npm - @vakra-dev/reader - Versions diffs - 0.0.3 → 0.1.1 - Mend

@vakra-dev/reader 0.0.3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -90,6 +90,20 @@ interface IBrowserPool {
     healthCheck?(): Promise<HealthStatus>;
 }
+/**
+ * Engine types for multi-engine scraping architecture
+ *
+ * Engine stack (in order of preference):
+ * 1. http - Native fetch, fastest, no browser
+ * 2. tlsclient - TLS fingerprinting via got-scraping
+ * 3. hero - Full browser with JavaScript execution
+ */
+/**
+ * Available engine names
+ */
+type EngineName = "http" | "tlsclient" | "hero";
 /**
  * Proxy configuration for Hero
  */
@@ -143,6 +157,8 @@ interface ScrapeOptions {
     formats?: Array<"markdown" | "html">;
     /** Custom user agent string */
     userAgent?: string;
+    /** Custom headers for requests */
+    headers?: Record<string, string>;
     /** Request timeout in milliseconds (default: 30000) */
     timeoutMs?: number;
     /** URL patterns to include (regex strings) */
@@ -187,6 +203,12 @@ interface ScrapeOptions {
     browserPool?: BrowserPoolConfig;
     /** Browser pool instance (internal, provided by ReaderClient) */
     pool?: IBrowserPool;
+    /** Engines to use in order (default: ['http', 'tlsclient', 'hero']) */
+    engines?: EngineName[];
+    /** Skip specific engines (e.g., ['http'] to skip native fetch) */
+    skipEngines?: EngineName[];
+    /** Force a specific engine, skipping the cascade */
+    forceEngine?: EngineName;
 }
 /**
  * Website metadata extracted from the base page
@@ -306,13 +328,17 @@ interface ScrapeResult {
 /**
  * Default scrape options
  */
-declare const DEFAULT_OPTIONS: Omit<Required<ScrapeOptions>, "proxy" | "waitForSelector" | "connectionToCore" | "userAgent" | "browserPool" | "pool"> & {
+declare const DEFAULT_OPTIONS: Omit<Required<ScrapeOptions>, "proxy" | "waitForSelector" | "connectionToCore" | "userAgent" | "headers" | "browserPool" | "pool" | "engines" | "skipEngines" | "forceEngine"> & {
     proxy?: ProxyConfig;
     waitForSelector?: string;
     connectionToCore?: any;
     userAgent?: string;
+    headers?: Record<string, string>;
     browserPool?: BrowserPoolConfig;
     pool?: IBrowserPool;
+    engines?: EngineName[];
+    skipEngines?: EngineName[];
+    forceEngine?: EngineName;
 };
 /**
  * Format type guard
@@ -547,7 +573,6 @@ declare class ReaderClient {
  */
 declare class Scraper {
     private options;
-    private pool;
     private logger;
     private robotsCache;
     constructor(options: ScrapeOptions);
@@ -570,13 +595,7 @@ declare class Scraper {
      */
     private scrapeSingleUrlWithRetry;
     /**
-     * Wait for the final page to load after any Cloudflare redirects
-     * Cloudflare often does silent redirects even when bypassed, we need to ensure
-     * we're on the actual content page before scraping.
-     */
-    private waitForFinalPage;
-    /**
-     * Scrape a single URL
+     * Scrape a single URL using the engine orchestrator
      */
     private scrapeSingleUrl;
     /**
@@ -832,6 +851,8 @@ declare function isDaemonRunning(port?: number): Promise<boolean>;
  *
  * Simple conversion without any headers, metadata, or formatting wrappers.
  * Returns clean markdown content ready for LLM consumption.
+ *
+ * Uses supermarkdown (Rust-based) for high-performance conversion.
  */
 declare function htmlToMarkdown(html: string): string;
 /**