npm - rezo - Versions diffs - 1.0.42 → 1.0.44 - Mend

rezo 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/dist/adapters/curl.cjs +131 -29
package/dist/adapters/curl.js +131 -29
package/dist/adapters/entries/curl.d.ts +65 -0
package/dist/adapters/entries/fetch.d.ts +65 -0
package/dist/adapters/entries/http.d.ts +65 -0
package/dist/adapters/entries/http2.d.ts +65 -0
package/dist/adapters/entries/react-native.d.ts +65 -0
package/dist/adapters/entries/xhr.d.ts +65 -0
package/dist/adapters/http2.cjs +209 -22
package/dist/adapters/http2.js +209 -22
package/dist/adapters/index.cjs +6 -6
package/dist/cache/index.cjs +9 -13
package/dist/cache/index.js +0 -2
package/dist/core/rezo.cjs +7 -0
package/dist/core/rezo.js +7 -0
package/dist/crawler/addon/decodo/index.cjs +1 -0
package/dist/crawler/addon/decodo/index.js +1 -0
package/dist/crawler/crawler-options.cjs +1 -0
package/dist/crawler/crawler-options.js +1 -0
package/dist/crawler/crawler.cjs +1070 -0
package/dist/crawler/crawler.js +1068 -0
package/dist/crawler/index.cjs +40 -0
package/dist/{plugin → crawler}/index.js +4 -2
package/dist/crawler/plugin/file-cacher.cjs +19 -0
package/dist/crawler/plugin/file-cacher.js +19 -0
package/dist/crawler/plugin/index.cjs +1 -0
package/dist/crawler/plugin/index.js +1 -0
package/dist/crawler/plugin/navigation-history.cjs +43 -0
package/dist/crawler/plugin/navigation-history.js +43 -0
package/dist/crawler/plugin/robots-txt.cjs +2 -0
package/dist/crawler/plugin/robots-txt.js +2 -0
package/dist/crawler/plugin/url-store.cjs +18 -0
package/dist/crawler/plugin/url-store.js +18 -0
package/dist/crawler.d.ts +511 -183
package/dist/entries/crawler.cjs +5 -5
package/dist/entries/crawler.js +2 -2
package/dist/index.cjs +27 -24
package/dist/index.d.ts +73 -0
package/dist/index.js +1 -0
package/dist/internal/agents/base.cjs +113 -0
package/dist/internal/agents/base.js +110 -0
package/dist/internal/agents/http-proxy.cjs +89 -0
package/dist/internal/agents/http-proxy.js +86 -0
package/dist/internal/agents/https-proxy.cjs +176 -0
package/dist/internal/agents/https-proxy.js +173 -0
package/dist/internal/agents/index.cjs +10 -0
package/dist/internal/agents/index.js +5 -0
package/dist/internal/agents/socks-client.cjs +571 -0
package/dist/internal/agents/socks-client.js +567 -0
package/dist/internal/agents/socks-proxy.cjs +75 -0
package/dist/internal/agents/socks-proxy.js +72 -0
package/dist/platform/browser.d.ts +65 -0
package/dist/platform/bun.d.ts +65 -0
package/dist/platform/deno.d.ts +65 -0
package/dist/platform/node.d.ts +65 -0
package/dist/platform/react-native.d.ts +65 -0
package/dist/platform/worker.d.ts +65 -0
package/dist/proxy/index.cjs +18 -16
package/dist/proxy/index.js +17 -12
package/dist/queue/index.cjs +8 -8
package/dist/responses/buildError.cjs +11 -2
package/dist/responses/buildError.js +11 -2
package/dist/responses/universal/index.cjs +11 -11
package/dist/utils/curl.cjs +317 -0
package/dist/utils/curl.js +314 -0
package/package.json +2 -6
package/dist/cache/file-cacher.cjs +0 -264
package/dist/cache/file-cacher.js +0 -261
package/dist/cache/url-store.cjs +0 -288
package/dist/cache/url-store.js +0 -285
package/dist/plugin/addon/decodo/index.cjs +0 -1
package/dist/plugin/addon/decodo/index.js +0 -1
package/dist/plugin/crawler-options.cjs +0 -1
package/dist/plugin/crawler-options.js +0 -1
package/dist/plugin/crawler.cjs +0 -519
package/dist/plugin/crawler.js +0 -517
package/dist/plugin/index.cjs +0 -36
/package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
/package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
/package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
/package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
/package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
/package/dist/{plugin → crawler}/scraper.cjs +0 -0
/package/dist/{plugin → crawler}/scraper.js +0 -0

package/dist/crawler.d.ts CHANGED Viewed

@@ -6,43 +6,24 @@ import { SecureContext, TLSSocket } from 'node:tls';
 import { Cookie as TouchCookie, CookieJar as TouchCookieJar, CreateCookieOptions } from 'tough-cookie';
 /**
- * FileCacher - Cross-runtime SQLite-based file caching system
+ * CrawlerCache - High-performance SQLite-based response caching for web crawlers
  *
- * Provides persistent key-value storage with namespace support, TTL expiration,
- * and optional zstd compression for efficient data storage.
+ * Optimized specifically for crawler workloads with:
+ * - WAL mode for high-throughput concurrent reads/writes
+ * - Batch operations for efficient bulk storage
+ * - Domain-based namespacing for organized cache management
+ * - Optional zstd compression for storage efficiency
  *
  * @module cache/file-cacher
  * @author Rezo HTTP Client Library
- *
- * @example
- * ```typescript
- * import { FileCacher } from 'rezo';
- *
- * // Create a file cacher instance
- * const cacher = await FileCacher.create({
- *   cacheDir: './cache',
- *   ttl: 3600000, // 1 hour
- *   compression: true,
- *   encryptNamespace: true
- * });
- *
- * // Store and retrieve data
- * await cacher.set('user:123', { name: 'John' }, 3600000, 'users');
- * const user = await cacher.get('user:123', 'users');
- *
- * // Check existence and cleanup
- * const exists = await cacher.has('user:123', 'users');
- * await cacher.delete('user:123', 'users');
- * await cacher.close();
- * ```
  */
 /**
- * Configuration options for FileCacher
+ * Configuration options for CrawlerCache
  */
 export interface FileCacherOptions {
 	/**
 	 * Directory path for storing cache databases
-	 * @default './cache'
+	 * @default '/tmp/rezo-crawler/cache'
 	 */
 	cacheDir?: string;
 	/**
@@ -51,23 +32,18 @@ export interface FileCacherOptions {
 	 */
 	ttl?: number;
 	/**
-	 * Enable zstd compression for stored values
+	 * Enable zstd compression for stored values (Node.js 22.15+)
 	 * Reduces storage size but adds CPU overhead
 	 * @default false
 	 */
 	compression?: boolean;
 	/**
-	 * Enable soft delete (mark as deleted instead of removing)
-	 * @default false
-	 */
-	softDelete?: boolean;
-	/**
-	 * Hash namespace names for privacy/security
+	 * Hash namespace names for privacy
 	 * @default false
 	 */
 	encryptNamespace?: boolean;
 	/**
-	 * Maximum number of entries per namespace (0 = unlimited)
+	 * Maximum entries per namespace (0 = unlimited)
 	 * @default 0
 	 */
 	maxEntries?: number;
@@ -77,157 +53,76 @@ declare class FileCacher {
 	private readonly options;
 	private readonly cacheDir;
 	private closed;
-	/**
-	 * Private constructor - use FileCacher.create() instead
-	 */
 	private constructor();
 	/**
 	 * Create a new FileCacher instance
-	 *
-	 * @param options - Configuration options
-	 * @returns Promise resolving to initialized FileCacher instance
-	 *
-	 * @example
-	 * ```typescript
-	 * const cacher = await FileCacher.create({
-	 *   cacheDir: './my-cache',
-	 *   ttl: 3600000,
-	 *   compression: true
-	 * });
-	 * ```
 	 */
 	static create(options?: FileCacherOptions): Promise<FileCacher>;
 	/**
-	 * Get or create database for a namespace
+	 * Get or create optimized database for a namespace (domain)
 	 */
 	private getDatabase;
 	/**
-	 * Store a value in the cache
-	 *
-	 * @param key - Unique key for the cached item
-	 * @param value - Value to cache (will be JSON serialized)
-	 * @param ttl - Time-to-live in milliseconds (uses default if not specified)
-	 * @param namespace - Namespace for isolation (default: 'default')
-	 * @returns Promise resolving when stored
-	 *
-	 * @example
-	 * ```typescript
-	 * // Store with default TTL
-	 * await cacher.set('key1', { data: 'value' });
-	 *
-	 * // Store with custom TTL and namespace
-	 * await cacher.set('key2', responseData, 3600000, 'api-responses');
-	 * ```
+	 * Store a response in the cache
 	 */
 	set<T = any>(key: string, value: T, ttl?: number, namespace?: string): Promise<void>;
 	/**
-	 * Retrieve a value from the cache
-	 *
-	 * @param key - Key of the cached item
-	 * @param namespace - Namespace to search in (default: 'default')
-	 * @returns Promise resolving to cached value or null if not found/expired
-	 *
-	 * @example
-	 * ```typescript
-	 * const data = await cacher.get<MyType>('key1', 'my-namespace');
-	 * if (data) {
-	 *   console.log('Cache hit:', data);
-	 * }
-	 * ```
+	 * Store multiple responses in a single transaction (batch operation)
+	 */
+	setMany<T = any>(entries: Array<{
+		key: string;
+		value: T;
+		ttl?: number;
+	}>, namespace?: string): Promise<void>;
+	/**
+	 * Retrieve a cached response
 	 */
 	get<T = any>(key: string, namespace?: string): Promise<T | null>;
 	/**
-	 * Check if a key exists in the cache and is not expired
-	 *
-	 * @param key - Key to check
-	 * @param namespace - Namespace to search in (default: 'default')
-	 * @returns Promise resolving to true if key exists and is valid
-	 *
-	 * @example
-	 * ```typescript
-	 * if (await cacher.has('key1', 'my-namespace')) {
-	 *   const data = await cacher.get('key1', 'my-namespace');
-	 * }
-	 * ```
+	 * Check if a key exists and is not expired
 	 */
 	has(key: string, namespace?: string): Promise<boolean>;
+	/**
+	 * Check multiple keys at once (batch operation)
+	 */
+	hasMany(keys: string[], namespace?: string): Promise<Set<string>>;
 	/**
 	 * Delete a key from the cache
-	 *
-	 * @param key - Key to delete
-	 * @param namespace - Namespace to delete from (default: 'default')
-	 * @returns Promise resolving to true if key was deleted
-	 *
-	 * @example
-	 * ```typescript
-	 * await cacher.delete('obsolete-key', 'my-namespace');
-	 * ```
 	 */
 	delete(key: string, namespace?: string): Promise<boolean>;
 	/**
 	 * Clear all entries in a namespace
-	 *
-	 * @param namespace - Namespace to clear (default: 'default')
-	 * @returns Promise resolving when cleared
-	 *
-	 * @example
-	 * ```typescript
-	 * // Clear all cached data for a domain
-	 * await cacher.clear('example.com');
-	 * ```
 	 */
 	clear(namespace?: string): Promise<void>;
 	/**
-	 * Remove all expired entries from a namespace
-	 *
-	 * @param namespace - Namespace to cleanup (default: 'default')
-	 * @returns Promise resolving to number of entries removed
-	 *
-	 * @example
-	 * ```typescript
-	 * const removed = await cacher.cleanup('my-namespace');
-	 * console.log(`Removed ${removed} expired entries`);
-	 * ```
+	 * Remove all expired entries
 	 */
 	cleanup(namespace?: string): Promise<number>;
 	/**
-	 * Get statistics for a namespace
-	 *
-	 * @param namespace - Namespace to get stats for (default: 'default')
-	 * @returns Promise resolving to cache statistics
-	 *
-	 * @example
-	 * ```typescript
-	 * const stats = await cacher.stats('my-namespace');
-	 * console.log(`${stats.count} entries, ${stats.size} bytes`);
-	 * ```
+	 * Get cache statistics for a namespace
 	 */
 	stats(namespace?: string): Promise<{
 		count: number;
 		expired: number;
-		deleted: number;
 	}>;
 	/**
-	 * Close all database connections and release resources
-	 *
-	 * @returns Promise resolving when all connections are closed
-	 *
-	 * @example
-	 * ```typescript
-	 * // Always close when done
-	 * await cacher.close();
-	 * ```
+	 * Close all database connections
 	 */
 	close(): Promise<void>;
-	/**
-	 * Check if the cacher has been closed
-	 */
 	get isClosed(): boolean;
-	/**
-	 * Get the cache directory path
-	 */
 	get directory(): string;
 }
+export interface CrawlSession {
+	sessionId: string;
+	baseUrl: string;
+	startedAt: number;
+	lastActivityAt: number;
+	status: "running" | "paused" | "completed" | "failed";
+	urlsVisited: number;
+	urlsQueued: number;
+	urlsFailed: number;
+	metadata?: string;
+}
 export interface RezoHttpHeaders {
 	accept?: string | undefined;
 	"accept-encoding"?: string | undefined;
@@ -4464,6 +4359,71 @@ declare class Rezo {
 	 * @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
 	 */
 	clearCookies(): void;
+	/**
+	 * Convert a Rezo request configuration to a cURL command string.
+	 *
+	 * Generates a valid cURL command that can be executed in a terminal to
+	 * reproduce the same HTTP request. Useful for:
+	 * - Debugging and sharing requests
+	 * - Documentation and examples
+	 * - Testing requests outside of Node.js
+	 * - Exporting requests to other tools
+	 *
+	 * @param config - Request configuration object
+	 * @returns A cURL command string
+	 *
+	 * @example
+	 * ```typescript
+	 * const curl = Rezo.toCurl({
+	 *   url: 'https://api.example.com/users',
+	 *   method: 'POST',
+	 *   headers: { 'Content-Type': 'application/json' },
+	 *   body: { name: 'John', email: 'john@example.com' }
+	 * });
+	 * // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
+	 * ```
+	 */
+	static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
+	/**
+	 * Parse a cURL command string into a Rezo request configuration.
+	 *
+	 * Converts a cURL command into a configuration object that can be
+	 * passed directly to Rezo request methods. Useful for:
+	 * - Importing requests from browser DevTools
+	 * - Converting curl examples from API documentation
+	 * - Migrating scripts from curl to Rezo
+	 *
+	 * Supports common cURL options:
+	 * - `-X, --request` - HTTP method
+	 * - `-H, --header` - Request headers
+	 * - `-d, --data, --data-raw, --data-binary` - Request body
+	 * - `-u, --user` - Basic authentication
+	 * - `-x, --proxy` - Proxy configuration
+	 * - `--socks5, --socks4` - SOCKS proxy
+	 * - `-L, --location` - Follow redirects
+	 * - `--max-redirs` - Maximum redirects
+	 * - `--max-time` - Request timeout
+	 * - `-k, --insecure` - Skip TLS verification
+	 * - `-A, --user-agent` - User agent header
+	 *
+	 * @param curlCommand - A cURL command string
+	 * @returns A request configuration object
+	 *
+	 * @example
+	 * ```typescript
+	 * // From browser DevTools "Copy as cURL"
+	 * const config = Rezo.fromCurl(`
+	 *   curl 'https://api.example.com/data' \\
+	 *     -H 'Authorization: Bearer token123' \\
+	 *     -H 'Content-Type: application/json'
+	 * `);
+	 *
+	 * // Use with Rezo
+	 * const rezo = new Rezo();
+	 * const response = await rezo.request(config);
+	 * ```
+	 */
+	static fromCurl(curlCommand: string): RezoRequestOptions;
 }
 /**
  * Rezo HTTP Client - Core Types
@@ -6156,17 +6116,25 @@ declare class Decodo {
 	/**
 	 * Create a new Decodo client instance
 	 *
-	 * @param config - Decodo API configuration
-	 * @throws Error if username or password is missing
+	 * @param config - Decodo API configuration (supports username/password OR token auth)
+	 * @throws Error if authentication credentials are missing
 	 *
 	 * @example
 	 * ```typescript
+	 * // Username/password authentication
 	 * const decodo = new Decodo({
 	 *   username: 'user',
 	 *   password: 'password',
 	 *   headless: 'html',
 	 *   country: 'US'
 	 * });
+	 *
+	 * // Token authentication (alternative)
+	 * const decodo = new Decodo({
+	 *   token: 'your_api_token',
+	 *   headless: 'html',
+	 *   country: 'US'
+	 * });
 	 * ```
 	 */
 	constructor(config: DecodoConfig);
@@ -6284,6 +6252,15 @@ declare class Decodo {
  * const regexDomain: Domain = '^(sub|api)\.example\.com$';
  */
 export type Domain = string[] | string | RegExp;
+/**
+ * Supported HTTP adapter types for crawler requests
+ * @description
+ * - 'http': Standard Node.js HTTP/HTTPS adapter (default)
+ * - 'http2': HTTP/2 adapter with session pooling
+ * - 'curl': cURL adapter for maximum compatibility
+ * - 'fetch': Browser-compatible Fetch API adapter
+ */
+export type CrawlerAdapterType = "http" | "http2" | "curl" | "fetch";
 /**
  * Configuration interface for the CrawlerOptions class
  * @description Defines all available options for configuring web crawler behavior,
@@ -6292,6 +6269,12 @@ export type Domain = string[] | string | RegExp;
 export interface ICrawlerOptions {
 	/** Base URL for the crawler - the starting point for crawling operations */
 	baseUrl: string;
+	/** HTTP adapter to use for requests (default: 'http') */
+	adapter?: CrawlerAdapterType;
+	/** Enable navigation history for resumable crawling (default: false) */
+	enableNavigationHistory?: boolean;
+	/** Session ID for navigation history - allows resuming specific crawl sessions */
+	sessionId?: string;
 	/** Whether to reject unauthorized SSL certificates (default: true) */
 	rejectUnauthorized?: boolean;
 	/** Custom user agent string for HTTP requests */
@@ -6381,6 +6364,42 @@ export interface ICrawlerOptions {
 	} | {
 		enable: false;
 	} | undefined | false;
+	/** Decodo proxy service configuration for specific domains or global use */
+	decodo?: {
+		enable: true;
+		labs: [
+			{
+				domain: Domain;
+				isGlobal?: boolean;
+				options: DecodoOptions;
+				queueOptions: queueOptions$1;
+			}
+		];
+	} | {
+		enable: false;
+	} | undefined | false;
+	/** Maximum crawl depth from start URL (0 = unlimited, default: 0) */
+	maxDepth?: number;
+	/** Maximum total URLs to crawl (0 = unlimited, default: 0) */
+	maxUrls?: number;
+	/** Maximum response size in bytes to process (0 = unlimited, default: 0) */
+	maxResponseSize?: number;
+	/** Respect robots.txt rules (default: false) */
+	respectRobotsTxt?: boolean;
+	/** Follow rel="nofollow" links (default: false - ignores nofollow links) */
+	followNofollow?: boolean;
+	/** Enable automatic throttling based on server response times (default: true) */
+	autoThrottle?: boolean;
+	/** Target request delay in ms for AutoThrottle (default: 1000) */
+	autoThrottleTargetDelay?: number;
+	/** Minimum delay between requests in ms (default: 100) */
+	autoThrottleMinDelay?: number;
+	/** Maximum delay between requests in ms (default: 60000) */
+	autoThrottleMaxDelay?: number;
+	/** Maximum time to wait on 429 response in ms (default: 1800000 = 30 min) */
+	maxWaitOn429?: number;
+	/** Always wait on 429 regardless of time, shows warning (default: false) */
+	alwaysWaitOn429?: boolean;
 }
 /**
  * Advanced web crawler configuration class with support for domain-specific settings
@@ -6415,6 +6434,12 @@ export interface ICrawlerOptions {
 export declare class CrawlerOptions {
 	/** Base URL for the crawler - the starting point for crawling operations */
 	baseUrl: string;
+	/** HTTP adapter to use for requests */
+	adapter: CrawlerAdapterType;
+	/** Enable navigation history for resumable crawling */
+	enableNavigationHistory: boolean;
+	/** Session ID for navigation history - allows resuming specific crawl sessions */
+	sessionId: string;
 	/** Whether to reject unauthorized SSL certificates */
 	rejectUnauthorized?: boolean;
 	/** Custom user agent string for HTTP requests */
@@ -6451,6 +6476,28 @@ export declare class CrawlerOptions {
 	throwFatalError?: boolean;
 	/** Enable debug logging */
 	debug?: boolean;
+	/** Maximum crawl depth from start URL (0 = unlimited) */
+	maxDepth: number;
+	/** Maximum total URLs to crawl (0 = unlimited) */
+	maxUrls: number;
+	/** Maximum response size in bytes to process (0 = unlimited) */
+	maxResponseSize: number;
+	/** Respect robots.txt rules */
+	respectRobotsTxt: boolean;
+	/** Follow rel="nofollow" links */
+	followNofollow: boolean;
+	/** Enable automatic throttling based on server response times */
+	autoThrottle: boolean;
+	/** Target request delay in ms for AutoThrottle */
+	autoThrottleTargetDelay: number;
+	/** Minimum delay between requests in ms */
+	autoThrottleMinDelay: number;
+	/** Maximum delay between requests in ms */
+	autoThrottleMaxDelay: number;
+	/** Maximum time to wait on 429 response in ms */
+	maxWaitOn429: number;
+	/** Always wait on 429 regardless of time */
+	alwaysWaitOn429: boolean;
 	/** Internal storage for Oxylabs configurations with domain mapping */
 	oxylabs: {
 		domain?: Domain;
@@ -6832,13 +6879,44 @@ export interface EmailDiscoveryEvent {
 	discoveredAt: string;
 	timestamp: Date;
 }
+interface RedirectEvent$1 {
+	originalUrl: string;
+	finalUrl: string;
+	redirectCount: number;
+	statusCode: number;
+}
+/**
+ * Export format options
+ */
+export type ExportFormat = "json" | "jsonl" | "csv";
 /**
- * Generic handler function type for crawler event callbacks.
- * All crawler event handlers must return a Promise<void>.
+ * Handler with element bound to `this` context.
+ * Use `function` syntax (not arrow functions) to access `this`.
  *
- * @template T - The type of element or data passed to the handler
+ * @example
+ * ```typescript
+ * crawler.onText('h1', async function(text) {
+ *   console.log(text, this.tagName); // `this` is the element
+ * });
+ * ```
+ */
+export type ElementBoundHandler<TValue, TElement = Element> = (this: TElement, value: TValue) => Promise<void>;
+/**
+ * Handler for attribute extraction with element bound to `this`.
+ * Receives both the attribute value and attribute name.
+ */
+export type AttributeHandler = (this: Element, value: string, attributeName: string) => Promise<void>;
+/**
+ * Crawl statistics
  */
-export type CrawlerHandler<T = any> = (element: T) => Promise<void>;
+export interface CrawlStats {
+	urlsVisited: number;
+	urlsQueued: number;
+	urlsFailed: number;
+	startTime: number;
+	endTime?: number;
+	currentDepth: number;
+}
 /**
  * A powerful web crawler that provides event-driven HTML parsing and data extraction.
  * Supports caching, proxy rotation, retry mechanisms, and email lead discovery.
@@ -6886,29 +6964,126 @@ export declare class Crawler {
 	private isStorageReady;
 	private isCacheReady;
 	private leadsFinder;
+	/** Navigation history for resumable crawling */
+	private navigationHistory;
+	private isNavigationHistoryReady;
+	private isSessionReady;
+	private currentSession;
+	private navigationHistoryInitPromise;
+	/** Adapter-specific request executor */
+	private adapterExecutor;
+	private adapterType;
+	/** Track pending execute() calls for proper done() behavior */
+	private pendingExecutions;
+	/** robots.txt parser and validator */
+	private robotsTxt;
+	/** AutoThrottle: track response times per domain for adaptive rate limiting */
+	private domainResponseTimes;
+	private domainCurrentDelay;
+	/** Crawl statistics */
+	private crawlStats;
+	/** URL depth tracking for maxDepth limit */
+	private urlDepthMap;
+	/** Lifecycle event handlers */
+	private startHandlers;
+	private finishHandlers;
+	private redirectHandlers;
+	/** Data collection for export */
+	private collectedData;
+	/** Flag to track if crawl has started */
+	private crawlStarted;
 	/**
 	 * Creates a new Crawler instance with the specified configuration.
 	 *
-	 * @param option - Primary crawler configuration options
-	 * @param backup - Optional backup HTTP client configuration for failover scenarios
+	 * @param crawlerOptions - Crawler configuration options
+	 * @param http - Optional Rezo HTTP client instance (creates default if not provided)
 	 *
 	 * @example
 	 * ```typescript
+	 * // Basic usage (creates default Rezo instance)
 	 * const crawler = new Crawler({
-	 *   http: primaryHttpClient,
-	 *   baseUrl: 'https://api.example.com',
-	 *   timeout: 30000,
+	 *   baseUrl: 'https://example.com',
 	 *   enableCache: true,
 	 *   cacheDir: './cache',
-	 *   socksProxies: [{ host: '127.0.0.1', port: 9050 }]
-	 * }, {
-	 *   http: backupHttpClient,
-	 *   useProxy: false,
-	 *   concurrency: 5
 	 * });
+	 *
+	 * // With resumable crawling
+	 * const crawler = new Crawler({
+	 *   baseUrl: 'https://example.com',
+	 *   enableNavigationHistory: true,
+	 *   sessionId: 'my-session',
+	 *   cacheDir: './cache',
+	 * });
+	 *
+	 * // With custom Rezo instance
+	 * const crawler = new Crawler({
+	 *   baseUrl: 'https://example.com',
+	 *   adapter: 'curl',
+	 * }, myRezoInstance);
 	 * ```
 	 */
-	constructor(crawlerOptions: ICrawlerOptions, http: Rezo);
+	constructor(crawlerOptions: ICrawlerOptions, http?: Rezo);
+	/**
+	 * Initialize the HTTP adapter based on configuration
+	 */
+	private initializeAdapter;
+	/**
+	 * Initialize navigation history and session
+	 */
+	private initializeNavigationHistory;
+	/**
+	 * Wait for navigation history and session to be ready
+	 */
+	private waitForNavigationHistory;
+	/**
+	 * Ensure navigation history is ready and return it (or null if not enabled)
+	 * This is used by visit() and other methods that need to write to navigation history
+	 */
+	private ensureNavigationHistoryReady;
+	/**
+	 * Add URL to navigation history queue
+	 */
+	private addToNavigationQueue;
+	/**
+	 * Mark URL as visited in navigation history
+	 */
+	private markUrlVisited;
+	/**
+	 * Get the current crawl session
+	 */
+	getSession(): CrawlSession | null;
+	/**
+	 * Get the session ID
+	 */
+	getSessionId(): string;
+	/**
+	 * Resume a previous crawl session
+	 * @param sessionId - Optional session ID to resume (uses current session if not provided)
+	 * @returns Promise resolving to the Crawler instance for chaining
+	 */
+	resume(sessionId?: string): Promise<Crawler>;
+	/**
+	 * Get list of resumable sessions
+	 * @returns Promise resolving to array of sessions that can be resumed
+	 */
+	getResumableSessions(): Promise<CrawlSession[]>;
+	/**
+	 * Pause the current crawl session
+	 */
+	pause(): Promise<void>;
+	/**
+	 * Mark the current session as completed
+	 */
+	complete(): Promise<void>;
+	/**
+	 * Get the current adapter type being used
+	 */
+	getAdapterType(): CrawlerAdapterType;
+	/**
+	 * Switch to a different adapter at runtime
+	 * @param adapter - The adapter type to switch to
+	 */
+	setAdapter(adapter: CrawlerAdapterType): Promise<void>;
 	private rawResponseHandler;
 	private waitForCache;
 	private waitForStorage;
@@ -6985,6 +7160,54 @@ export declare class Crawler {
 	 * ```
 	 */
 	onEmailLeads(handler: (emails: string[]) => Promise<void>): Crawler;
+	/**
+	 * Registers a handler called before crawling starts.
+	 * Useful for initialization, logging, or setup tasks.
+	 *
+	 * @param handler - Function to call before crawling begins
+	 * @returns The crawler instance for method chaining
+	 *
+	 * @example
+	 * ```typescript
+	 * crawler.onStart(async () => {
+	 *   console.log('Crawl session started');
+	 *   await initializeDatabase();
+	 * });
+	 * ```
+	 */
+	onStart(handler: () => Promise<void>): Crawler;
+	/**
+	 * Registers a handler called when crawling finishes.
+	 * Receives crawl statistics including URLs visited, failed, and timing.
+	 *
+	 * @param handler - Function to call when crawling completes
+	 * @returns The crawler instance for method chaining
+	 *
+	 * @example
+	 * ```typescript
+	 * crawler.onFinish(async (stats) => {
+	 *   console.log(`Crawl completed: ${stats.urlsVisited} URLs in ${stats.endTime - stats.startTime}ms`);
+	 *   await generateReport(stats);
+	 * });
+	 * ```
+	 */
+	onFinish(handler: (stats: CrawlStats) => Promise<void>): Crawler;
+	/**
+	 * Registers a handler called when a redirect is followed.
+	 * Provides information about the original URL, final URL, and redirect count.
+	 *
+	 * @param handler - Function to handle redirect events
+	 * @returns The crawler instance for method chaining
+	 *
+	 * @example
+	 * ```typescript
+	 * crawler.onRedirect(async (event) => {
+	 *   console.log(`Redirect: ${event.originalUrl} -> ${event.finalUrl}`);
+	 *   trackRedirects(event);
+	 * });
+	 * ```
+	 */
+	onRedirect(handler: (event: RedirectEvent$1) => Promise<void>): Crawler;
 	/**
 	 * Registers a handler for raw response data.
 	 * Triggered for all responses, providing access to the raw Buffer data.
@@ -7080,21 +7303,23 @@ export declare class Crawler {
 	/**
 	 * Registers a handler for href attributes from anchor and link elements.
 	 * Automatically resolves relative URLs to absolute URLs.
+	 * Use `function` syntax (not arrow) to access `this` as the element.
 	 *
-	 * @param handler - Function to handle href URLs as strings
+	 * @param handler - Function receiving href string, with `this` bound to the element
 	 * @returns The crawler instance for method chaining
 	 *
 	 * @example
 	 * ```typescript
-	 * crawler.onHref(async (href) => {
+	 * crawler.onHref(async function(href) {
 	 *   console.log('Found URL:', href);
+	 *   console.log('Link text:', this.textContent); // `this` is the anchor/link element
 	 *   if (href.includes('/api/')) {
 	 *     await crawler.visit(href);
 	 *   }
 	 * });
 	 * ```
 	 */
-	onHref(handler: (href: string) => Promise<void>): Crawler;
+	onHref(handler: ElementBoundHandler<string, HTMLAnchorElement | HTMLLinkElement>): Crawler;
 	/**
 	 * Registers a handler for elements matching a CSS selector.
 	 * Provides fine-grained control over which elements to process.
@@ -7136,55 +7361,57 @@ export declare class Crawler {
 	/**
 	 * Registers a handler for HTML element attributes.
 	 * Can extract specific attributes from all elements or from elements matching a selector.
+	 * Use `function` syntax (not arrow) to access `this` as the element.
 	 *
 	 * @param attribute - The attribute name to extract
-	 * @param handler - Function to handle attribute values
+	 * @param handler - Function receiving (value, attrName), with `this` bound to element
 	 * @returns The crawler instance for method chaining
 	 *
 	 * @overload
 	 * @param selection - CSS selector to filter elements
 	 * @param attribute - The attribute name to extract
-	 * @param handler - Function to handle attribute values
+	 * @param handler - Function receiving (value, attrName), with `this` bound to element
 	 * @returns The crawler instance for method chaining
 	 *
 	 * @example
 	 * ```typescript
 	 * // Extract all 'data-id' attributes
-	 * crawler.onAttribute('data-id', async (value) => {
-	 *   console.log('Found data-id:', value);
+	 * crawler.onAttribute('data-id', async function(value, attrName) {
+	 *   console.log('Found', attrName, ':', value, 'on:', this.tagName);
 	 * });
 	 *
 	 * // Extract 'src' attributes from images only
-	 * crawler.onAttribute('img', 'src', async (src) => {
-	 *   console.log('Image source:', src);
+	 * crawler.onAttribute('img', 'src', async function(value) {
+	 *   console.log('Image source:', value, 'alt:', this.getAttribute('alt'));
 	 * });
 	 * ```
 	 */
-	onAttribute(attribute: string, handler: CrawlerHandler<string>): Crawler;
-	onAttribute(selection: string, attribute: string, handler: CrawlerHandler<string>): Crawler;
+	onAttribute(attribute: string, handler: AttributeHandler): Crawler;
+	onAttribute(selection: string, attribute: string, handler: AttributeHandler): Crawler;
 	/**
 	 * Registers a handler for text content of elements matching a CSS selector.
 	 * Extracts and processes the textContent of matching elements.
+	 * Use `function` syntax (not arrow) to access `this` as the element.
 	 *
 	 * @param selection - CSS selector to match elements
-	 * @param handler - Function to handle extracted text content
+	 * @param handler - Function receiving text string, with `this` bound to element
 	 * @returns The crawler instance for method chaining
 	 *
 	 * @example
 	 * ```typescript
-	 * // Extract all heading text
-	 * crawler.onText('h1, h2, h3', async (text) => {
-	 *   console.log('Heading:', text.trim());
+	 * // Extract all heading text with element context
+	 * crawler.onText('h1, h2, h3', async function(text) {
+	 *   console.log('Heading:', text.trim(), 'Tag:', this.tagName);
 	 * });
 	 *
-	 * // Extract product prices
-	 * crawler.onText('.price', async (price) => {
-	 *   const numericPrice = parseFloat(price.replace(/[^\d.]/g, ''));
-	 *   console.log('Price value:', numericPrice);
+	 * // Extract product prices with element context
+	 * crawler.onText('.price', async function(text) {
+	 *   const numericPrice = parseFloat(text.replace(/[^\d.]/g, ''));
+	 *   console.log('Price:', numericPrice, 'Product:', this.closest('.product')?.id);
 	 * });
 	 * ```
 	 */
-	onText(selection: string, handler: CrawlerHandler<string>): Crawler;
+	onText(selection: string, handler: ElementBoundHandler<string>): Crawler;
 	private _onBody;
 	private _onAttribute;
 	private _onText;
@@ -7199,6 +7426,86 @@ export declare class Crawler {
 	private _onEmailLeads;
 	private _onRawResponse;
 	private _onResponse;
+	/**
+	 * Calculate adaptive delay based on server response times (AutoThrottle)
+	 */
+	private calculateAutoThrottleDelay;
+	/**
+	 * Get current AutoThrottle delay for a domain
+	 */
+	private getAutoThrottleDelay;
+	/**
+	 * Handle 429 Too Many Requests response with Retry-After header parsing
+	 */
+	private handle429Response;
+	/**
+	 * Check if URL passes all crawl limit checks
+	 */
+	private checkCrawlLimits;
+	/**
+	 * Check if a link should be followed based on nofollow rules
+	 */
+	private shouldFollowLink;
+	/**
+	 * Check response size against maxResponseSize limit
+	 */
+	private checkResponseSize;
+	/**
+	 * Collect data for later export
+	 *
+	 * @param data - Data to collect (will be added to export buffer)
+	 * @returns The crawler instance for method chaining
+	 *
+	 * @example
+	 * ```typescript
+	 * crawler.onDocument(async (doc) => {
+	 *   crawler.collect({
+	 *     title: doc.title,
+	 *     url: doc.URL,
+	 *     h1: doc.querySelector('h1')?.textContent
+	 *   });
+	 * });
+	 * ```
+	 */
+	collect(data: any): Crawler;
+	/**
+	 * Get all collected data
+	 */
+	getCollectedData(): any[];
+	/**
+	 * Clear collected data
+	 */
+	clearCollectedData(): Crawler;
+	/**
+	 * Export collected data to a file
+	 *
+	 * @param filePath - Output file path
+	 * @param format - Export format: 'json', 'jsonl', or 'csv'
+	 *
+	 * @example
+	 * ```typescript
+	 * await crawler.waitForAll();
+	 * await crawler.exportData('./output.json', 'json');
+	 * await crawler.exportData('./output.csv', 'csv');
+	 * ```
+	 */
+	exportData(filePath: string, format?: ExportFormat): Promise<void>;
+	/**
+	 * Get current crawl statistics
+	 */
+	getStats(): CrawlStats;
+	/**
+	 * Trigger onStart handlers (called once on first visit)
+	 */
+	private triggerStartHandlers;
+	/**
+	 * Trigger onFinish handlers
+	 */
+	private triggerFinishHandlers;
+	/**
+	 * Trigger onRedirect handlers
+	 */
+	private triggerRedirectHandlers;
 	private buildUrl;
 	/**
 	 * Visits a URL and processes it according to registered event handlers.
@@ -7303,7 +7610,28 @@ export declare class Crawler {
 	 * ```
 	 */
 	waitForAll(): Promise<void>;
+	/**
+	 * Alias for waitForAll() - waits for all crawling operations to complete.
+	 * @returns Promise that resolves when done
+	 * @example
+	 * ```typescript
+	 * crawler.visit('https://example.com');
+	 * await crawler.done();
+	 * ```
+	 */
+	done(): Promise<void>;
 	close(): Promise<void>;
+	/**
+	 * Destroys the crawler instance and releases all resources.
+	 * Clears all queued tasks, closes caches, and cleans up event handlers.
+	 * @returns Promise that resolves when destruction is complete
+	 * @example
+	 * ```typescript
+	 * await crawler.destroy();
+	 * // Crawler is now fully cleaned up
+	 * ```
+	 */
+	destroy(): Promise<void>;
 }
 export {};