rezo 1.0.41 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/adapters/curl.cjs +143 -32
  2. package/dist/adapters/curl.js +143 -32
  3. package/dist/adapters/entries/curl.d.ts +65 -0
  4. package/dist/adapters/entries/fetch.d.ts +65 -0
  5. package/dist/adapters/entries/http.d.ts +65 -0
  6. package/dist/adapters/entries/http2.d.ts +65 -0
  7. package/dist/adapters/entries/react-native.d.ts +65 -0
  8. package/dist/adapters/entries/xhr.d.ts +65 -0
  9. package/dist/adapters/fetch.cjs +98 -12
  10. package/dist/adapters/fetch.js +98 -12
  11. package/dist/adapters/http.cjs +26 -14
  12. package/dist/adapters/http.js +26 -14
  13. package/dist/adapters/http2.cjs +756 -227
  14. package/dist/adapters/http2.js +756 -227
  15. package/dist/adapters/index.cjs +6 -6
  16. package/dist/adapters/xhr.cjs +94 -2
  17. package/dist/adapters/xhr.js +94 -2
  18. package/dist/cache/dns-cache.cjs +5 -3
  19. package/dist/cache/dns-cache.js +5 -3
  20. package/dist/cache/file-cacher.cjs +7 -1
  21. package/dist/cache/file-cacher.js +7 -1
  22. package/dist/cache/index.cjs +15 -13
  23. package/dist/cache/index.js +1 -0
  24. package/dist/cache/navigation-history.cjs +298 -0
  25. package/dist/cache/navigation-history.js +296 -0
  26. package/dist/cache/url-store.cjs +7 -1
  27. package/dist/cache/url-store.js +7 -1
  28. package/dist/core/rezo.cjs +7 -0
  29. package/dist/core/rezo.js +7 -0
  30. package/dist/crawler.d.ts +196 -11
  31. package/dist/entries/crawler.cjs +5 -5
  32. package/dist/index.cjs +27 -24
  33. package/dist/index.d.ts +73 -0
  34. package/dist/index.js +1 -0
  35. package/dist/internal/agents/base.cjs +113 -0
  36. package/dist/internal/agents/base.js +110 -0
  37. package/dist/internal/agents/http-proxy.cjs +89 -0
  38. package/dist/internal/agents/http-proxy.js +86 -0
  39. package/dist/internal/agents/https-proxy.cjs +176 -0
  40. package/dist/internal/agents/https-proxy.js +173 -0
  41. package/dist/internal/agents/index.cjs +10 -0
  42. package/dist/internal/agents/index.js +5 -0
  43. package/dist/internal/agents/socks-client.cjs +571 -0
  44. package/dist/internal/agents/socks-client.js +567 -0
  45. package/dist/internal/agents/socks-proxy.cjs +75 -0
  46. package/dist/internal/agents/socks-proxy.js +72 -0
  47. package/dist/platform/browser.d.ts +65 -0
  48. package/dist/platform/bun.d.ts +65 -0
  49. package/dist/platform/deno.d.ts +65 -0
  50. package/dist/platform/node.d.ts +65 -0
  51. package/dist/platform/react-native.d.ts +65 -0
  52. package/dist/platform/worker.d.ts +65 -0
  53. package/dist/plugin/crawler-options.cjs +1 -1
  54. package/dist/plugin/crawler-options.js +1 -1
  55. package/dist/plugin/crawler.cjs +192 -1
  56. package/dist/plugin/crawler.js +192 -1
  57. package/dist/plugin/index.cjs +36 -36
  58. package/dist/proxy/index.cjs +18 -16
  59. package/dist/proxy/index.js +17 -12
  60. package/dist/queue/index.cjs +8 -8
  61. package/dist/responses/buildError.cjs +11 -2
  62. package/dist/responses/buildError.js +11 -2
  63. package/dist/responses/universal/index.cjs +11 -11
  64. package/dist/utils/agent-pool.cjs +1 -17
  65. package/dist/utils/agent-pool.js +1 -17
  66. package/dist/utils/curl.cjs +317 -0
  67. package/dist/utils/curl.js +314 -0
  68. package/package.json +1 -1
@@ -0,0 +1,296 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { createHash } from "node:crypto";
4
+ function detectRuntime() {
5
+ if (typeof globalThis.Bun !== "undefined")
6
+ return "bun";
7
+ if (typeof globalThis.Deno !== "undefined")
8
+ return "deno";
9
+ return "node";
10
+ }
11
+ async function createDatabase(dbPath) {
12
+ const runtime = detectRuntime();
13
+ if (runtime === "bun") {
14
+ const { Database } = await import("bun:sqlite");
15
+ const db = new Database(dbPath);
16
+ return {
17
+ run: (sql, ...params) => db.run(sql, ...params),
18
+ get: (sql, ...params) => db.query(sql).get(...params),
19
+ all: (sql, ...params) => db.query(sql).all(...params),
20
+ close: () => db.close()
21
+ };
22
+ }
23
+ if (runtime === "deno") {
24
+ try {
25
+ const { Database } = await import("node:sqlite");
26
+ const db = new Database(dbPath);
27
+ return {
28
+ run: (sql, ...params) => db.exec(sql, params),
29
+ get: (sql, ...params) => {
30
+ const stmt = db.prepare(sql);
31
+ return stmt.get(...params);
32
+ },
33
+ all: (sql, ...params) => {
34
+ const stmt = db.prepare(sql);
35
+ return stmt.all(...params);
36
+ },
37
+ close: () => db.close()
38
+ };
39
+ } catch {
40
+ throw new Error("Deno SQLite support requires Node.js compatibility mode");
41
+ }
42
+ }
43
+ const { DatabaseSync } = await import("node:sqlite");
44
+ const db = new DatabaseSync(dbPath);
45
+ return {
46
+ run: (sql, ...params) => {
47
+ const stmt = db.prepare(sql);
48
+ stmt.run(...params);
49
+ },
50
+ get: (sql, ...params) => {
51
+ const stmt = db.prepare(sql);
52
+ return stmt.get(...params);
53
+ },
54
+ all: (sql, ...params) => {
55
+ const stmt = db.prepare(sql);
56
+ return stmt.all(...params);
57
+ },
58
+ close: () => db.close()
59
+ };
60
+ }
61
+
62
+ export class NavigationHistory {
63
+ db = null;
64
+ options;
65
+ storeDir;
66
+ dbPath;
67
+ closed = false;
68
+ initPromise = null;
69
+ constructor(options = {}) {
70
+ this.options = {
71
+ storeDir: options.storeDir || "./navigation-history",
72
+ dbFileName: options.dbFileName || "navigation.db",
73
+ hashUrls: options.hashUrls ?? false
74
+ };
75
+ this.storeDir = path.resolve(this.options.storeDir);
76
+ this.dbPath = path.join(this.storeDir, this.options.dbFileName);
77
+ if (!fs.existsSync(this.storeDir)) {
78
+ fs.mkdirSync(this.storeDir, { recursive: true });
79
+ }
80
+ }
81
+ static async create(options = {}) {
82
+ const store = new NavigationHistory(options);
83
+ await store.initialize();
84
+ return store;
85
+ }
86
+ async initialize() {
87
+ if (this.initPromise)
88
+ return this.initPromise;
89
+ this.initPromise = (async () => {
90
+ this.db = await createDatabase(this.dbPath);
91
+ this.db.run(`
92
+ CREATE TABLE IF NOT EXISTS sessions (
93
+ sessionId TEXT PRIMARY KEY,
94
+ baseUrl TEXT NOT NULL,
95
+ startedAt INTEGER NOT NULL,
96
+ lastActivityAt INTEGER NOT NULL,
97
+ status TEXT DEFAULT 'running',
98
+ urlsVisited INTEGER DEFAULT 0,
99
+ urlsQueued INTEGER DEFAULT 0,
100
+ urlsFailed INTEGER DEFAULT 0,
101
+ metadata TEXT
102
+ )
103
+ `);
104
+ this.db.run(`
105
+ CREATE TABLE IF NOT EXISTS queue (
106
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
107
+ sessionId TEXT NOT NULL,
108
+ urlKey TEXT NOT NULL,
109
+ originalUrl TEXT NOT NULL,
110
+ method TEXT DEFAULT 'GET',
111
+ priority INTEGER DEFAULT 0,
112
+ body TEXT,
113
+ headers TEXT,
114
+ metadata TEXT,
115
+ addedAt INTEGER NOT NULL,
116
+ UNIQUE(sessionId, urlKey)
117
+ )
118
+ `);
119
+ this.db.run(`
120
+ CREATE TABLE IF NOT EXISTS visited (
121
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
122
+ sessionId TEXT NOT NULL,
123
+ urlKey TEXT NOT NULL,
124
+ originalUrl TEXT NOT NULL,
125
+ status INTEGER,
126
+ visitedAt INTEGER NOT NULL,
127
+ finalUrl TEXT,
128
+ contentType TEXT,
129
+ errorMessage TEXT,
130
+ UNIQUE(sessionId, urlKey)
131
+ )
132
+ `);
133
+ this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_session ON queue(sessionId)");
134
+ this.db.run("CREATE INDEX IF NOT EXISTS idx_queue_priority ON queue(sessionId, priority DESC)");
135
+ this.db.run("CREATE INDEX IF NOT EXISTS idx_visited_session ON visited(sessionId)");
136
+ this.db.run("CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status)");
137
+ })();
138
+ return this.initPromise;
139
+ }
140
+ getUrlKey(url) {
141
+ if (this.options.hashUrls) {
142
+ return createHash("sha256").update(url).digest("hex");
143
+ }
144
+ return url;
145
+ }
146
+ async createSession(sessionId, baseUrl, metadata) {
147
+ if (this.closed || !this.db)
148
+ throw new Error("NavigationHistory is closed");
149
+ const now = Date.now();
150
+ const session = {
151
+ sessionId,
152
+ baseUrl,
153
+ startedAt: now,
154
+ lastActivityAt: now,
155
+ status: "running",
156
+ urlsVisited: 0,
157
+ urlsQueued: 0,
158
+ urlsFailed: 0,
159
+ metadata: metadata ? JSON.stringify(metadata) : undefined
160
+ };
161
+ this.db.run(`INSERT OR REPLACE INTO sessions (sessionId, baseUrl, startedAt, lastActivityAt, status, urlsVisited, urlsQueued, urlsFailed, metadata)
162
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, baseUrl, now, now, "running", 0, 0, 0, session.metadata ?? null);
163
+ return session;
164
+ }
165
+ async getSession(sessionId) {
166
+ if (this.closed || !this.db)
167
+ throw new Error("NavigationHistory is closed");
168
+ return this.db.get("SELECT * FROM sessions WHERE sessionId = ?", sessionId);
169
+ }
170
+ async updateSessionStatus(sessionId, status) {
171
+ if (this.closed || !this.db)
172
+ throw new Error("NavigationHistory is closed");
173
+ this.db.run("UPDATE sessions SET status = ?, lastActivityAt = ? WHERE sessionId = ?", status, Date.now(), sessionId);
174
+ }
175
+ async updateSessionStats(sessionId, stats) {
176
+ if (this.closed || !this.db)
177
+ throw new Error("NavigationHistory is closed");
178
+ const updates = ["lastActivityAt = ?"];
179
+ const params = [Date.now()];
180
+ if (stats.urlsVisited !== undefined) {
181
+ updates.push("urlsVisited = ?");
182
+ params.push(stats.urlsVisited);
183
+ }
184
+ if (stats.urlsQueued !== undefined) {
185
+ updates.push("urlsQueued = ?");
186
+ params.push(stats.urlsQueued);
187
+ }
188
+ if (stats.urlsFailed !== undefined) {
189
+ updates.push("urlsFailed = ?");
190
+ params.push(stats.urlsFailed);
191
+ }
192
+ params.push(sessionId);
193
+ this.db.run(`UPDATE sessions SET ${updates.join(", ")} WHERE sessionId = ?`, ...params);
194
+ }
195
+ async addToQueue(sessionId, url, options = {}) {
196
+ if (this.closed || !this.db)
197
+ throw new Error("NavigationHistory is closed");
198
+ const urlKey = this.getUrlKey(url);
199
+ const existing = this.db.get("SELECT id FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
200
+ if (existing)
201
+ return false;
202
+ const isVisited = this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
203
+ if (isVisited)
204
+ return false;
205
+ this.db.run(`INSERT INTO queue (sessionId, urlKey, originalUrl, method, priority, body, headers, metadata, addedAt)
206
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, urlKey, url, options.method || "GET", options.priority || 0, options.body ? JSON.stringify(options.body) : null, options.headers ? JSON.stringify(options.headers) : null, options.metadata ? JSON.stringify(options.metadata) : null, Date.now());
207
+ return true;
208
+ }
209
+ async getNextFromQueue(sessionId) {
210
+ if (this.closed || !this.db)
211
+ throw new Error("NavigationHistory is closed");
212
+ const item = this.db.get("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC LIMIT 1", sessionId);
213
+ return item;
214
+ }
215
+ async removeFromQueue(sessionId, url) {
216
+ if (this.closed || !this.db)
217
+ throw new Error("NavigationHistory is closed");
218
+ const urlKey = this.getUrlKey(url);
219
+ this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
220
+ return true;
221
+ }
222
+ async getQueueSize(sessionId) {
223
+ if (this.closed || !this.db)
224
+ throw new Error("NavigationHistory is closed");
225
+ const result = this.db.get("SELECT COUNT(*) as count FROM queue WHERE sessionId = ?", sessionId);
226
+ return result?.count || 0;
227
+ }
228
+ async markVisited(sessionId, url, result = {}) {
229
+ if (this.closed || !this.db)
230
+ throw new Error("NavigationHistory is closed");
231
+ const urlKey = this.getUrlKey(url);
232
+ this.db.run("DELETE FROM queue WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
233
+ this.db.run(`INSERT OR REPLACE INTO visited (sessionId, urlKey, originalUrl, status, visitedAt, finalUrl, contentType, errorMessage)
234
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, sessionId, urlKey, url, result.status || 0, Date.now(), result.finalUrl ?? null, result.contentType ?? null, result.errorMessage ?? null);
235
+ }
236
+ async isVisited(sessionId, url) {
237
+ if (this.closed || !this.db)
238
+ throw new Error("NavigationHistory is closed");
239
+ const urlKey = this.getUrlKey(url);
240
+ const result = this.db.get("SELECT id FROM visited WHERE sessionId = ? AND urlKey = ?", sessionId, urlKey);
241
+ return !!result;
242
+ }
243
+ async getVisitedCount(sessionId) {
244
+ if (this.closed || !this.db)
245
+ throw new Error("NavigationHistory is closed");
246
+ const result = this.db.get("SELECT COUNT(*) as count FROM visited WHERE sessionId = ?", sessionId);
247
+ return result?.count || 0;
248
+ }
249
+ async getFailedUrls(sessionId) {
250
+ if (this.closed || !this.db)
251
+ throw new Error("NavigationHistory is closed");
252
+ return this.db.all("SELECT url, status, visitedAt, finalUrl, contentType, errorMessage FROM visited WHERE sessionId = ? AND (status >= 400 OR errorMessage IS NOT NULL)", sessionId);
253
+ }
254
+ async getAllQueuedUrls(sessionId) {
255
+ if (this.closed || !this.db)
256
+ throw new Error("NavigationHistory is closed");
257
+ return this.db.all("SELECT originalUrl as url, method, priority, body, headers, metadata, addedAt FROM queue WHERE sessionId = ? ORDER BY priority DESC, addedAt ASC", sessionId);
258
+ }
259
+ async clearQueue(sessionId) {
260
+ if (this.closed || !this.db)
261
+ throw new Error("NavigationHistory is closed");
262
+ this.db.run("DELETE FROM queue WHERE sessionId = ?", sessionId);
263
+ }
264
+ async clearVisited(sessionId) {
265
+ if (this.closed || !this.db)
266
+ throw new Error("NavigationHistory is closed");
267
+ this.db.run("DELETE FROM visited WHERE sessionId = ?", sessionId);
268
+ }
269
+ async deleteSession(sessionId) {
270
+ if (this.closed || !this.db)
271
+ throw new Error("NavigationHistory is closed");
272
+ this.db.run("DELETE FROM queue WHERE sessionId = ?", sessionId);
273
+ this.db.run("DELETE FROM visited WHERE sessionId = ?", sessionId);
274
+ this.db.run("DELETE FROM sessions WHERE sessionId = ?", sessionId);
275
+ }
276
+ async getResumableSessions() {
277
+ if (this.closed || !this.db)
278
+ throw new Error("NavigationHistory is closed");
279
+ return this.db.all("SELECT * FROM sessions WHERE status IN ('running', 'paused') ORDER BY lastActivityAt DESC");
280
+ }
281
+ async close() {
282
+ if (this.closed)
283
+ return;
284
+ this.closed = true;
285
+ if (this.db) {
286
+ this.db.close();
287
+ this.db = null;
288
+ }
289
+ }
290
+ get isClosed() {
291
+ return this.closed;
292
+ }
293
+ get databasePath() {
294
+ return this.dbPath;
295
+ }
296
+ }
@@ -45,7 +45,13 @@ async function createDatabase(dbPath) {
45
45
  const { DatabaseSync } = await import("node:sqlite");
46
46
  const db = new DatabaseSync(dbPath);
47
47
  return {
48
- run: (sql, ...params) => db.exec(sql),
48
+ run: (sql, ...params) => {
49
+ if (params.length === 0) {
50
+ db.exec(sql);
51
+ } else {
52
+ db.prepare(sql).run(...params);
53
+ }
54
+ },
49
55
  get: (sql, ...params) => {
50
56
  const stmt = db.prepare(sql);
51
57
  return stmt.get(...params);
@@ -45,7 +45,13 @@ async function createDatabase(dbPath) {
45
45
  const { DatabaseSync } = await import("node:sqlite");
46
46
  const db = new DatabaseSync(dbPath);
47
47
  return {
48
- run: (sql, ...params) => db.exec(sql),
48
+ run: (sql, ...params) => {
49
+ if (params.length === 0) {
50
+ db.exec(sql);
51
+ } else {
52
+ db.prepare(sql).run(...params);
53
+ }
54
+ },
49
55
  get: (sql, ...params) => {
50
56
  const stmt = db.prepare(sql);
51
57
  return stmt.get(...params);
@@ -7,6 +7,7 @@ const packageJson = require("../../package.json");
7
7
  const { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } = require('./hooks.cjs');
8
8
  const { ResponseCache, DNSCache } = require('../cache/index.cjs');
9
9
  const { ProxyManager } = require('../proxy/manager.cjs');
10
+ const { toCurl: toCurlUtil, fromCurl: fromCurlUtil } = require('../utils/curl.cjs');
10
11
  let globalAdapter = null;
11
12
  function setGlobalAdapter(adapter) {
12
13
  globalAdapter = adapter;
@@ -479,6 +480,12 @@ class Rezo {
479
480
  clearCookies() {
480
481
  this.jar?.removeAllCookiesSync();
481
482
  }
483
+ static toCurl(config) {
484
+ return toCurlUtil(config);
485
+ }
486
+ static fromCurl(curlCommand) {
487
+ return fromCurlUtil(curlCommand);
488
+ }
482
489
  }
483
490
  const defaultTransforms = exports.defaultTransforms = {
484
491
  request: [
package/dist/core/rezo.js CHANGED
@@ -7,6 +7,7 @@ import packageJson from "../../package.json" with { type: 'json' };
7
7
  import { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } from './hooks.js';
8
8
  import { ResponseCache, DNSCache } from '../cache/index.js';
9
9
  import { ProxyManager } from '../proxy/manager.js';
10
+ import { toCurl as toCurlUtil, fromCurl as fromCurlUtil } from '../utils/curl.js';
10
11
  let globalAdapter = null;
11
12
  export function setGlobalAdapter(adapter) {
12
13
  globalAdapter = adapter;
@@ -479,6 +480,12 @@ export class Rezo {
479
480
  clearCookies() {
480
481
  this.jar?.removeAllCookiesSync();
481
482
  }
483
+ static toCurl(config) {
484
+ return toCurlUtil(config);
485
+ }
486
+ static fromCurl(curlCommand) {
487
+ return fromCurlUtil(curlCommand);
488
+ }
482
489
  }
483
490
  export const defaultTransforms = {
484
491
  request: [
package/dist/crawler.d.ts CHANGED
@@ -228,6 +228,17 @@ declare class FileCacher {
228
228
  */
229
229
  get directory(): string;
230
230
  }
231
+ export interface CrawlSession {
232
+ sessionId: string;
233
+ baseUrl: string;
234
+ startedAt: number;
235
+ lastActivityAt: number;
236
+ status: "running" | "paused" | "completed" | "failed";
237
+ urlsVisited: number;
238
+ urlsQueued: number;
239
+ urlsFailed: number;
240
+ metadata?: string;
241
+ }
231
242
  export interface RezoHttpHeaders {
232
243
  accept?: string | undefined;
233
244
  "accept-encoding"?: string | undefined;
@@ -4464,6 +4475,71 @@ declare class Rezo {
4464
4475
  * @see {@link cookieJar} - Access the underlying RezoCookieJar for more control
4465
4476
  */
4466
4477
  clearCookies(): void;
4478
+ /**
4479
+ * Convert a Rezo request configuration to a cURL command string.
4480
+ *
4481
+ * Generates a valid cURL command that can be executed in a terminal to
4482
+ * reproduce the same HTTP request. Useful for:
4483
+ * - Debugging and sharing requests
4484
+ * - Documentation and examples
4485
+ * - Testing requests outside of Node.js
4486
+ * - Exporting requests to other tools
4487
+ *
4488
+ * @param config - Request configuration object
4489
+ * @returns A cURL command string
4490
+ *
4491
+ * @example
4492
+ * ```typescript
4493
+ * const curl = Rezo.toCurl({
4494
+ * url: 'https://api.example.com/users',
4495
+ * method: 'POST',
4496
+ * headers: { 'Content-Type': 'application/json' },
4497
+ * body: { name: 'John', email: 'john@example.com' }
4498
+ * });
4499
+ * // Output: curl -X POST -H 'content-type: application/json' --data-raw '{"name":"John","email":"john@example.com"}' -L --compressed 'https://api.example.com/users'
4500
+ * ```
4501
+ */
4502
+ static toCurl(config: RezoRequestConfig | RezoRequestOptions): string;
4503
+ /**
4504
+ * Parse a cURL command string into a Rezo request configuration.
4505
+ *
4506
+ * Converts a cURL command into a configuration object that can be
4507
+ * passed directly to Rezo request methods. Useful for:
4508
+ * - Importing requests from browser DevTools
4509
+ * - Converting curl examples from API documentation
4510
+ * - Migrating scripts from curl to Rezo
4511
+ *
4512
+ * Supports common cURL options:
4513
+ * - `-X, --request` - HTTP method
4514
+ * - `-H, --header` - Request headers
4515
+ * - `-d, --data, --data-raw, --data-binary` - Request body
4516
+ * - `-u, --user` - Basic authentication
4517
+ * - `-x, --proxy` - Proxy configuration
4518
+ * - `--socks5, --socks4` - SOCKS proxy
4519
+ * - `-L, --location` - Follow redirects
4520
+ * - `--max-redirs` - Maximum redirects
4521
+ * - `--max-time` - Request timeout
4522
+ * - `-k, --insecure` - Skip TLS verification
4523
+ * - `-A, --user-agent` - User agent header
4524
+ *
4525
+ * @param curlCommand - A cURL command string
4526
+ * @returns A request configuration object
4527
+ *
4528
+ * @example
4529
+ * ```typescript
4530
+ * // From browser DevTools "Copy as cURL"
4531
+ * const config = Rezo.fromCurl(`
4532
+ * curl 'https://api.example.com/data' \\
4533
+ * -H 'Authorization: Bearer token123' \\
4534
+ * -H 'Content-Type: application/json'
4535
+ * `);
4536
+ *
4537
+ * // Use with Rezo
4538
+ * const rezo = new Rezo();
4539
+ * const response = await rezo.request(config);
4540
+ * ```
4541
+ */
4542
+ static fromCurl(curlCommand: string): RezoRequestOptions;
4467
4543
  }
4468
4544
  /**
4469
4545
  * Rezo HTTP Client - Core Types
@@ -6284,6 +6360,15 @@ declare class Decodo {
6284
6360
  * const regexDomain: Domain = '^(sub|api)\.example\.com$';
6285
6361
  */
6286
6362
  export type Domain = string[] | string | RegExp;
6363
+ /**
6364
+ * Supported HTTP adapter types for crawler requests
6365
+ * @description
6366
+ * - 'http': Standard Node.js HTTP/HTTPS adapter (default)
6367
+ * - 'http2': HTTP/2 adapter with session pooling
6368
+ * - 'curl': cURL adapter for maximum compatibility
6369
+ * - 'fetch': Browser-compatible Fetch API adapter
6370
+ */
6371
+ export type CrawlerAdapterType = "http" | "http2" | "curl" | "fetch";
6287
6372
  /**
6288
6373
  * Configuration interface for the CrawlerOptions class
6289
6374
  * @description Defines all available options for configuring web crawler behavior,
@@ -6292,6 +6377,12 @@ export type Domain = string[] | string | RegExp;
6292
6377
  export interface ICrawlerOptions {
6293
6378
  /** Base URL for the crawler - the starting point for crawling operations */
6294
6379
  baseUrl: string;
6380
+ /** HTTP adapter to use for requests (default: 'http') */
6381
+ adapter?: CrawlerAdapterType;
6382
+ /** Enable navigation history for resumable crawling (default: false) */
6383
+ enableNavigationHistory?: boolean;
6384
+ /** Session ID for navigation history - allows resuming specific crawl sessions */
6385
+ sessionId?: string;
6295
6386
  /** Whether to reject unauthorized SSL certificates (default: true) */
6296
6387
  rejectUnauthorized?: boolean;
6297
6388
  /** Custom user agent string for HTTP requests */
@@ -6415,6 +6506,12 @@ export interface ICrawlerOptions {
6415
6506
  export declare class CrawlerOptions {
6416
6507
  /** Base URL for the crawler - the starting point for crawling operations */
6417
6508
  baseUrl: string;
6509
+ /** HTTP adapter to use for requests */
6510
+ adapter: CrawlerAdapterType;
6511
+ /** Enable navigation history for resumable crawling */
6512
+ enableNavigationHistory: boolean;
6513
+ /** Session ID for navigation history - allows resuming specific crawl sessions */
6514
+ sessionId: string;
6418
6515
  /** Whether to reject unauthorized SSL certificates */
6419
6516
  rejectUnauthorized?: boolean;
6420
6517
  /** Custom user agent string for HTTP requests */
@@ -6886,29 +6983,107 @@ export declare class Crawler {
6886
6983
  private isStorageReady;
6887
6984
  private isCacheReady;
6888
6985
  private leadsFinder;
6986
+ /** Navigation history for resumable crawling */
6987
+ private navigationHistory;
6988
+ private isNavigationHistoryReady;
6989
+ private isSessionReady;
6990
+ private currentSession;
6991
+ private navigationHistoryInitPromise;
6992
+ /** Adapter-specific request executor */
6993
+ private adapterExecutor;
6994
+ private adapterType;
6889
6995
  /**
6890
6996
  * Creates a new Crawler instance with the specified configuration.
6891
6997
  *
6892
- * @param option - Primary crawler configuration options
6893
- * @param backup - Optional backup HTTP client configuration for failover scenarios
6998
+ * @param crawlerOptions - Crawler configuration options
6999
+ * @param http - Optional Rezo HTTP client instance (creates default if not provided)
6894
7000
  *
6895
7001
  * @example
6896
7002
  * ```typescript
7003
+ * // Basic usage (creates default Rezo instance)
6897
7004
  * const crawler = new Crawler({
6898
- * http: primaryHttpClient,
6899
- * baseUrl: 'https://api.example.com',
6900
- * timeout: 30000,
7005
+ * baseUrl: 'https://example.com',
6901
7006
  * enableCache: true,
6902
7007
  * cacheDir: './cache',
6903
- * socksProxies: [{ host: '127.0.0.1', port: 9050 }]
6904
- * }, {
6905
- * http: backupHttpClient,
6906
- * useProxy: false,
6907
- * concurrency: 5
6908
7008
  * });
7009
+ *
7010
+ * // With resumable crawling
7011
+ * const crawler = new Crawler({
7012
+ * baseUrl: 'https://example.com',
7013
+ * enableNavigationHistory: true,
7014
+ * sessionId: 'my-session',
7015
+ * cacheDir: './cache',
7016
+ * });
7017
+ *
7018
+ * // With custom Rezo instance
7019
+ * const crawler = new Crawler({
7020
+ * baseUrl: 'https://example.com',
7021
+ * adapter: 'curl',
7022
+ * }, myRezoInstance);
6909
7023
  * ```
6910
7024
  */
6911
- constructor(crawlerOptions: ICrawlerOptions, http: Rezo);
7025
+ constructor(crawlerOptions: ICrawlerOptions, http?: Rezo);
7026
+ /**
7027
+ * Initialize the HTTP adapter based on configuration
7028
+ */
7029
+ private initializeAdapter;
7030
+ /**
7031
+ * Initialize navigation history and session
7032
+ */
7033
+ private initializeNavigationHistory;
7034
+ /**
7035
+ * Wait for navigation history and session to be ready
7036
+ */
7037
+ private waitForNavigationHistory;
7038
+ /**
7039
+ * Ensure navigation history is ready and return it (or null if not enabled)
7040
+ * This is used by visit() and other methods that need to write to navigation history
7041
+ */
7042
+ private ensureNavigationHistoryReady;
7043
+ /**
7044
+ * Add URL to navigation history queue
7045
+ */
7046
+ private addToNavigationQueue;
7047
+ /**
7048
+ * Mark URL as visited in navigation history
7049
+ */
7050
+ private markUrlVisited;
7051
+ /**
7052
+ * Get the current crawl session
7053
+ */
7054
+ getSession(): CrawlSession | null;
7055
+ /**
7056
+ * Get the session ID
7057
+ */
7058
+ getSessionId(): string;
7059
+ /**
7060
+ * Resume a previous crawl session
7061
+ * @param sessionId - Optional session ID to resume (uses current session if not provided)
7062
+ * @returns Promise resolving to the Crawler instance for chaining
7063
+ */
7064
+ resume(sessionId?: string): Promise<Crawler>;
7065
+ /**
7066
+ * Get list of resumable sessions
7067
+ * @returns Promise resolving to array of sessions that can be resumed
7068
+ */
7069
+ getResumableSessions(): Promise<CrawlSession[]>;
7070
+ /**
7071
+ * Pause the current crawl session
7072
+ */
7073
+ pause(): Promise<void>;
7074
+ /**
7075
+ * Mark the current session as completed
7076
+ */
7077
+ complete(): Promise<void>;
7078
+ /**
7079
+ * Get the current adapter type being used
7080
+ */
7081
+ getAdapterType(): CrawlerAdapterType;
7082
+ /**
7083
+ * Switch to a different adapter at runtime
7084
+ * @param adapter - The adapter type to switch to
7085
+ */
7086
+ setAdapter(adapter: CrawlerAdapterType): Promise<void>;
6912
7087
  private rawResponseHandler;
6913
7088
  private waitForCache;
6914
7089
  private waitForStorage;
@@ -7303,6 +7478,16 @@ export declare class Crawler {
7303
7478
  * ```
7304
7479
  */
7305
7480
  waitForAll(): Promise<void>;
7481
+ /**
7482
+ * Alias for waitForAll() - waits for all crawling operations to complete.
7483
+ * @returns Promise that resolves when done
7484
+ * @example
7485
+ * ```typescript
7486
+ * crawler.visit('https://example.com');
7487
+ * await crawler.done();
7488
+ * ```
7489
+ */
7490
+ done(): Promise<void>;
7306
7491
  close(): Promise<void>;
7307
7492
  }
7308
7493
 
@@ -1,5 +1,5 @@
1
- const _mod_7nublp = require('../plugin/crawler.cjs');
2
- exports.Crawler = _mod_7nublp.Crawler;;
3
- const _mod_dsncou = require('../plugin/crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_dsncou.CrawlerOptions;
5
- exports.Domain = _mod_dsncou.Domain;;
1
+ const _mod_l31jyt = require('../plugin/crawler.cjs');
2
+ exports.Crawler = _mod_l31jyt.Crawler;;
3
+ const _mod_2ht78p = require('../plugin/crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_2ht78p.CrawlerOptions;
5
+ exports.Domain = _mod_2ht78p.Domain;;