rezo 1.0.41 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/adapters/curl.cjs +143 -32
  2. package/dist/adapters/curl.js +143 -32
  3. package/dist/adapters/entries/curl.d.ts +65 -0
  4. package/dist/adapters/entries/fetch.d.ts +65 -0
  5. package/dist/adapters/entries/http.d.ts +65 -0
  6. package/dist/adapters/entries/http2.d.ts +65 -0
  7. package/dist/adapters/entries/react-native.d.ts +65 -0
  8. package/dist/adapters/entries/xhr.d.ts +65 -0
  9. package/dist/adapters/fetch.cjs +98 -12
  10. package/dist/adapters/fetch.js +98 -12
  11. package/dist/adapters/http.cjs +26 -14
  12. package/dist/adapters/http.js +26 -14
  13. package/dist/adapters/http2.cjs +756 -227
  14. package/dist/adapters/http2.js +756 -227
  15. package/dist/adapters/index.cjs +6 -6
  16. package/dist/adapters/xhr.cjs +94 -2
  17. package/dist/adapters/xhr.js +94 -2
  18. package/dist/cache/dns-cache.cjs +5 -3
  19. package/dist/cache/dns-cache.js +5 -3
  20. package/dist/cache/file-cacher.cjs +7 -1
  21. package/dist/cache/file-cacher.js +7 -1
  22. package/dist/cache/index.cjs +15 -13
  23. package/dist/cache/index.js +1 -0
  24. package/dist/cache/navigation-history.cjs +298 -0
  25. package/dist/cache/navigation-history.js +296 -0
  26. package/dist/cache/url-store.cjs +7 -1
  27. package/dist/cache/url-store.js +7 -1
  28. package/dist/core/rezo.cjs +7 -0
  29. package/dist/core/rezo.js +7 -0
  30. package/dist/crawler.d.ts +196 -11
  31. package/dist/entries/crawler.cjs +5 -5
  32. package/dist/index.cjs +27 -24
  33. package/dist/index.d.ts +73 -0
  34. package/dist/index.js +1 -0
  35. package/dist/internal/agents/base.cjs +113 -0
  36. package/dist/internal/agents/base.js +110 -0
  37. package/dist/internal/agents/http-proxy.cjs +89 -0
  38. package/dist/internal/agents/http-proxy.js +86 -0
  39. package/dist/internal/agents/https-proxy.cjs +176 -0
  40. package/dist/internal/agents/https-proxy.js +173 -0
  41. package/dist/internal/agents/index.cjs +10 -0
  42. package/dist/internal/agents/index.js +5 -0
  43. package/dist/internal/agents/socks-client.cjs +571 -0
  44. package/dist/internal/agents/socks-client.js +567 -0
  45. package/dist/internal/agents/socks-proxy.cjs +75 -0
  46. package/dist/internal/agents/socks-proxy.js +72 -0
  47. package/dist/platform/browser.d.ts +65 -0
  48. package/dist/platform/bun.d.ts +65 -0
  49. package/dist/platform/deno.d.ts +65 -0
  50. package/dist/platform/node.d.ts +65 -0
  51. package/dist/platform/react-native.d.ts +65 -0
  52. package/dist/platform/worker.d.ts +65 -0
  53. package/dist/plugin/crawler-options.cjs +1 -1
  54. package/dist/plugin/crawler-options.js +1 -1
  55. package/dist/plugin/crawler.cjs +192 -1
  56. package/dist/plugin/crawler.js +192 -1
  57. package/dist/plugin/index.cjs +36 -36
  58. package/dist/proxy/index.cjs +18 -16
  59. package/dist/proxy/index.js +17 -12
  60. package/dist/queue/index.cjs +8 -8
  61. package/dist/responses/buildError.cjs +11 -2
  62. package/dist/responses/buildError.js +11 -2
  63. package/dist/responses/universal/index.cjs +11 -11
  64. package/dist/utils/agent-pool.cjs +1 -17
  65. package/dist/utils/agent-pool.js +1 -17
  66. package/dist/utils/curl.cjs +317 -0
  67. package/dist/utils/curl.js +314 -0
  68. package/package.json +1 -1
@@ -1,11 +1,14 @@
1
1
  const fs = require("node:fs");
2
2
  const { FileCacher } = require('../cache/file-cacher.cjs');
3
3
  const { UrlStore } = require('../cache/url-store.cjs');
4
+ const { NavigationHistory } = require('../cache/navigation-history.cjs');
4
5
  const { parseHTML } = require("linkedom");
5
6
  const path = require("node:path");
7
+ const { Rezo } = require('../core/rezo.cjs');
6
8
  const { RezoQueue } = require('../queue/queue.cjs');
7
9
  const { Scraper } = require('./scraper.cjs');
8
10
  const { CrawlerOptions } = require('./crawler-options.cjs');
11
+ const { loadAdapter } = require('../adapters/picker.cjs');
9
12
  String.prototype.addBaseUrl = function(url) {
10
13
  url = url instanceof URL ? url.href : url;
11
14
  const html = this.replace(/<base\b[^>]*?>/gi, "");
@@ -44,12 +47,20 @@ class Crawler {
44
47
  isStorageReady = false;
45
48
  isCacheReady = false;
46
49
  leadsFinder;
47
- constructor(crawlerOptions, http) {
50
+ navigationHistory = null;
51
+ isNavigationHistoryReady = false;
52
+ isSessionReady = false;
53
+ currentSession = null;
54
+ navigationHistoryInitPromise = null;
55
+ adapterExecutor = null;
56
+ adapterType;
57
+ constructor(crawlerOptions, http = new Rezo) {
48
58
  this.http = http;
49
59
  this.queue = new RezoQueue({
50
60
  concurrency: 1000
51
61
  });
52
62
  this.config = new CrawlerOptions(crawlerOptions);
63
+ this.adapterType = this.config.adapter;
53
64
  const enableCache = this.config.enableCache;
54
65
  this.isCacheEnabled = enableCache;
55
66
  if (enableCache) {
@@ -91,8 +102,172 @@ class Crawler {
91
102
  this.isStorageReady = true;
92
103
  });
93
104
  }
105
+ if (this.config.enableNavigationHistory) {
106
+ const navHistoryDir = path.resolve(this.config.cacheDir, "navigation");
107
+ if (!fs.existsSync(navHistoryDir))
108
+ fs.mkdirSync(navHistoryDir, { recursive: true });
109
+ this.navigationHistoryInitPromise = this.initializeNavigationHistory(navHistoryDir);
110
+ }
111
+ this.initializeAdapter();
94
112
  this.leadsFinder = new Scraper(this.http, this.config, this._onEmailLeads.bind(this), this._onEmailDiscovered.bind(this), this.config.debug);
95
113
  }
114
+ async initializeAdapter() {
115
+ try {
116
+ const adapterModule = await loadAdapter(this.adapterType);
117
+ this.adapterExecutor = adapterModule.executeRequest.bind(adapterModule);
118
+ } catch (error) {
119
+ if (this.config.debug) {
120
+ console.warn(`[Crawler] Failed to load adapter '${this.adapterType}', falling back to http instance`);
121
+ }
122
+ }
123
+ }
124
+ async initializeNavigationHistory(navHistoryDir) {
125
+ try {
126
+ const history = await NavigationHistory.create({
127
+ storeDir: navHistoryDir,
128
+ dbFileName: "navigation.db"
129
+ });
130
+ this.navigationHistory = history;
131
+ this.isNavigationHistoryReady = true;
132
+ const session = await history.getSession(this.config.sessionId);
133
+ if (session && (session.status === "running" || session.status === "paused")) {
134
+ this.currentSession = session;
135
+ await history.updateSessionStatus(this.config.sessionId, "running");
136
+ } else if (!session) {
137
+ this.currentSession = await history.createSession(this.config.sessionId, this.config.baseUrl, { adapter: this.adapterType });
138
+ }
139
+ this.isSessionReady = true;
140
+ } catch (error) {
141
+ if (this.config.debug) {
142
+ console.error(`[Crawler] Failed to initialize navigation history:`, error);
143
+ }
144
+ this.isNavigationHistoryReady = false;
145
+ this.isSessionReady = false;
146
+ }
147
+ }
148
+ async waitForNavigationHistory() {
149
+ if (!this.config.enableNavigationHistory)
150
+ return;
151
+ if (this.navigationHistoryInitPromise) {
152
+ await this.navigationHistoryInitPromise;
153
+ }
154
+ }
155
+ async ensureNavigationHistoryReady() {
156
+ if (!this.config.enableNavigationHistory)
157
+ return null;
158
+ await this.waitForNavigationHistory();
159
+ return this.navigationHistory;
160
+ }
161
+ async addToNavigationQueue(url, method, body, headers) {
162
+ const history = await this.ensureNavigationHistoryReady();
163
+ if (!history || !this.currentSession)
164
+ return;
165
+ try {
166
+ await history.addToQueue(this.currentSession.sessionId, url, {
167
+ method,
168
+ body,
169
+ headers
170
+ });
171
+ } catch (error) {
172
+ if (this.config.debug) {
173
+ console.warn(`[Crawler] Failed to add URL to navigation queue: ${url}`, error);
174
+ }
175
+ }
176
+ }
177
+ async markUrlVisited(url, result) {
178
+ const history = await this.ensureNavigationHistoryReady();
179
+ if (!history || !this.currentSession)
180
+ return;
181
+ try {
182
+ await history.markVisited(this.currentSession.sessionId, url, result);
183
+ } catch (error) {
184
+ if (this.config.debug) {
185
+ console.warn(`[Crawler] Failed to mark URL as visited: ${url}`, error);
186
+ }
187
+ }
188
+ }
189
+ getSession() {
190
+ return this.currentSession;
191
+ }
192
+ getSessionId() {
193
+ return this.config.sessionId;
194
+ }
195
+ async resume(sessionId) {
196
+ if (!this.config.enableNavigationHistory) {
197
+ throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
198
+ }
199
+ await this.waitForNavigationHistory();
200
+ if (!this.navigationHistory) {
201
+ throw new Error("Navigation history failed to initialize.");
202
+ }
203
+ await this.waitForStorage();
204
+ if (this.isCacheEnabled) {
205
+ await this.waitForCache();
206
+ }
207
+ const targetSessionId = sessionId || this.config.sessionId;
208
+ const session = await this.navigationHistory.getSession(targetSessionId);
209
+ if (!session) {
210
+ throw new Error(`Session '${targetSessionId}' not found`);
211
+ }
212
+ if (session.status === "completed") {
213
+ throw new Error(`Session '${targetSessionId}' is already completed`);
214
+ }
215
+ this.currentSession = session;
216
+ await this.navigationHistory.updateSessionStatus(targetSessionId, "running");
217
+ const queuedUrls = await this.navigationHistory.getAllQueuedUrls(targetSessionId);
218
+ if (this.config.debug) {
219
+ console.log(`[Crawler] Resuming session '${targetSessionId}' with ${queuedUrls.length} queued URLs`);
220
+ }
221
+ const scheduledUrls = new Set;
222
+ for (const item of queuedUrls) {
223
+ if (scheduledUrls.has(item.url)) {
224
+ continue;
225
+ }
226
+ scheduledUrls.add(item.url);
227
+ const body = item.body ? JSON.parse(item.body) : undefined;
228
+ const headers = item.headers ? JSON.parse(item.headers) : undefined;
229
+ this.visit(item.url, {
230
+ method: item.method,
231
+ body,
232
+ headers,
233
+ forceRevisit: false
234
+ });
235
+ }
236
+ return this;
237
+ }
238
+ async getResumableSessions() {
239
+ if (!this.config.enableNavigationHistory) {
240
+ return [];
241
+ }
242
+ await this.waitForNavigationHistory();
243
+ if (!this.navigationHistory) {
244
+ return [];
245
+ }
246
+ return this.navigationHistory.getResumableSessions();
247
+ }
248
+ async pause() {
249
+ await this.waitForNavigationHistory();
250
+ if (!this.navigationHistory || !this.currentSession) {
251
+ return;
252
+ }
253
+ await this.navigationHistory.updateSessionStatus(this.currentSession.sessionId, "paused");
254
+ this.currentSession.status = "paused";
255
+ }
256
+ async complete() {
257
+ await this.waitForNavigationHistory();
258
+ if (!this.navigationHistory || !this.currentSession) {
259
+ return;
260
+ }
261
+ await this.navigationHistory.updateSessionStatus(this.currentSession.sessionId, "completed");
262
+ this.currentSession.status = "completed";
263
+ }
264
+ getAdapterType() {
265
+ return this.adapterType;
266
+ }
267
+ async setAdapter(adapter) {
268
+ this.adapterType = adapter;
269
+ await this.initializeAdapter();
270
+ }
96
271
  rawResponseHandler(data) {
97
272
  if (this.rawResponseEvents.length === 0)
98
273
  return;
@@ -390,6 +565,10 @@ class Crawler {
390
565
  };
391
566
  decodoInstanse = this.config.getAdapter(url, "decodo", false, useOxylabsRotation) || undefined;
392
567
  }
568
+ if (this.config.enableNavigationHistory) {
569
+ const headersObj = headers instanceof Headers ? Object.fromEntries(headers.entries()) : headers;
570
+ this.addToNavigationQueue(url, method, body, headersObj);
571
+ }
393
572
  if (deepEmailFinder) {
394
573
  this.execute2(method, url, body, _options, forceRevisit).then();
395
574
  return this;
@@ -444,6 +623,11 @@ class Crawler {
444
623
  await this.saveCache(url, res);
445
624
  if (!isVisited)
446
625
  await this.saveUrl(url);
626
+ this.markUrlVisited(url, {
627
+ status: res.status,
628
+ finalUrl: res.finalUrl,
629
+ contentType: res.contentType
630
+ });
447
631
  if (res.contentType && res.contentType.includes("/json")) {
448
632
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
449
633
  this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
@@ -491,6 +675,10 @@ class Crawler {
491
675
  return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
492
676
  }
493
677
  }
678
+ this.markUrlVisited(url, {
679
+ status: error?.response?.status || 0,
680
+ errorMessage: e.message || "Unknown error"
681
+ });
494
682
  if (this.config.throwFatalError)
495
683
  throw e;
496
684
  if (this.config.debug) {
@@ -506,6 +694,9 @@ class Crawler {
506
694
  async waitForAll() {
507
695
  await this.queue.onIdle();
508
696
  }
697
+ async done() {
698
+ return this.waitForAll();
699
+ }
509
700
  async close() {
510
701
  try {
511
702
  await this.cacher.close();
@@ -1,11 +1,14 @@
1
1
  import fs from "node:fs";
2
2
  import { FileCacher } from '../cache/file-cacher.js';
3
3
  import { UrlStore } from '../cache/url-store.js';
4
+ import { NavigationHistory } from '../cache/navigation-history.js';
4
5
  import { parseHTML } from "linkedom";
5
6
  import path from "node:path";
7
+ import { Rezo } from '../core/rezo.js';
6
8
  import { RezoQueue } from '../queue/queue.js';
7
9
  import { Scraper } from './scraper.js';
8
10
  import { CrawlerOptions } from './crawler-options.js';
11
+ import { loadAdapter } from '../adapters/picker.js';
9
12
  String.prototype.addBaseUrl = function(url) {
10
13
  url = url instanceof URL ? url.href : url;
11
14
  const html = this.replace(/<base\b[^>]*?>/gi, "");
@@ -44,12 +47,20 @@ export class Crawler {
44
47
  isStorageReady = false;
45
48
  isCacheReady = false;
46
49
  leadsFinder;
47
- constructor(crawlerOptions, http) {
50
+ navigationHistory = null;
51
+ isNavigationHistoryReady = false;
52
+ isSessionReady = false;
53
+ currentSession = null;
54
+ navigationHistoryInitPromise = null;
55
+ adapterExecutor = null;
56
+ adapterType;
57
+ constructor(crawlerOptions, http = new Rezo) {
48
58
  this.http = http;
49
59
  this.queue = new RezoQueue({
50
60
  concurrency: 1000
51
61
  });
52
62
  this.config = new CrawlerOptions(crawlerOptions);
63
+ this.adapterType = this.config.adapter;
53
64
  const enableCache = this.config.enableCache;
54
65
  this.isCacheEnabled = enableCache;
55
66
  if (enableCache) {
@@ -91,8 +102,172 @@ export class Crawler {
91
102
  this.isStorageReady = true;
92
103
  });
93
104
  }
105
+ if (this.config.enableNavigationHistory) {
106
+ const navHistoryDir = path.resolve(this.config.cacheDir, "navigation");
107
+ if (!fs.existsSync(navHistoryDir))
108
+ fs.mkdirSync(navHistoryDir, { recursive: true });
109
+ this.navigationHistoryInitPromise = this.initializeNavigationHistory(navHistoryDir);
110
+ }
111
+ this.initializeAdapter();
94
112
  this.leadsFinder = new Scraper(this.http, this.config, this._onEmailLeads.bind(this), this._onEmailDiscovered.bind(this), this.config.debug);
95
113
  }
114
+ async initializeAdapter() {
115
+ try {
116
+ const adapterModule = await loadAdapter(this.adapterType);
117
+ this.adapterExecutor = adapterModule.executeRequest.bind(adapterModule);
118
+ } catch (error) {
119
+ if (this.config.debug) {
120
+ console.warn(`[Crawler] Failed to load adapter '${this.adapterType}', falling back to http instance`);
121
+ }
122
+ }
123
+ }
124
+ async initializeNavigationHistory(navHistoryDir) {
125
+ try {
126
+ const history = await NavigationHistory.create({
127
+ storeDir: navHistoryDir,
128
+ dbFileName: "navigation.db"
129
+ });
130
+ this.navigationHistory = history;
131
+ this.isNavigationHistoryReady = true;
132
+ const session = await history.getSession(this.config.sessionId);
133
+ if (session && (session.status === "running" || session.status === "paused")) {
134
+ this.currentSession = session;
135
+ await history.updateSessionStatus(this.config.sessionId, "running");
136
+ } else if (!session) {
137
+ this.currentSession = await history.createSession(this.config.sessionId, this.config.baseUrl, { adapter: this.adapterType });
138
+ }
139
+ this.isSessionReady = true;
140
+ } catch (error) {
141
+ if (this.config.debug) {
142
+ console.error(`[Crawler] Failed to initialize navigation history:`, error);
143
+ }
144
+ this.isNavigationHistoryReady = false;
145
+ this.isSessionReady = false;
146
+ }
147
+ }
148
+ async waitForNavigationHistory() {
149
+ if (!this.config.enableNavigationHistory)
150
+ return;
151
+ if (this.navigationHistoryInitPromise) {
152
+ await this.navigationHistoryInitPromise;
153
+ }
154
+ }
155
+ async ensureNavigationHistoryReady() {
156
+ if (!this.config.enableNavigationHistory)
157
+ return null;
158
+ await this.waitForNavigationHistory();
159
+ return this.navigationHistory;
160
+ }
161
+ async addToNavigationQueue(url, method, body, headers) {
162
+ const history = await this.ensureNavigationHistoryReady();
163
+ if (!history || !this.currentSession)
164
+ return;
165
+ try {
166
+ await history.addToQueue(this.currentSession.sessionId, url, {
167
+ method,
168
+ body,
169
+ headers
170
+ });
171
+ } catch (error) {
172
+ if (this.config.debug) {
173
+ console.warn(`[Crawler] Failed to add URL to navigation queue: ${url}`, error);
174
+ }
175
+ }
176
+ }
177
+ async markUrlVisited(url, result) {
178
+ const history = await this.ensureNavigationHistoryReady();
179
+ if (!history || !this.currentSession)
180
+ return;
181
+ try {
182
+ await history.markVisited(this.currentSession.sessionId, url, result);
183
+ } catch (error) {
184
+ if (this.config.debug) {
185
+ console.warn(`[Crawler] Failed to mark URL as visited: ${url}`, error);
186
+ }
187
+ }
188
+ }
189
+ getSession() {
190
+ return this.currentSession;
191
+ }
192
+ getSessionId() {
193
+ return this.config.sessionId;
194
+ }
195
+ async resume(sessionId) {
196
+ if (!this.config.enableNavigationHistory) {
197
+ throw new Error("Navigation history is not enabled. Set enableNavigationHistory: true in options.");
198
+ }
199
+ await this.waitForNavigationHistory();
200
+ if (!this.navigationHistory) {
201
+ throw new Error("Navigation history failed to initialize.");
202
+ }
203
+ await this.waitForStorage();
204
+ if (this.isCacheEnabled) {
205
+ await this.waitForCache();
206
+ }
207
+ const targetSessionId = sessionId || this.config.sessionId;
208
+ const session = await this.navigationHistory.getSession(targetSessionId);
209
+ if (!session) {
210
+ throw new Error(`Session '${targetSessionId}' not found`);
211
+ }
212
+ if (session.status === "completed") {
213
+ throw new Error(`Session '${targetSessionId}' is already completed`);
214
+ }
215
+ this.currentSession = session;
216
+ await this.navigationHistory.updateSessionStatus(targetSessionId, "running");
217
+ const queuedUrls = await this.navigationHistory.getAllQueuedUrls(targetSessionId);
218
+ if (this.config.debug) {
219
+ console.log(`[Crawler] Resuming session '${targetSessionId}' with ${queuedUrls.length} queued URLs`);
220
+ }
221
+ const scheduledUrls = new Set;
222
+ for (const item of queuedUrls) {
223
+ if (scheduledUrls.has(item.url)) {
224
+ continue;
225
+ }
226
+ scheduledUrls.add(item.url);
227
+ const body = item.body ? JSON.parse(item.body) : undefined;
228
+ const headers = item.headers ? JSON.parse(item.headers) : undefined;
229
+ this.visit(item.url, {
230
+ method: item.method,
231
+ body,
232
+ headers,
233
+ forceRevisit: false
234
+ });
235
+ }
236
+ return this;
237
+ }
238
+ async getResumableSessions() {
239
+ if (!this.config.enableNavigationHistory) {
240
+ return [];
241
+ }
242
+ await this.waitForNavigationHistory();
243
+ if (!this.navigationHistory) {
244
+ return [];
245
+ }
246
+ return this.navigationHistory.getResumableSessions();
247
+ }
248
+ async pause() {
249
+ await this.waitForNavigationHistory();
250
+ if (!this.navigationHistory || !this.currentSession) {
251
+ return;
252
+ }
253
+ await this.navigationHistory.updateSessionStatus(this.currentSession.sessionId, "paused");
254
+ this.currentSession.status = "paused";
255
+ }
256
+ async complete() {
257
+ await this.waitForNavigationHistory();
258
+ if (!this.navigationHistory || !this.currentSession) {
259
+ return;
260
+ }
261
+ await this.navigationHistory.updateSessionStatus(this.currentSession.sessionId, "completed");
262
+ this.currentSession.status = "completed";
263
+ }
264
+ getAdapterType() {
265
+ return this.adapterType;
266
+ }
267
+ async setAdapter(adapter) {
268
+ this.adapterType = adapter;
269
+ await this.initializeAdapter();
270
+ }
96
271
  rawResponseHandler(data) {
97
272
  if (this.rawResponseEvents.length === 0)
98
273
  return;
@@ -390,6 +565,10 @@ export class Crawler {
390
565
  };
391
566
  decodoInstanse = this.config.getAdapter(url, "decodo", false, useOxylabsRotation) || undefined;
392
567
  }
568
+ if (this.config.enableNavigationHistory) {
569
+ const headersObj = headers instanceof Headers ? Object.fromEntries(headers.entries()) : headers;
570
+ this.addToNavigationQueue(url, method, body, headersObj);
571
+ }
393
572
  if (deepEmailFinder) {
394
573
  this.execute2(method, url, body, _options, forceRevisit).then();
395
574
  return this;
@@ -444,6 +623,11 @@ export class Crawler {
444
623
  await this.saveCache(url, res);
445
624
  if (!isVisited)
446
625
  await this.saveUrl(url);
626
+ this.markUrlVisited(url, {
627
+ status: res.status,
628
+ finalUrl: res.finalUrl,
629
+ contentType: res.contentType
630
+ });
447
631
  if (res.contentType && res.contentType.includes("/json")) {
448
632
  if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
449
633
  this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
@@ -491,6 +675,10 @@ export class Crawler {
491
675
  return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
492
676
  }
493
677
  }
678
+ this.markUrlVisited(url, {
679
+ status: error?.response?.status || 0,
680
+ errorMessage: e.message || "Unknown error"
681
+ });
494
682
  if (this.config.throwFatalError)
495
683
  throw e;
496
684
  if (this.config.debug) {
@@ -506,6 +694,9 @@ export class Crawler {
506
694
  async waitForAll() {
507
695
  await this.queue.onIdle();
508
696
  }
697
+ async done() {
698
+ return this.waitForAll();
699
+ }
509
700
  async close() {
510
701
  try {
511
702
  await this.cacher.close();
@@ -1,36 +1,36 @@
1
- const _mod_a2g0l2 = require('./crawler.cjs');
2
- exports.Crawler = _mod_a2g0l2.Crawler;;
3
- const _mod_bsn73l = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_bsn73l.CrawlerOptions;;
5
- const _mod_ufcptn = require('../cache/file-cacher.cjs');
6
- exports.FileCacher = _mod_ufcptn.FileCacher;;
7
- const _mod_tj3jh8 = require('../cache/url-store.cjs');
8
- exports.UrlStore = _mod_tj3jh8.UrlStore;;
9
- const _mod_imciy5 = require('./addon/oxylabs/index.cjs');
10
- exports.Oxylabs = _mod_imciy5.Oxylabs;;
11
- const _mod_t0yjxb = require('./addon/oxylabs/options.cjs');
12
- exports.OXYLABS_BROWSER_TYPES = _mod_t0yjxb.OXYLABS_BROWSER_TYPES;
13
- exports.OXYLABS_COMMON_LOCALES = _mod_t0yjxb.OXYLABS_COMMON_LOCALES;
14
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_t0yjxb.OXYLABS_COMMON_GEO_LOCATIONS;
15
- exports.OXYLABS_US_STATES = _mod_t0yjxb.OXYLABS_US_STATES;
16
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_t0yjxb.OXYLABS_EUROPEAN_COUNTRIES;
17
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_t0yjxb.OXYLABS_ASIAN_COUNTRIES;
18
- exports.getRandomOxylabsBrowserType = _mod_t0yjxb.getRandomBrowserType;
19
- exports.getRandomOxylabsLocale = _mod_t0yjxb.getRandomLocale;
20
- exports.getRandomOxylabsGeoLocation = _mod_t0yjxb.getRandomGeoLocation;;
21
- const _mod_z9e7mj = require('./addon/decodo/index.cjs');
22
- exports.Decodo = _mod_z9e7mj.Decodo;;
23
- const _mod_tcfz5k = require('./addon/decodo/options.cjs');
24
- exports.DECODO_DEVICE_TYPES = _mod_tcfz5k.DECODO_DEVICE_TYPES;
25
- exports.DECODO_HEADLESS_MODES = _mod_tcfz5k.DECODO_HEADLESS_MODES;
26
- exports.DECODO_COMMON_LOCALES = _mod_tcfz5k.DECODO_COMMON_LOCALES;
27
- exports.DECODO_COMMON_COUNTRIES = _mod_tcfz5k.DECODO_COMMON_COUNTRIES;
28
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_tcfz5k.DECODO_EUROPEAN_COUNTRIES;
29
- exports.DECODO_ASIAN_COUNTRIES = _mod_tcfz5k.DECODO_ASIAN_COUNTRIES;
30
- exports.DECODO_US_STATES = _mod_tcfz5k.DECODO_US_STATES;
31
- exports.DECODO_COMMON_CITIES = _mod_tcfz5k.DECODO_COMMON_CITIES;
32
- exports.getRandomDecodoDeviceType = _mod_tcfz5k.getRandomDeviceType;
33
- exports.getRandomDecodoLocale = _mod_tcfz5k.getRandomLocale;
34
- exports.getRandomDecodoCountry = _mod_tcfz5k.getRandomCountry;
35
- exports.getRandomDecodoCity = _mod_tcfz5k.getRandomCity;
36
- exports.generateDecodoSessionId = _mod_tcfz5k.generateSessionId;;
1
+ const _mod_pwsonp = require('./crawler.cjs');
2
+ exports.Crawler = _mod_pwsonp.Crawler;;
3
+ const _mod_f0a514 = require('./crawler-options.cjs');
4
+ exports.CrawlerOptions = _mod_f0a514.CrawlerOptions;;
5
+ const _mod_0j6c45 = require('../cache/file-cacher.cjs');
6
+ exports.FileCacher = _mod_0j6c45.FileCacher;;
7
+ const _mod_0ys1f7 = require('../cache/url-store.cjs');
8
+ exports.UrlStore = _mod_0ys1f7.UrlStore;;
9
+ const _mod_bs9mae = require('./addon/oxylabs/index.cjs');
10
+ exports.Oxylabs = _mod_bs9mae.Oxylabs;;
11
+ const _mod_afta57 = require('./addon/oxylabs/options.cjs');
12
+ exports.OXYLABS_BROWSER_TYPES = _mod_afta57.OXYLABS_BROWSER_TYPES;
13
+ exports.OXYLABS_COMMON_LOCALES = _mod_afta57.OXYLABS_COMMON_LOCALES;
14
+ exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_afta57.OXYLABS_COMMON_GEO_LOCATIONS;
15
+ exports.OXYLABS_US_STATES = _mod_afta57.OXYLABS_US_STATES;
16
+ exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_afta57.OXYLABS_EUROPEAN_COUNTRIES;
17
+ exports.OXYLABS_ASIAN_COUNTRIES = _mod_afta57.OXYLABS_ASIAN_COUNTRIES;
18
+ exports.getRandomOxylabsBrowserType = _mod_afta57.getRandomBrowserType;
19
+ exports.getRandomOxylabsLocale = _mod_afta57.getRandomLocale;
20
+ exports.getRandomOxylabsGeoLocation = _mod_afta57.getRandomGeoLocation;;
21
+ const _mod_8woeb1 = require('./addon/decodo/index.cjs');
22
+ exports.Decodo = _mod_8woeb1.Decodo;;
23
+ const _mod_rpxa81 = require('./addon/decodo/options.cjs');
24
+ exports.DECODO_DEVICE_TYPES = _mod_rpxa81.DECODO_DEVICE_TYPES;
25
+ exports.DECODO_HEADLESS_MODES = _mod_rpxa81.DECODO_HEADLESS_MODES;
26
+ exports.DECODO_COMMON_LOCALES = _mod_rpxa81.DECODO_COMMON_LOCALES;
27
+ exports.DECODO_COMMON_COUNTRIES = _mod_rpxa81.DECODO_COMMON_COUNTRIES;
28
+ exports.DECODO_EUROPEAN_COUNTRIES = _mod_rpxa81.DECODO_EUROPEAN_COUNTRIES;
29
+ exports.DECODO_ASIAN_COUNTRIES = _mod_rpxa81.DECODO_ASIAN_COUNTRIES;
30
+ exports.DECODO_US_STATES = _mod_rpxa81.DECODO_US_STATES;
31
+ exports.DECODO_COMMON_CITIES = _mod_rpxa81.DECODO_COMMON_CITIES;
32
+ exports.getRandomDecodoDeviceType = _mod_rpxa81.getRandomDeviceType;
33
+ exports.getRandomDecodoLocale = _mod_rpxa81.getRandomLocale;
34
+ exports.getRandomDecodoCountry = _mod_rpxa81.getRandomCountry;
35
+ exports.getRandomDecodoCity = _mod_rpxa81.getRandomCity;
36
+ exports.generateDecodoSessionId = _mod_rpxa81.generateSessionId;;
@@ -1,11 +1,9 @@
1
- const { SocksProxyAgent: RezoSocksProxy } = require("socks-proxy-agent");
2
- const { HttpsProxyAgent: RezoHttpsSocks } = require("https-proxy-agent");
3
- const { HttpProxyAgent: RezoHttpSocks } = require("http-proxy-agent");
1
+ const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents.cjs');
4
2
  const { parseProxyString } = require('./parse.cjs');
5
- const _mod_kx45pf = require('./manager.cjs');
6
- exports.ProxyManager = _mod_kx45pf.ProxyManager;;
7
- const _mod_2nb4l8 = require('./parse.cjs');
8
- exports.parseProxyString = _mod_2nb4l8.parseProxyString;;
3
+ const _mod_wpx4sa = require('./manager.cjs');
4
+ exports.ProxyManager = _mod_wpx4sa.ProxyManager;;
5
+ const _mod_138ems = require('./parse.cjs');
6
+ exports.parseProxyString = _mod_138ems.parseProxyString;;
9
7
  function createOptions(uri, opts) {
10
8
  if (uri instanceof URL || typeof uri === "string") {
11
9
  return {
@@ -26,29 +24,29 @@ function rezoProxy(uri, over, opts) {
26
24
  if (typeof over === "string") {
27
25
  const config = createOptions(uri, opts);
28
26
  if (over === "http") {
29
- return new RezoHttpSocks(config.uri, config.opts);
27
+ return new HttpProxyAgent(config.uri, config.opts);
30
28
  }
31
- return new RezoHttpsSocks(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
29
+ return new HttpsProxyAgent(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
32
30
  } else {
33
31
  const isHttp = uri.startsWith("http:");
34
32
  const isHttps = uri.startsWith("https:");
35
33
  const isSocks = uri.startsWith("sock");
36
34
  if (isSocks) {
37
35
  const config = createOptions(uri, over || opts);
38
- return new RezoSocksProxy(config.uri, config.opts);
36
+ return new SocksProxyAgent(config.uri, config.opts);
39
37
  }
40
38
  if (isHttp) {
41
39
  const config = createOptions(uri, over || opts);
42
- return new RezoHttpSocks(config.uri, config.opts);
40
+ return new HttpProxyAgent(config.uri, config.opts);
43
41
  }
44
42
  if (isHttps) {
45
43
  const config = createOptions(uri, over || opts);
46
- return new RezoHttpsSocks(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
44
+ return new HttpsProxyAgent(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
47
45
  }
48
46
  const proxy = parseProxyString(uri);
49
47
  if (proxy) {
50
48
  const config = createOptions(proxy, over || opts);
51
- return new RezoSocksProxy(config.uri, config.opts);
49
+ return new SocksProxyAgent(config.uri, config.opts);
52
50
  }
53
51
  throw new Error("Invalid proxy protocol");
54
52
  }
@@ -57,12 +55,16 @@ function rezoProxy(uri, over, opts) {
57
55
  delete uri.client;
58
56
  const config = createOptions(uri, opts);
59
57
  if (over === "http") {
60
- return new RezoHttpSocks(config.uri, config.opts);
58
+ return new HttpProxyAgent(config.uri, config.opts);
61
59
  }
62
- return new RezoHttpsSocks(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
60
+ return new HttpsProxyAgent(config.uri, { ...config.opts, rejectUnauthorized: config.opts?.rejectUnauthorized ?? false });
63
61
  }
64
62
  const config = createOptions(uri, opts);
65
- return new RezoSocksProxy(config.uri, config.opts);
63
+ return new SocksProxyAgent(config.uri, config.opts);
66
64
  }
67
65
 
66
+ exports.Agent = Agent;
67
+ exports.HttpProxyAgent = HttpProxyAgent;
68
+ exports.HttpsProxyAgent = HttpsProxyAgent;
69
+ exports.SocksProxyAgent = SocksProxyAgent;
68
70
  exports.rezoProxy = rezoProxy;