rezo 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/adapters/curl.cjs +131 -29
  2. package/dist/adapters/curl.js +131 -29
  3. package/dist/adapters/entries/curl.d.ts +65 -0
  4. package/dist/adapters/entries/fetch.d.ts +65 -0
  5. package/dist/adapters/entries/http.d.ts +65 -0
  6. package/dist/adapters/entries/http2.d.ts +65 -0
  7. package/dist/adapters/entries/react-native.d.ts +65 -0
  8. package/dist/adapters/entries/xhr.d.ts +65 -0
  9. package/dist/adapters/http2.cjs +209 -22
  10. package/dist/adapters/http2.js +209 -22
  11. package/dist/adapters/index.cjs +6 -6
  12. package/dist/cache/index.cjs +9 -13
  13. package/dist/cache/index.js +0 -2
  14. package/dist/core/rezo.cjs +7 -0
  15. package/dist/core/rezo.js +7 -0
  16. package/dist/crawler/addon/decodo/index.cjs +1 -0
  17. package/dist/crawler/addon/decodo/index.js +1 -0
  18. package/dist/crawler/crawler-options.cjs +1 -0
  19. package/dist/crawler/crawler-options.js +1 -0
  20. package/dist/crawler/crawler.cjs +1070 -0
  21. package/dist/crawler/crawler.js +1068 -0
  22. package/dist/crawler/index.cjs +40 -0
  23. package/dist/{plugin → crawler}/index.js +4 -2
  24. package/dist/crawler/plugin/file-cacher.cjs +19 -0
  25. package/dist/crawler/plugin/file-cacher.js +19 -0
  26. package/dist/crawler/plugin/index.cjs +1 -0
  27. package/dist/crawler/plugin/index.js +1 -0
  28. package/dist/crawler/plugin/navigation-history.cjs +43 -0
  29. package/dist/crawler/plugin/navigation-history.js +43 -0
  30. package/dist/crawler/plugin/robots-txt.cjs +2 -0
  31. package/dist/crawler/plugin/robots-txt.js +2 -0
  32. package/dist/crawler/plugin/url-store.cjs +18 -0
  33. package/dist/crawler/plugin/url-store.js +18 -0
  34. package/dist/crawler.d.ts +511 -183
  35. package/dist/entries/crawler.cjs +5 -5
  36. package/dist/entries/crawler.js +2 -2
  37. package/dist/index.cjs +27 -24
  38. package/dist/index.d.ts +73 -0
  39. package/dist/index.js +1 -0
  40. package/dist/internal/agents/base.cjs +113 -0
  41. package/dist/internal/agents/base.js +110 -0
  42. package/dist/internal/agents/http-proxy.cjs +89 -0
  43. package/dist/internal/agents/http-proxy.js +86 -0
  44. package/dist/internal/agents/https-proxy.cjs +176 -0
  45. package/dist/internal/agents/https-proxy.js +173 -0
  46. package/dist/internal/agents/index.cjs +10 -0
  47. package/dist/internal/agents/index.js +5 -0
  48. package/dist/internal/agents/socks-client.cjs +571 -0
  49. package/dist/internal/agents/socks-client.js +567 -0
  50. package/dist/internal/agents/socks-proxy.cjs +75 -0
  51. package/dist/internal/agents/socks-proxy.js +72 -0
  52. package/dist/platform/browser.d.ts +65 -0
  53. package/dist/platform/bun.d.ts +65 -0
  54. package/dist/platform/deno.d.ts +65 -0
  55. package/dist/platform/node.d.ts +65 -0
  56. package/dist/platform/react-native.d.ts +65 -0
  57. package/dist/platform/worker.d.ts +65 -0
  58. package/dist/proxy/index.cjs +18 -16
  59. package/dist/proxy/index.js +17 -12
  60. package/dist/queue/index.cjs +8 -8
  61. package/dist/responses/buildError.cjs +11 -2
  62. package/dist/responses/buildError.js +11 -2
  63. package/dist/responses/universal/index.cjs +11 -11
  64. package/dist/utils/curl.cjs +317 -0
  65. package/dist/utils/curl.js +314 -0
  66. package/package.json +2 -6
  67. package/dist/cache/file-cacher.cjs +0 -264
  68. package/dist/cache/file-cacher.js +0 -261
  69. package/dist/cache/url-store.cjs +0 -288
  70. package/dist/cache/url-store.js +0 -285
  71. package/dist/plugin/addon/decodo/index.cjs +0 -1
  72. package/dist/plugin/addon/decodo/index.js +0 -1
  73. package/dist/plugin/crawler-options.cjs +0 -1
  74. package/dist/plugin/crawler-options.js +0 -1
  75. package/dist/plugin/crawler.cjs +0 -519
  76. package/dist/plugin/crawler.js +0 -517
  77. package/dist/plugin/index.cjs +0 -36
  78. /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
  79. /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
  80. /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
  81. /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
  82. /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
  83. /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
  84. /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
  85. /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
  86. /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
  87. /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
  88. /package/dist/{plugin → crawler}/scraper.cjs +0 -0
  89. /package/dist/{plugin → crawler}/scraper.js +0 -0
@@ -1,517 +0,0 @@
1
- import fs from "node:fs";
2
- import { FileCacher } from '../cache/file-cacher.js';
3
- import { UrlStore } from '../cache/url-store.js';
4
- import { parseHTML } from "linkedom";
5
- import path from "node:path";
6
- import { RezoQueue } from '../queue/queue.js';
7
- import { Scraper } from './scraper.js';
8
- import { CrawlerOptions } from './crawler-options.js';
9
- String.prototype.addBaseUrl = function(url) {
10
- url = url instanceof URL ? url.href : url;
11
- const html = this.replace(/<base\b[^>]*?>/gi, "");
12
- if (/<head[^>]*>/i.test(html)) {
13
- return html.replace(/<head[^>]*>/i, (match) => `${match}
14
- <base href="${url}">`);
15
- }
16
- const baseTag = `<head>
17
- <base href="${url}">
18
- </head>
19
- `;
20
- if (/<body[^>]*>/i.test(html)) {
21
- return html.replace(/<body[^>]*>/i, baseTag + "$&");
22
- }
23
- if (/<html[^>]*>/i.test(html)) {
24
- return html.replace(/<html[^>]*>/i, `$&
25
- ` + baseTag);
26
- }
27
- return this;
28
- };
29
-
30
- export class Crawler {
31
- http;
32
- events = [];
33
- jsonEvents = [];
34
- errorEvents = [];
35
- responseEvents = [];
36
- rawResponseEvents = [];
37
- emailDiscoveredEvents = [];
38
- emailLeadsEvents = [];
39
- cacher = null;
40
- queue;
41
- isCacheEnabled;
42
- config;
43
- urlStorage;
44
- isStorageReady = false;
45
- isCacheReady = false;
46
- leadsFinder;
47
- constructor(crawlerOptions, http) {
48
- this.http = http;
49
- this.queue = new RezoQueue({
50
- concurrency: 1000
51
- });
52
- this.config = new CrawlerOptions(crawlerOptions);
53
- const enableCache = this.config.enableCache;
54
- this.isCacheEnabled = enableCache;
55
- if (enableCache) {
56
- const cacheDir = this.config.cacheDir;
57
- const cacheTTL = this.config.cacheTTL;
58
- const dbUrl = cacheDir && (cacheDir.startsWith("./") || cacheDir.startsWith("/")) ? `${cacheDir}${cacheDir.endsWith("/") ? "" : "/"}` : cacheDir ? `./${cacheDir}${cacheDir.endsWith("/") ? "" : "/"}` : `./cache/`;
59
- if (!fs.existsSync(path.dirname(dbUrl)))
60
- fs.mkdirSync(path.dirname(dbUrl), { recursive: true });
61
- FileCacher.create({
62
- cacheDir: dbUrl,
63
- softDelete: false,
64
- ttl: cacheTTL,
65
- encryptNamespace: true
66
- }).then((storage) => {
67
- this.cacher = storage;
68
- this.isCacheReady = true;
69
- });
70
- const dit = path.resolve(cacheDir, "urls");
71
- if (!fs.existsSync(dit))
72
- fs.mkdirSync(dit, { recursive: true });
73
- UrlStore.create({
74
- storeDir: dit,
75
- dbFileName: ".url_cache.db",
76
- ttl: 1000 * 60 * 60 * 24 * 7
77
- }).then((storage) => {
78
- this.urlStorage = storage;
79
- this.isStorageReady = true;
80
- });
81
- } else {
82
- const dit = path.resolve(this.config.cacheDir, "./cache/urls");
83
- if (!fs.existsSync(dit))
84
- fs.mkdirSync(dit, { recursive: true });
85
- UrlStore.create({
86
- storeDir: dit,
87
- dbFileName: ".url_cache.db",
88
- ttl: 1000 * 60 * 60 * 24 * 7
89
- }).then((storage) => {
90
- this.urlStorage = storage;
91
- this.isStorageReady = true;
92
- });
93
- }
94
- this.leadsFinder = new Scraper(this.http, this.config, this._onEmailLeads.bind(this), this._onEmailDiscovered.bind(this), this.config.debug);
95
- }
96
- rawResponseHandler(data) {
97
- if (this.rawResponseEvents.length === 0)
98
- return;
99
- const isBuffer = Buffer.isBuffer(data);
100
- if (!isBuffer) {
101
- if (data instanceof ArrayBuffer) {
102
- data = Buffer.from(new Uint8Array(data));
103
- } else if (data instanceof Uint8Array) {
104
- data = Buffer.from(data);
105
- } else if (typeof data === "string") {
106
- data = Buffer.from(data, "utf8");
107
- } else if (typeof data === "object") {
108
- data = Buffer.from(JSON.stringify(data), "utf8");
109
- }
110
- }
111
- this.rawResponseEvents.forEach((e) => {
112
- const handler = e.attr[0];
113
- handler(data);
114
- });
115
- }
116
- async waitForCache() {
117
- if (this.isCacheReady)
118
- return;
119
- await this.sleep(this.rnd(50, 200));
120
- await this.waitForCache();
121
- }
122
- async waitForStorage() {
123
- if (this.isStorageReady)
124
- return;
125
- await this.sleep(this.rnd(50, 200));
126
- await this.waitForStorage();
127
- }
128
- async saveUrl(url) {
129
- await this.waitForStorage();
130
- await this.urlStorage.set(url);
131
- }
132
- async hasUrlInCache(url) {
133
- await this.waitForStorage();
134
- return await this.urlStorage.has(url);
135
- }
136
- async saveCache(url, value) {
137
- if (!this.isCacheEnabled)
138
- return;
139
- await this.waitForCache();
140
- return this.cacher.set(url, value, this.config.cacheTTL, this.getNamespace(url));
141
- }
142
- getNamespace(url) {
143
- try {
144
- return new URL(url).hostname;
145
- } catch {
146
- return;
147
- }
148
- }
149
- async hasCache(url) {
150
- if (!this.isCacheEnabled)
151
- return false;
152
- await this.waitForCache();
153
- return this.cacher.has(url, this.getNamespace(url));
154
- }
155
- async getCache(url) {
156
- if (!this.isCacheEnabled)
157
- return null;
158
- await this.waitForCache();
159
- return this.cacher.get(url, this.getNamespace(url));
160
- }
161
- sleep(ms) {
162
- return new Promise((resolve) => setTimeout(resolve, ms));
163
- }
164
- rnd(min = 0, max = Number.MAX_VALUE) {
165
- return Math.floor(Math.random() * (max - min + 1)) + min;
166
- }
167
- onError(handler) {
168
- this.errorEvents.push({
169
- handler: "_onError",
170
- attr: [handler]
171
- });
172
- return this;
173
- }
174
- onJson(handler) {
175
- this.jsonEvents.push({
176
- handler: "_onJson",
177
- attr: [handler]
178
- });
179
- return this;
180
- }
181
- onEmailDiscovered(handler) {
182
- this.emailDiscoveredEvents.push(handler);
183
- return this;
184
- }
185
- onEmailLeads(handler) {
186
- this.emailLeadsEvents.push(handler);
187
- return this;
188
- }
189
- onRawData(handler) {
190
- this.rawResponseEvents.push({
191
- handler: "_onRawResponse",
192
- attr: [handler]
193
- });
194
- return this;
195
- }
196
- onDocument(handler) {
197
- this.events.push({
198
- handler: "_onDocument",
199
- attr: [handler]
200
- });
201
- return this;
202
- }
203
- onBody(handler) {
204
- this.events.push({
205
- handler: "_onBody",
206
- attr: [handler]
207
- });
208
- return this;
209
- }
210
- onElement(handler) {
211
- this.events.push({
212
- handler: "_onElement",
213
- attr: [handler]
214
- });
215
- return this;
216
- }
217
- onAnchor(selection, handler) {
218
- this.events.push({
219
- handler: "_onAnchor",
220
- attr: [selection, handler]
221
- });
222
- return this;
223
- }
224
- onHref(handler) {
225
- this.events.push({
226
- handler: "_onHref",
227
- attr: [handler]
228
- });
229
- return this;
230
- }
231
- onSelection(selection, handler) {
232
- this.events.push({
233
- handler: "_onSelection",
234
- attr: [selection, handler]
235
- });
236
- return this;
237
- }
238
- onResponse(handler) {
239
- this.responseEvents.push({
240
- handler: "_onResponse",
241
- attr: [handler]
242
- });
243
- return this;
244
- }
245
- onAttribute(selection, attribute, handler) {
246
- this.events.push({
247
- handler: "_onAttribute",
248
- attr: [selection, attribute, handler]
249
- });
250
- return this;
251
- }
252
- onText(selection, handler) {
253
- this.events.push({
254
- handler: "_onText",
255
- attr: [selection, handler]
256
- });
257
- return this;
258
- }
259
- _onBody(handler, document) {
260
- this.queue.add(() => handler(document.body));
261
- }
262
- _onAttribute(selection, attribute, handler, document) {
263
- selection = typeof attribute === "function" ? selection : null;
264
- attribute = typeof attribute === "function" ? selection : attribute;
265
- handler = typeof attribute === "function" ? attribute : handler;
266
- selection = selection || `[${attribute}]`;
267
- const elements = document.querySelectorAll(selection);
268
- for (let i = 0;i < elements.length; i++) {
269
- if (elements[i].hasAttribute(attribute))
270
- this.queue.add(() => handler(elements[i].getAttribute(attribute)));
271
- }
272
- }
273
- _onText(selection, handler, document) {
274
- const elements = document.querySelectorAll(selection);
275
- for (let i = 0;i < elements.length; i++) {
276
- this.queue.add(() => handler(elements[i].textContent));
277
- }
278
- }
279
- _onSelection(selection, handler, document) {
280
- const elements = document.querySelectorAll(selection);
281
- for (let i = 0;i < elements.length; i++) {
282
- this.queue.add(() => handler(elements[i]));
283
- }
284
- }
285
- _onElement(handler, document) {
286
- const elements = document.querySelectorAll("*");
287
- for (let i = 0;i < elements.length; i++) {
288
- this.queue.add(() => handler(elements[i]));
289
- }
290
- }
291
- _onHref(handler, document) {
292
- const elements = document.querySelectorAll("a, link");
293
- for (let i = 0;i < elements.length; i++) {
294
- if (elements[i].hasAttribute("href"))
295
- this.queue.add(() => handler(new URL(elements[i].getAttribute("href"), document.URL).href));
296
- }
297
- }
298
- _onAnchor(selection, handler, document) {
299
- handler = typeof selection === "function" ? selection : handler;
300
- selection = typeof selection === "function" ? "a" : selection;
301
- const elements = document.querySelectorAll(selection);
302
- for (let i = 0;i < elements.length; i++) {
303
- if (elements[i]?.href && document.baseURI)
304
- elements[i].href = new URL(elements[i].getAttribute("href"), document.baseURI).href;
305
- this.queue.add(() => handler(elements[i]));
306
- }
307
- }
308
- _onDocument(handler, document) {
309
- this.queue.add(() => handler(document));
310
- }
311
- _onJson(handler, json) {
312
- this.queue.add(() => handler(json));
313
- }
314
- _onError(handler, error) {
315
- this.queue.add(() => handler(error));
316
- }
317
- async _onEmailDiscovered(handler, email) {
318
- await handler(email);
319
- }
320
- async _onEmailLeads(handler, emails) {
321
- await handler(emails);
322
- }
323
- _onRawResponse(handler, rawResponse) {
324
- this.queue.add(() => handler(rawResponse));
325
- }
326
- _onResponse(handler, response) {
327
- this.queue.add(() => handler(response));
328
- }
329
- buildUrl(url, params) {
330
- if (params) {
331
- const u = new URL(url, this.config.baseUrl);
332
- for (const [key, value] of Object.entries(params)) {
333
- u.searchParams.set(key, value.toString());
334
- }
335
- url = u.href;
336
- }
337
- return url;
338
- }
339
- visit(url, options) {
340
- if (this.config.baseUrl)
341
- url = new URL(url, this.config.baseUrl).href;
342
- if (options?.params && (options.useOxylabsScraperAi || this.config.hasDomain(url, "oxylabs"))) {
343
- url = this.buildUrl(url, options.params);
344
- }
345
- const {
346
- method = "GET",
347
- headers = new Headers,
348
- forceRevisit = this.config.forceRevisit,
349
- body = "",
350
- timeout = this.config.timeout,
351
- maxRedirects = this.config.maxRedirects,
352
- useProxy = this.config.hasDomain(url, "proxies", options?.useProxy),
353
- extractLeads = false,
354
- params,
355
- rejectUnauthorized,
356
- useQueue = false,
357
- deepEmailFinder = false,
358
- useOxylabsScraperAi = false,
359
- useOxylabsRotation = true,
360
- useDecodo = false
361
- } = options || {};
362
- const _options = {
363
- headers: this.config.pickHeaders(url, true, headers, true),
364
- timeout,
365
- maxRedirects,
366
- params,
367
- proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
368
- rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
369
- pqueue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
370
- };
371
- let oxylabsOptions = {};
372
- let oxylabsInstanse = undefined;
373
- if (useOxylabsScraperAi && this.config.hasDomain(url, "oxylabs")) {
374
- oxylabsOptions = {
375
- method: method === "POST" ? "post" : "get",
376
- headers: this.config.pickHeaders(url, true, headers, true),
377
- pqueue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined,
378
- base64Body: typeof body === "string" ? Buffer.from(body).toString("base64") : undefined
379
- };
380
- oxylabsInstanse = this.config.getAdapter(url, "oxylabs", false, useOxylabsRotation) || undefined;
381
- }
382
- let decodoOptions = {};
383
- let decodoInstanse = undefined;
384
- if (useDecodo && this.config.hasDomain(url, "decodo")) {
385
- decodoOptions = {
386
- method: method === "POST" ? "post" : "get",
387
- headers: this.config.pickHeaders(url, true, headers, true),
388
- pqueue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined,
389
- base64Body: typeof body === "string" ? Buffer.from(body).toString("base64") : undefined
390
- };
391
- decodoInstanse = this.config.getAdapter(url, "decodo", false, useOxylabsRotation) || undefined;
392
- }
393
- if (deepEmailFinder) {
394
- this.execute2(method, url, body, _options, forceRevisit).then();
395
- return this;
396
- }
397
- this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions).then();
398
- return this;
399
- }
400
- async execute(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions) {
401
- this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions)).then();
402
- }
403
- async execute2(method, url, body, options = {}, forceRevisit) {
404
- this.queue.add(() => this.leadsFinder.parseExternalWebsite(url, method, body, {
405
- httpConfig: options,
406
- saveCache: this.saveCache.bind(this),
407
- saveUrl: this.saveUrl.bind(this),
408
- getCache: this.getCache.bind(this),
409
- hasUrlInCache: this.hasUrlInCache.bind(this),
410
- onEmailDiscovered: this.emailDiscoveredEvents,
411
- onEmails: this.emailLeadsEvents,
412
- queue: this.queue,
413
- depth: 1,
414
- allowCrossDomainTravel: true
415
- }, forceRevisit, true)).then();
416
- }
417
- async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0) {
418
- try {
419
- console.log({
420
- oxylabsOptions: typeof oxylabsOptions,
421
- oxylabsInstanse: typeof oxylabsInstanse,
422
- decodoInstanse: typeof decodoInstanse,
423
- decodoOptions: typeof decodoOptions
424
- });
425
- const isVisited = forceRevisit ? false : await this.hasUrlInCache(url);
426
- const cache = await this.getCache(url);
427
- if (isVisited && !cache)
428
- return;
429
- if (isVisited && method !== "GET")
430
- return;
431
- const response = cache && method === "GET" ? cache : oxylabsInstanse && oxylabsOptions ? await oxylabsInstanse.scrape(url) : decodoInstanse && decodoOptions ? await decodoInstanse.scrape(url) : await (method === "GET" ? this.http.get(url, options) : method === "PATCH" ? this.http.patch(url, body, options) : method === "POST" ? this.http.post(url, body, options) : this.http.put(url, body, options));
432
- const res = {
433
- data: response.data || response.content || "",
434
- contentType: response.contentType || "",
435
- finalUrl: response.finalUrl || response.url || url,
436
- url: response?.urls?.[0] || response.url || this.buildUrl(url, options.params),
437
- headers: response.headers || {},
438
- status: response.status || response.statusCode || 200,
439
- statusText: response.statusText || "",
440
- cookies: response?.cookies?.serialized || response?.cookies,
441
- contentLength: response.contentLength || 0
442
- };
443
- if (!cache)
444
- await this.saveCache(url, res);
445
- if (!isVisited)
446
- await this.saveUrl(url);
447
- if (res.contentType && res.contentType.includes("/json")) {
448
- if (this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) {
449
- this.leadsFinder.extractEmails(JSON.stringify(res.data), res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
450
- }
451
- for (let i = 0;i < this.jsonEvents.length; i++) {
452
- const event = this.jsonEvents[i];
453
- this[event.handler](...event.attr, res.data);
454
- }
455
- }
456
- for (let i = 0;i < this.responseEvents.length; i++) {
457
- const event = this.responseEvents[i];
458
- this[event.handler](...event.attr, res);
459
- }
460
- this.rawResponseHandler(res.data);
461
- if (!res.contentType || !res.contentType.includes("/html") || typeof res.data !== "string")
462
- return;
463
- if ((this.emailDiscoveredEvents.length > 0 || this.emailLeadsEvents.length > 0) && isEmail) {
464
- this.leadsFinder.extractEmails(res.data, res.finalUrl, this.emailDiscoveredEvents, this.emailLeadsEvents, this.queue);
465
- }
466
- const { document } = parseHTML(res.data.addBaseUrl(res.finalUrl));
467
- document.URL = res.finalUrl;
468
- for (let i = 0;i < this.events.length; i++) {
469
- const event = this.events[i];
470
- this[event.handler](...event.attr, document);
471
- }
472
- } catch (e) {
473
- const error = e;
474
- if (error && error.response) {
475
- const status = error.response.status;
476
- const retryDelay = this.config.retryDelay || 1000;
477
- const maxRetryAttempts = this.config.maxRetryAttempts || 3;
478
- const maxRetryOnProxyError = this.config.maxRetryOnProxyError || 3;
479
- const retryWithoutProxyOnStatusCode = this.config.retryWithoutProxyOnStatusCode || undefined;
480
- const retryOnStatusCode = this.config.retryOnStatusCode || undefined;
481
- const retryOnProxyError = this.config.retryOnProxyError || undefined;
482
- if (retryWithoutProxyOnStatusCode && options.proxy && retryWithoutProxyOnStatusCode.includes(status) && retryCount < maxRetryAttempts) {
483
- await this.sleep(retryDelay);
484
- delete options.proxy;
485
- return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
486
- } else if (retryOnStatusCode && options.proxy && retryOnStatusCode.includes(status) && retryCount < maxRetryAttempts) {
487
- await this.sleep(retryDelay);
488
- return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
489
- } else if (retryOnProxyError && options.proxy && retryCount < maxRetryOnProxyError) {
490
- await this.sleep(retryDelay);
491
- return await this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount + 1);
492
- }
493
- }
494
- if (this.config.throwFatalError)
495
- throw e;
496
- if (this.config.debug) {
497
- console.log(`Error visiting ${url}: ${e.message}`);
498
- }
499
- console.log(error);
500
- for (let i = 0;i < this.errorEvents.length; i++) {
501
- const event = this.errorEvents[i];
502
- this[event.handler](...event.attr, e);
503
- }
504
- }
505
- }
506
- async waitForAll() {
507
- await this.queue.onIdle();
508
- }
509
- async close() {
510
- try {
511
- await this.cacher.close();
512
- } catch {}
513
- try {
514
- await this.urlStorage.close();
515
- } catch {}
516
- }
517
- }
@@ -1,36 +0,0 @@
1
- const _mod_no49zr = require('./crawler.cjs');
2
- exports.Crawler = _mod_no49zr.Crawler;;
3
- const _mod_6cwfk4 = require('./crawler-options.cjs');
4
- exports.CrawlerOptions = _mod_6cwfk4.CrawlerOptions;;
5
- const _mod_orelxz = require('../cache/file-cacher.cjs');
6
- exports.FileCacher = _mod_orelxz.FileCacher;;
7
- const _mod_1treoe = require('../cache/url-store.cjs');
8
- exports.UrlStore = _mod_1treoe.UrlStore;;
9
- const _mod_472eye = require('./addon/oxylabs/index.cjs');
10
- exports.Oxylabs = _mod_472eye.Oxylabs;;
11
- const _mod_d7toci = require('./addon/oxylabs/options.cjs');
12
- exports.OXYLABS_BROWSER_TYPES = _mod_d7toci.OXYLABS_BROWSER_TYPES;
13
- exports.OXYLABS_COMMON_LOCALES = _mod_d7toci.OXYLABS_COMMON_LOCALES;
14
- exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_d7toci.OXYLABS_COMMON_GEO_LOCATIONS;
15
- exports.OXYLABS_US_STATES = _mod_d7toci.OXYLABS_US_STATES;
16
- exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_d7toci.OXYLABS_EUROPEAN_COUNTRIES;
17
- exports.OXYLABS_ASIAN_COUNTRIES = _mod_d7toci.OXYLABS_ASIAN_COUNTRIES;
18
- exports.getRandomOxylabsBrowserType = _mod_d7toci.getRandomBrowserType;
19
- exports.getRandomOxylabsLocale = _mod_d7toci.getRandomLocale;
20
- exports.getRandomOxylabsGeoLocation = _mod_d7toci.getRandomGeoLocation;;
21
- const _mod_xok7vg = require('./addon/decodo/index.cjs');
22
- exports.Decodo = _mod_xok7vg.Decodo;;
23
- const _mod_mgqhmj = require('./addon/decodo/options.cjs');
24
- exports.DECODO_DEVICE_TYPES = _mod_mgqhmj.DECODO_DEVICE_TYPES;
25
- exports.DECODO_HEADLESS_MODES = _mod_mgqhmj.DECODO_HEADLESS_MODES;
26
- exports.DECODO_COMMON_LOCALES = _mod_mgqhmj.DECODO_COMMON_LOCALES;
27
- exports.DECODO_COMMON_COUNTRIES = _mod_mgqhmj.DECODO_COMMON_COUNTRIES;
28
- exports.DECODO_EUROPEAN_COUNTRIES = _mod_mgqhmj.DECODO_EUROPEAN_COUNTRIES;
29
- exports.DECODO_ASIAN_COUNTRIES = _mod_mgqhmj.DECODO_ASIAN_COUNTRIES;
30
- exports.DECODO_US_STATES = _mod_mgqhmj.DECODO_US_STATES;
31
- exports.DECODO_COMMON_CITIES = _mod_mgqhmj.DECODO_COMMON_CITIES;
32
- exports.getRandomDecodoDeviceType = _mod_mgqhmj.getRandomDeviceType;
33
- exports.getRandomDecodoLocale = _mod_mgqhmj.getRandomLocale;
34
- exports.getRandomDecodoCountry = _mod_mgqhmj.getRandomCountry;
35
- exports.getRandomDecodoCity = _mod_mgqhmj.getRandomCity;
36
- exports.generateDecodoSessionId = _mod_mgqhmj.generateSessionId;;
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes