rezo 1.0.66 → 1.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/entries/curl.d.ts +5 -0
- package/dist/adapters/entries/fetch.d.ts +5 -0
- package/dist/adapters/entries/http.d.ts +5 -0
- package/dist/adapters/entries/http2.d.ts +5 -0
- package/dist/adapters/entries/react-native.d.ts +5 -0
- package/dist/adapters/entries/xhr.d.ts +5 -0
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -9
- package/dist/crawler/crawler.cjs +26 -5
- package/dist/crawler/crawler.js +26 -5
- package/dist/crawler/index.cjs +40 -40
- package/dist/crawler.d.ts +10 -0
- package/dist/entries/crawler.cjs +4 -4
- package/dist/index.cjs +27 -27
- package/dist/index.d.ts +5 -0
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/platform/browser.d.ts +5 -0
- package/dist/platform/bun.d.ts +5 -0
- package/dist/platform/deno.d.ts +5 -0
- package/dist/platform/node.d.ts +5 -0
- package/dist/platform/react-native.d.ts +5 -0
- package/dist/platform/worker.d.ts +5 -0
- package/dist/proxy/index.cjs +4 -4
- package/dist/proxy/manager.cjs +1 -1
- package/dist/proxy/manager.js +1 -1
- package/dist/queue/index.cjs +8 -8
- package/dist/queue/queue.cjs +3 -1
- package/dist/queue/queue.js +3 -1
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/wget/asset-extractor.cjs +556 -0
- package/dist/wget/asset-extractor.js +553 -0
- package/dist/wget/asset-organizer.cjs +230 -0
- package/dist/wget/asset-organizer.js +227 -0
- package/dist/wget/download-cache.cjs +221 -0
- package/dist/wget/download-cache.js +218 -0
- package/dist/wget/downloader.cjs +607 -0
- package/dist/wget/downloader.js +604 -0
- package/dist/wget/file-writer.cjs +349 -0
- package/dist/wget/file-writer.js +346 -0
- package/dist/wget/filter-lists.cjs +1330 -0
- package/dist/wget/filter-lists.js +1330 -0
- package/dist/wget/index.cjs +633 -0
- package/dist/wget/index.d.ts +8486 -0
- package/dist/wget/index.js +614 -0
- package/dist/wget/link-converter.cjs +297 -0
- package/dist/wget/link-converter.js +294 -0
- package/dist/wget/progress.cjs +271 -0
- package/dist/wget/progress.js +266 -0
- package/dist/wget/resume.cjs +166 -0
- package/dist/wget/resume.js +163 -0
- package/dist/wget/robots.cjs +303 -0
- package/dist/wget/robots.js +300 -0
- package/dist/wget/types.cjs +200 -0
- package/dist/wget/types.js +197 -0
- package/dist/wget/url-filter.cjs +351 -0
- package/dist/wget/url-filter.js +348 -0
- package/package.json +6 -1
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_mwtx05 = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_mwtx05.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_mwtx05.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_mwtx05.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_mwtx05.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_mwtx05.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
1
|
+
const _mod_p96mxt = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_p96mxt.LRUCache;;
|
|
3
|
+
const _mod_3suvou = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_3suvou.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_3suvou.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_3suvou.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_kytcf0 = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_kytcf0.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_kytcf0.normalizeResponseCacheConfig;;
|
package/dist/crawler/crawler.cjs
CHANGED
|
@@ -57,6 +57,7 @@ class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
pendingVisitCount = 0;
|
|
60
61
|
isDestroyed = false;
|
|
61
62
|
queueOptions = { concurrency: 1000 };
|
|
62
63
|
robotsTxt;
|
|
@@ -827,7 +828,6 @@ class Crawler {
|
|
|
827
828
|
maxRedirects = this.config.maxRedirects,
|
|
828
829
|
useProxy = this.config.hasDomain(url, "proxies", options?.useProxy),
|
|
829
830
|
extractLeads = false,
|
|
830
|
-
params,
|
|
831
831
|
rejectUnauthorized,
|
|
832
832
|
useQueue = false,
|
|
833
833
|
deepEmailFinder = false,
|
|
@@ -841,7 +841,6 @@ class Crawler {
|
|
|
841
841
|
headers: this.config.pickHeaders(url, true, headers, true),
|
|
842
842
|
timeout,
|
|
843
843
|
maxRedirects,
|
|
844
|
-
params,
|
|
845
844
|
proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
|
|
846
845
|
rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
|
|
847
846
|
queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
|
|
@@ -868,16 +867,21 @@ class Crawler {
|
|
|
868
867
|
};
|
|
869
868
|
decodoInstanse = this.config.getAdapter(url, "decodo", false, useOxylabsRotation) || undefined;
|
|
870
869
|
}
|
|
870
|
+
url = this.buildUrl(url, options?.params);
|
|
871
871
|
if (this.config.enableNavigationHistory) {
|
|
872
872
|
const headersObj = headers instanceof Headers ? Object.fromEntries(headers.entries()) : headers;
|
|
873
873
|
this.addToNavigationQueue(url, method, body, headersObj);
|
|
874
874
|
}
|
|
875
|
+
if (url.includes(`/www.yellowpages.com/search?`))
|
|
876
|
+
console.log("Visiting: ", url);
|
|
875
877
|
if (deepEmailFinder) {
|
|
878
|
+
this.pendingVisitCount++;
|
|
876
879
|
const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
|
|
877
880
|
this.pendingExecutions.add(p);
|
|
878
881
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
879
882
|
return this;
|
|
880
883
|
}
|
|
884
|
+
this.pendingVisitCount++;
|
|
881
885
|
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata);
|
|
882
886
|
this.pendingExecutions.add(p);
|
|
883
887
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
@@ -892,6 +896,7 @@ class Crawler {
|
|
|
892
896
|
await this.waitForNavigationHistory();
|
|
893
897
|
}
|
|
894
898
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache, emailMetadata));
|
|
899
|
+
this.pendingVisitCount--;
|
|
895
900
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
896
901
|
}
|
|
897
902
|
async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
|
|
@@ -915,6 +920,7 @@ class Crawler {
|
|
|
915
920
|
allowCrossDomainTravel: true,
|
|
916
921
|
emailMetadata
|
|
917
922
|
}, forceRevisit, true)).then();
|
|
923
|
+
this.pendingVisitCount--;
|
|
918
924
|
}
|
|
919
925
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
|
|
920
926
|
try {
|
|
@@ -1066,10 +1072,25 @@ class Crawler {
|
|
|
1066
1072
|
}
|
|
1067
1073
|
}
|
|
1068
1074
|
async waitForAll() {
|
|
1069
|
-
if (this.pendingExecutions.size
|
|
1070
|
-
await
|
|
1075
|
+
if (this.pendingVisitCount === 0 && this.pendingExecutions.size === 0 && this.queue.size === 0 && this.queue.pending === 0 && !this.crawlStarted) {
|
|
1076
|
+
await this.triggerFinishHandlers();
|
|
1077
|
+
await this.destroy();
|
|
1078
|
+
return;
|
|
1079
|
+
}
|
|
1080
|
+
while (true) {
|
|
1081
|
+
while (this.pendingVisitCount > 0) {
|
|
1082
|
+
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
1083
|
+
}
|
|
1084
|
+
if (this.pendingExecutions.size > 0) {
|
|
1085
|
+
await Promise.allSettled([...this.pendingExecutions]);
|
|
1086
|
+
}
|
|
1087
|
+
if (this.queue.size > 0 || this.queue.pending > 0) {
|
|
1088
|
+
await this.queue.onIdle();
|
|
1089
|
+
}
|
|
1090
|
+
if (this.pendingVisitCount === 0 && this.pendingExecutions.size === 0 && this.queue.size === 0 && this.queue.pending === 0) {
|
|
1091
|
+
break;
|
|
1092
|
+
}
|
|
1071
1093
|
}
|
|
1072
|
-
await this.queue.onIdle();
|
|
1073
1094
|
await this.triggerFinishHandlers();
|
|
1074
1095
|
await this.destroy();
|
|
1075
1096
|
}
|
package/dist/crawler/crawler.js
CHANGED
|
@@ -57,6 +57,7 @@ export class Crawler {
|
|
|
57
57
|
adapterExecutor = null;
|
|
58
58
|
adapterType;
|
|
59
59
|
pendingExecutions = new Set;
|
|
60
|
+
pendingVisitCount = 0;
|
|
60
61
|
isDestroyed = false;
|
|
61
62
|
queueOptions = { concurrency: 1000 };
|
|
62
63
|
robotsTxt;
|
|
@@ -827,7 +828,6 @@ export class Crawler {
|
|
|
827
828
|
maxRedirects = this.config.maxRedirects,
|
|
828
829
|
useProxy = this.config.hasDomain(url, "proxies", options?.useProxy),
|
|
829
830
|
extractLeads = false,
|
|
830
|
-
params,
|
|
831
831
|
rejectUnauthorized,
|
|
832
832
|
useQueue = false,
|
|
833
833
|
deepEmailFinder = false,
|
|
@@ -841,7 +841,6 @@ export class Crawler {
|
|
|
841
841
|
headers: this.config.pickHeaders(url, true, headers, true),
|
|
842
842
|
timeout,
|
|
843
843
|
maxRedirects,
|
|
844
|
-
params,
|
|
845
844
|
proxy: useProxy ? this.config.getAdapter(url, "proxies", true, true) || undefined : undefined,
|
|
846
845
|
rejectUnauthorized: typeof rejectUnauthorized === "boolean" ? rejectUnauthorized : this.config.rejectUnauthorized,
|
|
847
846
|
queue: this.config.getAdapter(url, "limiters", useQueue, useQueue) || undefined
|
|
@@ -868,16 +867,21 @@ export class Crawler {
|
|
|
868
867
|
};
|
|
869
868
|
decodoInstanse = this.config.getAdapter(url, "decodo", false, useOxylabsRotation) || undefined;
|
|
870
869
|
}
|
|
870
|
+
url = this.buildUrl(url, options?.params);
|
|
871
871
|
if (this.config.enableNavigationHistory) {
|
|
872
872
|
const headersObj = headers instanceof Headers ? Object.fromEntries(headers.entries()) : headers;
|
|
873
873
|
this.addToNavigationQueue(url, method, body, headersObj);
|
|
874
874
|
}
|
|
875
|
+
if (url.includes(`/www.yellowpages.com/search?`))
|
|
876
|
+
console.log("Visiting: ", url);
|
|
875
877
|
if (deepEmailFinder) {
|
|
878
|
+
this.pendingVisitCount++;
|
|
876
879
|
const p = this.execute2(method, url, body, _options, forceRevisit, emailMetadata);
|
|
877
880
|
this.pendingExecutions.add(p);
|
|
878
881
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
879
882
|
return this;
|
|
880
883
|
}
|
|
884
|
+
this.pendingVisitCount++;
|
|
881
885
|
const p = this.execute(method, url, body, _options, extractLeads, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, skipCache, emailMetadata);
|
|
882
886
|
this.pendingExecutions.add(p);
|
|
883
887
|
p.finally(() => this.pendingExecutions.delete(p));
|
|
@@ -892,6 +896,7 @@ export class Crawler {
|
|
|
892
896
|
await this.waitForNavigationHistory();
|
|
893
897
|
}
|
|
894
898
|
const task = this.queue.add(() => this.executeHttp(method, url, body, options, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, 0, undefined, skipCache, emailMetadata));
|
|
899
|
+
this.pendingVisitCount--;
|
|
895
900
|
task.finally(() => this.pendingExecutions.delete(task));
|
|
896
901
|
}
|
|
897
902
|
async execute2(method, url, body, options = {}, forceRevisit, emailMetadata) {
|
|
@@ -915,6 +920,7 @@ export class Crawler {
|
|
|
915
920
|
allowCrossDomainTravel: true,
|
|
916
921
|
emailMetadata
|
|
917
922
|
}, forceRevisit, true)).then();
|
|
923
|
+
this.pendingVisitCount--;
|
|
918
924
|
}
|
|
919
925
|
async executeHttp(method, url, body, options = {}, isEmail, forceRevisit, oxylabsOptions, oxylabsInstanse, decodoInstanse, decodoOptions, retryCount = 0, parentUrl, skipCache, emailMetadata) {
|
|
920
926
|
try {
|
|
@@ -1066,10 +1072,25 @@ export class Crawler {
|
|
|
1066
1072
|
}
|
|
1067
1073
|
}
|
|
1068
1074
|
async waitForAll() {
|
|
1069
|
-
if (this.pendingExecutions.size
|
|
1070
|
-
await
|
|
1075
|
+
if (this.pendingVisitCount === 0 && this.pendingExecutions.size === 0 && this.queue.size === 0 && this.queue.pending === 0 && !this.crawlStarted) {
|
|
1076
|
+
await this.triggerFinishHandlers();
|
|
1077
|
+
await this.destroy();
|
|
1078
|
+
return;
|
|
1079
|
+
}
|
|
1080
|
+
while (true) {
|
|
1081
|
+
while (this.pendingVisitCount > 0) {
|
|
1082
|
+
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
1083
|
+
}
|
|
1084
|
+
if (this.pendingExecutions.size > 0) {
|
|
1085
|
+
await Promise.allSettled([...this.pendingExecutions]);
|
|
1086
|
+
}
|
|
1087
|
+
if (this.queue.size > 0 || this.queue.pending > 0) {
|
|
1088
|
+
await this.queue.onIdle();
|
|
1089
|
+
}
|
|
1090
|
+
if (this.pendingVisitCount === 0 && this.pendingExecutions.size === 0 && this.queue.size === 0 && this.queue.pending === 0) {
|
|
1091
|
+
break;
|
|
1092
|
+
}
|
|
1071
1093
|
}
|
|
1072
|
-
await this.queue.onIdle();
|
|
1073
1094
|
await this.triggerFinishHandlers();
|
|
1074
1095
|
await this.destroy();
|
|
1075
1096
|
}
|
package/dist/crawler/index.cjs
CHANGED
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
5
|
-
const
|
|
6
|
-
exports.RobotsTxt =
|
|
7
|
-
const
|
|
8
|
-
exports.FileCacher =
|
|
9
|
-
const
|
|
10
|
-
exports.UrlStore =
|
|
11
|
-
const
|
|
12
|
-
exports.NavigationHistory =
|
|
13
|
-
const
|
|
14
|
-
exports.Oxylabs =
|
|
15
|
-
const
|
|
16
|
-
exports.OXYLABS_BROWSER_TYPES =
|
|
17
|
-
exports.OXYLABS_COMMON_LOCALES =
|
|
18
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS =
|
|
19
|
-
exports.OXYLABS_US_STATES =
|
|
20
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES =
|
|
21
|
-
exports.OXYLABS_ASIAN_COUNTRIES =
|
|
22
|
-
exports.getRandomOxylabsBrowserType =
|
|
23
|
-
exports.getRandomOxylabsLocale =
|
|
24
|
-
exports.getRandomOxylabsGeoLocation =
|
|
25
|
-
const
|
|
26
|
-
exports.Decodo =
|
|
27
|
-
const
|
|
28
|
-
exports.DECODO_DEVICE_TYPES =
|
|
29
|
-
exports.DECODO_HEADLESS_MODES =
|
|
30
|
-
exports.DECODO_COMMON_LOCALES =
|
|
31
|
-
exports.DECODO_COMMON_COUNTRIES =
|
|
32
|
-
exports.DECODO_EUROPEAN_COUNTRIES =
|
|
33
|
-
exports.DECODO_ASIAN_COUNTRIES =
|
|
34
|
-
exports.DECODO_US_STATES =
|
|
35
|
-
exports.DECODO_COMMON_CITIES =
|
|
36
|
-
exports.getRandomDecodoDeviceType =
|
|
37
|
-
exports.getRandomDecodoLocale =
|
|
38
|
-
exports.getRandomDecodoCountry =
|
|
39
|
-
exports.getRandomDecodoCity =
|
|
40
|
-
exports.generateDecodoSessionId =
|
|
1
|
+
const _mod_xfzq7o = require('./crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_xfzq7o.Crawler;;
|
|
3
|
+
const _mod_g57p41 = require('./crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_g57p41.CrawlerOptions;;
|
|
5
|
+
const _mod_4xdu8g = require('./plugin/robots-txt.cjs');
|
|
6
|
+
exports.RobotsTxt = _mod_4xdu8g.RobotsTxt;;
|
|
7
|
+
const _mod_rvaquk = require('./plugin/file-cacher.cjs');
|
|
8
|
+
exports.FileCacher = _mod_rvaquk.FileCacher;;
|
|
9
|
+
const _mod_dm8gds = require('./plugin/url-store.cjs');
|
|
10
|
+
exports.UrlStore = _mod_dm8gds.UrlStore;;
|
|
11
|
+
const _mod_42ekda = require('./plugin/navigation-history.cjs');
|
|
12
|
+
exports.NavigationHistory = _mod_42ekda.NavigationHistory;;
|
|
13
|
+
const _mod_im09o4 = require('./addon/oxylabs/index.cjs');
|
|
14
|
+
exports.Oxylabs = _mod_im09o4.Oxylabs;;
|
|
15
|
+
const _mod_8gc43u = require('./addon/oxylabs/options.cjs');
|
|
16
|
+
exports.OXYLABS_BROWSER_TYPES = _mod_8gc43u.OXYLABS_BROWSER_TYPES;
|
|
17
|
+
exports.OXYLABS_COMMON_LOCALES = _mod_8gc43u.OXYLABS_COMMON_LOCALES;
|
|
18
|
+
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_8gc43u.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
19
|
+
exports.OXYLABS_US_STATES = _mod_8gc43u.OXYLABS_US_STATES;
|
|
20
|
+
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_8gc43u.OXYLABS_EUROPEAN_COUNTRIES;
|
|
21
|
+
exports.OXYLABS_ASIAN_COUNTRIES = _mod_8gc43u.OXYLABS_ASIAN_COUNTRIES;
|
|
22
|
+
exports.getRandomOxylabsBrowserType = _mod_8gc43u.getRandomBrowserType;
|
|
23
|
+
exports.getRandomOxylabsLocale = _mod_8gc43u.getRandomLocale;
|
|
24
|
+
exports.getRandomOxylabsGeoLocation = _mod_8gc43u.getRandomGeoLocation;;
|
|
25
|
+
const _mod_7tfb6z = require('./addon/decodo/index.cjs');
|
|
26
|
+
exports.Decodo = _mod_7tfb6z.Decodo;;
|
|
27
|
+
const _mod_pzy8jh = require('./addon/decodo/options.cjs');
|
|
28
|
+
exports.DECODO_DEVICE_TYPES = _mod_pzy8jh.DECODO_DEVICE_TYPES;
|
|
29
|
+
exports.DECODO_HEADLESS_MODES = _mod_pzy8jh.DECODO_HEADLESS_MODES;
|
|
30
|
+
exports.DECODO_COMMON_LOCALES = _mod_pzy8jh.DECODO_COMMON_LOCALES;
|
|
31
|
+
exports.DECODO_COMMON_COUNTRIES = _mod_pzy8jh.DECODO_COMMON_COUNTRIES;
|
|
32
|
+
exports.DECODO_EUROPEAN_COUNTRIES = _mod_pzy8jh.DECODO_EUROPEAN_COUNTRIES;
|
|
33
|
+
exports.DECODO_ASIAN_COUNTRIES = _mod_pzy8jh.DECODO_ASIAN_COUNTRIES;
|
|
34
|
+
exports.DECODO_US_STATES = _mod_pzy8jh.DECODO_US_STATES;
|
|
35
|
+
exports.DECODO_COMMON_CITIES = _mod_pzy8jh.DECODO_COMMON_CITIES;
|
|
36
|
+
exports.getRandomDecodoDeviceType = _mod_pzy8jh.getRandomDeviceType;
|
|
37
|
+
exports.getRandomDecodoLocale = _mod_pzy8jh.getRandomLocale;
|
|
38
|
+
exports.getRandomDecodoCountry = _mod_pzy8jh.getRandomCountry;
|
|
39
|
+
exports.getRandomDecodoCity = _mod_pzy8jh.getRandomCity;
|
|
40
|
+
exports.generateDecodoSessionId = _mod_pzy8jh.generateSessionId;;
|
package/dist/crawler.d.ts
CHANGED
|
@@ -2180,6 +2180,8 @@ declare class RezoQueue<T = any> {
|
|
|
2180
2180
|
private readonly throughputWindowSize;
|
|
2181
2181
|
private idlePromise?;
|
|
2182
2182
|
private emptyPromise?;
|
|
2183
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2184
|
+
private hasEverBeenActive;
|
|
2183
2185
|
readonly config: Required<QueueConfig>;
|
|
2184
2186
|
/**
|
|
2185
2187
|
* Create a new RezoQueue
|
|
@@ -2254,6 +2256,9 @@ declare class RezoQueue<T = any> {
|
|
|
2254
2256
|
}) => boolean): number;
|
|
2255
2257
|
/**
|
|
2256
2258
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2259
|
+
*
|
|
2260
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2261
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2257
2262
|
*/
|
|
2258
2263
|
onIdle(): Promise<void>;
|
|
2259
2264
|
/**
|
|
@@ -7082,6 +7087,11 @@ export declare class Crawler {
|
|
|
7082
7087
|
private adapterType;
|
|
7083
7088
|
/** Track pending execute() calls for proper done() behavior */
|
|
7084
7089
|
private pendingExecutions;
|
|
7090
|
+
/**
|
|
7091
|
+
* Track pending visits that haven't yet added to the queue.
|
|
7092
|
+
* This prevents waitForAll() from exiting when execute() is still in its async preamble.
|
|
7093
|
+
*/
|
|
7094
|
+
private pendingVisitCount;
|
|
7085
7095
|
/** Track if the crawler has been destroyed */
|
|
7086
7096
|
private isDestroyed;
|
|
7087
7097
|
/** Original queue options for restoration */
|
package/dist/entries/crawler.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Crawler =
|
|
3
|
-
const
|
|
4
|
-
exports.CrawlerOptions =
|
|
1
|
+
const _mod_b5a7u5 = require('../crawler/crawler.cjs');
|
|
2
|
+
exports.Crawler = _mod_b5a7u5.Crawler;;
|
|
3
|
+
const _mod_lypmhz = require('../crawler/crawler-options.cjs');
|
|
4
|
+
exports.CrawlerOptions = _mod_lypmhz.CrawlerOptions;;
|
package/dist/index.cjs
CHANGED
|
@@ -1,30 +1,30 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Rezo =
|
|
3
|
-
exports.createRezoInstance =
|
|
4
|
-
exports.createDefaultInstance =
|
|
5
|
-
const
|
|
6
|
-
exports.RezoError =
|
|
7
|
-
exports.RezoErrorCode =
|
|
8
|
-
const
|
|
9
|
-
exports.RezoHeaders =
|
|
10
|
-
const
|
|
11
|
-
exports.RezoFormData =
|
|
12
|
-
const
|
|
13
|
-
exports.RezoCookieJar =
|
|
14
|
-
exports.Cookie =
|
|
15
|
-
const
|
|
16
|
-
exports.toCurl =
|
|
17
|
-
exports.fromCurl =
|
|
18
|
-
const
|
|
19
|
-
exports.createDefaultHooks =
|
|
20
|
-
exports.mergeHooks =
|
|
21
|
-
const
|
|
22
|
-
exports.ProxyManager =
|
|
23
|
-
const
|
|
24
|
-
exports.RezoQueue =
|
|
25
|
-
exports.HttpQueue =
|
|
26
|
-
exports.Priority =
|
|
27
|
-
exports.HttpMethodPriority =
|
|
1
|
+
const _mod_lagae6 = require('./core/rezo.cjs');
|
|
2
|
+
exports.Rezo = _mod_lagae6.Rezo;
|
|
3
|
+
exports.createRezoInstance = _mod_lagae6.createRezoInstance;
|
|
4
|
+
exports.createDefaultInstance = _mod_lagae6.createDefaultInstance;;
|
|
5
|
+
const _mod_4jxc85 = require('./errors/rezo-error.cjs');
|
|
6
|
+
exports.RezoError = _mod_4jxc85.RezoError;
|
|
7
|
+
exports.RezoErrorCode = _mod_4jxc85.RezoErrorCode;;
|
|
8
|
+
const _mod_sbjvga = require('./utils/headers.cjs');
|
|
9
|
+
exports.RezoHeaders = _mod_sbjvga.RezoHeaders;;
|
|
10
|
+
const _mod_go9l84 = require('./utils/form-data.cjs');
|
|
11
|
+
exports.RezoFormData = _mod_go9l84.RezoFormData;;
|
|
12
|
+
const _mod_wmoaa7 = require('./utils/cookies.cjs');
|
|
13
|
+
exports.RezoCookieJar = _mod_wmoaa7.RezoCookieJar;
|
|
14
|
+
exports.Cookie = _mod_wmoaa7.Cookie;;
|
|
15
|
+
const _mod_fqhfcz = require('./utils/curl.cjs');
|
|
16
|
+
exports.toCurl = _mod_fqhfcz.toCurl;
|
|
17
|
+
exports.fromCurl = _mod_fqhfcz.fromCurl;;
|
|
18
|
+
const _mod_dv2dqm = require('./core/hooks.cjs');
|
|
19
|
+
exports.createDefaultHooks = _mod_dv2dqm.createDefaultHooks;
|
|
20
|
+
exports.mergeHooks = _mod_dv2dqm.mergeHooks;;
|
|
21
|
+
const _mod_e63fae = require('./proxy/manager.cjs');
|
|
22
|
+
exports.ProxyManager = _mod_e63fae.ProxyManager;;
|
|
23
|
+
const _mod_l303ep = require('./queue/index.cjs');
|
|
24
|
+
exports.RezoQueue = _mod_l303ep.RezoQueue;
|
|
25
|
+
exports.HttpQueue = _mod_l303ep.HttpQueue;
|
|
26
|
+
exports.Priority = _mod_l303ep.Priority;
|
|
27
|
+
exports.HttpMethodPriority = _mod_l303ep.HttpMethodPriority;;
|
|
28
28
|
const { RezoError } = require('./errors/rezo-error.cjs');
|
|
29
29
|
const isRezoError = exports.isRezoError = RezoError.isRezoError;
|
|
30
30
|
const Cancel = exports.Cancel = RezoError;
|
package/dist/index.d.ts
CHANGED
|
@@ -2254,6 +2254,8 @@ export declare class RezoQueue<T = any> {
|
|
|
2254
2254
|
private readonly throughputWindowSize;
|
|
2255
2255
|
private idlePromise?;
|
|
2256
2256
|
private emptyPromise?;
|
|
2257
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2258
|
+
private hasEverBeenActive;
|
|
2257
2259
|
readonly config: Required<QueueConfig>;
|
|
2258
2260
|
/**
|
|
2259
2261
|
* Create a new RezoQueue
|
|
@@ -2328,6 +2330,9 @@ export declare class RezoQueue<T = any> {
|
|
|
2328
2330
|
}) => boolean): number;
|
|
2329
2331
|
/**
|
|
2330
2332
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2333
|
+
*
|
|
2334
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2335
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2331
2336
|
*/
|
|
2332
2337
|
onIdle(): Promise<void>;
|
|
2333
2338
|
/**
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.Agent =
|
|
3
|
-
const
|
|
4
|
-
exports.HttpProxyAgent =
|
|
5
|
-
const
|
|
6
|
-
exports.HttpsProxyAgent =
|
|
7
|
-
const
|
|
8
|
-
exports.SocksProxyAgent =
|
|
9
|
-
const
|
|
10
|
-
exports.SocksClient =
|
|
1
|
+
const _mod_4lksq3 = require('./base.cjs');
|
|
2
|
+
exports.Agent = _mod_4lksq3.Agent;;
|
|
3
|
+
const _mod_6k5yy2 = require('./http-proxy.cjs');
|
|
4
|
+
exports.HttpProxyAgent = _mod_6k5yy2.HttpProxyAgent;;
|
|
5
|
+
const _mod_p934a4 = require('./https-proxy.cjs');
|
|
6
|
+
exports.HttpsProxyAgent = _mod_p934a4.HttpsProxyAgent;;
|
|
7
|
+
const _mod_c7shok = require('./socks-proxy.cjs');
|
|
8
|
+
exports.SocksProxyAgent = _mod_c7shok.SocksProxyAgent;;
|
|
9
|
+
const _mod_uqiinv = require('./socks-client.cjs');
|
|
10
|
+
exports.SocksClient = _mod_uqiinv.SocksClient;;
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
package/dist/platform/bun.d.ts
CHANGED
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
package/dist/platform/deno.d.ts
CHANGED
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
package/dist/platform/node.d.ts
CHANGED
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|
|
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
|
|
|
2128
2128
|
private readonly throughputWindowSize;
|
|
2129
2129
|
private idlePromise?;
|
|
2130
2130
|
private emptyPromise?;
|
|
2131
|
+
/** Tracks if queue has ever had work added - ensures onIdle waits for first task */
|
|
2132
|
+
private hasEverBeenActive;
|
|
2131
2133
|
readonly config: Required<QueueConfig>;
|
|
2132
2134
|
/**
|
|
2133
2135
|
* Create a new RezoQueue
|
|
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
|
|
|
2202
2204
|
}) => boolean): number;
|
|
2203
2205
|
/**
|
|
2204
2206
|
* Wait for queue to become idle (no running or pending tasks)
|
|
2207
|
+
*
|
|
2208
|
+
* Unlike a simple "isIdle" check, this properly waits for work to be added
|
|
2209
|
+
* and completed if called before any tasks are queued (matches p-queue behavior).
|
|
2205
2210
|
*/
|
|
2206
2211
|
onIdle(): Promise<void>;
|
|
2207
2212
|
/**
|