@crawlee/core 3.13.9-beta.8 → 3.13.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/enqueue_links/enqueue_links.d.ts +6 -7
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +27 -17
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +8 -2
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +14 -10
- package/enqueue_links/shared.js.map +1 -1
- package/package.json +5 -5
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
|
|
2
2
|
import { type RobotsTxtFile } from '@crawlee/utils';
|
|
3
3
|
import type { SetRequired } from 'type-fest';
|
|
4
4
|
import type { Request } from '../request';
|
|
5
5
|
import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, RequestProvider, RequestQueueOperationOptions } from '../storages';
|
|
6
|
-
import type { GlobInput, PseudoUrlInput, RegExpInput, RequestTransform } from './shared';
|
|
7
|
-
export type SkippedRequestCallback = (args: {
|
|
8
|
-
url: string;
|
|
9
|
-
reason: 'robotsTxt' | 'limit';
|
|
10
|
-
}) => Awaitable<void>;
|
|
6
|
+
import type { GlobInput, PseudoUrlInput, RegExpInput, RequestTransform, SkippedRequestCallback } from './shared';
|
|
11
7
|
export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
12
8
|
/** Limit the amount of actually enqueued URLs to this number. Useful for testing across the entire crawling scope. */
|
|
13
9
|
limit?: number;
|
|
@@ -142,7 +138,10 @@ export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
|
142
138
|
robotsTxtFile?: Pick<RobotsTxtFile, 'isAllowed'>;
|
|
143
139
|
/**
|
|
144
140
|
* When a request is skipped for some reason, you can use this callback to act on it.
|
|
145
|
-
* This is currently fired
|
|
141
|
+
* This is currently fired for requests skipped
|
|
142
|
+
* 1. based on robots.txt file,
|
|
143
|
+
* 2. because they don't match enqueueLinks filters,
|
|
144
|
+
* 3. or because the maxRequestsPerCrawl limit has been reached
|
|
146
145
|
*/
|
|
147
146
|
onSkippedRequest?: SkippedRequestCallback;
|
|
148
147
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,EAAE,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAGpD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAI7C,OAAO,KAAK,EAAE,OAAO,EAAkB,MAAM,YAAY,CAAC;AAC1D,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,eAAe,EACf,4BAA4B,EAC/B,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EACR,SAAS,EACT,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,sBAAsB,EAGzB,MAAM,UAAU,CAAC;AAUlB,MAAM,WAAW,mBAAoB,SAAQ,4BAA4B;IACrE,sHAAsH;IACtH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,mCAAmC;IACnC,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzB,0DAA0D;IAC1D,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B,oDAAoD;IACpD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,UAAU,CAAC;IAEtB,iEAAiE;IACjE,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,EAAE,SAAS,SAAS,EAAE,CAAC;IAE7B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,SAAS,CAAC,SAAS,GAAG,WAAW,CAAC,EAAE,CAAC;IAE/C;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,SAAS,WAAW,EAAE,CAAC;IAEjC;;;;;;;;;;;;;;;;;OAiBG;IACH,UAAU,CAAC,EAAE,SAAS,cAAc,EAAE,CAAC;IAEvC;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,wBAAwB,CAAC,EAAE,gBAAgB,CAAC;IAE5C;;;;;;;;;;;;;;;;;OAiBG;IACH,QAAQ,CAAC,EAAE,eAAe,GAAG,KAAK,GAAG,aAAa,GAAG,eAAe,GAAG,aAAa,CAAC;IAErF;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;;OAGG;IACH,aAAa,CAAC,EAAE,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC;IAEjD;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;CAC7C;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,oBAAY,eAAe;IACvB;;OAEG;IACH,GAAG,QAAQ;IAEX;;;;;;OAMG;IACH,YAAY,kBAAkB;IAE9B;;;;;;OAMG;IACH,UAAU,gBAAgB;IAE1B;;;;;;OAMG;IACH,UAAU,gBAAgB;CAC7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAsB,YAAY,CAC9B,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,EAAE,MAAM,CAAC,GAAG;IACtE,YAAY,EAAE;QACV,kBAAkB,EAAE,CAChB,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,EAAE,EAC/B,OAAO,EAAE,yBAAyB,KACjC,OAAO,CAAC,wBAAwB,CAAC,CAAC;KAC1C,CAAC;CACL,GACF,OAAO,CAAC,sBAAsB,CAAC,CAsNjC;AAED;;;;;;;GAOG;AACH,wBAAgB,sCAAsC,CAAC,EACnD,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,mBAAmB,GACtB,EAAE,cAAc,sBA+BhB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,eAAe,CAAC,EAAE,mBAAmB,CAAC,UAAU,CAAC,CAAC;IAClD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -173,6 +173,13 @@ async function enqueueLinks(options) {
|
|
|
173
173
|
break;
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
|
+
async function reportSkippedRequests(skippedRequests, reason) {
|
|
177
|
+
if (onSkippedRequest && skippedRequests.length > 0) {
|
|
178
|
+
await Promise.all(skippedRequests.map((request) => {
|
|
179
|
+
return onSkippedRequest({ url: request.url, reason });
|
|
180
|
+
}));
|
|
181
|
+
}
|
|
182
|
+
}
|
|
176
183
|
let requestOptions = (0, shared_1.createRequestOptions)(urls, options);
|
|
177
184
|
if (robotsTxtFile) {
|
|
178
185
|
const skippedRequests = [];
|
|
@@ -183,34 +190,37 @@ async function enqueueLinks(options) {
|
|
|
183
190
|
skippedRequests.push(request);
|
|
184
191
|
return false;
|
|
185
192
|
});
|
|
186
|
-
|
|
187
|
-
await Promise.all(skippedRequests.map((request) => {
|
|
188
|
-
return onSkippedRequest({ url: request.url, reason: 'robotsTxt' });
|
|
189
|
-
}));
|
|
190
|
-
}
|
|
193
|
+
await reportSkippedRequests(skippedRequests, 'robotsTxt');
|
|
191
194
|
}
|
|
192
195
|
if (transformRequestFunction) {
|
|
196
|
+
const skippedRequests = [];
|
|
193
197
|
requestOptions = requestOptions
|
|
194
|
-
.map((request) =>
|
|
195
|
-
|
|
198
|
+
.map((request) => {
|
|
199
|
+
const transformedRequest = transformRequestFunction(request);
|
|
200
|
+
if (!transformedRequest) {
|
|
201
|
+
skippedRequests.push(request);
|
|
202
|
+
}
|
|
203
|
+
return transformedRequest;
|
|
204
|
+
})
|
|
205
|
+
.filter((r) => Boolean(r));
|
|
206
|
+
await reportSkippedRequests(skippedRequests, 'filters');
|
|
196
207
|
}
|
|
197
|
-
function createFilteredRequests() {
|
|
208
|
+
async function createFilteredRequests() {
|
|
209
|
+
const skippedRequests = [];
|
|
198
210
|
// No user provided patterns means we can skip an extra filtering step
|
|
199
211
|
if (urlPatternObjects.length === 0) {
|
|
200
|
-
return (0, shared_1.createRequests)(requestOptions, enqueueStrategyPatterns, urlExcludePatternObjects, options.strategy);
|
|
212
|
+
return (0, shared_1.createRequests)(requestOptions, enqueueStrategyPatterns, urlExcludePatternObjects, options.strategy, (url) => skippedRequests.push(url));
|
|
201
213
|
}
|
|
202
214
|
// Generate requests based on the user patterns first
|
|
203
|
-
const generatedRequestsFromUserFilters = (0, shared_1.createRequests)(requestOptions, urlPatternObjects, urlExcludePatternObjects, options.strategy);
|
|
215
|
+
const generatedRequestsFromUserFilters = (0, shared_1.createRequests)(requestOptions, urlPatternObjects, urlExcludePatternObjects, options.strategy, (url) => skippedRequests.push(url));
|
|
204
216
|
// ...then filter them by the enqueue links strategy (making this an AND check)
|
|
205
|
-
|
|
217
|
+
const filtered = (0, shared_1.filterRequestsByPatterns)(generatedRequestsFromUserFilters, enqueueStrategyPatterns, (url) => skippedRequests.push(url));
|
|
218
|
+
await reportSkippedRequests(skippedRequests.map((url) => ({ url })), 'filters');
|
|
219
|
+
return filtered;
|
|
206
220
|
}
|
|
207
|
-
let requests = createFilteredRequests();
|
|
221
|
+
let requests = await createFilteredRequests();
|
|
208
222
|
if (limit && limit < requests.length) {
|
|
209
|
-
|
|
210
|
-
for (const request of requests.slice(limit)) {
|
|
211
|
-
await onSkippedRequest({ url: request.url, reason: 'limit' });
|
|
212
|
-
}
|
|
213
|
-
}
|
|
223
|
+
await reportSkippedRequests(requests.slice(limit), 'limit');
|
|
214
224
|
requests = requests.slice(0, limit);
|
|
215
225
|
}
|
|
216
226
|
const { addedRequests } = await requestQueue.addRequestsBatched(requests, {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.js","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"enqueue_links.js","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":";;;AA8QA,oCA+NC;AAUD,wFAoCC;;AAzhBD,oDAAoB;AACpB,iCAAkC;AAGlC,6DAA6B;AAkB7B,qCAOkB;AAgKlB;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,IAAY,eAgCX;AAhCD,WAAY,eAAe;IACvB;;OAEG;IACH,8BAAW,CAAA;IAEX;;;;;;OAMG;IACH,iDAA8B,CAAA;IAE9B;;;;;;OAMG;IACH,6CAA0B,CAAA;IAE1B;;;;;;OAMG;IACH,6CAA0B,CAAA;AAC9B,CAAC,EAhCW,eAAe,+BAAf,eAAe,QAgC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACI,KAAK,UAAU,YAAY,CAC9B,OAOC;IAED,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,UAAU,CAChB;YACI,4JAA4J;YAC5J,kHAAkH;SACrH,CAAC,IAAI,CAAC,IAAI,CAAC,CACf,CAAC;IACN,CAAC;IAED,IAAA,YAAE,EACE,OAAO,EACP,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,IAAI,EAAE,YAAE,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC;QAChC,YAAY,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACrD,aAAa,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC;QACtD,gBAAgB,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;QACtC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QAC9B,cAAc,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;QACnC,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QACzB,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC3B,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;QACzB,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAClF,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7E,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAC7B,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CACvF;QACD,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACjF,wBAAwB,EAAE,YAAE,CAAC,QAAQ,CAAC,QAAQ;QAC9C,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAClE,2BAA2B,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;KACnD,CAAC,CACL,CAAC;IAEF,MAAM,EACF,YAAY,EACZ,KAAK,EACL,IAAI,EACJ,UAAU,EACV,OAAO,EACP,KAAK,EACL,OAAO,EACP,wBAAwB,EACxB,SAAS,EACT,2BAA2B,EAC3B,aAAa,EACb,gBAAgB,GACnB,GAAG,OAAO,CAAC;IAEZ,MAAM,wBAAwB,GAAuB,EAAE,CAAC;IACxD,MAAM,iBAAiB,GAAuB,EAAE,CAAC;IAEjD,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;YACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC;gBAC7C,wBAAwB,CAAC,IAAI,CAAC,GAAG,IAAA,sCAA6B,EAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,IAAI,IAAI,YAAY,MAAM,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACpD,wBAAwB,CAAC,IAAI,CAAC,GAAG,IAAA,0CAAiC,EAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChF,CAAC;QACL,CAAC;IACL,CAAC;IAED,IAAI,UAAU,EAAE,MAAM,EAAE,CAAC;QACrB,aAAG,CAAC,UAAU,CAAC,qEAAqE,CAAC,CAAC;QACtF,iBAAiB,CAAC,IAAI,CAAC,GAAG,IAAA,6CAAoC,EAAC,UAAU,CAAC,CAAC,CAAC;IAChF,CAAC;IAED,IAAI,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,iBAAiB,CAAC,IAAI,CAAC,GAAG,IAAA,sCAA6B,EAAC,KAAK,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,IAAI,OAAO,EAAE,MAAM,EAAE,CAAC;QAClB,iBAAiB,CAAC,IAAI,CAAC,GAAG,IAAA,0CAAiC,EAAC,OAAO,CAAC,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,CAAC;QAC5B,OAAO,CAAC,QAAQ,KAAhB,OAAO,CAAC,QAAQ,GAAK,eAAe,CAAC,YAAY,EAAC;IACtD,CAAC;IAED,MAAM,uBAAuB,GAAuB,EAAE,CAAC;IAEvD,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAErC,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;YACvB,KAAK,eAAe,CAAC,YAAY;gBAC7B,sFAAsF;gBACtF,uFAAuF;gBACvF,yCAAyC;gBACzC,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;gBAC7E,MAAM;YACV,KAAK,eAAe,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC9B,4CAA4C;gBAC5C,MAAM,eAAe,GAAG,IAAA,iBAAS,EAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAC;gBAExE,IAAI,eAAe,EAAE,CAAC;oBAClB,gHAAgH;oBAChH,GAAG,CAAC,QAAQ,GAAG,eAAe,CAAC;oBAC/B,uBAAuB,CAAC,IAAI,CACxB,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe,EAAE,KAAK,eAAe,EAAE,CAAC,KAAK,CAAC,EAAE,EAC/F,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CACjD,CAAC;gBACN,CAAC;qBAAM,CAAC;oBACJ,6FAA6F;oBAC7F,4BAA4B;oBAC5B,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,CAAC,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;gBACjF,CAAC;gBAED,MAAM;YACV,CAAC;YACD,KAAK,eAAe,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC9B,4EAA4E;gBAC5E,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,GAAG,CAAC,MAAM,KAAK,EAAE,CAAC,CAAC;gBAC3D,MAAM;YACV,CAAC;YACD,KAAK,eAAe,CAAC,GAAG,CAAC;YACzB;gBACI,uBAAuB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;gBACxD,MAAM;QACd,CAAC;IACL,CAAC;IAED,KAAK,UAAU,qBAAqB,CAAC,eAAkC,EAAE,MAA4B;QACjG,IAAI,gBAAgB,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,OAAO,CAAC,GAAG,CACb,eAAe,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;gBAC5B,OAAO,gBAAgB,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YAC1D,CAAC,CAAC,CACL,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,cAAc,GAAG,IAAA,6BAAoB,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEzD,IAAI,aAAa,EAAE,CAAC;QAChB,MAAM,eAAe,GAAqB,EAAE,CAAC;QAE7C,cAAc,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE;YAC/C,IAAI,aAAa,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBACvC,OAAO,IAAI,CAAC;YAChB,CAAC;YAED,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC9B,OAAO,KAAK,CAAC;QACjB,CAAC,CAAC,CAAC;QAEH,MAAM,qBAAqB,CAAC,eAAe,EAAE,WAAW,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,wBAAwB,EAAE,CAAC;QAC3B,MAAM,eAAe,GAAqB,EAAE,CAAC;QAE7C,cAAc,GAAG,cAAc;aAC1B,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;YACb,MAAM,kBAAkB,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;YAC7D,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBACtB,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAClC,CAAC;YACD,OAAO,kBAAkB,CAAC;QAC9B,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAqB,CAAC;QAEnD,MAAM,qBAAqB,CAAC,eAAe,EAAE,SAAS,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,UAAU,sBAAsB;QACjC,MAAM,eAAe,GAAa,EAAE,CAAC;QAErC,sEAAsE;QACtE,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO,IAAA,uBAAc,EACjB,cAAc,EACd,uBAAuB,EACvB,wBAAwB,EACxB,OAAO,CAAC,QAAQ,EAChB,CAAC,GAAG,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CACrC,CAAC;QACN,CAAC;QAED,qDAAqD;QACrD,MAAM,gCAAgC,GAAG,IAAA,uBAAc,EACnD,cAAc,EACd,iBAAiB,EACjB,wBAAwB,EACxB,OAAO,CAAC,QAAQ,EAChB,CAAC,GAAG,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CACrC,CAAC;QACF,+EAA+E;QAC/E,MAAM,QAAQ,GAAG,IAAA,iCAAwB,EAAC,gCAAgC,EAAE,uBAAuB,EAAE,CAAC,GAAG,EAAE,EAAE,CACzG,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAC5B,CAAC;QAEF,MAAM,qBAAqB,CACvB,eAAe,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,EACvC,SAAS,CACZ,CAAC;QAEF,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,IAAI,QAAQ,GAAG,MAAM,sBAAsB,EAAE,CAAC;IAC9C,IAAI,KAAK,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnC,MAAM,qBAAqB,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;QAC5D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,YAAY,CAAC,kBAAkB,CAAC,QAAQ,EAAE;QACtE,SAAS;QACT,2BAA2B;KAC9B,CAAC,CAAC;IAEH,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,mBAAmB,EAAE,EAAE,EAAE,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,SAAgB,sCAAsC,CAAC,EACnD,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,mBAAmB,GACN;IACb,wCAAwC;IACxC,IAAI,mBAAmB,EAAE,CAAC;QACtB,OAAO,mBAAmB,CAAC;IAC/B,CAAC;IAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,kBAAkB,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,eAAe,IAAI,kBAAkB,CAAC,CAAC,MAAM,CAAC;IAE7E,6DAA6D;IAC7D,IAAI,eAAe,KAAK,eAAe,CAAC,GAAG,EAAE,CAAC;QAC1C,OAAO,cAAc,CAAC;IAC1B,CAAC;IAED,0HAA0H;IAC1H,+IAA+I;IAC/I,4DAA4D;IAC5D,IAAI,eAAe,KAAK,eAAe,CAAC,UAAU,EAAE,CAAC;QACjD,MAAM,gBAAgB,GAAG,IAAA,iBAAS,EAAC,iBAAiB,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAE,CAAC;QAC/E,MAAM,aAAa,GAAG,IAAA,iBAAS,EAAC,cAAc,EAAE,EAAE,WAAW,EAAE,KAAK,EAAE,CAAE,CAAC;QAEzE,IAAI,gBAAgB,KAAK,aAAa,EAAE,CAAC;YACrC,OAAO,cAAc,CAAC;QAC1B,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,2JAA2J;IAC3J,mCAAmC;IACnC,OAAO,iBAAiB,CAAC;AAC7B,CAAC;AAYD;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe;IACrC,OAAO,OAAO,CAAC,OAAO,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Awaitable } from '@crawlee/types';
|
|
1
2
|
import type { RequestOptions } from '../request';
|
|
2
3
|
import { Request } from '../request';
|
|
3
4
|
import type { EnqueueLinksOptions } from './enqueue_links';
|
|
@@ -18,6 +19,11 @@ export type RegExpObject = {
|
|
|
18
19
|
regexp: RegExp;
|
|
19
20
|
} & Pick<RequestOptions, 'method' | 'payload' | 'label' | 'userData' | 'headers'>;
|
|
20
21
|
export type RegExpInput = RegExp | RegExpObject;
|
|
22
|
+
export type SkippedRequestReason = 'robotsTxt' | 'limit' | 'filters' | 'redirect';
|
|
23
|
+
export type SkippedRequestCallback = (args: {
|
|
24
|
+
url: string;
|
|
25
|
+
reason: SkippedRequestReason;
|
|
26
|
+
}) => Awaitable<void>;
|
|
21
27
|
/**
|
|
22
28
|
* @ignore
|
|
23
29
|
*/
|
|
@@ -47,8 +53,8 @@ export declare function constructRegExpObjectsFromRegExps(regexps: readonly RegE
|
|
|
47
53
|
/**
|
|
48
54
|
* @ignore
|
|
49
55
|
*/
|
|
50
|
-
export declare function createRequests(requestOptions: (string | RequestOptions)[], urlPatternObjects?: UrlPatternObject[], excludePatternObjects?: UrlPatternObject[], strategy?: EnqueueLinksOptions['strategy']): Request[];
|
|
51
|
-
export declare function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatternObject[]): Request[];
|
|
56
|
+
export declare function createRequests(requestOptions: (string | RequestOptions)[], urlPatternObjects?: UrlPatternObject[], excludePatternObjects?: UrlPatternObject[], strategy?: EnqueueLinksOptions['strategy'], onSkippedUrl?: (url: string) => void): Request[];
|
|
57
|
+
export declare function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatternObject[], onSkippedUrl?: (url: string) => void): Request[];
|
|
52
58
|
/**
|
|
53
59
|
* @ignore
|
|
54
60
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAKhD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAE3D,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAYhD,MAAM,MAAM,gBAAgB,GAAG;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB,GAAG,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC,CAAC;AAElF,MAAM,MAAM,eAAe,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CACjD,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,eAAe,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAC5C,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,UAAU,CAAC;AAE5C,MAAM,MAAM,YAAY,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAChD,cAAc,EACd,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,SAAS,CAC1D,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,YAAY,CAAC;AAEhD,MAAM,MAAM,oBAAoB,GAAG,WAAW,GAAG,OAAO,GAAG,SAAS,GAAG,UAAU,CAAC;AAElF,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,oBAAoB,CAAC;CAChC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAEtB;;GAEG;AACH,wBAAgB,8BAA8B,CAC1C,IAAI,EAAE,SAAS,GAAG,WAAW,GAAG,cAAc,EAC9C,OAAO,EAAE,YAAY,GAAG,UAAU,GACnC,IAAI,CAMN;AAED;;;;GAIG;AACH,wBAAgB,oCAAoC,CAAC,UAAU,EAAE,SAAS,cAAc,EAAE,GAAG,YAAY,EAAE,CAiB1G;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,SAAS,SAAS,EAAE,GAAG,UAAU,EAAE,CAkCvF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKxD;AAED;;;;GAIG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,SAAS,WAAW,EAAE,GAAG,YAAY,EAAE,CAgBjG;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC1B,cAAc,EAAE,CAAC,MAAM,GAAG,cAAc,CAAC,EAAE,EAC3C,iBAAiB,CAAC,EAAE,gBAAgB,EAAE,EACtC,qBAAqB,GAAE,gBAAgB,EAAO,EAC9C,QAAQ,CAAC,EAAE,mBAAmB,CAAC,UAAU,CAAC,EAC1C,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GACrC,OAAO,EAAE,CAqCX;AAED,wBAAgB,wBAAwB,CACpC,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,CAAC,EAAE,gBAAgB,EAAE,EAC7B,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,GACrC,OAAO,EAAE,CAqBX;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAChC,OAAO,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,EAAE,EAC7C,OAAO,GAAE,IAAI,CAAC,mBAAmB,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,GAAG,gBAAgB,GAAG,UAAU,CAAM,GAC1G,cAAc,EAAE,CA+BlB;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC7B;;;OAGG;IACH,CAAC,QAAQ,EAAE,cAAc,GAAG,cAAc,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC;CACzE"}
|
package/enqueue_links/shared.js
CHANGED
|
@@ -124,14 +124,18 @@ function constructRegExpObjectsFromRegExps(regexps) {
|
|
|
124
124
|
/**
|
|
125
125
|
* @ignore
|
|
126
126
|
*/
|
|
127
|
-
function createRequests(requestOptions, urlPatternObjects, excludePatternObjects = [], strategy) {
|
|
127
|
+
function createRequests(requestOptions, urlPatternObjects, excludePatternObjects = [], strategy, onSkippedUrl) {
|
|
128
128
|
return requestOptions
|
|
129
129
|
.map((opts) => ({ url: typeof opts === 'string' ? opts : opts.url, opts }))
|
|
130
130
|
.filter(({ url }) => {
|
|
131
|
-
|
|
131
|
+
const matchesExcludePatterns = excludePatternObjects.some((excludePatternObject) => {
|
|
132
132
|
const { regexp, glob } = excludePatternObject;
|
|
133
133
|
return (regexp && url.match(regexp)) || (glob && (0, minimatch_1.minimatch)(url, glob, { nocase: true }));
|
|
134
134
|
});
|
|
135
|
+
if (matchesExcludePatterns) {
|
|
136
|
+
onSkippedUrl?.(url);
|
|
137
|
+
}
|
|
138
|
+
return !matchesExcludePatterns;
|
|
135
139
|
})
|
|
136
140
|
.map(({ url, opts }) => {
|
|
137
141
|
if (!urlPatternObjects || !urlPatternObjects.length) {
|
|
@@ -147,23 +151,23 @@ function createRequests(requestOptions, urlPatternObjects, excludePatternObjects
|
|
|
147
151
|
}
|
|
148
152
|
}
|
|
149
153
|
// didn't match any positive pattern
|
|
154
|
+
onSkippedUrl?.(url);
|
|
150
155
|
return null;
|
|
151
156
|
})
|
|
152
157
|
.filter((request) => request);
|
|
153
158
|
}
|
|
154
|
-
function filterRequestsByPatterns(requests, patterns) {
|
|
159
|
+
function filterRequestsByPatterns(requests, patterns, onSkippedUrl) {
|
|
155
160
|
if (!patterns?.length) {
|
|
156
161
|
return requests;
|
|
157
162
|
}
|
|
158
163
|
const filtered = [];
|
|
159
164
|
for (const request of requests) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
}
|
|
165
|
+
const matchingPattern = patterns.find(({ regexp, glob }) => (regexp && request.url.match(regexp)) || (glob && (0, minimatch_1.minimatch)(request.url, glob, { nocase: true })));
|
|
166
|
+
if (matchingPattern !== undefined) {
|
|
167
|
+
filtered.push(request);
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
onSkippedUrl?.(request.url);
|
|
167
171
|
}
|
|
168
172
|
}
|
|
169
173
|
return filtered;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/enqueue_links/shared.ts"],"names":[],"mappings":";;;AA2DA,wEASC;AAOD,oFAiBC;AAOD,sEAkCC;AAKD,kDAKC;AAOD,8EAgBC;AAKD,wCA2CC;AAED,4DAyBC;AAKD,oDAkCC;AAxRD,uCAA+B;AAG/B,yCAAsC;AAEtC,kDAAiD;AAGjD,wCAAqC;AAGrC,wCAAgD;AAAvC,uGAAA,cAAc,OAAA;AAEvB,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C;;;;;GAKG;AACH,MAAM,wBAAwB,GAAG,IAAI,GAAG,EAAE,CAAC;AAmC3C;;GAEG;AACH,SAAgB,8BAA8B,CAC1C,IAA8C,EAC9C,OAAkC;IAElC,wBAAwB,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC5C,IAAI,wBAAwB,CAAC,IAAI,GAAG,4BAA4B,EAAE,CAAC;QAC/D,MAAM,GAAG,GAAG,wBAAwB,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACzD,wBAAwB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,SAAgB,oCAAoC,CAAC,UAAqC;IACtF,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC3B,mCAAmC;QACnC,IAAI,YAAY,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtD,IAAI,YAAY;YAAE,OAAO,YAAY,CAAC;QAEtC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,YAAY,GAAG,EAAE,MAAM,EAAE,IAAA,yBAAY,EAAC,IAAI,CAAC,EAAE,CAAC;QAClD,CAAC;aAAM,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,GAAG,cAAc,EAAE,GAAG,IAAI,CAAC;YACzC,YAAY,GAAG,EAAE,MAAM,EAAE,IAAA,yBAAY,EAAC,IAAI,CAAC,EAAE,GAAG,cAAc,EAAE,CAAC;QACrE,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;QAEnD,OAAO,YAAY,CAAC;IACxB,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;;;GAIG;AACH,SAAgB,6BAA6B,CAAC,KAA2B;IACrE,OAAO,KAAK;SACP,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACb,uCAAuC;QACvC,IAAI,CAAC,IAAI,EAAE,CAAC;YACR,OAAO,KAAK,CAAC;QACjB,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,KAAK,CAAC;IACjB,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACV,8BAA8B;QAC9B,IAAI,UAAU,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,UAAU;YAAE,OAAO,UAAU,CAAC;QAElC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,UAAU,GAAG,EAAE,IAAI,EAAE,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;QACrD,CAAC;aAAM,CAAC;YACJ,MAAM,EAAE,IAAI,EAAE,GAAG,cAAc,EAAE,GAAG,IAAI,CAAC;YACzC,UAAU,GAAG,EAAE,IAAI,EAAE,mBAAmB,CAAC,IAAI,CAAC,EAAE,GAAG,cAAc,EAAE,CAAC;QACxE,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QAEjD,OAAO,UAAU,CAAC;IACtB,CAAC,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,IAAY;IAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,8BAA8B,WAAW,mCAAmC,CAAC,CAAC;IAClG,OAAO,WAAW,CAAC;AACvB,CAAC;AAED;;;;GAIG;AACH,SAAgB,iCAAiC,CAAC,OAA+B;IAC7E,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,gCAAgC;QAChC,IAAI,YAAY,GAAG,wBAAwB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtD,IAAI,YAAY;YAAE,OAAO,YAAY,CAAC;QAEtC,IAAI,IAAI,YAAY,MAAM,EAAE,CAAC;YACzB,YAAY,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACJ,YAAY,GAAG,IAAI,CAAC;QACxB,CAAC;QAED,8BAA8B,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;QAEnD,OAAO,YAAY,CAAC;IACxB,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAC1B,cAA2C,EAC3C,iBAAsC,EACtC,wBAA4C,EAAE,EAC9C,QAA0C,EAC1C,YAAoC;IAEpC,OAAO,cAAc;SAChB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;SAC1E,MAAM,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,MAAM,sBAAsB,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC,oBAAoB,EAAE,EAAE;YAC/E,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,oBAAoB,CAAC;YAC9C,OAAO,CAAC,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAA,qBAAS,EAAC,GAAG,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7F,CAAC,CAAC,CAAC;QAEH,IAAI,sBAAsB,EAAE,CAAC;YACzB,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;QAED,OAAO,CAAC,sBAAsB,CAAC;IACnC,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE;QACnB,IAAI,CAAC,iBAAiB,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,CAAC;YAClD,OAAO,IAAI,iBAAO,CAAC,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,eAAe,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE,CAAC,CAAC;QAC1G,CAAC;QAED,KAAK,MAAM,gBAAgB,IAAI,iBAAiB,EAAE,CAAC;YAC/C,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,oBAAoB,EAAE,GAAG,gBAAgB,CAAC;YACnE,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAA,qBAAS,EAAC,GAAG,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;gBACpF,MAAM,OAAO,GACT,OAAO,IAAI,KAAK,QAAQ;oBACpB,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,oBAAoB,EAAE,eAAe,EAAE,QAAQ,EAAE;oBACnE,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE,GAAG,oBAAoB,EAAE,eAAe,EAAE,QAAQ,EAAE,CAAC;gBAE1E,OAAO,IAAI,iBAAO,CAAC,OAAO,CAAC,CAAC;YAChC,CAAC;QACL,CAAC;QAED,oCAAoC;QACpC,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,IAAI,CAAC;IAChB,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAc,CAAC;AACnD,CAAC;AAED,SAAgB,wBAAwB,CACpC,QAAmB,EACnB,QAA6B,EAC7B,YAAoC;IAEpC,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;QACpB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,MAAM,QAAQ,GAAc,EAAE,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CACjC,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,CACjB,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,IAAI,IAAA,qBAAS,EAAC,OAAO,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CACxG,CAAC;QAEF,IAAI,eAAe,KAAK,SAAS,EAAE,CAAC;YAChC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3B,CAAC;aAAM,CAAC;YACJ,YAAY,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAChC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAChC,OAA6C,EAC7C,UAAuG,EAAE;IAEzG,OAAO,OAAO;SACT,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACT,OAAO,GAAG,KAAK,QAAQ;QACnB,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,eAAe,EAAE,OAAO,CAAC,QAAQ,EAAE;QACjD,CAAC,CAAE,EAAE,GAAG,GAAG,EAAE,eAAe,EAAE,OAAO,CAAC,QAAQ,EAAqB,CAC1E;SACA,MAAM,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,IAAI,CAAC;YACD,OAAO,IAAI,cAAG,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC;QAC9C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,cAAc,EAAE,EAAE;QACpB,cAAc,CAAC,GAAG,GAAG,IAAI,cAAG,CAAC,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC;QACvE,cAAc,CAAC,QAAQ,KAAvB,cAAc,CAAC,QAAQ,GAAK,OAAO,CAAC,QAAQ,IAAI,EAAE,EAAC;QAEnD,IAAI,OAAO,OAAO,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpC,cAAc,CAAC,QAAQ,GAAG;gBACtB,GAAG,cAAc,CAAC,QAAQ;gBAC1B,KAAK,EAAE,OAAO,CAAC,KAAK;aACvB,CAAC;QACN,CAAC;QAED,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;YACzB,cAAc,CAAC,cAAc,GAAG,IAAI,CAAC;QACzC,CAAC;QAED,OAAO,cAAc,CAAC;IAC1B,CAAC,CAAC,CAAC;AACX,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "3.13.9
|
|
3
|
+
"version": "3.13.9",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -59,9 +59,9 @@
|
|
|
59
59
|
"@apify/pseudo_url": "^2.0.30",
|
|
60
60
|
"@apify/timeout": "^0.3.0",
|
|
61
61
|
"@apify/utilities": "^2.7.10",
|
|
62
|
-
"@crawlee/memory-storage": "3.13.9
|
|
63
|
-
"@crawlee/types": "3.13.9
|
|
64
|
-
"@crawlee/utils": "3.13.9
|
|
62
|
+
"@crawlee/memory-storage": "3.13.9",
|
|
63
|
+
"@crawlee/types": "3.13.9",
|
|
64
|
+
"@crawlee/utils": "3.13.9",
|
|
65
65
|
"@sapphire/async-queue": "^1.5.1",
|
|
66
66
|
"@vladfrangu/async_event_emitter": "^2.2.2",
|
|
67
67
|
"csv-stringify": "^6.2.0",
|
|
@@ -83,5 +83,5 @@
|
|
|
83
83
|
}
|
|
84
84
|
}
|
|
85
85
|
},
|
|
86
|
-
"gitHead": "
|
|
86
|
+
"gitHead": "371eab1afca23ed0619cf7d32134b8c33d17dfe0"
|
|
87
87
|
}
|