@crawlee/core 3.5.5-beta.7 → 3.5.5-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/enqueue_links/enqueue_links.d.ts +2 -2
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/index.mjs +2 -0
- package/package.json +5 -5
- package/request.d.ts +2 -0
- package/request.d.ts.map +1 -1
- package/request.js +5 -6
- package/request.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/request_provider.d.ts +266 -0
- package/storages/request_provider.d.ts.map +1 -0
- package/storages/request_provider.js +602 -0
- package/storages/request_provider.js.map +1 -0
- package/storages/request_queue.d.ts +16 -299
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +60 -646
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +40 -0
- package/storages/request_queue_v2.d.ts.map +1 -0
- package/storages/request_queue_v2.js +247 -0
- package/storages/request_queue_v2.js.map +1 -0
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts +34 -0
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +45 -1
- package/storages/utils.js.map +1 -1
- package/tsconfig.build.tsbuildinfo +1 -1
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
|
|
2
2
|
import type { SetRequired } from 'type-fest';
|
|
3
3
|
import type { GlobInput, PseudoUrlInput, RegExpInput, RequestTransform } from './shared';
|
|
4
|
-
import type {
|
|
4
|
+
import type { RequestProvider, RequestQueueOperationOptions } from '../storages';
|
|
5
5
|
export interface EnqueueLinksOptions extends RequestQueueOperationOptions {
|
|
6
6
|
/** Limit the amount of actually enqueued URLs to this number. Useful for testing across the entire crawling scope. */
|
|
7
7
|
limit?: number;
|
|
8
8
|
/** An array of URLs to enqueue. */
|
|
9
9
|
urls?: string[];
|
|
10
10
|
/** A request queue to which the URLs will be enqueued. */
|
|
11
|
-
requestQueue?:
|
|
11
|
+
requestQueue?: RequestProvider;
|
|
12
12
|
/** A CSS selector matching links to be enqueued. */
|
|
13
13
|
selector?: string;
|
|
14
14
|
/** Sets {@apilink Request.userData} for newly enqueued requests. */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGzE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,KAAK,EAAE,SAAS,EAAE,cAAc,EAAE,WAAW,EAAE,gBAAgB,EAAoB,MAAM,UAAU,CAAC;AAU3G,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"enqueue_links.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/enqueue_links.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGzE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,KAAK,EAAE,SAAS,EAAE,cAAc,EAAE,WAAW,EAAE,gBAAgB,EAAoB,MAAM,UAAU,CAAC;AAU3G,OAAO,KAAK,EAAE,eAAe,EAAE,4BAA4B,EAAE,MAAM,aAAa,CAAC;AAEjF,MAAM,WAAW,mBAAoB,SAAQ,4BAA4B;IACrE,sHAAsH;IACtH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,mCAAmC;IACnC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAEhB,0DAA0D;IAC1D,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B,oDAAoD;IACpD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,UAAU,CAAC;IAEtB,iEAAiE;IACjE,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,EAAE,SAAS,EAAE,CAAC;IAEpB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,CAAC,SAAS,GAAG,WAAW,CAAC,EAAE,CAAC;IAEtC;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,WAAW,EAAE,CAAC;IAExB;;;;;;;;;;;;;;;;;OAiBG;IACH,UAAU,CAAC,EAAE,cAAc,EAAE,CAAC;IAE9B;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,wBAAwB,CAAC,EAAE,gBAAgB,CAAC;IAE5C;;;;;;;;;;;;;;;;;OAiBG;IACH,QAAQ,CAAC,EAAE,eAAe,GAAG,KAAK,GAAG,aAAa,GAAG,eAAe,GAAG,aAAa,CAAC;CACxF;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,oBAAY,eAAe;IACvB;;OAEG;IACH,GAAG,QAAQ;IAEX;;;;;;OAMG;IACH,YAAY,kBAAkB;IAE9B;;;;;;OAMG;IACH,UAAU,gBAAgB;IAE1B;;;;;;OAMG;IACH,UAAU,gBAAgB;CAC7B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,WAAW,CAAC,mBAAmB,EAAE,cAAc,GAAG,MAAM,CAAC,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAoJtI;AAED;;;;;;;GAOG;AACH,wBAAgB,sCAAsC,CAAC,EACnD,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,mBAAmB,GACtB,EAAE,cAAc,sBA+BhB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,eAAe,CAAC,EAAE,mBAAmB,CAAC,UAAU,CAAC,CAAC;IAClD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B"}
|
package/index.mjs
CHANGED
|
@@ -30,7 +30,9 @@ export const QUERY_HEAD_MIN_LENGTH = mod.QUERY_HEAD_MIN_LENGTH;
|
|
|
30
30
|
export const REQUESTS_PERSISTENCE_KEY = mod.REQUESTS_PERSISTENCE_KEY;
|
|
31
31
|
export const Request = mod.Request;
|
|
32
32
|
export const RequestList = mod.RequestList;
|
|
33
|
+
export const RequestProvider = mod.RequestProvider;
|
|
33
34
|
export const RequestQueue = mod.RequestQueue;
|
|
35
|
+
export const RequestQueueV2 = mod.RequestQueueV2;
|
|
34
36
|
export const RequestState = mod.RequestState;
|
|
35
37
|
export const RetryRequestError = mod.RetryRequestError;
|
|
36
38
|
export const Router = mod.Router;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "3.5.5-beta.
|
|
3
|
+
"version": "3.5.5-beta.9",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -59,9 +59,9 @@
|
|
|
59
59
|
"@apify/pseudo_url": "^2.0.30",
|
|
60
60
|
"@apify/timeout": "^0.3.0",
|
|
61
61
|
"@apify/utilities": "^2.7.10",
|
|
62
|
-
"@crawlee/memory-storage": "3.5.5-beta.
|
|
63
|
-
"@crawlee/types": "3.5.5-beta.
|
|
64
|
-
"@crawlee/utils": "3.5.5-beta.
|
|
62
|
+
"@crawlee/memory-storage": "3.5.5-beta.9",
|
|
63
|
+
"@crawlee/types": "3.5.5-beta.9",
|
|
64
|
+
"@crawlee/utils": "3.5.5-beta.9",
|
|
65
65
|
"@sapphire/async-queue": "^1.5.0",
|
|
66
66
|
"@types/tough-cookie": "^4.0.2",
|
|
67
67
|
"@vladfrangu/async_event_emitter": "^2.2.2",
|
|
@@ -84,5 +84,5 @@
|
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
},
|
|
87
|
-
"gitHead": "
|
|
87
|
+
"gitHead": "75e00d23f481181a10b947e020eecbc319d46341"
|
|
88
88
|
}
|
package/request.d.ts
CHANGED
package/request.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request.d.ts","sourceRoot":"","sources":["../src/request.ts"],"names":[],"mappings":";;AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAK9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAKjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AA2BrD,oBAAY,YAAY;IACpB,WAAW,IAAA;IACX,UAAU,IAAA;IACV,SAAS,IAAA;IACT,eAAe,IAAA;IACf,IAAI,IAAA;IACJ,aAAa,IAAA;IACb,KAAK,IAAA;CACR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,OAAO,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IACzD,iBAAiB;IACjB,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ,oCAAoC;IACpC,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;;;;OAOG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB,yCAAyC;IACzC,MAAM,EAAE,kBAAkB,CAAC;IAE3B,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,8FAA8F;IAC9F,OAAO,EAAE,OAAO,CAAC;IAEjB,2FAA2F;IAC3F,UAAU,EAAE,MAAM,CAAC;IAEnB,0DAA0D;IAC1D,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,wEAAwE;IACxE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEjC,sEAAsE;IACtE,OAAO,CAAC,SAAS,CAA2B;IAE5C,gDAAgD;IAChD,QAAQ,EAAE,QAAQ,CAAkB;IAEpC;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;gBACS,OAAO,EAAE,cAAc,CAAC,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"request.d.ts","sourceRoot":"","sources":["../src/request.ts"],"names":[],"mappings":";;AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAK9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAKjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AA2BrD,oBAAY,YAAY;IACpB,WAAW,IAAA;IACX,UAAU,IAAA;IACV,SAAS,IAAA;IACT,eAAe,IAAA;IACf,IAAI,IAAA;IACJ,aAAa,IAAA;IACb,KAAK,IAAA;CACR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,OAAO,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IACzD,iBAAiB;IACjB,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ,oCAAoC;IACpC,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;;;;OAOG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB,yCAAyC;IACzC,MAAM,EAAE,kBAAkB,CAAC;IAE3B,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,8FAA8F;IAC9F,OAAO,EAAE,OAAO,CAAC;IAEjB,2FAA2F;IAC3F,UAAU,EAAE,MAAM,CAAC;IAEnB,0DAA0D;IAC1D,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,wEAAwE;IACxE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEjC,sEAAsE;IACtE,OAAO,CAAC,SAAS,CAA2B;IAE5C,gDAAgD;IAChD,QAAQ,EAAE,QAAQ,CAAkB;IAEpC;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;gBACS,OAAO,EAAE,cAAc,CAAC,QAAQ,CAAC;IAmH7C,yGAAyG;IACzG,IAAI,cAAc,IAAI,OAAO,CAE5B;IAED,yGAAyG;IACzG,IAAI,cAAc,CAAC,KAAK,EAAE,OAAO,EAMhC;IAED,2HAA2H;IAC3H,IAAI,oBAAoB,IAAI,MAAM,CAEjC;IAED,2HAA2H;IAC3H,IAAI,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAMrC;IAED,oDAAoD;IACpD,IAAI,KAAK,IAAI,MAAM,GAAG,SAAS,CAE9B;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAElC;IAED,8HAA8H;IAC9H,IAAI,UAAU,IAAI,MAAM,GAAG,SAAS,CAEnC;IAED,8HAA8H;IAC9H,IAAI,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAMvC;IAED,uDAAuD;IACvD,IAAI,KAAK,IAAI,YAAY,CAExB;IAED,uDAAuD;IACvD,IAAI,KAAK,CAAC,KAAK,EAAE,YAAY,EAM5B;IAED;;;;;;;;;;;;OAYG;IACH,gBAAgB,CAAC,cAAc,EAAE,OAAO,EAAE,OAAO,GAAE,uBAA4B,GAAG,IAAI;IAgCtF,SAAS,CAAC,iBAAiB,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,EAAE,uBAAuB;IAgBpH,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,UAAU,GAAG,MAAM;CAQtD;AAED;;GAEG;AACH,MAAM,WAAW,cAAc,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IAEpE,mEAAmE;IACnE,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;;;;;;;;;;;;;OAgBG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,qBAAqB;IACrB,MAAM,CAAC,EAAE,kBAAkB,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAE5D,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;OAQG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEjC;;;OAGG;IACH,QAAQ,CAAC,EAAE,QAAQ,CAAC;IAEpB;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,gBAAgB;IAChB,EAAE,CAAC,EAAE,MAAM,CAAC;IAEZ,gBAAgB;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,gBAAgB;IAChB,aAAa,CAAC,EAAE,IAAI,CAAC;CAExB;AAED,MAAM,WAAW,uBAAuB;IACpC;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,UAAU,uBAAuB;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,kBAAkB,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAClC;AAED,MAAM,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG;IAAE,eAAe,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,OAAO,CAAC;AAExG,gBAAgB;AAChB,MAAM,WAAW,cAAc;IAC3B,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB"}
|
package/request.js
CHANGED
|
@@ -5,7 +5,7 @@ const tslib_1 = require("tslib");
|
|
|
5
5
|
const node_crypto_1 = tslib_1.__importDefault(require("node:crypto"));
|
|
6
6
|
const node_util_1 = tslib_1.__importDefault(require("node:util"));
|
|
7
7
|
const utilities_1 = require("@apify/utilities");
|
|
8
|
-
const ow_1 = tslib_1.
|
|
8
|
+
const ow_1 = tslib_1.__importDefault(require("ow"));
|
|
9
9
|
const log_1 = require("./log");
|
|
10
10
|
const typedefs_1 = require("./typedefs");
|
|
11
11
|
// new properties on the Request object breaks serialization
|
|
@@ -188,15 +188,14 @@ class Request {
|
|
|
188
188
|
// properties and speeds up the validation cca 3-fold.
|
|
189
189
|
// See https://github.com/sindresorhus/ow/issues/193
|
|
190
190
|
(0, typedefs_1.keys)(options).forEach((prop) => {
|
|
191
|
+
// skip url, because it is validated above
|
|
192
|
+
if (prop === 'url') {
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
191
195
|
const predicate = requestOptionalPredicates[prop];
|
|
192
196
|
const value = options[prop];
|
|
193
197
|
if (predicate) {
|
|
194
198
|
(0, ow_1.default)(value, `RequestOptions.${prop}`, predicate);
|
|
195
|
-
// 'url' is checked above because it's not optional
|
|
196
|
-
}
|
|
197
|
-
else if (prop !== 'url') {
|
|
198
|
-
const msg = `Did not expect property \`${prop}\` to exist, got \`${value}\` in object \`RequestOptions\``;
|
|
199
|
-
throw new ow_1.ArgumentError(msg, this.constructor);
|
|
200
199
|
}
|
|
201
200
|
});
|
|
202
201
|
const { id, url, loadedUrl, uniqueKey, payload, noRetry = false, retryCount = 0, sessionRotationCount = 0, maxRetries, errorMessages = [], headers = {}, userData = {}, label, handledAt, keepUrlFragment = false, useExtendedUniqueKey = false, skipNavigation, } = options;
|
package/request.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request.js","sourceRoot":"","sources":["../src/request.ts"],"names":[],"mappings":";;;;AACA,sEAAiC;AACjC,kEAA6B;AAE7B,gDAAgD;AAGhD,
|
|
1
|
+
{"version":3,"file":"request.js","sourceRoot":"","sources":["../src/request.ts"],"names":[],"mappings":";;;;AACA,sEAAiC;AACjC,kEAA6B;AAE7B,gDAAgD;AAGhD,oDAAoB;AAEpB,+BAA0C;AAE1C,yCAAkC;AAElC,4DAA4D;AAC5D,MAAM,GAAG,GAAG,SAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;AAEpD,MAAM,yBAAyB,GAAG;IAC9B,EAAE,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IACtB,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG;IACjC,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC7B,MAAM,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC1B,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC;IAC9C,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;IAC5B,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC9B,oBAAoB,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IACxC,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC9B,aAAa,EAAE,YAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,YAAE,CAAC,MAAM,CAAC;IAClD,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC3B,QAAQ,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IAC5B,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;IACzB,SAAS,EAAE,YAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,CAAC,IAAI,EAAE,YAAE,CAAC,IAAI,CAAC;IACnD,eAAe,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;IACpC,oBAAoB,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;IACzC,cAAc,EAAE,YAAE,CAAC,QAAQ,CAAC,OAAO;IACnC,KAAK,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC;CACrE,CAAC;AAEF,IAAY,YAQX;AARD,WAAY,YAAY;IACpB,6DAAW,CAAA;IACX,2DAAU,CAAA;IACV,yDAAS,CAAA;IACT,qEAAe,CAAA;IACf,+CAAI,CAAA;IACJ,iEAAa,CAAA;IACb,iDAAK,CAAA;AACT,CAAC,EARW,YAAY,4BAAZ,YAAY,QAQvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAa,OAAO;IAqDhB;;OAEG;IACH,YAAY,OAAiC;QAvD7C,iBAAiB;QACjB;;;;;WAAY;QAEZ,oCAAoC;QACpC;;;;;WAAY;QAEZ;;;;;;;WAOG;QACH;;;;;WAAmB;QAEnB;;;WAGG;QACH;;;;;WAAkB;QAElB,yCAAyC;QACzC;;;;;WAA2B;QAE3B,oDAAoD;QACpD;;;;;WAAiB;QAEjB,8FAA8F;QAC9F;;;;;WAAiB;QAEjB,2FAA2F;QAC3F;;;;;WAAmB;QAEnB,0DAA0D;QAC1D;;;;;WAAwB;QAExB,wEAAwE;QACxE;;;;;WAAiC;QAEjC,sEAAsE;QAC9D;;;;mBAAiC,EAAE;WAAC;QAE5C,gDAAgD;QAChD;;;;mBAAqB,EAAc;WAAC;QAEpC;;;WAGG;QACH;;;;;WAAmB;QAMf,IAAA,YAAE,EAAC,OAAO,EAAE,gBAAgB,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;QACzC,IAAA,YAAE,EAAC,OAAO,CAAC,GAAG,EAAE,oBAAoB,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;QACjD,4DAA4D;QAC5D,oDAAoD;QACpD,0DAA0D;QAC1D,sDAAsD;QACtD,oDAAoD;QACpD,IAAA,eAAI,EAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,0CAA0C;YAC1C,IAAI,IAAI,KAAK,KAAK,EAAE;gBAChB,OAAO;aACV;YAED,MAAM,SAAS,GAAG,yBAAyB,CAAC,IAA8C,CAAC,CAAC;YAC5F,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YAC5B,IAAI,SAAS,EAAE;gBACX,IAAA,YAAE,EAAC,KAAK,EAAE,kBAAkB,IAAI,EAAE,EAAE,SAA0B,CAAC,CAAC;aACnE;QACL,CAAC,CAAC,CAAC;QAEH,MAAM,EACF,EAAE,EACF,GAAG,EACH,SAAS,EACT,SAAS,EACT,OAAO,EACP,OAAO,GAAG,KAAK,EACf,UAAU,GAAG,CAAC,EACd,oBAAoB,GAAG,CAAC,EACxB,UAAU,EACV,aAAa,GAAG,EAAE,EAClB,OAAO,GAAG,EAAE,EACZ,QAAQ,GAAG,EAAE,EACb,KAAK,EACL,SAAS,EACT,eAAe,GAAG,KAAK,EACvB,oBAAoB,GAAG,KAAK,EAC5B,cAAc,GACjB,GAAG,OAMH,CAAC;QAEF,IAAI,EACA,MAAM,GAAG,KAAK,GACjB,GAAG,OAAO,CAAC;QAEZ,MAAM,GAAG,MAAM,CAAC,WAAW,EAAwB,CAAC;QAEpD,IAAI,MAAM,KAAK,KAAK,IAAI,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QAEnG,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;QACf,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,IAAI,IAAI,CAAC,iBAAiB,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE,CAAC,CAAC;QACtH,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,oBAAoB,GAAG,oBAAoB,CAAC;QACjD,IAAI,CAAC,aAAa,GAAG,CAAC,GAAG,aAAa,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,SAAoB,YAAY,IAAI,CAAC,CAAC,CAAE,SAAkB,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,SAAU,CAAC;QAEvG,IAAI,KAAK,EAAE;YACP,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;SAC1B;QAED,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE;YAC1B,SAAS,EAAE;gBACP,KAAK,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,GAAG,QAAQ,EAAE;gBACrC,UAAU,EAAE,KAAK;gBACjB,QAAQ,EAAE,IAAI;aACjB;YACD,QAAQ,EAAE;gBACN,GAAG,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS;gBACzB,GAAG,EAAE,CAAC,KAA0B,EAAE,EAAE;oBAChC,MAAM,CAAC,gBAAgB,CAAC,KAAK,EAAE;wBAC3B,SAAS,EAAE;4BACP,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;4BAC/B,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,IAAI;yBACjB;wBACD,MAAM,EAAE;4BACJ,KAAK,EAAE,GAAG,EAAE;gCACR,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;oCAClD,OAAO,CAAC;wCACJ,GAAG,IAAI,CAAC,SAAS;wCACjB,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;qCACtC,CAAC,CAAC;iCACN;gCAED,OAAO,IAAI,CAAC,SAAS,CAAC;4BAC1B,CAAC;4BACD,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,IAAI;yBACjB;qBACJ,CAAC,CAAC;oBACH,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;gBAC3B,CAAC;gBACD,UAAU,EAAE,IAAI;aACnB;SACJ,CAAC,CAAC;QAEH,4EAA4E;QAC5E,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAEzB,IAAI,cAAc,IAAI,IAAI;YAAE,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACjE,IAAI,UAAU,IAAI,IAAI;YAAE,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACzD,CAAC;IAED,yGAAyG;IACzG,IAAI,cAAc;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,cAAc,IAAI,KAAK,CAAC;IAC5D,CAAC;IAED,yGAAyG;IACzG,IAAI,cAAc,CAAC,KAAc;QAC7B,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,QAAuB,CAAC,SAAS,GAAG,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;SACvE;aAAM;YACH,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,cAAc,GAAG,KAAK,CAAC;SAClD;IACL,CAAC;IAED,2HAA2H;IAC3H,IAAI,oBAAoB;QACpB,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,oBAAoB,IAAI,KAAK,CAAC;IAClE,CAAC;IAED,2HAA2H;IAC3H,IAAI,oBAAoB,CAAC,KAAa;QAClC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,QAAuB,CAAC,SAAS,GAAG,EAAE,oBAAoB,EAAE,KAAK,EAAE,CAAC;SAC7E;aAAM;YACH,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,oBAAoB,GAAG,KAAK,CAAC;SACxD;IACL,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK;QACL,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;IAC/B,CAAC;IAED,oDAAoD;IACpD,IAAI,KAAK,CAAC,KAAyB;QAC9B,IAAI,CAAC,QAAuB,CAAC,KAAK,GAAG,KAAK,CAAC;IAChD,CAAC;IAED,8HAA8H;IAC9H,IAAI,UAAU;QACV,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,UAAU,CAAC;IAC/C,CAAC;IAED,8HAA8H;IAC9H,IAAI,UAAU,CAAC,KAAyB;QACpC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,QAAuB,CAAC,SAAS,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;SACnE;aAAM;YACH,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,UAAU,GAAG,KAAK,CAAC;SAC9C;IACL,CAAC;IAED,uDAAuD;IACvD,IAAI,KAAK;QACL,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,IAAI,YAAY,CAAC,WAAW,CAAC;IACtE,CAAC;IAED,uDAAuD;IACvD,IAAI,KAAK,CAAC,KAAmB;QACzB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,QAAuB,CAAC,SAAS,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;SAC9D;aAAM;YACH,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC;SACzC;IACL,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,gBAAgB,CAAC,cAAuB,EAAE,UAAmC,EAAE;QAC3E,MAAM,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC;QAC9B,IAAI,OAAO,CAAC;QACZ,MAAM,IAAI,GAAG,OAAO,cAAc,CAAC;QACnC,IAAI,IAAI,KAAK,QAAQ,EAAE;YACnB,IAAI,CAAC,cAAc,EAAE;gBACjB,OAAO,GAAG,MAAM,CAAC;aACpB;iBAAM,IAAI,cAAc,YAAY,KAAK,EAAE;gBACxC,OAAO,GAAG,SAAS;oBACf,CAAC,CAAC,cAAc,CAAC,OAAO;oBACxB,8BAA8B;oBAC9B,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC;aAC9B;iBAAM,IAAI,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,SAAS,CAAC,EAAE;gBACvD,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,SAAS,CAAC,CAAC;aAC5D;iBAAM,IAAK,cAAyB,CAAC,QAAQ,EAAE,KAAK,iBAAiB,EAAE;gBACpE,OAAO,GAAI,cAAyB,CAAC,QAAQ,EAAE,CAAC;aACnD;iBAAM;gBACH,IAAI;oBACA,OAAO,GAAG,mBAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;iBAC1C;gBAAC,OAAO,GAAG,EAAE;oBACV,OAAO,GAAG,yDAAyD,CAAC;iBACvE;aACJ;SACJ;aAAM,IAAI,IAAI,KAAK,WAAW,EAAE;YAC7B,OAAO,GAAG,WAAW,CAAC;SACzB;aAAM;YACH,OAAO,GAAI,cAAyB,CAAC,QAAQ,EAAE,CAAC;SACnD;QAED,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC;IAES,iBAAiB,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAA2B;QAChH,MAAM,gBAAgB,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,aAAa,GAAG,IAAA,wBAAY,EAAC,GAAG,EAAE,eAAe,CAAC,IAAI,GAAG,CAAC,CAAC,6DAA6D;QAC9H,IAAI,CAAC,oBAAoB,EAAE;YACvB,IAAI,gBAAgB,KAAK,KAAK,IAAI,OAAO,EAAE;gBACvC,8EAA8E;gBAC9E,GAAG,CAAC,UAAU,CAAC,uBAAuB,gBAAgB,2BAA2B;sBAC3E,kFAAkF;sBAClF,iHAAiH,CAAC,CAAC;aAC5H;YACD,OAAO,aAAa,CAAC;SACxB;QACD,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9D,OAAO,GAAG,gBAAgB,IAAI,WAAW,KAAK,aAAa,EAAE,CAAC;IAClE,CAAC;IAES,YAAY,CAAC,OAAmB;QACtC,OAAO,qBAAM;aACR,UAAU,CAAC,QAAQ,CAAC;aACpB,MAAM,CAAC,OAAO,CAAC;aACf,MAAM,CAAC,QAAQ,CAAC;aAChB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;aACrB,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACzB,CAAC;CACJ;AAlTD,0BAkTC"}
|
package/storages/index.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
export * from './dataset';
|
|
2
2
|
export * from './key_value_store';
|
|
3
3
|
export * from './request_list';
|
|
4
|
+
export * from './request_provider';
|
|
4
5
|
export * from './request_queue';
|
|
6
|
+
export * from './request_queue_v2';
|
|
5
7
|
export * from './storage_manager';
|
|
6
8
|
export * from './utils';
|
|
7
9
|
//# sourceMappingURL=index.d.ts.map
|
package/storages/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/storages/index.ts"],"names":[],"mappings":"AAAA,cAAc,WAAW,CAAC;AAC1B,cAAc,mBAAmB,CAAC;AAClC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/storages/index.ts"],"names":[],"mappings":"AAAA,cAAc,WAAW,CAAC;AAC1B,cAAc,mBAAmB,CAAC;AAClC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,oBAAoB,CAAC;AACnC,cAAc,iBAAiB,CAAC;AAChC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,SAAS,CAAC"}
|
package/storages/index.js
CHANGED
|
@@ -4,7 +4,9 @@ const tslib_1 = require("tslib");
|
|
|
4
4
|
tslib_1.__exportStar(require("./dataset"), exports);
|
|
5
5
|
tslib_1.__exportStar(require("./key_value_store"), exports);
|
|
6
6
|
tslib_1.__exportStar(require("./request_list"), exports);
|
|
7
|
+
tslib_1.__exportStar(require("./request_provider"), exports);
|
|
7
8
|
tslib_1.__exportStar(require("./request_queue"), exports);
|
|
9
|
+
tslib_1.__exportStar(require("./request_queue_v2"), exports);
|
|
8
10
|
tslib_1.__exportStar(require("./storage_manager"), exports);
|
|
9
11
|
tslib_1.__exportStar(require("./utils"), exports);
|
|
10
12
|
//# sourceMappingURL=index.js.map
|
package/storages/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/storages/index.ts"],"names":[],"mappings":";;;AAAA,oDAA0B;AAC1B,4DAAkC;AAClC,yDAA+B;AAC/B,0DAAgC;AAChC,4DAAkC;AAClC,kDAAwB"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/storages/index.ts"],"names":[],"mappings":";;;AAAA,oDAA0B;AAC1B,4DAAkC;AAClC,yDAA+B;AAC/B,6DAAmC;AACnC,0DAAgC;AAChC,6DAAmC;AACnC,4DAAkC;AAClC,kDAAwB"}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import { ListDictionary, LruCache } from '@apify/datastructures';
|
|
2
|
+
import type { Log } from '@apify/log';
|
|
3
|
+
import type { BatchAddRequestsResult, Dictionary, ProcessedRequest, QueueOperationInfo, RequestQueueClient, RequestQueueInfo, StorageClient } from '@crawlee/types';
|
|
4
|
+
import type { IStorage, StorageManagerOptions } from './storage_manager';
|
|
5
|
+
import { Configuration } from '../configuration';
|
|
6
|
+
import type { ProxyConfiguration } from '../proxy_configuration';
|
|
7
|
+
import { Request } from '../request';
|
|
8
|
+
import type { RequestOptions, InternalSource, Source } from '../request';
|
|
9
|
+
export declare abstract class RequestProvider implements IStorage {
|
|
10
|
+
readonly config: Configuration;
|
|
11
|
+
id: string;
|
|
12
|
+
name?: string;
|
|
13
|
+
timeoutSecs: number;
|
|
14
|
+
clientKey: string;
|
|
15
|
+
client: RequestQueueClient;
|
|
16
|
+
protected proxyConfiguration?: ProxyConfiguration;
|
|
17
|
+
log: Log;
|
|
18
|
+
internalTimeoutMillis: number;
|
|
19
|
+
requestLockSecs: number;
|
|
20
|
+
assumedTotalCount: number;
|
|
21
|
+
assumedHandledCount: number;
|
|
22
|
+
protected queueHeadIds: ListDictionary<string>;
|
|
23
|
+
protected requestCache: LruCache<RequestLruItem>;
|
|
24
|
+
/** @internal */
|
|
25
|
+
inProgress: Set<string>;
|
|
26
|
+
protected recentlyHandledRequestsCache: LruCache<boolean>;
|
|
27
|
+
protected queuePausedForMigration: boolean;
|
|
28
|
+
constructor(options: InternalRequestProviderOptions, config?: Configuration);
|
|
29
|
+
/**
|
|
30
|
+
* @ignore
|
|
31
|
+
*/
|
|
32
|
+
inProgressCount(): number;
|
|
33
|
+
/**
|
|
34
|
+
* Adds a request to the queue.
|
|
35
|
+
*
|
|
36
|
+
* If a request with the same `uniqueKey` property is already present in the queue,
|
|
37
|
+
* it will not be updated. You can find out whether this happened from the resulting
|
|
38
|
+
* {@apilink QueueOperationInfo} object.
|
|
39
|
+
*
|
|
40
|
+
* To add multiple requests to the queue by extracting links from a webpage,
|
|
41
|
+
* see the {@apilink enqueueLinks} helper function.
|
|
42
|
+
*
|
|
43
|
+
* @param requestLike {@apilink Request} object or vanilla object with request data.
|
|
44
|
+
* Note that the function sets the `uniqueKey` and `id` fields to the passed Request.
|
|
45
|
+
* @param [options] Request queue operation options.
|
|
46
|
+
*/
|
|
47
|
+
addRequest(requestLike: Source, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo>;
|
|
48
|
+
/**
|
|
49
|
+
* Adds requests to the queue in batches of 25.
|
|
50
|
+
*
|
|
51
|
+
* If a request that is passed in is already present due to its `uniqueKey` property being the same,
|
|
52
|
+
* it will not be updated. You can find out whether this happened by finding the request in the resulting
|
|
53
|
+
* {@apilink BatchAddRequestsResult} object.
|
|
54
|
+
*
|
|
55
|
+
* @param requestsLike {@apilink Request} objects or vanilla objects with request data.
|
|
56
|
+
* Note that the function sets the `uniqueKey` and `id` fields to the passed requests if missing.
|
|
57
|
+
* @param [options] Request queue operation options.
|
|
58
|
+
*/
|
|
59
|
+
addRequests(requestsLike: Source[], options?: RequestQueueOperationOptions): Promise<BatchAddRequestsResult>;
|
|
60
|
+
/**
|
|
61
|
+
* Adds requests to the queue in batches. By default, it will resolve after the initial batch is added, and continue
|
|
62
|
+
* adding the rest in background. You can configure the batch size via `batchSize` option and the sleep time in between
|
|
63
|
+
* the batches via `waitBetweenBatchesMillis`. If you want to wait for all batches to be added to the queue, you can use
|
|
64
|
+
* the `waitForAllRequestsToBeAdded` promise you get in the response object.
|
|
65
|
+
*
|
|
66
|
+
* @param requests The requests to add
|
|
67
|
+
* @param options Options for the request queue
|
|
68
|
+
*/
|
|
69
|
+
addRequestsBatched(requests: (string | Source)[], options?: AddRequestsBatchedOptions): Promise<AddRequestsBatchedResult>;
|
|
70
|
+
/**
|
|
71
|
+
* Gets the request from the queue specified by ID.
|
|
72
|
+
*
|
|
73
|
+
* @param id ID of the request.
|
|
74
|
+
* @returns Returns the request object, or `null` if it was not found.
|
|
75
|
+
*/
|
|
76
|
+
getRequest<T extends Dictionary = Dictionary>(id: string): Promise<Request<T> | null>;
|
|
77
|
+
abstract fetchNextRequest<T extends Dictionary = Dictionary>(options?: RequestOptions): Promise<Request<T> | null>;
|
|
78
|
+
/**
|
|
79
|
+
* Marks a request that was previously returned by the
|
|
80
|
+
* {@apilink RequestQueue.fetchNextRequest}
|
|
81
|
+
* function as handled after successful processing.
|
|
82
|
+
* Handled requests will never again be returned by the `fetchNextRequest` function.
|
|
83
|
+
*/
|
|
84
|
+
markRequestHandled(request: Request): Promise<RequestQueueOperationInfo | null>;
|
|
85
|
+
/**
|
|
86
|
+
* Reclaims a failed request back to the queue, so that it can be returned for processing later again
|
|
87
|
+
* by another call to {@apilink RequestQueue.fetchNextRequest}.
|
|
88
|
+
* The request record in the queue is updated using the provided `request` parameter.
|
|
89
|
+
* For example, this lets you store the number of retries or error messages for the request.
|
|
90
|
+
*/
|
|
91
|
+
reclaimRequest(request: Request, options?: RequestQueueOperationOptions): Promise<RequestQueueOperationInfo | null>;
|
|
92
|
+
protected abstract ensureHeadIsNonEmpty(): Promise<void>;
|
|
93
|
+
/**
|
|
94
|
+
* Resolves to `true` if the next call to {@apilink RequestQueue.fetchNextRequest}
|
|
95
|
+
* would return `null`, otherwise it resolves to `false`.
|
|
96
|
+
* Note that even if the queue is empty, there might be some pending requests currently being processed.
|
|
97
|
+
* If you need to ensure that there is no activity in the queue, use {@apilink RequestQueue.isFinished}.
|
|
98
|
+
*/
|
|
99
|
+
isEmpty(): Promise<boolean>;
|
|
100
|
+
/**
|
|
101
|
+
* Resolves to `true` if all requests were already handled and there are no more left.
|
|
102
|
+
* Due to the nature of distributed storage used by the queue,
|
|
103
|
+
* the function might occasionally return a false negative,
|
|
104
|
+
* but it will never return a false positive.
|
|
105
|
+
*/
|
|
106
|
+
isFinished(): Promise<boolean>;
|
|
107
|
+
protected _reset(): void;
|
|
108
|
+
/**
|
|
109
|
+
* Caches information about request to beware of unneeded addRequest() calls.
|
|
110
|
+
*/
|
|
111
|
+
protected _cacheRequest(cacheKey: string, queueOperationInfo: RequestQueueOperationInfo): void;
|
|
112
|
+
/**
|
|
113
|
+
* Adds a request straight to the queueHeadDict, to improve performance.
|
|
114
|
+
*/
|
|
115
|
+
protected _maybeAddRequestToQueueHead(requestId: string, forefront: boolean): void;
|
|
116
|
+
/**
|
|
117
|
+
* Removes the queue either from the Apify Cloud storage or from the local database,
|
|
118
|
+
* depending on the mode of operation.
|
|
119
|
+
*/
|
|
120
|
+
drop(): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Returns the number of handled requests.
|
|
123
|
+
*
|
|
124
|
+
* This function is just a convenient shortcut for:
|
|
125
|
+
*
|
|
126
|
+
* ```javascript
|
|
127
|
+
* const { handledRequestCount } = await queue.getInfo();
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
handledCount(): Promise<number>;
|
|
131
|
+
/**
|
|
132
|
+
* Returns an object containing general information about the request queue.
|
|
133
|
+
*
|
|
134
|
+
* The function returns the same object as the Apify API Client's
|
|
135
|
+
* [getQueue](https://docs.apify.com/api/apify-client-js/latest#ApifyClient-requestQueues)
|
|
136
|
+
* function, which in turn calls the
|
|
137
|
+
* [Get request queue](https://apify.com/docs/api/v2#/reference/request-queues/queue/get-request-queue)
|
|
138
|
+
* API endpoint.
|
|
139
|
+
*
|
|
140
|
+
* **Example:**
|
|
141
|
+
* ```
|
|
142
|
+
* {
|
|
143
|
+
* id: "WkzbQMuFYuamGv3YF",
|
|
144
|
+
* name: "my-queue",
|
|
145
|
+
* userId: "wRsJZtadYvn4mBZmm",
|
|
146
|
+
* createdAt: new Date("2015-12-12T07:34:14.202Z"),
|
|
147
|
+
* modifiedAt: new Date("2015-12-13T08:36:13.202Z"),
|
|
148
|
+
* accessedAt: new Date("2015-12-14T08:36:13.202Z"),
|
|
149
|
+
* totalRequestCount: 25,
|
|
150
|
+
* handledRequestCount: 5,
|
|
151
|
+
* pendingRequestCount: 20,
|
|
152
|
+
* }
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
getInfo(): Promise<RequestQueueInfo | undefined>;
|
|
156
|
+
/**
|
|
157
|
+
* Fetches URLs from requestsFromUrl and returns them in format of list of requests
|
|
158
|
+
*/
|
|
159
|
+
protected _fetchRequestsFromUrl(source: InternalSource): Promise<RequestOptions[]>;
|
|
160
|
+
/**
|
|
161
|
+
* Adds all fetched requests from a URL from a remote resource.
|
|
162
|
+
*/
|
|
163
|
+
protected _addFetchedRequests(source: InternalSource, fetchedRequests: RequestOptions[], options: RequestQueueOperationOptions): Promise<ProcessedRequest[]>;
|
|
164
|
+
/**
|
|
165
|
+
* @internal wraps public utility for mocking purposes
|
|
166
|
+
*/
|
|
167
|
+
private _downloadListOfUrls;
|
|
168
|
+
/**
|
|
169
|
+
* Opens a request queue and returns a promise resolving to an instance
|
|
170
|
+
* of the {@apilink RequestQueue} class.
|
|
171
|
+
*
|
|
172
|
+
* {@apilink RequestQueue} represents a queue of URLs to crawl, which is stored either on local filesystem or in the cloud.
|
|
173
|
+
* The queue is used for deep crawling of websites, where you start with several URLs and then
|
|
174
|
+
* recursively follow links to other pages. The data structure supports both breadth-first
|
|
175
|
+
* and depth-first crawling orders.
|
|
176
|
+
*
|
|
177
|
+
* For more details and code examples, see the {@apilink RequestQueue} class.
|
|
178
|
+
*
|
|
179
|
+
* @param [queueIdOrName]
|
|
180
|
+
* ID or name of the request queue to be opened. If `null` or `undefined`,
|
|
181
|
+
* the function returns the default request queue associated with the crawler run.
|
|
182
|
+
* @param [options] Open Request Queue options.
|
|
183
|
+
*/
|
|
184
|
+
static open(queueIdOrName?: string | null, options?: StorageManagerOptions): Promise<BuiltRequestProvider>;
|
|
185
|
+
}
|
|
186
|
+
declare class BuiltRequestProvider extends RequestProvider {
|
|
187
|
+
fetchNextRequest<T extends Dictionary = Dictionary>(options?: RequestOptions<Dictionary> | undefined): Promise<Request<T> | null>;
|
|
188
|
+
protected ensureHeadIsNonEmpty(): Promise<void>;
|
|
189
|
+
}
|
|
190
|
+
interface RequestLruItem {
|
|
191
|
+
uniqueKey: string;
|
|
192
|
+
isHandled: boolean;
|
|
193
|
+
id: string;
|
|
194
|
+
hydrated: Request | null;
|
|
195
|
+
lockExpiresAt: number | null;
|
|
196
|
+
}
|
|
197
|
+
export interface RequestProviderOptions {
|
|
198
|
+
id: string;
|
|
199
|
+
name?: string;
|
|
200
|
+
client: StorageClient;
|
|
201
|
+
/**
|
|
202
|
+
* Used to pass the proxy configuration for the `requestsFromUrl` objects.
|
|
203
|
+
* Takes advantage of the internal address rotation and authentication process.
|
|
204
|
+
* If undefined, the `requestsFromUrl` requests will be made without proxy.
|
|
205
|
+
*/
|
|
206
|
+
proxyConfiguration?: ProxyConfiguration;
|
|
207
|
+
}
|
|
208
|
+
export interface InternalRequestProviderOptions extends RequestProviderOptions {
|
|
209
|
+
logPrefix: string;
|
|
210
|
+
requestCacheMaxSize: number;
|
|
211
|
+
recentlyHandledRequestsMaxSize: number;
|
|
212
|
+
}
|
|
213
|
+
export interface RequestQueueOperationOptions {
|
|
214
|
+
/**
|
|
215
|
+
* If set to `true`:
|
|
216
|
+
* - while adding the request to the queue: the request will be added to the foremost position in the queue.
|
|
217
|
+
* - while reclaiming the request: the request will be placed to the beginning of the queue, so that it's returned
|
|
218
|
+
* in the next call to {@apilink RequestQueue.fetchNextRequest}.
|
|
219
|
+
* By default, it's put to the end of the queue.
|
|
220
|
+
* @default false
|
|
221
|
+
*/
|
|
222
|
+
forefront?: boolean;
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* @internal
|
|
226
|
+
*/
|
|
227
|
+
export interface RequestQueueOperationInfo extends QueueOperationInfo {
|
|
228
|
+
uniqueKey: string;
|
|
229
|
+
}
|
|
230
|
+
export interface AddRequestsBatchedOptions extends RequestQueueOperationOptions {
|
|
231
|
+
/**
|
|
232
|
+
* Whether to wait for all the provided requests to be added, instead of waiting just for the initial batch of up to `batchSize`.
|
|
233
|
+
* @default false
|
|
234
|
+
*/
|
|
235
|
+
waitForAllRequestsToBeAdded?: boolean;
|
|
236
|
+
/**
|
|
237
|
+
* @default 1000
|
|
238
|
+
*/
|
|
239
|
+
batchSize?: number;
|
|
240
|
+
/**
|
|
241
|
+
* @default 1000
|
|
242
|
+
*/
|
|
243
|
+
waitBetweenBatchesMillis?: number;
|
|
244
|
+
}
|
|
245
|
+
export interface AddRequestsBatchedResult {
|
|
246
|
+
addedRequests: ProcessedRequest[];
|
|
247
|
+
/**
|
|
248
|
+
* A promise which will resolve with the rest of the requests that were added to the queue.
|
|
249
|
+
*
|
|
250
|
+
* Alternatively, we can set {@apilink AddRequestsBatchedOptions.waitForAllRequestsToBeAdded|`waitForAllRequestsToBeAdded`} to `true`
|
|
251
|
+
* in the {@apilink BasicCrawler.addRequests|`crawler.addRequests()`} options.
|
|
252
|
+
*
|
|
253
|
+
* **Example:**
|
|
254
|
+
*
|
|
255
|
+
* ```ts
|
|
256
|
+
* // Assuming `requests` is a list of requests.
|
|
257
|
+
* const result = await crawler.addRequests(requests);
|
|
258
|
+
*
|
|
259
|
+
* // If we want to wait for the rest of the requests to be added to the queue:
|
|
260
|
+
* await result.waitForAllRequestsToBeAdded;
|
|
261
|
+
* ```
|
|
262
|
+
*/
|
|
263
|
+
waitForAllRequestsToBeAdded: Promise<ProcessedRequest[]>;
|
|
264
|
+
}
|
|
265
|
+
export {};
|
|
266
|
+
//# sourceMappingURL=request_provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"request_provider.d.ts","sourceRoot":"","sources":["../../src/storages/request_provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,KAAK,EACR,sBAAsB,EACtB,UAAU,EACV,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,gBAAgB,EAChB,aAAa,EAChB,MAAM,gBAAgB,CAAC;AAIxB,OAAO,KAAK,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAGzE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAGzE,8BAAsB,eAAgB,YAAW,QAAQ;IAyBA,QAAQ,CAAC,MAAM;IAxBpE,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,SAAM;IACjB,SAAS,SAA0B;IACnC,MAAM,EAAE,kBAAkB,CAAC;IAC3B,SAAS,CAAC,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAElD,GAAG,EAAE,GAAG,CAAC;IACT,qBAAqB,SAAc;IACnC,eAAe,SAAU;IAIzB,iBAAiB,SAAK;IACtB,mBAAmB,SAAK;IAExB,SAAS,CAAC,YAAY,yBAAgC;IACtD,SAAS,CAAC,YAAY,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;IACjD,gBAAgB;IAChB,UAAU,cAAqB;IAC/B,SAAS,CAAC,4BAA4B,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IAE1D,SAAS,CAAC,uBAAuB,UAAS;gBAE9B,OAAO,EAAE,8BAA8B,EAAW,MAAM,gBAAkC;IAqBtG;;OAEG;IACH,eAAe;IAIf;;;;;;;;;;;;;OAaG;IACG,UAAU,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,GAAE,4BAAiC,GAAG,OAAO,CAAC,yBAAyB,CAAC;IAuDrH;;;;;;;;;;OAUG;IACG,WAAW,CACb,YAAY,EAAE,MAAM,EAAE,EACtB,OAAO,GAAE,4BAAiC,GAC3C,OAAO,CAAC,sBAAsB,CAAC;IA0FlC;;;;;;;;OAQG;IACG,kBAAkB,CAAC,QAAQ,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAAE,OAAO,GAAE,yBAA8B,GAAG,OAAO,CAAC,wBAAwB,CAAC;IAiFnI;;;;;OAKG;IACG,UAAU,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAS3F,QAAQ,CAAC,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;IAElH;;;;;OAKG;IACG,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IA6BrF;;;;;OAKG;IACG,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,GAAE,4BAAiC,GAAG,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IAuC7H,SAAS,CAAC,QAAQ,CAAC,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;IAExD;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC;IAKjC;;;;;OAKG;IACG,UAAU,IAAI,OAAO,CAAC,OAAO,CAAC;IAOpC,SAAS,CAAC,MAAM;IAShB;;OAEG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,kBAAkB,EAAE,yBAAyB,GAAG,IAAI;IAU9F;;OAEG;IACH,SAAS,CAAC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI;IAQlF;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAM3B;;;;;;;;OAQG;IACG,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC;IAMrC;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACG,OAAO,IAAI,OAAO,CAAC,gBAAgB,GAAG,SAAS,CAAC;IAItD;;OAEG;cACa,qBAAqB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAoBxF;;OAEG;cACa,mBAAmB,CAAC,MAAM,EAAE,cAAc,EAAE,eAAe,EAAE,cAAc,EAAE,EAAE,OAAO,EAAE,4BAA4B;IAgBpI;;OAEG;YACW,mBAAmB;IAIjC;;;;;;;;;;;;;;;OAeG;WACU,IAAI,CAAC,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,EAAE,OAAO,GAAE,qBAA0B,GAAG,OAAO,CAAC,oBAAoB,CAAC;CAmBvH;AAED,OAAO,OAAO,oBAAqB,SAAQ,eAAe;IAC7C,gBAAgB,CAAC,CAAC,SAAS,UAAU,GAAG,UAAU,EAAE,OAAO,CAAC,EAAE,cAAc,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;cACvH,oBAAoB,IAAI,OAAO,CAAC,IAAI,CAAC;CAC3D;AAED,UAAU,cAAc;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,OAAO,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,CAAC;IAEtB;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED,MAAM,WAAW,8BAA+B,SAAQ,sBAAsB;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,8BAA8B,EAAE,MAAM,CAAC;CAC1C;AAED,MAAM,WAAW,4BAA4B;IACzC;;;;;;;OAOG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,kBAAkB;IACjE,SAAS,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,yBAA0B,SAAQ,4BAA4B;IAC3E;;;OAGG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;IAEtC;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACrC;AAED,MAAM,WAAW,wBAAwB;IACrC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC;;;;;;;;;;;;;;;OAeG;IACH,2BAA2B,EAAE,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;CAC5D"}
|