@crawlee/core 4.0.0-beta.4 → 4.0.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +15 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +5 -78
- package/configuration.d.ts.map +1 -1
- package/configuration.js +6 -102
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +1 -1
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +8 -8
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +121 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +15 -23
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +0 -8
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -18
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +30 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +41 -23
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +24 -7
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +66 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +18 -0
- package/errors.d.ts.map +1 -1
- package/errors.js +35 -0
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +27 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +2 -1
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +146 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +6 -7
- package/proxy_configuration.d.ts +17 -94
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +18 -54
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +137 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +48 -6
- package/request.d.ts.map +1 -1
- package/request.js +62 -16
- package/request.js.map +1 -1
- package/service_locator.d.ts +130 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +249 -0
- package/service_locator.js.map +1 -0
- package/session_pool/session.d.ts +9 -31
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +17 -21
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +27 -54
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +54 -69
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +53 -3
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +78 -6
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +71 -1
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +95 -12
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +11 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +76 -9
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +92 -54
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +8 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts +10 -8
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +12 -22
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +4 -3
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"form-data-like.d.ts","sourceRoot":"","sources":["../../src/http_clients/form-data-like.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,UAAU,QAAQ;IACd;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B;;OAEG;IACH,MAAM,IAAI,cAAc,CAAC,UAAU,CAAC,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;IACjE,QAAQ,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC;CAC1C;AAED;;GAEG;AACH,KAAK,kBAAkB,GAAG,MAAM,GAAG,QAAQ,CAAC;AAC5C;;GAEG;AACH,MAAM,WAAW,YAAY;IACzB;;;;;;;;;;OAUG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9D;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB,EAAE,CAAC;IAC3C;;;OAGG;IACH,OAAO,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,kBAAkB,CAAC,CAAC,CAAC;IAC1D;;OAEG;IACH,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,kBAAkB,CAAC,CAAC,CAAC;IACpE,QAAQ,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,EAAE,MAAM,CAAC;CAC1C"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"form-data-like.js","sourceRoot":"","sources":["../../src/http_clients/form-data-like.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import type { BaseHttpClient, HttpRequest, HttpResponse, RedirectHandler, ResponseTypes, StreamingHttpResponse } from './base-http-client.js';
|
|
2
|
-
/**
|
|
3
|
-
* A HTTP client implementation based on the `got-scraping` library.
|
|
4
|
-
*/
|
|
5
|
-
export declare class GotScrapingHttpClient implements BaseHttpClient {
|
|
6
|
-
/**
|
|
7
|
-
* @inheritDoc
|
|
8
|
-
*/
|
|
9
|
-
sendRequest<TResponseType extends keyof ResponseTypes>(request: HttpRequest<TResponseType>): Promise<HttpResponse<TResponseType>>;
|
|
10
|
-
/**
|
|
11
|
-
* @inheritDoc
|
|
12
|
-
*/
|
|
13
|
-
stream(request: HttpRequest, handleRedirect?: RedirectHandler): Promise<StreamingHttpResponse>;
|
|
14
|
-
}
|
|
15
|
-
//# sourceMappingURL=got-scraping-http-client.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACR,cAAc,EACd,WAAW,EACX,YAAY,EACZ,eAAe,EACf,aAAa,EACb,qBAAqB,EACxB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,qBAAsB,YAAW,cAAc;IACxD;;OAEG;IACG,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,EACvD,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC;IAgBvC;;OAEG;IACG,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,cAAc,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,qBAAqB,CAAC;CAmDvG"}
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { gotScraping } from 'got-scraping';
|
|
2
|
-
/**
|
|
3
|
-
* A HTTP client implementation based on the `got-scraping` library.
|
|
4
|
-
*/
|
|
5
|
-
export class GotScrapingHttpClient {
|
|
6
|
-
/**
|
|
7
|
-
* @inheritDoc
|
|
8
|
-
*/
|
|
9
|
-
async sendRequest(request) {
|
|
10
|
-
const gotResult = await gotScraping({
|
|
11
|
-
...request,
|
|
12
|
-
retry: {
|
|
13
|
-
limit: 0,
|
|
14
|
-
...request.retry,
|
|
15
|
-
},
|
|
16
|
-
});
|
|
17
|
-
return {
|
|
18
|
-
...gotResult,
|
|
19
|
-
body: gotResult.body,
|
|
20
|
-
request: { url: request.url, ...gotResult.request },
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* @inheritDoc
|
|
25
|
-
*/
|
|
26
|
-
async stream(request, handleRedirect) {
|
|
27
|
-
// eslint-disable-next-line no-async-promise-executor
|
|
28
|
-
return new Promise(async (resolve, reject) => {
|
|
29
|
-
const stream = gotScraping({ ...request, isStream: true });
|
|
30
|
-
stream.on('redirect', (updatedOptions, redirectResponse) => {
|
|
31
|
-
handleRedirect?.(redirectResponse, updatedOptions);
|
|
32
|
-
});
|
|
33
|
-
// We need to end the stream for DELETE requests, otherwise it will hang.
|
|
34
|
-
if (request.method && ['DELETE', 'delete'].includes(request.method)) {
|
|
35
|
-
stream.end();
|
|
36
|
-
}
|
|
37
|
-
stream.on('error', reject);
|
|
38
|
-
stream.on('response', (response) => {
|
|
39
|
-
const result = {
|
|
40
|
-
stream,
|
|
41
|
-
request,
|
|
42
|
-
redirectUrls: response.redirectUrls,
|
|
43
|
-
url: response.url,
|
|
44
|
-
ip: response.ip,
|
|
45
|
-
statusCode: response.statusCode,
|
|
46
|
-
headers: response.headers,
|
|
47
|
-
trailers: response.trailers,
|
|
48
|
-
complete: response.complete,
|
|
49
|
-
get downloadProgress() {
|
|
50
|
-
return stream.downloadProgress;
|
|
51
|
-
},
|
|
52
|
-
get uploadProgress() {
|
|
53
|
-
return stream.uploadProgress;
|
|
54
|
-
},
|
|
55
|
-
};
|
|
56
|
-
Object.assign(result, response); // TODO BC - remove in 4.0
|
|
57
|
-
resolve(result);
|
|
58
|
-
stream.on('end', () => {
|
|
59
|
-
result.complete = response.complete;
|
|
60
|
-
result.trailers ??= {};
|
|
61
|
-
Object.assign(result.trailers, response.trailers);
|
|
62
|
-
result.rawTrailers ??= []; // TODO BC - remove in 4.0
|
|
63
|
-
Object.assign(result.rawTrailers, response.rawTrailers);
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
});
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
//# sourceMappingURL=got-scraping-http-client.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.js","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAW3C;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAC9B;;OAEG;IACH,KAAK,CAAC,WAAW,CACb,OAAmC;QAEnC,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC;YAChC,GAAG,OAAO;YACV,KAAK,EAAE;gBACH,KAAK,EAAE,CAAC;gBACR,GAAI,OAAO,CAAC,KAA6C;aAC5D;SACJ,CAAC,CAAC;QAEH,OAAO;YACH,GAAG,SAAS;YACZ,IAAI,EAAE,SAAS,CAAC,IAAoC;YACpD,OAAO,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;SACtD,CAAC;IACN,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAoB,EAAE,cAAgC;QAC/D,qDAAqD;QACrD,OAAO,IAAI,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;YACzC,MAAM,MAAM,GAAG,WAAW,CAAC,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;YAE3D,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,cAAuB,EAAE,gBAA+B,EAAE,EAAE;gBAC/E,cAAc,EAAE,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;YACvD,CAAC,CAAC,CAAC;YAEH,yEAAyE;YACzE,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClE,MAAM,CAAC,GAAG,EAAE,CAAC;YACjB,CAAC;YAED,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAE3B,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,QAAuB,EAAE,EAAE;gBAC9C,MAAM,MAAM,GAA0B;oBAClC,MAAM;oBACN,OAAO;oBACP,YAAY,EAAE,QAAQ,CAAC,YAAY;oBACnC,GAAG,EAAE,QAAQ,CAAC,GAAG;oBACjB,EAAE,EAAE,QAAQ,CAAC,EAAE;oBACf,UAAU,EAAE,QAAQ,CAAC,UAAU;oBAC/B,OAAO,EAAE,QAAQ,CAAC,OAAO;oBACzB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;oBAC3B,QAAQ,EAAE,QAAQ,CAAC,QAAQ;oBAC3B,IAAI,gBAAgB;wBAChB,OAAO,MAAM,CAAC,gBAAgB,CAAC;oBACnC,CAAC;oBACD,IAAI,cAAc;wBACd,OAAO,MAAM,CAAC,cAAc,CAAC;oBACjC,CAAC;iBACJ,CAAC;gBAEF,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,0BAA0B;gBAE3D,OAAO,CAAC,MAAM,CAAC,CAAC;gBAEhB,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;oBAClB,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC;oBAEpC,MAAM,CAAC,QAAQ,KAAK,EAAE,CAAC;oBACvB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;oBAEjD,MAAc,CAAC,WAAW,KAAK,EAAE,CAAC,CAAC,0BAA0B;oBAC9D,MAAM,CAAC,MAAM,CAAE,MAAc,CAAC,WAAW,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;gBACrE,CAAC,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;CACJ"}
|
package/http_clients/index.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/http_clients/index.ts"],"names":[],"mappings":"AAAA,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC"}
|
package/http_clients/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/http_clients/index.ts"],"names":[],"mappings":"AAAA,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC"}
|