@crawlee/core 4.0.0-beta.6 → 4.0.0-beta.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +18 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +85 -227
- package/configuration.d.ts.map +1 -1
- package/configuration.js +159 -223
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +4 -2
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +18 -12
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +123 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +19 -28
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +12 -20
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/crawler_utils.d.ts +2 -2
- package/crawlers/crawler_utils.d.ts.map +1 -1
- package/crawlers/crawler_utils.js +1 -1
- package/crawlers/crawler_utils.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -24
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +32 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +45 -24
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +25 -8
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +69 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +33 -3
- package/errors.d.ts.map +1 -1
- package/errors.js +48 -4
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +33 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +3 -2
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +82 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +9 -10
- package/proxy_configuration.d.ts +14 -148
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +19 -167
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +142 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +74 -10
- package/request.d.ts.map +1 -1
- package/request.js +85 -23
- package/request.js.map +1 -1
- package/router.d.ts.map +1 -1
- package/router.js.map +1 -1
- package/serialization.js +1 -1
- package/serialization.js.map +1 -1
- package/service_locator.d.ts +157 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +234 -0
- package/service_locator.js.map +1 -0
- package/session_pool/index.d.ts +0 -1
- package/session_pool/index.d.ts.map +1 -1
- package/session_pool/index.js +0 -1
- package/session_pool/index.js.map +1 -1
- package/session_pool/session.d.ts +26 -72
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +36 -98
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +65 -71
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +101 -100
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +90 -46
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +149 -121
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +3 -1
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +3 -1
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +104 -22
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +166 -51
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +13 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +87 -22
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +127 -77
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +10 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_instance_manager.d.ts +91 -0
- package/storages/storage_instance_manager.d.ts.map +1 -0
- package/storages/storage_instance_manager.js +236 -0
- package/storages/storage_instance_manager.js.map +1 -0
- package/storages/utils.d.ts +47 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +57 -5
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/validators.d.ts +4 -0
- package/validators.d.ts.map +1 -1
- package/validators.js +4 -0
- package/validators.js.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/session_pool/events.d.ts +0 -3
- package/session_pool/events.d.ts.map +0 -1
- package/session_pool/events.js +0 -3
- package/session_pool/events.js.map +0 -1
- package/storages/storage_manager.d.ts +0 -58
- package/storages/storage_manager.d.ts.map +0 -1
- package/storages/storage_manager.js +0 -105
- package/storages/storage_manager.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { Awaitable } from '@crawlee/types';
|
|
2
|
+
/**
|
|
3
|
+
* Represents a middleware step in the context pipeline.
|
|
4
|
+
*
|
|
5
|
+
* @template TCrawlingContext - The input context type for this middleware
|
|
6
|
+
* @template TCrawlingContextExtension - The enhanced output context type
|
|
7
|
+
*/
|
|
8
|
+
export interface ContextMiddleware<TCrawlingContext, TCrawlingContextExtension> {
|
|
9
|
+
/** The main middleware function that enhances the context */
|
|
10
|
+
action: (context: TCrawlingContext) => Awaitable<TCrawlingContextExtension>;
|
|
11
|
+
/** Optional cleanup function called after the consumer finishes or fails */
|
|
12
|
+
cleanup?: (context: TCrawlingContext & TCrawlingContextExtension, error?: unknown) => Awaitable<void>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Encapsulates the logic of gradually enhancing the crawling context with additional information and utilities.
|
|
16
|
+
*
|
|
17
|
+
* The enhancement is done by a chain of middlewares that are added to the pipeline after its creation.
|
|
18
|
+
* This class provides a type-safe way to build a pipeline of context transformations where each step
|
|
19
|
+
* can enhance the context with additional properties or utilities.
|
|
20
|
+
*
|
|
21
|
+
* @template TContextBase - The base context type that serves as the starting point
|
|
22
|
+
* @template TCrawlingContext - The final context type after all middleware transformations
|
|
23
|
+
*/
|
|
24
|
+
export declare abstract class ContextPipeline<TContextBase, TCrawlingContext extends TContextBase> {
|
|
25
|
+
/**
|
|
26
|
+
* Creates a new empty context pipeline.
|
|
27
|
+
*
|
|
28
|
+
* @template TContextBase - The base context type for the pipeline
|
|
29
|
+
* @returns A new ContextPipeline instance with no transformations
|
|
30
|
+
*/
|
|
31
|
+
static create<TContextBase>(): ContextPipeline<TContextBase, TContextBase>;
|
|
32
|
+
/**
|
|
33
|
+
* Adds a middleware to the pipeline, creating a new pipeline instance.
|
|
34
|
+
*
|
|
35
|
+
* This method provides a fluent interface for building context transformation pipelines.
|
|
36
|
+
* Each middleware can enhance the context with additional properties or utilities.
|
|
37
|
+
*
|
|
38
|
+
* @template TCrawlingContextExtension - The enhanced context type produced by this middleware
|
|
39
|
+
* @param middleware - The middleware to add to the pipeline
|
|
40
|
+
* @returns A new ContextPipeline instance with the added middleware
|
|
41
|
+
*/
|
|
42
|
+
abstract compose<TCrawlingContextExtension>(middleware: ContextMiddleware<TCrawlingContext, TCrawlingContextExtension>): ContextPipeline<TContextBase, TCrawlingContext & TCrawlingContextExtension>;
|
|
43
|
+
/**
|
|
44
|
+
* Chains another pipeline onto this one. The other pipeline's base context must match
|
|
45
|
+
* this pipeline's output context. Returns a new pipeline that runs this pipeline's
|
|
46
|
+
* middlewares first, then the other pipeline's middlewares.
|
|
47
|
+
*
|
|
48
|
+
* @template TFinalContext - The final context type after the chained pipeline's transformations
|
|
49
|
+
* @param other - The pipeline to append after this one
|
|
50
|
+
* @returns A new ContextPipeline combining both pipelines' middlewares
|
|
51
|
+
*/
|
|
52
|
+
abstract chain<TFinalContext extends TCrawlingContext>(other: ContextPipeline<TCrawlingContext, TFinalContext>): ContextPipeline<TContextBase, TFinalContext>;
|
|
53
|
+
/**
|
|
54
|
+
* Executes the middleware pipeline and passes the final context to a consumer function.
|
|
55
|
+
*
|
|
56
|
+
* This method runs the crawling context through the entire middleware chain, enhancing it
|
|
57
|
+
* at each step, and then passes the final enhanced context to the provided consumer function.
|
|
58
|
+
* Proper cleanup is performed even if exceptions occur during processing.
|
|
59
|
+
*
|
|
60
|
+
* @param crawlingContext - The initial context to process through the pipeline
|
|
61
|
+
* @param finalContextConsumer - The function that will receive the final enhanced context
|
|
62
|
+
*
|
|
63
|
+
* @throws {ContextPipelineInitializationError} When a middleware fails during initialization
|
|
64
|
+
* @throws {ContextPipelineInterruptedError} When the pipeline is intentionally interrupted during initialization
|
|
65
|
+
* @throws {RequestHandlerError} When the final context consumer throws an exception
|
|
66
|
+
* @throws {ContextPipelineCleanupError} When cleanup operations fail
|
|
67
|
+
* @throws {SessionError} Session errors are re-thrown as-is for special handling
|
|
68
|
+
*/
|
|
69
|
+
abstract call(crawlingContext: TContextBase, finalContextConsumer: (finalContext: TCrawlingContext) => Awaitable<unknown>): Promise<void>;
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=context_pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context_pipeline.d.ts","sourceRoot":"","sources":["../../src/crawlers/context_pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAWhD;;;;;GAKG;AACH,MAAM,WAAW,iBAAiB,CAAC,gBAAgB,EAAE,yBAAyB;IAC1E,6DAA6D;IAC7D,MAAM,EAAE,CAAC,OAAO,EAAE,gBAAgB,KAAK,SAAS,CAAC,yBAAyB,CAAC,CAAC;IAC5E,4EAA4E;IAC5E,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,gBAAgB,GAAG,yBAAyB,EAAE,KAAK,CAAC,EAAE,OAAO,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;CACzG;AAED;;;;;;;;;GASG;AACH,8BAAsB,eAAe,CAAC,YAAY,EAAE,gBAAgB,SAAS,YAAY;IACrF;;;;;OAKG;IACH,MAAM,CAAC,MAAM,CAAC,YAAY,KAAK,eAAe,CAAC,YAAY,EAAE,YAAY,CAAC;IAI1E;;;;;;;;;OASG;IACH,QAAQ,CAAC,OAAO,CAAC,yBAAyB,EACtC,UAAU,EAAE,iBAAiB,CAAC,gBAAgB,EAAE,yBAAyB,CAAC,GAC3E,eAAe,CAAC,YAAY,EAAE,gBAAgB,GAAG,yBAAyB,CAAC;IAE9E;;;;;;;;OAQG;IACH,QAAQ,CAAC,KAAK,CAAC,aAAa,SAAS,gBAAgB,EACjD,KAAK,EAAE,eAAe,CAAC,gBAAgB,EAAE,aAAa,CAAC,GACxD,eAAe,CAAC,YAAY,EAAE,aAAa,CAAC;IAE/C;;;;;;;;;;;;;;;OAeG;IACH,QAAQ,CAAC,IAAI,CACT,eAAe,EAAE,YAAY,EAC7B,oBAAoB,EAAE,CAAC,YAAY,EAAE,gBAAgB,KAAK,SAAS,CAAC,OAAO,CAAC,GAC7E,OAAO,CAAC,IAAI,CAAC;CACnB"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { ContextPipelineCleanupError, ContextPipelineInitializationError, ContextPipelineInterruptedError, RequestHandlerError, SessionError, } from '../errors.js';
|
|
2
|
+
import { serviceLocator } from '../service_locator.js';
|
|
3
|
+
/**
|
|
4
|
+
* Encapsulates the logic of gradually enhancing the crawling context with additional information and utilities.
|
|
5
|
+
*
|
|
6
|
+
* The enhancement is done by a chain of middlewares that are added to the pipeline after its creation.
|
|
7
|
+
* This class provides a type-safe way to build a pipeline of context transformations where each step
|
|
8
|
+
* can enhance the context with additional properties or utilities.
|
|
9
|
+
*
|
|
10
|
+
* @template TContextBase - The base context type that serves as the starting point
|
|
11
|
+
* @template TCrawlingContext - The final context type after all middleware transformations
|
|
12
|
+
*/
|
|
13
|
+
export class ContextPipeline {
|
|
14
|
+
/**
|
|
15
|
+
* Creates a new empty context pipeline.
|
|
16
|
+
*
|
|
17
|
+
* @template TContextBase - The base context type for the pipeline
|
|
18
|
+
* @returns A new ContextPipeline instance with no transformations
|
|
19
|
+
*/
|
|
20
|
+
static create() {
|
|
21
|
+
return new ContextPipelineImpl({ action: async (context) => context });
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Implementation of the `ContextPipeline` logic. This hides implementation details such as the `middleware` and `parent`
|
|
26
|
+
* properties from the `ContextPipeline` interface, making type checking more reliable.
|
|
27
|
+
*/
|
|
28
|
+
class ContextPipelineImpl extends ContextPipeline {
|
|
29
|
+
middleware;
|
|
30
|
+
parent;
|
|
31
|
+
constructor(middleware, parent) {
|
|
32
|
+
super();
|
|
33
|
+
this.middleware = middleware;
|
|
34
|
+
this.parent = parent;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* @inheritdoc
|
|
38
|
+
*/
|
|
39
|
+
compose(middleware) {
|
|
40
|
+
return new ContextPipelineImpl(middleware, this);
|
|
41
|
+
}
|
|
42
|
+
chain(other) {
|
|
43
|
+
const otherMiddlewares = Array.from(other.middlewareChain()).reverse();
|
|
44
|
+
let result = this;
|
|
45
|
+
for (const middleware of otherMiddlewares) {
|
|
46
|
+
result = result.compose(middleware);
|
|
47
|
+
}
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
*middlewareChain() {
|
|
51
|
+
let step = this;
|
|
52
|
+
while (step !== undefined) {
|
|
53
|
+
yield step.middleware;
|
|
54
|
+
step = step.parent;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* @inheritdoc
|
|
59
|
+
*/
|
|
60
|
+
async call(crawlingContext, finalContextConsumer) {
|
|
61
|
+
const middlewares = Array.from(this.middlewareChain()).reverse();
|
|
62
|
+
const cleanupStack = [];
|
|
63
|
+
let consumerException;
|
|
64
|
+
try {
|
|
65
|
+
for (const { action, cleanup } of middlewares) {
|
|
66
|
+
try {
|
|
67
|
+
const contextExtension = await action(crawlingContext);
|
|
68
|
+
const extensionNames = [
|
|
69
|
+
...Object.getOwnPropertyNames(contextExtension),
|
|
70
|
+
...Object.getOwnPropertySymbols(contextExtension),
|
|
71
|
+
];
|
|
72
|
+
for (const key of extensionNames) {
|
|
73
|
+
try {
|
|
74
|
+
if (Object.getOwnPropertyDescriptor(crawlingContext, key)?.configurable !== false) {
|
|
75
|
+
Object.defineProperty(crawlingContext, key, Object.getOwnPropertyDescriptor(contextExtension, key));
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
serviceLocator
|
|
80
|
+
.getLogger()
|
|
81
|
+
.debug(`Context pipeline failed to define property ${key.toString()}:`, error);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (cleanup) {
|
|
85
|
+
cleanupStack.push(cleanup);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
catch (exception) {
|
|
89
|
+
if (exception instanceof SessionError) {
|
|
90
|
+
throw exception; // Session errors are re-thrown as-is
|
|
91
|
+
}
|
|
92
|
+
if (exception instanceof ContextPipelineInterruptedError) {
|
|
93
|
+
throw exception;
|
|
94
|
+
}
|
|
95
|
+
throw new ContextPipelineInitializationError(exception);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
try {
|
|
99
|
+
await finalContextConsumer(crawlingContext);
|
|
100
|
+
}
|
|
101
|
+
catch (exception) {
|
|
102
|
+
if (exception instanceof SessionError) {
|
|
103
|
+
consumerException = exception;
|
|
104
|
+
throw exception; // Session errors are re-thrown as-is
|
|
105
|
+
}
|
|
106
|
+
consumerException = exception;
|
|
107
|
+
throw new RequestHandlerError(exception);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
finally {
|
|
111
|
+
try {
|
|
112
|
+
for (const cleanup of cleanupStack.reverse()) {
|
|
113
|
+
await cleanup(crawlingContext, consumerException);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
catch (exception) {
|
|
117
|
+
// eslint-disable-next-line no-unsafe-finally
|
|
118
|
+
throw new ContextPipelineCleanupError(exception);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
//# sourceMappingURL=context_pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context_pipeline.js","sourceRoot":"","sources":["../../src/crawlers/context_pipeline.ts"],"names":[],"mappings":"AAEA,OAAO,EACH,2BAA2B,EAC3B,kCAAkC,EAClC,+BAA+B,EAC/B,mBAAmB,EACnB,YAAY,GACf,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAevD;;;;;;;;;GASG;AACH,MAAM,OAAgB,eAAe;IACjC;;;;;OAKG;IACH,MAAM,CAAC,MAAM;QACT,OAAO,IAAI,mBAAmB,CAA6B,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,CAAC,CAAC;IACvG,CAAC;CAiDJ;AAED;;;GAGG;AACH,MAAM,mBAAyE,SAAQ,eAGtF;IAEe;IACA;IAFZ,YACY,UAA6D,EAC7D,MAAwD;QAEhE,KAAK,EAAE,CAAC;QAHA,eAAU,GAAV,UAAU,CAAmD;QAC7D,WAAM,GAAN,MAAM,CAAkD;IAGpE,CAAC;IAED;;OAEG;IACH,OAAO,CACH,UAA0E;QAE1E,OAAO,IAAI,mBAAmB,CAC1B,UAAiB,EACjB,IAAW,CACd,CAAC;IACN,CAAC;IAED,KAAK,CACD,KAAuD;QAEvD,MAAM,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAC9B,KAAa,CAAC,eAAe,EAA2C,CAC5E,CAAC,OAAO,EAAE,CAAC;QAEZ,IAAI,MAAM,GAAuC,IAAW,CAAC;QAC7D,KAAK,MAAM,UAAU,IAAI,gBAAgB,EAAE,CAAC;YACxC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,UAAiB,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,MAAsD,CAAC;IAClE,CAAC;IAEO,CAAC,eAAe;QACpB,IAAI,IAAI,GAAgE,IAAW,CAAC;QAEpF,OAAO,IAAI,KAAK,SAAS,EAAE,CAAC;YACxB,MAAM,IAAI,CAAC,UAAU,CAAC;YACtB,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC;QACvB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CACN,eAA6B,EAC7B,oBAA0E;QAE1E,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;QACjE,MAAM,YAAY,GAAG,EAAE,CAAC;QACxB,IAAI,iBAAsC,CAAC;QAE3C,IAAI,CAAC;YACD,KAAK,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,WAAW,EAAE,CAAC;gBAC5C,IAAI,CAAC;oBACD,MAAM,gBAAgB,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;oBAEvD,MAAM,cAAc,GAAG;wBACnB,GAAG,MAAM,CAAC,mBAAmB,CAAC,gBAAgB,CAAC;wBAC/C,GAAG,MAAM,CAAC,qBAAqB,CAAC,gBAAgB,CAAC;qBACpD,CAAC;oBAEF,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;wBAC/B,IAAI,CAAC;4BACD,IAAI,MAAM,CAAC,wBAAwB,CAAC,eAAe,EAAE,GAAG,CAAC,EAAE,YAAY,KAAK,KAAK,EAAE,CAAC;gCAChF,MAAM,CAAC,cAAc,CACjB,eAAe,EACf,GAAG,EACH,MAAM,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,GAAG,CAAE,CAC1D,CAAC;4BACN,CAAC;wBACL,CAAC;wBAAC,OAAO,KAAU,EAAE,CAAC;4BAClB,cAAc;iCACT,SAAS,EAAE;iCACX,KAAK,CAAC,8CAA8C,GAAG,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;wBACvF,CAAC;oBACL,CAAC;oBAED,IAAI,OAAO,EAAE,CAAC;wBACV,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBAC/B,CAAC;gBACL,CAAC;gBAAC,OAAO,SAAkB,EAAE,CAAC;oBAC1B,IAAI,SAAS,YAAY,YAAY,EAAE,CAAC;wBACpC,MAAM,SAAS,CAAC,CAAC,qCAAqC;oBAC1D,CAAC;oBACD,IAAI,SAAS,YAAY,+BAA+B,EAAE,CAAC;wBACvD,MAAM,SAAS,CAAC;oBACpB,CAAC;oBAED,MAAM,IAAI,kCAAkC,CAAC,SAAS,CAAC,CAAC;gBAC5D,CAAC;YACL,CAAC;YAED,IAAI,CAAC;gBACD,MAAM,oBAAoB,CAAC,eAAmC,CAAC,CAAC;YACpE,CAAC;YAAC,OAAO,SAAkB,EAAE,CAAC;gBAC1B,IAAI,SAAS,YAAY,YAAY,EAAE,CAAC;oBACpC,iBAAiB,GAAG,SAAS,CAAC;oBAC9B,MAAM,SAAS,CAAC,CAAC,qCAAqC;gBAC1D,CAAC;gBACD,iBAAiB,GAAG,SAAS,CAAC;gBAC9B,MAAM,IAAI,mBAAmB,CAAC,SAAS,CAAC,CAAC;YAC7C,CAAC;QACL,CAAC;gBAAS,CAAC;YACP,IAAI,CAAC;gBACD,KAAK,MAAM,OAAO,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;oBAC3C,MAAM,OAAO,CAAC,eAAe,EAAE,iBAAiB,CAAC,CAAC;gBACtD,CAAC;YACL,CAAC;YAAC,OAAO,SAAkB,EAAE,CAAC;gBAC1B,6CAA6C;gBAC7C,MAAM,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;YACrD,CAAC;QACL,CAAC;IACL,CAAC;CACJ"}
|
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
import type { OptionsInit, Response as GotResponse } from 'got-scraping';
|
|
4
|
-
import type { ReadonlyDeep } from 'type-fest';
|
|
1
|
+
import type { Dictionary, HttpRequestOptions, ISession, ProxyInfo, SendRequestOptions } from '@crawlee/types';
|
|
2
|
+
import type { ReadonlyDeep, SetRequired } from 'type-fest';
|
|
5
3
|
import type { Configuration } from '../configuration.js';
|
|
6
4
|
import type { EnqueueLinksOptions } from '../enqueue_links/enqueue_links.js';
|
|
7
|
-
import type {
|
|
8
|
-
import type { ProxyInfo } from '../proxy_configuration.js';
|
|
5
|
+
import type { CrawleeLogger } from '../log.js';
|
|
9
6
|
import type { Request, Source } from '../request.js';
|
|
10
|
-
import type { Session } from '../session_pool/session.js';
|
|
11
7
|
import type { Dataset } from '../storages/dataset.js';
|
|
12
8
|
import { KeyValueStore, type RecordOptions } from '../storages/key_value_store.js';
|
|
13
9
|
import type { RequestQueueOperationOptions } from '../storages/request_provider.js';
|
|
10
|
+
import type { StorageIdentifier } from '../storages/storage_instance_manager.js';
|
|
14
11
|
/** @internal */
|
|
15
12
|
export type IsAny<T> = 0 extends 1 & T ? true : false;
|
|
16
13
|
/** @internal */
|
|
@@ -22,9 +19,9 @@ export type LoadedRequest<R extends Request> = WithRequired<R, 'id' | 'loadedUrl
|
|
|
22
19
|
export type LoadedContext<Context extends RestrictedCrawlingContext> = IsAny<Context> extends true ? Context : {
|
|
23
20
|
request: LoadedRequest<Context['request']>;
|
|
24
21
|
} & Omit<Context, 'request'>;
|
|
25
|
-
export interface RestrictedCrawlingContext<UserData extends Dictionary = Dictionary>
|
|
22
|
+
export interface RestrictedCrawlingContext<UserData extends Dictionary = Dictionary> {
|
|
26
23
|
id: string;
|
|
27
|
-
session
|
|
24
|
+
session: ISession;
|
|
28
25
|
/**
|
|
29
26
|
* An object with information about currently used proxy by the crawler
|
|
30
27
|
* and configured by the {@link ProxyConfiguration} class.
|
|
@@ -41,7 +38,7 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
41
38
|
*
|
|
42
39
|
* @param [data] Data to be pushed to the default dataset.
|
|
43
40
|
*/
|
|
44
|
-
pushData(data: ReadonlyDeep<Parameters<Dataset['pushData']>[0]>,
|
|
41
|
+
pushData(data: ReadonlyDeep<Parameters<Dataset['pushData']>[0]>, datasetIdentifier?: string | StorageIdentifier): Promise<void>;
|
|
45
42
|
/**
|
|
46
43
|
* This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
|
|
47
44
|
* currently used by the crawler.
|
|
@@ -66,7 +63,7 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
66
63
|
*
|
|
67
64
|
* @param [options] All `enqueueLinks()` parameters are passed via an options object.
|
|
68
65
|
*/
|
|
69
|
-
enqueueLinks: (options
|
|
66
|
+
enqueueLinks: (options: ReadonlyDeep<Omit<SetRequired<EnqueueLinksOptions, 'urls'>, 'requestQueue' | 'robotsTxtFile'>>) => Promise<unknown>;
|
|
70
67
|
/**
|
|
71
68
|
* Add requests directly to the request queue.
|
|
72
69
|
*
|
|
@@ -81,14 +78,13 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
81
78
|
/**
|
|
82
79
|
* Get a key-value store with given name or id, or the default one for the crawler.
|
|
83
80
|
*/
|
|
84
|
-
getKeyValueStore: (
|
|
81
|
+
getKeyValueStore: (identifier?: string | StorageIdentifier) => Promise<Pick<KeyValueStore, 'id' | 'name' | 'getValue' | 'getAutoSavedValue' | 'setValue' | 'getPublicUrl'>>;
|
|
85
82
|
/**
|
|
86
83
|
* A preconfigured logger for the request handler.
|
|
87
84
|
*/
|
|
88
|
-
log:
|
|
85
|
+
log: CrawleeLogger;
|
|
89
86
|
}
|
|
90
|
-
export interface CrawlingContext<
|
|
91
|
-
crawler: Crawler;
|
|
87
|
+
export interface CrawlingContext<UserData extends Dictionary = Dictionary> extends RestrictedCrawlingContext<UserData> {
|
|
92
88
|
/**
|
|
93
89
|
* This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
|
|
94
90
|
* currently used by the crawler.
|
|
@@ -114,14 +110,9 @@ export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary
|
|
|
114
110
|
* @param [options] All `enqueueLinks()` parameters are passed via an options object.
|
|
115
111
|
* @returns Promise that resolves to {@link BatchAddRequestsResult} object.
|
|
116
112
|
*/
|
|
117
|
-
enqueueLinks(options
|
|
113
|
+
enqueueLinks(options: ReadonlyDeep<Omit<SetRequired<EnqueueLinksOptions, 'urls'>, 'requestQueue' | 'robotsTxtFile'>> & Pick<EnqueueLinksOptions, 'requestQueue' | 'robotsTxtFile'>): Promise<unknown>;
|
|
118
114
|
/**
|
|
119
|
-
*
|
|
120
|
-
*/
|
|
121
|
-
getKeyValueStore: (idOrName?: string) => Promise<KeyValueStore>;
|
|
122
|
-
/**
|
|
123
|
-
* Fires HTTP request via [`got-scraping`](https://crawlee.dev/js/docs/guides/got-scraping), allowing to override the request
|
|
124
|
-
* options on the fly.
|
|
115
|
+
* Fires HTTP request via the internal HTTP client, allowing to override the request options on the fly.
|
|
125
116
|
*
|
|
126
117
|
* This is handy when you work with a browser crawler but want to execute some requests outside it (e.g. API requests).
|
|
127
118
|
* Check the [Skipping navigations for certain requests](https://crawlee.dev/js/docs/examples/skip-navigation) example for
|
|
@@ -136,7 +127,11 @@ export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary
|
|
|
136
127
|
* },
|
|
137
128
|
* ```
|
|
138
129
|
*/
|
|
139
|
-
sendRequest
|
|
130
|
+
sendRequest: (requestOverrides?: Partial<HttpRequestOptions>, optionsOverrides?: SendRequestOptions) => Promise<Response>;
|
|
131
|
+
/**
|
|
132
|
+
* Register a function to be called at the very end of the request handling process. This is useful for resources that should be accessible to error handlers, for instance.
|
|
133
|
+
*/
|
|
134
|
+
registerDeferredCleanup(cleanup: () => Promise<unknown>): void;
|
|
140
135
|
}
|
|
141
136
|
/**
|
|
142
137
|
* A partial implementation of {@link RestrictedCrawlingContext} that stores parameters of calls to context methods for later inspection.
|
|
@@ -149,7 +144,6 @@ export declare class RequestHandlerResult {
|
|
|
149
144
|
private _keyValueStoreChanges;
|
|
150
145
|
private pushDataCalls;
|
|
151
146
|
private addRequestsCalls;
|
|
152
|
-
private enqueueLinksCalls;
|
|
153
147
|
constructor(config: Configuration, crawleeStateKey: string);
|
|
154
148
|
/**
|
|
155
149
|
* A record of calls to {@link RestrictedCrawlingContext.pushData}, {@link RestrictedCrawlingContext.addRequests}, {@link RestrictedCrawlingContext.enqueueLinks} made by a request handler.
|
|
@@ -157,7 +151,6 @@ export declare class RequestHandlerResult {
|
|
|
157
151
|
get calls(): ReadonlyDeep<{
|
|
158
152
|
pushData: Parameters<RestrictedCrawlingContext['pushData']>[];
|
|
159
153
|
addRequests: Parameters<RestrictedCrawlingContext['addRequests']>[];
|
|
160
|
-
enqueueLinks: Parameters<RestrictedCrawlingContext['enqueueLinks']>[];
|
|
161
154
|
}>;
|
|
162
155
|
/**
|
|
163
156
|
* A record of changes made to key-value stores by a request handler.
|
|
@@ -171,7 +164,7 @@ export declare class RequestHandlerResult {
|
|
|
171
164
|
*/
|
|
172
165
|
get datasetItems(): ReadonlyDeep<{
|
|
173
166
|
item: Dictionary;
|
|
174
|
-
|
|
167
|
+
datasetIdentifier?: string | StorageIdentifier;
|
|
175
168
|
}[]>;
|
|
176
169
|
/**
|
|
177
170
|
* URLs enqueued to the request queue by a request handler, either via {@link RestrictedCrawlingContext.addRequests} or {@link RestrictedCrawlingContext.enqueueLinks}
|
|
@@ -188,11 +181,9 @@ export declare class RequestHandlerResult {
|
|
|
188
181
|
label?: string;
|
|
189
182
|
}[]>;
|
|
190
183
|
pushData: RestrictedCrawlingContext['pushData'];
|
|
191
|
-
enqueueLinks: RestrictedCrawlingContext['enqueueLinks'];
|
|
192
184
|
addRequests: RestrictedCrawlingContext['addRequests'];
|
|
193
185
|
useState: RestrictedCrawlingContext['useState'];
|
|
194
186
|
getKeyValueStore: RestrictedCrawlingContext['getKeyValueStore'];
|
|
195
|
-
private idOrDefault;
|
|
196
187
|
private getKeyValueStoreChangedValue;
|
|
197
188
|
private setKeyValueStoreChangedValue;
|
|
198
189
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,kBAAkB,EAAE,QAAQ,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAC9G,OAAO,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,KAAK,aAAa,EAAE,MAAM,gCAAgC,CAAC;AACnF,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,iCAAiC,CAAC;AACpF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yCAAyC,CAAC;AAEjF,gBAAgB;AAChB,MAAM,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC;AAEtD,gBAAgB;AAChB,MAAM,MAAM,YAAY,CAAC,CAAC,EAAE,CAAC,SAAS,MAAM,CAAC,IAAI,CAAC,GAAG;KAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;CAAE,CAAC;AAE1E,MAAM,MAAM,aAAa,CAAC,CAAC,SAAS,OAAO,IAAI,YAAY,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;AAEnF,gBAAgB;AAChB,MAAM,MAAM,aAAa,CAAC,OAAO,SAAS,yBAAyB,IAC/D,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GACrB,OAAO,GACP;IACI,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;CAC9C,GAAG,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAEvC,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IAC/E,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,QAAQ,CAAC;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE3B;;;;;;OAMG;IACH,QAAQ,CACJ,IAAI,EAAE,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EACtD,iBAAiB,CAAC,EAAE,MAAM,GAAG,iBAAiB,GAC/C,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjB;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,YAAY,EAAE,CACV,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,KACtG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtB;;;;;OAKG;IACH,WAAW,EAAE,CACT,YAAY,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC/C,OAAO,CAAC,EAAE,YAAY,CAAC,4BAA4B,CAAC,KACnD,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnB;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,CAAC,EAAE,KAAK,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC;IAE1F;;OAEG;IACH,gBAAgB,EAAE,CACd,UAAU,CAAC,EAAE,MAAM,GAAG,iBAAiB,KACtC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,GAAG,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC;IAElH;;OAEG;IACH,GAAG,EAAE,aAAa,CAAC;CACtB;AAED,MAAM,WAAW,eAAe,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SAAQ,yBAAyB,CAAC,QAAQ,CAAC;IAClH;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,YAAY,CACR,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,eAAe,CAAC,CAAC,GACnG,IAAI,CAAC,mBAAmB,EAAE,cAAc,GAAG,eAAe,CAAC,GAChE,OAAO,CAAC,OAAO,CAAC,CAAC;IAEpB;;;;;;;;;;;;;;;OAeG;IACH,WAAW,EAAE,CACT,gBAAgB,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAC9C,gBAAgB,CAAC,EAAE,kBAAkB,KACpC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAEvB;;OAEG;IACH,uBAAuB,CAAC,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;CAClE;AAED;;;;GAIG;AACH,qBAAa,oBAAoB;IASzB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,eAAe;IAT3B,OAAO,CAAC,qBAAqB,CACtB;IAEP,OAAO,CAAC,aAAa,CAA2D;IAEhF,OAAO,CAAC,gBAAgB,CAA8D;gBAG1E,MAAM,EAAE,aAAa,EACrB,eAAe,EAAE,MAAM;IAGnC;;OAEG;IACH,IAAI,KAAK,IAAI,YAAY,CAAC;QACtB,QAAQ,EAAE,UAAU,CAAC,yBAAyB,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;QAC9D,WAAW,EAAE,UAAU,CAAC,yBAAyB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;KACvE,CAAC,CAKD;IAED;;OAEG;IACH,IAAI,oBAAoB,IAAI,YAAY,CACpC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,YAAY,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC,CACrF,CAEA;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,iBAAiB,CAAC,EAAE,MAAM,GAAG,iBAAiB,CAAA;KAAE,EAAE,CAAC,CAIvG;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAkBlE;IAED;;OAEG;IACH,IAAI,gBAAgB,IAAI,YAAY,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAgB1E;IAED,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAE7C;IAEF,WAAW,EAAE,yBAAyB,CAAC,aAAa,CAAC,CAEnD;IAEF,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAG7C;IAEF,gBAAgB,EAAE,yBAAyB,CAAC,kBAAkB,CAAC,CAc7D;IAEF,OAAO,CAAC,4BAA4B,CAIlC;IAEF,OAAO,CAAC,4BAA4B,CASlC;CACL"}
|
|
@@ -10,7 +10,6 @@ export class RequestHandlerResult {
|
|
|
10
10
|
_keyValueStoreChanges = {};
|
|
11
11
|
pushDataCalls = [];
|
|
12
12
|
addRequestsCalls = [];
|
|
13
|
-
enqueueLinksCalls = [];
|
|
14
13
|
constructor(config, crawleeStateKey) {
|
|
15
14
|
this.config = config;
|
|
16
15
|
this.crawleeStateKey = crawleeStateKey;
|
|
@@ -22,7 +21,6 @@ export class RequestHandlerResult {
|
|
|
22
21
|
return {
|
|
23
22
|
pushData: this.pushDataCalls,
|
|
24
23
|
addRequests: this.addRequestsCalls,
|
|
25
|
-
enqueueLinks: this.enqueueLinksCalls,
|
|
26
24
|
};
|
|
27
25
|
}
|
|
28
26
|
/**
|
|
@@ -35,16 +33,13 @@ export class RequestHandlerResult {
|
|
|
35
33
|
* Items added to datasets by a request handler.
|
|
36
34
|
*/
|
|
37
35
|
get datasetItems() {
|
|
38
|
-
return this.pushDataCalls.flatMap(([data,
|
|
36
|
+
return this.pushDataCalls.flatMap(([data, datasetIdentifier]) => (Array.isArray(data) ? data : [data]).map((item) => ({ item, datasetIdentifier })));
|
|
39
37
|
}
|
|
40
38
|
/**
|
|
41
39
|
* URLs enqueued to the request queue by a request handler, either via {@link RestrictedCrawlingContext.addRequests} or {@link RestrictedCrawlingContext.enqueueLinks}
|
|
42
40
|
*/
|
|
43
41
|
get enqueuedUrls() {
|
|
44
42
|
const result = [];
|
|
45
|
-
for (const [options] of this.enqueueLinksCalls) {
|
|
46
|
-
result.push(...(options?.urls?.map((url) => ({ url, label: options?.label })) ?? []));
|
|
47
|
-
}
|
|
48
43
|
for (const [requests] of this.addRequestsCalls) {
|
|
49
44
|
for (const request of requests) {
|
|
50
45
|
if (typeof request === 'object' &&
|
|
@@ -78,9 +73,6 @@ export class RequestHandlerResult {
|
|
|
78
73
|
pushData = async (data, datasetIdOrName) => {
|
|
79
74
|
this.pushDataCalls.push([data, datasetIdOrName]);
|
|
80
75
|
};
|
|
81
|
-
enqueueLinks = async (options) => {
|
|
82
|
-
this.enqueueLinksCalls.push([options]);
|
|
83
|
-
};
|
|
84
76
|
addRequests = async (requests, options = {}) => {
|
|
85
77
|
this.addRequestsCalls.push([requests, options]);
|
|
86
78
|
};
|
|
@@ -88,27 +80,27 @@ export class RequestHandlerResult {
|
|
|
88
80
|
const store = await this.getKeyValueStore(undefined);
|
|
89
81
|
return await store.getAutoSavedValue(this.crawleeStateKey, defaultValue);
|
|
90
82
|
};
|
|
91
|
-
getKeyValueStore = async (
|
|
92
|
-
const store = await KeyValueStore.open(
|
|
83
|
+
getKeyValueStore = async (identifier) => {
|
|
84
|
+
const store = await KeyValueStore.open(identifier, { config: this.config });
|
|
85
|
+
const storeId = store.id;
|
|
93
86
|
return {
|
|
94
|
-
id: this.
|
|
95
|
-
name:
|
|
96
|
-
getValue: async (key) => this.getKeyValueStoreChangedValue(
|
|
87
|
+
id: storeId ?? this.config.defaultKeyValueStoreId,
|
|
88
|
+
name: store.name,
|
|
89
|
+
getValue: async (key) => this.getKeyValueStoreChangedValue(storeId, key) ?? (await store.getValue(key)),
|
|
97
90
|
setValue: async (key, value, options) => {
|
|
98
|
-
this.setKeyValueStoreChangedValue(
|
|
91
|
+
this.setKeyValueStoreChangedValue(storeId, key, value, options);
|
|
99
92
|
},
|
|
100
93
|
getAutoSavedValue: store.getAutoSavedValue.bind(store),
|
|
101
94
|
getPublicUrl: store.getPublicUrl.bind(store),
|
|
102
95
|
};
|
|
103
96
|
};
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
const id = this.idOrDefault(idOrName);
|
|
97
|
+
getKeyValueStoreChangedValue = (storeKey, key) => {
|
|
98
|
+
const id = storeKey ?? this.config.defaultKeyValueStoreId;
|
|
107
99
|
this._keyValueStoreChanges[id] ??= {};
|
|
108
100
|
return this.keyValueStoreChanges[id][key]?.changedValue ?? null;
|
|
109
101
|
};
|
|
110
|
-
setKeyValueStoreChangedValue = (
|
|
111
|
-
const id = this.
|
|
102
|
+
setKeyValueStoreChangedValue = (storeKey, key, changedValue, options) => {
|
|
103
|
+
const id = storeKey ?? this.config.defaultKeyValueStoreId;
|
|
112
104
|
this._keyValueStoreChanges[id] ??= {};
|
|
113
105
|
this._keyValueStoreChanges[id][key] = { changedValue, options };
|
|
114
106
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,aAAa,EAAsB,MAAM,gCAAgC,CAAC;AAkKnF;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;IASjB;IACA;IATJ,qBAAqB,GACzB,EAAE,CAAC;IAEC,aAAa,GAAwD,EAAE,CAAC;IAExE,gBAAgB,GAA2D,EAAE,CAAC;IAEtF,YACY,MAAqB,EACrB,eAAuB;QADvB,WAAM,GAAN,MAAM,CAAe;QACrB,oBAAe,GAAf,eAAe,CAAQ;IAChC,CAAC;IAEJ;;OAEG;IACH,IAAI,KAAK;QAIL,OAAO;YACH,QAAQ,EAAE,IAAI,CAAC,aAAa;YAC5B,WAAW,EAAE,IAAI,CAAC,gBAAgB;SACrC,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,oBAAoB;QAGpB,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,iBAAiB,CAAC,EAAE,EAAE,CAC5D,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,CAAC,CAAC,CACrF,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,MAAM,MAAM,GAAsC,EAAE,CAAC;QAErD,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,CAAC,CAAC,CAAC,iBAAiB,IAAI,OAAO,CAAC,IAAI,OAAO,CAAC,eAAe,KAAK,SAAS,CAAC;oBAC1E,OAAO,CAAC,GAAG,KAAK,SAAS,EAC3B,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5D,CAAC;qBAAM,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACrC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAClC,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,IAAI,gBAAgB;QAChB,MAAM,MAAM,GAA0C,EAAE,CAAC;QAEzD,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,iBAAiB,IAAI,OAAO;oBAC5B,OAAO,CAAC,eAAe,KAAK,SAAS,EACvC,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,eAAe,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5E,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,QAAQ,GAA0C,KAAK,EAAE,IAAI,EAAE,eAAe,EAAE,EAAE;QAC9E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC;IAEF,WAAW,GAA6C,KAAK,EAAE,QAAQ,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;QACrF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IACpD,CAAC,CAAC;IAEF,QAAQ,GAA0C,KAAK,EAAE,YAAY,EAAE,EAAE;QACrE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;QACrD,OAAO,MAAM,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,eAAe,EAAE,YAAY,CAAC,CAAC;IAC7E,CAAC,CAAC;IAEF,gBAAgB,GAAkD,KAAK,EAAE,UAAU,EAAE,EAAE;QACnF,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5E,MAAM,OAAO,GAAG,KAAK,CAAC,EAAE,CAAC;QAEzB,OAAO;YACH,EAAE,EAAE,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB;YACjD,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,4BAA4B,CAAC,OAAO,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YACvG,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;gBACpC,IAAI,CAAC,4BAA4B,CAAC,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;YACpE,CAAC;YACD,iBAAiB,EAAE,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,KAAK,CAAC;YACtD,YAAY,EAAE,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;SAC/C,CAAC;IACN,CAAC,CAAC;IAEM,4BAA4B,GAAG,CAAC,QAA4B,EAAE,GAAW,EAAE,EAAE;QACjF,MAAM,EAAE,GAAG,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC;QAC1D,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,YAAY,IAAI,IAAI,CAAC;IACpE,CAAC,CAAC;IAEM,4BAA4B,GAAG,CACnC,QAA4B,EAC5B,GAAW,EACX,YAAqB,EACrB,OAAuB,EACzB,EAAE;QACA,MAAM,EAAE,GAAG,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC;QAC1D,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QACtC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;IACpE,CAAC,CAAC;CACL"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ISession } from '@crawlee/types';
|
|
2
2
|
/**
|
|
3
3
|
* Handles timeout request
|
|
4
4
|
* @internal
|
|
5
5
|
*/
|
|
6
6
|
export declare function handleRequestTimeout({ session, errorMessage }: {
|
|
7
|
-
session?:
|
|
7
|
+
session?: ISession;
|
|
8
8
|
errorMessage: string;
|
|
9
9
|
}): void;
|
|
10
10
|
//# sourceMappingURL=crawler_utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_utils.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_utils.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"crawler_utils.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAI/C;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE;IAAE,OAAO,CAAC,EAAE,QAAQ,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,QAK3G"}
|
|
@@ -5,7 +5,7 @@ import { TimeoutError } from '@apify/timeout';
|
|
|
5
5
|
*/
|
|
6
6
|
export function handleRequestTimeout({ session, errorMessage }) {
|
|
7
7
|
session?.markBad();
|
|
8
|
-
const timeoutMillis =
|
|
8
|
+
const timeoutMillis = /(\d+)\s?ms/.exec(errorMessage)?.[1]; // first capturing group
|
|
9
9
|
const timeoutSecs = Number(timeoutMillis) / 1000;
|
|
10
10
|
throw new TimeoutError(`Navigation timed out after ${timeoutSecs} seconds.`);
|
|
11
11
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_utils.js","sourceRoot":"","sources":["../../src/crawlers/crawler_utils.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"crawler_utils.js","sourceRoot":"","sources":["../../src/crawlers/crawler_utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,EAAE,OAAO,EAAE,YAAY,EAAgD;IACxG,OAAO,EAAE,OAAO,EAAE,CAAC;IACnB,MAAM,aAAa,GAAG,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,wBAAwB;IACpF,MAAM,WAAW,GAAG,MAAM,CAAC,aAAa,CAAC,GAAG,IAAI,CAAC;IACjD,MAAM,IAAI,YAAY,CAAC,8BAA8B,WAAW,WAAW,CAAC,CAAC;AACjF,CAAC"}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { CrawlingContext } from '../crawlers/crawler_commons.js';
|
|
2
2
|
import type { KeyValueStore } from '../storages/key_value_store.js';
|
|
3
3
|
import type { ErrnoException } from './error_tracker.js';
|
|
4
|
+
import type { SnapshottableProperties } from './internals/types.js';
|
|
4
5
|
interface BrowserCrawlingContext {
|
|
5
6
|
saveSnapshot: (options: {
|
|
6
7
|
key: string;
|
|
@@ -39,7 +40,7 @@ export declare class ErrorSnapshotter {
|
|
|
39
40
|
/**
|
|
40
41
|
* Capture a snapshot of the error context.
|
|
41
42
|
*/
|
|
42
|
-
captureSnapshot(error: ErrnoException, context: CrawlingContext): Promise<ErrorSnapshot>;
|
|
43
|
+
captureSnapshot(error: ErrnoException, context: CrawlingContext & SnapshottableProperties): Promise<ErrorSnapshot>;
|
|
43
44
|
/**
|
|
44
45
|
* Captures a snapshot of the current page using the context.saveSnapshot function.
|
|
45
46
|
* This function is applicable for browser contexts only.
|
|
@@ -49,7 +50,7 @@ export declare class ErrorSnapshotter {
|
|
|
49
50
|
/**
|
|
50
51
|
* Save the HTML snapshot of the page, and return the fileName with the extension.
|
|
51
52
|
*/
|
|
52
|
-
saveHTMLSnapshot(html: string, keyValueStore: KeyValueStore, fileName: string): Promise<string | undefined>;
|
|
53
|
+
saveHTMLSnapshot(html: string, keyValueStore: Pick<KeyValueStore, 'setValue'>, fileName: string): Promise<string | undefined>;
|
|
53
54
|
/**
|
|
54
55
|
* Generate a unique fileName for each error snapshot.
|
|
55
56
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"error_snapshotter.d.ts","sourceRoot":"","sources":["../../src/crawlers/error_snapshotter.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAC;AACtE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"error_snapshotter.d.ts","sourceRoot":"","sources":["../../src/crawlers/error_snapshotter.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAC;AACtE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,sBAAsB,CAAC;AAGpE,UAAU,sBAAsB;IAC5B,YAAY,EAAE,CAAC,OAAO,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC7D;AAED,MAAM,WAAW,cAAc;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,UAAU,aAAa;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,gBAAgB;IACzB,MAAM,CAAC,QAAQ,CAAC,oBAAoB,MAAM;IAC1C,MAAM,CAAC,QAAQ,CAAC,eAAe,MAAM;IACrC,MAAM,CAAC,QAAQ,CAAC,mBAAmB,OAAO;IAC1C,MAAM,CAAC,QAAQ,CAAC,YAAY,uBAAuB;IACnD,MAAM,CAAC,QAAQ,CAAC,eAAe,oBAAoB;IAEnD;;OAEG;IACG,eAAe,CACjB,KAAK,EAAE,cAAc,EACrB,OAAO,EAAE,eAAe,GAAG,uBAAuB,GACnD,OAAO,CAAC,aAAa,CAAC;IAgDzB;;;;OAIG;IACG,sBAAsB,CACxB,OAAO,EAAE,sBAAsB,EAC/B,QAAQ,EAAE,MAAM,GACjB,OAAO,CAAC,cAAc,GAAG,SAAS,CAAC;IAYtC;;OAEG;IACG,gBAAgB,CAClB,IAAI,EAAE,MAAM,EACZ,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,UAAU,CAAC,EAC9C,QAAQ,EAAE,MAAM,GACjB,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAS9B;;OAEG;IACH,gBAAgB,CAAC,KAAK,EAAE,cAAc,GAAG,MAAM;CAyBlD"}
|
|
@@ -52,9 +52,9 @@ export class ErrorSnapshotter {
|
|
|
52
52
|
}
|
|
53
53
|
return {
|
|
54
54
|
screenshotFileName,
|
|
55
|
-
screenshotFileUrl: screenshotFileName && keyValueStore.getPublicUrl(screenshotFileName),
|
|
55
|
+
screenshotFileUrl: screenshotFileName && (await keyValueStore.getPublicUrl(screenshotFileName)),
|
|
56
56
|
htmlFileName,
|
|
57
|
-
htmlFileUrl: htmlFileName && keyValueStore.getPublicUrl(htmlFileName),
|
|
57
|
+
htmlFileUrl: htmlFileName && (await keyValueStore.getPublicUrl(htmlFileName)),
|
|
58
58
|
};
|
|
59
59
|
}
|
|
60
60
|
catch {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"error_snapshotter.js","sourceRoot":"","sources":["../../src/crawlers/error_snapshotter.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"error_snapshotter.js","sourceRoot":"","sources":["../../src/crawlers/error_snapshotter.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AAwBjC;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,gBAAgB;IACzB,MAAM,CAAU,oBAAoB,GAAG,EAAE,CAAC;IAC1C,MAAM,CAAU,eAAe,GAAG,EAAE,CAAC;IACrC,MAAM,CAAU,mBAAmB,GAAG,GAAG,CAAC;IAC1C,MAAM,CAAU,YAAY,GAAG,mBAAmB,CAAC;IACnD,MAAM,CAAU,eAAe,GAAG,gBAAgB,CAAC;IAEnD;;OAEG;IACH,KAAK,CAAC,eAAe,CACjB,KAAqB,EACrB,OAAkD;QAElD,IAAI,CAAC;YACD,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,CAAC;YAC3B,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,CAAC;YAE3B,MAAM,aAAa,GAAG,MAAM,OAAO,EAAE,gBAAgB,EAAE,CAAC;YACxD,0GAA0G;YAC1G,IAAI,CAAC,aAAa,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,OAAO,EAAE,CAAC;YACd,CAAC;YAED,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;YAE9C,IAAI,kBAAsC,CAAC;YAC3C,IAAI,YAAgC,CAAC;YAErC,IAAI,IAAI,EAAE,CAAC;gBACP,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,sBAAsB,CACnD,OAA4C,EAC5C,QAAQ,CACX,CAAC;gBAEF,IAAI,aAAa,EAAE,CAAC;oBAChB,kBAAkB,GAAG,aAAa,CAAC,kBAAkB,CAAC;oBACtD,YAAY,GAAG,aAAa,CAAC,YAAY,CAAC;gBAC9C,CAAC;gBAED,mGAAmG;gBACnG,IAAI,CAAC,YAAY,EAAE,CAAC;oBAChB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;oBAClC,YAAY,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACjG,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAClC,2BAA2B;gBAC3B,YAAY,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;YAC9E,CAAC;YAED,OAAO;gBACH,kBAAkB;gBAClB,iBAAiB,EAAE,kBAAkB,IAAI,CAAC,MAAM,aAAa,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAC;gBAC/F,YAAY;gBACZ,WAAW,EAAE,YAAY,IAAI,CAAC,MAAM,aAAa,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;aAChF,CAAC;QACN,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,sBAAsB,CACxB,OAA+B,EAC/B,QAAgB;QAEhB,IAAI,CAAC;YACD,MAAM,OAAO,CAAC,YAAY,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC9C,OAAO;gBACH,kBAAkB,EAAE,GAAG,QAAQ,MAAM;gBACrC,YAAY,EAAE,GAAG,QAAQ,OAAO;aACnC,CAAC;QACN,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CAClB,IAAY,EACZ,aAA8C,EAC9C,QAAgB;QAEhB,IAAI,CAAC;YACD,MAAM,aAAa,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE,WAAW,EAAE,WAAW,EAAE,CAAC,CAAC;YAC3E,OAAO,GAAG,QAAQ,OAAO,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,SAAS,CAAC;QACrB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,KAAqB;QAClC,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,eAAe,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,GAC/F,gBAAgB,CAAC;QACrB,yCAAyC;QACzC,MAAM,cAAc,GAAG,MAAM;aACxB,UAAU,CAAC,MAAM,CAAC;aAClB,MAAM,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC;aAC1C,MAAM,CAAC,KAAK,CAAC;aACb,KAAK,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;QAC/B,MAAM,kBAAkB,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,IAAI,EAAE,CAAC;QAEjG;;WAEG;QACH,MAAM,cAAc,GAAG,CAAC,GAAW,EAAU,EAAE;YAC3C,OAAO,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC,CAAC;QAEF,qDAAqD;QACrD,MAAM,QAAQ,GAAG,GAAG,eAAe,IAAI,cAAc,CAAC,cAAc,CAAC,IAAI,cAAc,CAAC,kBAAkB,CAAC,EAAE;aACxG,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,0CAA0C;aAC/D,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;QAEnC,OAAO,QAAQ,CAAC;IACpB,CAAC"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { CrawlingContext } from '../crawlers/crawler_commons.js';
|
|
2
2
|
import { ErrorSnapshotter } from './error_snapshotter.js';
|
|
3
|
+
import type { SnapshottableProperties } from './internals/types.js';
|
|
3
4
|
/**
|
|
4
5
|
* Node.js Error interface
|
|
5
6
|
*/
|
|
@@ -48,7 +49,7 @@ export declare class ErrorTracker {
|
|
|
48
49
|
addAsync(error: ErrnoException, context?: CrawlingContext): Promise<void>;
|
|
49
50
|
getUniqueErrorCount(): number;
|
|
50
51
|
getMostPopularErrors(count: number): [number, string[]][];
|
|
51
|
-
captureSnapshot(storage: Record<string, unknown>, error: ErrnoException, context: CrawlingContext): Promise<void>;
|
|
52
|
+
captureSnapshot(storage: Record<string, unknown>, error: ErrnoException, context: CrawlingContext & SnapshottableProperties): Promise<void>;
|
|
52
53
|
reset(): void;
|
|
53
54
|
}
|
|
54
55
|
//# sourceMappingURL=error_tracker.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"error_tracker.d.ts","sourceRoot":"","sources":["../../src/crawlers/error_tracker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"error_tracker.d.ts","sourceRoot":"","sources":["../../src/crawlers/error_tracker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,sBAAsB,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,cAAe,SAAQ,KAAK;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,GAAG,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAChC,aAAa,EAAE,OAAO,CAAC;IACvB,aAAa,EAAE,OAAO,CAAC;IACvB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,eAAe,EAAE,OAAO,CAAC;IACzB,kBAAkB,EAAE,OAAO,CAAC;CAC/B;AAuPD;;;;;;;;;;;;;GAaG;AACH,qBAAa,YAAY;;IAGrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAEhC,KAAK,EAAE,MAAM,CAAC;IAEd,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;gBAExB,OAAO,GAAE,OAAO,CAAC,mBAAmB,CAAM;IAoBtD,OAAO,CAAC,WAAW;IAwBnB,GAAG,CAAC,KAAK,EAAE,cAAc;IAUzB;;;OAGG;IACG,QAAQ,CAAC,KAAK,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,eAAe;IAe/D,mBAAmB;IAoBnB,oBAAoB,CAAC,KAAK,EAAE,MAAM;IAoB5B,eAAe,CACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAChC,KAAK,EAAE,cAAc,EACrB,OAAO,EAAE,eAAe,GAAG,uBAAuB;IAYtD,KAAK;CAOR"}
|