@crawlee/core 3.13.3-beta.9 → 3.13.4-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/autoscaling/autoscaled_pool.d.ts +16 -16
- package/autoscaling/autoscaled_pool.js +13 -13
- package/autoscaling/snapshotter.d.ts +1 -1
- package/autoscaling/snapshotter.js +1 -1
- package/autoscaling/system_status.d.ts +12 -12
- package/autoscaling/system_status.js +11 -11
- package/configuration.d.ts +10 -10
- package/configuration.js +4 -4
- package/crawlers/crawler_commons.d.ts +12 -12
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +5 -12
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/statistics.d.ts +5 -17
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +9 -20
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +14 -14
- package/enqueue_links/enqueue_links.js +5 -5
- package/enqueue_links/shared.d.ts +2 -2
- package/http_clients/base-http-client.d.ts +7 -7
- package/http_clients/base-http-client.js +1 -1
- package/package.json +5 -5
- package/proxy_configuration.d.ts +11 -11
- package/proxy_configuration.js +8 -8
- package/request.d.ts +3 -3
- package/request.js +2 -2
- package/session_pool/session.d.ts +1 -1
- package/session_pool/session_pool.d.ts +12 -12
- package/session_pool/session_pool.js +10 -10
- package/storages/dataset.d.ts +15 -15
- package/storages/dataset.js +9 -9
- package/storages/key_value_store.d.ts +32 -32
- package/storages/key_value_store.js +22 -22
- package/storages/request_list.d.ts +35 -35
- package/storages/request_list.js +19 -19
- package/storages/request_provider.d.ts +19 -19
- package/storages/request_provider.js +12 -12
- package/storages/request_queue.d.ts +16 -16
- package/storages/request_queue.js +16 -16
- package/storages/request_queue_v2.d.ts +7 -7
- package/storages/request_queue_v2.js +7 -7
- package/storages/utils.d.ts +2 -2
- package/tsconfig.build.tsbuildinfo +1 -1
|
@@ -20,7 +20,7 @@ export interface AutoscaledPoolOptions {
|
|
|
20
20
|
* If it resolves to `true` then the pool's run finishes. Being called only
|
|
21
21
|
* when there are no tasks being processed means that as long as `isTaskReadyFunction()`
|
|
22
22
|
* keeps resolving to `true`, `isFinishedFunction()` will never be called.
|
|
23
|
-
* To abort a run, use the {@
|
|
23
|
+
* To abort a run, use the {@link AutoscaledPool.abort} method.
|
|
24
24
|
*/
|
|
25
25
|
isFinishedFunction?: () => Promise<boolean>;
|
|
26
26
|
/**
|
|
@@ -84,12 +84,12 @@ export interface AutoscaledPoolOptions {
|
|
|
84
84
|
*/
|
|
85
85
|
taskTimeoutSecs?: number;
|
|
86
86
|
/**
|
|
87
|
-
* Options to be passed down to the {@
|
|
87
|
+
* Options to be passed down to the {@link Snapshotter} constructor. This is useful for fine-tuning
|
|
88
88
|
* the snapshot intervals and history.
|
|
89
89
|
*/
|
|
90
90
|
snapshotterOptions?: SnapshotterOptions;
|
|
91
91
|
/**
|
|
92
|
-
* Options to be passed down to the {@
|
|
92
|
+
* Options to be passed down to the {@link SystemStatus} constructor. This is useful for fine-tuning
|
|
93
93
|
* the system status reports. If a custom snapshotter is set in the options, it will be used
|
|
94
94
|
* by the pool.
|
|
95
95
|
*/
|
|
@@ -106,22 +106,22 @@ export interface AutoscaledPoolOptions {
|
|
|
106
106
|
* The pool only starts new tasks if there is enough free CPU and memory available
|
|
107
107
|
* and the Javascript event loop is not blocked.
|
|
108
108
|
*
|
|
109
|
-
* The information about the CPU and memory usage is obtained by the {@
|
|
109
|
+
* The information about the CPU and memory usage is obtained by the {@link Snapshotter} class,
|
|
110
110
|
* which makes regular snapshots of system resources that may be either local
|
|
111
111
|
* or from the Apify cloud infrastructure in case the process is running on the Apify platform.
|
|
112
|
-
* Meaningful data gathered from these snapshots is provided to `AutoscaledPool` by the {@
|
|
112
|
+
* Meaningful data gathered from these snapshots is provided to `AutoscaledPool` by the {@link SystemStatus} class.
|
|
113
113
|
*
|
|
114
114
|
* Before running the pool, you need to implement the following three functions:
|
|
115
|
-
* {@
|
|
116
|
-
* {@
|
|
117
|
-
* {@
|
|
115
|
+
* {@link AutoscaledPoolOptions.runTaskFunction},
|
|
116
|
+
* {@link AutoscaledPoolOptions.isTaskReadyFunction} and
|
|
117
|
+
* {@link AutoscaledPoolOptions.isFinishedFunction}.
|
|
118
118
|
*
|
|
119
|
-
* The auto-scaled pool is started by calling the {@
|
|
120
|
-
* The pool periodically queries the {@
|
|
119
|
+
* The auto-scaled pool is started by calling the {@link AutoscaledPool.run} function.
|
|
120
|
+
* The pool periodically queries the {@link AutoscaledPoolOptions.isTaskReadyFunction} function
|
|
121
121
|
* for more tasks, managing optimal concurrency, until the function resolves to `false`. The pool then queries
|
|
122
|
-
* the {@
|
|
122
|
+
* the {@link AutoscaledPoolOptions.isFinishedFunction}. If it resolves to `true`, the run finishes after all running tasks complete.
|
|
123
123
|
* If it resolves to `false`, it assumes there will be more tasks available later and keeps periodically querying for tasks.
|
|
124
|
-
* If any of the tasks throws then the {@
|
|
124
|
+
* If any of the tasks throws then the {@link AutoscaledPool.run} function rejects the promise with an error.
|
|
125
125
|
*
|
|
126
126
|
* The pool evaluates whether it should start a new task every time one of the tasks finishes
|
|
127
127
|
* and also in the interval set by the `options.maybeRunIntervalSecs` parameter.
|
|
@@ -220,7 +220,7 @@ export declare class AutoscaledPool {
|
|
|
220
220
|
run(): Promise<void>;
|
|
221
221
|
/**
|
|
222
222
|
* Aborts the run of the auto-scaled pool and destroys it. The promise returned from
|
|
223
|
-
* the {@
|
|
223
|
+
* the {@link AutoscaledPool.run} function will immediately resolve, no more new tasks
|
|
224
224
|
* will be spawned and all running tasks will be left in their current state.
|
|
225
225
|
*
|
|
226
226
|
* Due to the nature of the tasks, auto-scaled pool cannot reliably guarantee abortion
|
|
@@ -232,19 +232,19 @@ export declare class AutoscaledPool {
|
|
|
232
232
|
abort(): Promise<void>;
|
|
233
233
|
/**
|
|
234
234
|
* Prevents the auto-scaled pool from starting new tasks, but allows the running ones to finish
|
|
235
|
-
* (unlike abort, which terminates them). Used together with {@
|
|
235
|
+
* (unlike abort, which terminates them). Used together with {@link AutoscaledPool.resume}
|
|
236
236
|
*
|
|
237
237
|
* The function's promise will resolve once all running tasks have completed and the pool
|
|
238
238
|
* is effectively idle. If the `timeoutSecs` argument is provided, the promise will reject
|
|
239
239
|
* with a timeout error after the `timeoutSecs` seconds.
|
|
240
240
|
*
|
|
241
|
-
* The promise returned from the {@
|
|
241
|
+
* The promise returned from the {@link AutoscaledPool.run} function will not resolve
|
|
242
242
|
* when `.pause()` is invoked (unlike abort, which resolves it).
|
|
243
243
|
*/
|
|
244
244
|
pause(timeoutSecs?: number): Promise<void>;
|
|
245
245
|
/**
|
|
246
246
|
* Resumes the operation of the autoscaled-pool by allowing more tasks to be run.
|
|
247
|
-
* Used together with {@
|
|
247
|
+
* Used together with {@link AutoscaledPool.pause}
|
|
248
248
|
*
|
|
249
249
|
* Tasks will automatically start running again in `options.maybeRunIntervalSecs`.
|
|
250
250
|
*/
|
|
@@ -15,22 +15,22 @@ const system_status_1 = require("./system_status");
|
|
|
15
15
|
* The pool only starts new tasks if there is enough free CPU and memory available
|
|
16
16
|
* and the Javascript event loop is not blocked.
|
|
17
17
|
*
|
|
18
|
-
* The information about the CPU and memory usage is obtained by the {@
|
|
18
|
+
* The information about the CPU and memory usage is obtained by the {@link Snapshotter} class,
|
|
19
19
|
* which makes regular snapshots of system resources that may be either local
|
|
20
20
|
* or from the Apify cloud infrastructure in case the process is running on the Apify platform.
|
|
21
|
-
* Meaningful data gathered from these snapshots is provided to `AutoscaledPool` by the {@
|
|
21
|
+
* Meaningful data gathered from these snapshots is provided to `AutoscaledPool` by the {@link SystemStatus} class.
|
|
22
22
|
*
|
|
23
23
|
* Before running the pool, you need to implement the following three functions:
|
|
24
|
-
* {@
|
|
25
|
-
* {@
|
|
26
|
-
* {@
|
|
24
|
+
* {@link AutoscaledPoolOptions.runTaskFunction},
|
|
25
|
+
* {@link AutoscaledPoolOptions.isTaskReadyFunction} and
|
|
26
|
+
* {@link AutoscaledPoolOptions.isFinishedFunction}.
|
|
27
27
|
*
|
|
28
|
-
* The auto-scaled pool is started by calling the {@
|
|
29
|
-
* The pool periodically queries the {@
|
|
28
|
+
* The auto-scaled pool is started by calling the {@link AutoscaledPool.run} function.
|
|
29
|
+
* The pool periodically queries the {@link AutoscaledPoolOptions.isTaskReadyFunction} function
|
|
30
30
|
* for more tasks, managing optimal concurrency, until the function resolves to `false`. The pool then queries
|
|
31
|
-
* the {@
|
|
31
|
+
* the {@link AutoscaledPoolOptions.isFinishedFunction}. If it resolves to `true`, the run finishes after all running tasks complete.
|
|
32
32
|
* If it resolves to `false`, it assumes there will be more tasks available later and keeps periodically querying for tasks.
|
|
33
|
-
* If any of the tasks throws then the {@
|
|
33
|
+
* If any of the tasks throws then the {@link AutoscaledPool.run} function rejects the promise with an error.
|
|
34
34
|
*
|
|
35
35
|
* The pool evaluates whether it should start a new task every time one of the tasks finishes
|
|
36
36
|
* and also in the interval set by the `options.maybeRunIntervalSecs` parameter.
|
|
@@ -373,7 +373,7 @@ class AutoscaledPool {
|
|
|
373
373
|
}
|
|
374
374
|
/**
|
|
375
375
|
* Aborts the run of the auto-scaled pool and destroys it. The promise returned from
|
|
376
|
-
* the {@
|
|
376
|
+
* the {@link AutoscaledPool.run} function will immediately resolve, no more new tasks
|
|
377
377
|
* will be spawned and all running tasks will be left in their current state.
|
|
378
378
|
*
|
|
379
379
|
* Due to the nature of the tasks, auto-scaled pool cannot reliably guarantee abortion
|
|
@@ -391,13 +391,13 @@ class AutoscaledPool {
|
|
|
391
391
|
}
|
|
392
392
|
/**
|
|
393
393
|
* Prevents the auto-scaled pool from starting new tasks, but allows the running ones to finish
|
|
394
|
-
* (unlike abort, which terminates them). Used together with {@
|
|
394
|
+
* (unlike abort, which terminates them). Used together with {@link AutoscaledPool.resume}
|
|
395
395
|
*
|
|
396
396
|
* The function's promise will resolve once all running tasks have completed and the pool
|
|
397
397
|
* is effectively idle. If the `timeoutSecs` argument is provided, the promise will reject
|
|
398
398
|
* with a timeout error after the `timeoutSecs` seconds.
|
|
399
399
|
*
|
|
400
|
-
* The promise returned from the {@
|
|
400
|
+
* The promise returned from the {@link AutoscaledPool.run} function will not resolve
|
|
401
401
|
* when `.pause()` is invoked (unlike abort, which resolves it).
|
|
402
402
|
*/
|
|
403
403
|
async pause(timeoutSecs) {
|
|
@@ -426,7 +426,7 @@ class AutoscaledPool {
|
|
|
426
426
|
}
|
|
427
427
|
/**
|
|
428
428
|
* Resumes the operation of the autoscaled-pool by allowing more tasks to be run.
|
|
429
|
-
* Used together with {@
|
|
429
|
+
* Used together with {@link AutoscaledPool.pause}
|
|
430
430
|
*
|
|
431
431
|
* Tasks will automatically start running again in `options.maybeRunIntervalSecs`.
|
|
432
432
|
*/
|
|
@@ -75,7 +75,7 @@ interface ClientSnapshot {
|
|
|
75
75
|
* Creates snapshots of system resources at given intervals and marks the resource
|
|
76
76
|
* as either overloaded or not during the last interval. Keeps a history of the snapshots.
|
|
77
77
|
* It tracks the following resources: Memory, EventLoop, API and CPU.
|
|
78
|
-
* The class is used by the {@
|
|
78
|
+
* The class is used by the {@link AutoscaledPool} class.
|
|
79
79
|
*
|
|
80
80
|
* When running on the Apify platform, the CPU and memory statistics are provided by the platform,
|
|
81
81
|
* as collected from the running Docker container. When running locally, `Snapshotter`
|
|
@@ -14,7 +14,7 @@ const CRITICAL_OVERLOAD_RATE_LIMIT_MILLIS = 10000;
|
|
|
14
14
|
* Creates snapshots of system resources at given intervals and marks the resource
|
|
15
15
|
* as either overloaded or not during the last interval. Keeps a history of the snapshots.
|
|
16
16
|
* It tracks the following resources: Memory, EventLoop, API and CPU.
|
|
17
|
-
* The class is used by the {@
|
|
17
|
+
* The class is used by the {@link AutoscaledPool} class.
|
|
18
18
|
*
|
|
19
19
|
* When running on the Apify platform, the CPU and memory statistics are provided by the platform,
|
|
20
20
|
* as collected from the running Docker container. When running locally, `Snapshotter`
|
|
@@ -29,7 +29,7 @@ export interface SystemInfo {
|
|
|
29
29
|
}
|
|
30
30
|
export interface SystemStatusOptions {
|
|
31
31
|
/**
|
|
32
|
-
* Defines max age of snapshots used in the {@
|
|
32
|
+
* Defines max age of snapshots used in the {@link SystemStatus.getCurrentStatus} measurement.
|
|
33
33
|
* @default 5
|
|
34
34
|
*/
|
|
35
35
|
currentHistorySecs?: number;
|
|
@@ -82,25 +82,25 @@ export interface FinalStatistics {
|
|
|
82
82
|
crawlerRuntimeMillis: number;
|
|
83
83
|
}
|
|
84
84
|
/**
|
|
85
|
-
* Provides a simple interface to reading system status from a {@
|
|
86
|
-
* It only exposes two functions {@
|
|
87
|
-
* and {@
|
|
85
|
+
* Provides a simple interface to reading system status from a {@link Snapshotter} instance.
|
|
86
|
+
* It only exposes two functions {@link SystemStatus.getCurrentStatus}
|
|
87
|
+
* and {@link SystemStatus.getHistoricalStatus}.
|
|
88
88
|
* The system status is calculated using a weighted average of overloaded
|
|
89
89
|
* messages in the snapshots, with the weights being the time intervals
|
|
90
90
|
* between the snapshots. Each resource is calculated separately
|
|
91
91
|
* and the system is overloaded whenever at least one resource is overloaded.
|
|
92
|
-
* The class is used by the {@
|
|
92
|
+
* The class is used by the {@link AutoscaledPool} class.
|
|
93
93
|
*
|
|
94
|
-
* {@
|
|
94
|
+
* {@link SystemStatus.getCurrentStatus}
|
|
95
95
|
* returns a boolean that represents the current status of the system.
|
|
96
96
|
* The length of the current timeframe in seconds is configurable
|
|
97
97
|
* by the `currentHistorySecs` option and represents the max age
|
|
98
98
|
* of snapshots to be considered for the calculation.
|
|
99
99
|
*
|
|
100
|
-
* {@
|
|
100
|
+
* {@link SystemStatus.getHistoricalStatus}
|
|
101
101
|
* returns a boolean that represents the long-term status
|
|
102
102
|
* of the system. It considers the full snapshot history available
|
|
103
|
-
* in the {@
|
|
103
|
+
* in the {@link Snapshotter} instance.
|
|
104
104
|
* @category Scaling
|
|
105
105
|
*/
|
|
106
106
|
export declare class SystemStatus {
|
|
@@ -112,7 +112,7 @@ export declare class SystemStatus {
|
|
|
112
112
|
private readonly snapshotter;
|
|
113
113
|
constructor(options?: SystemStatusOptions);
|
|
114
114
|
/**
|
|
115
|
-
* Returns an {@
|
|
115
|
+
* Returns an {@link SystemInfo} object with the following structure:
|
|
116
116
|
*
|
|
117
117
|
* ```javascript
|
|
118
118
|
* {
|
|
@@ -129,7 +129,7 @@ export declare class SystemStatus {
|
|
|
129
129
|
*/
|
|
130
130
|
getCurrentStatus(): SystemInfo;
|
|
131
131
|
/**
|
|
132
|
-
* Returns an {@
|
|
132
|
+
* Returns an {@link SystemInfo} object with the following structure:
|
|
133
133
|
*
|
|
134
134
|
* ```javascript
|
|
135
135
|
* {
|
|
@@ -141,8 +141,8 @@ export declare class SystemStatus {
|
|
|
141
141
|
* ```
|
|
142
142
|
*
|
|
143
143
|
* Where the `isSystemIdle` property is set to `false` if the system
|
|
144
|
-
* has been overloaded in the full history of the {@
|
|
145
|
-
* (which is configurable in the {@
|
|
144
|
+
* has been overloaded in the full history of the {@link Snapshotter}
|
|
145
|
+
* (which is configurable in the {@link Snapshotter}) and `true` otherwise.
|
|
146
146
|
*/
|
|
147
147
|
getHistoricalStatus(): SystemInfo;
|
|
148
148
|
/**
|
|
@@ -6,25 +6,25 @@ const utils_1 = require("@crawlee/utils");
|
|
|
6
6
|
const ow_1 = tslib_1.__importDefault(require("ow"));
|
|
7
7
|
const snapshotter_1 = require("./snapshotter");
|
|
8
8
|
/**
|
|
9
|
-
* Provides a simple interface to reading system status from a {@
|
|
10
|
-
* It only exposes two functions {@
|
|
11
|
-
* and {@
|
|
9
|
+
* Provides a simple interface to reading system status from a {@link Snapshotter} instance.
|
|
10
|
+
* It only exposes two functions {@link SystemStatus.getCurrentStatus}
|
|
11
|
+
* and {@link SystemStatus.getHistoricalStatus}.
|
|
12
12
|
* The system status is calculated using a weighted average of overloaded
|
|
13
13
|
* messages in the snapshots, with the weights being the time intervals
|
|
14
14
|
* between the snapshots. Each resource is calculated separately
|
|
15
15
|
* and the system is overloaded whenever at least one resource is overloaded.
|
|
16
|
-
* The class is used by the {@
|
|
16
|
+
* The class is used by the {@link AutoscaledPool} class.
|
|
17
17
|
*
|
|
18
|
-
* {@
|
|
18
|
+
* {@link SystemStatus.getCurrentStatus}
|
|
19
19
|
* returns a boolean that represents the current status of the system.
|
|
20
20
|
* The length of the current timeframe in seconds is configurable
|
|
21
21
|
* by the `currentHistorySecs` option and represents the max age
|
|
22
22
|
* of snapshots to be considered for the calculation.
|
|
23
23
|
*
|
|
24
|
-
* {@
|
|
24
|
+
* {@link SystemStatus.getHistoricalStatus}
|
|
25
25
|
* returns a boolean that represents the long-term status
|
|
26
26
|
* of the system. It considers the full snapshot history available
|
|
27
|
-
* in the {@
|
|
27
|
+
* in the {@link Snapshotter} instance.
|
|
28
28
|
* @category Scaling
|
|
29
29
|
*/
|
|
30
30
|
class SystemStatus {
|
|
@@ -83,7 +83,7 @@ class SystemStatus {
|
|
|
83
83
|
this.snapshotter = snapshotter || new snapshotter_1.Snapshotter({ config });
|
|
84
84
|
}
|
|
85
85
|
/**
|
|
86
|
-
* Returns an {@
|
|
86
|
+
* Returns an {@link SystemInfo} object with the following structure:
|
|
87
87
|
*
|
|
88
88
|
* ```javascript
|
|
89
89
|
* {
|
|
@@ -102,7 +102,7 @@ class SystemStatus {
|
|
|
102
102
|
return this._isSystemIdle(this.currentHistoryMillis);
|
|
103
103
|
}
|
|
104
104
|
/**
|
|
105
|
-
* Returns an {@
|
|
105
|
+
* Returns an {@link SystemInfo} object with the following structure:
|
|
106
106
|
*
|
|
107
107
|
* ```javascript
|
|
108
108
|
* {
|
|
@@ -114,8 +114,8 @@ class SystemStatus {
|
|
|
114
114
|
* ```
|
|
115
115
|
*
|
|
116
116
|
* Where the `isSystemIdle` property is set to `false` if the system
|
|
117
|
-
* has been overloaded in the full history of the {@
|
|
118
|
-
* (which is configurable in the {@
|
|
117
|
+
* has been overloaded in the full history of the {@link Snapshotter}
|
|
118
|
+
* (which is configurable in the {@link Snapshotter}) and `true` otherwise.
|
|
119
119
|
*/
|
|
120
120
|
getHistoricalStatus() {
|
|
121
121
|
return this._isSystemIdle();
|
package/configuration.d.ts
CHANGED
|
@@ -9,17 +9,17 @@ import { type Constructor } from './typedefs';
|
|
|
9
9
|
export interface ConfigurationOptions {
|
|
10
10
|
/**
|
|
11
11
|
* Defines storage client to be used.
|
|
12
|
-
* @default {@
|
|
12
|
+
* @default {@link MemoryStorage}
|
|
13
13
|
*/
|
|
14
14
|
storageClient?: StorageClient;
|
|
15
15
|
/**
|
|
16
16
|
* Defines the Event Manager to be used.
|
|
17
|
-
* @default {@
|
|
17
|
+
* @default {@link EventManager}
|
|
18
18
|
*/
|
|
19
19
|
eventManager?: EventManager;
|
|
20
20
|
/**
|
|
21
21
|
* Could be used to adjust the storage client behavior
|
|
22
|
-
* e.g. {@
|
|
22
|
+
* e.g. {@link MemoryStorageOptions} could be used to adjust the {@link MemoryStorage} behavior.
|
|
23
23
|
*/
|
|
24
24
|
storageClientOptions?: Dictionary;
|
|
25
25
|
/**
|
|
@@ -57,7 +57,7 @@ export interface ConfigurationOptions {
|
|
|
57
57
|
*/
|
|
58
58
|
maxUsedCpuRatio?: number;
|
|
59
59
|
/**
|
|
60
|
-
* Sets the ratio, defining the amount of system memory that could be used by the {@
|
|
60
|
+
* Sets the ratio, defining the amount of system memory that could be used by the {@link AutoscaledPool}.
|
|
61
61
|
* When the memory usage is more than the provided ratio, the memory is considered overloaded.
|
|
62
62
|
*
|
|
63
63
|
* Alternative to `CRAWLEE_AVAILABLE_MEMORY_RATIO` environment variable.
|
|
@@ -65,7 +65,7 @@ export interface ConfigurationOptions {
|
|
|
65
65
|
*/
|
|
66
66
|
availableMemoryRatio?: number;
|
|
67
67
|
/**
|
|
68
|
-
* Sets the amount of system memory in megabytes to be used by the {@
|
|
68
|
+
* Sets the amount of system memory in megabytes to be used by the {@link AutoscaledPool}.
|
|
69
69
|
* By default, the maximum memory is set to one quarter of total system memory.
|
|
70
70
|
*
|
|
71
71
|
* Alternative to `CRAWLEE_MEMORY_MBYTES` environment variable.
|
|
@@ -85,7 +85,7 @@ export interface ConfigurationOptions {
|
|
|
85
85
|
systemInfoIntervalMillis?: number;
|
|
86
86
|
/**
|
|
87
87
|
* Defines the default input key, i.e. the key that is used to get the crawler input value
|
|
88
|
-
* from the default {@
|
|
88
|
+
* from the default {@link KeyValueStore} associated with the current crawler run.
|
|
89
89
|
*
|
|
90
90
|
* Alternative to `CRAWLEE_INPUT_KEY` environment variable.
|
|
91
91
|
* @default 'INPUT'
|
|
@@ -257,8 +257,8 @@ export declare class Configuration {
|
|
|
257
257
|
*/
|
|
258
258
|
static set(key: keyof ConfigurationOptions, value?: any): void;
|
|
259
259
|
/**
|
|
260
|
-
* Returns cached instance of {@
|
|
261
|
-
* this {@
|
|
260
|
+
* Returns cached instance of {@link StorageClient} using options as defined in the environment variables or in
|
|
261
|
+
* this {@link Configuration} instance. Only first call of this method will create the client, following calls will
|
|
262
262
|
* return the same client instance.
|
|
263
263
|
*
|
|
264
264
|
* Caching works based on the `storageClientOptions`, so calling this method with different options will return
|
|
@@ -280,11 +280,11 @@ export declare class Configuration {
|
|
|
280
280
|
*/
|
|
281
281
|
static getGlobalConfig(): Configuration;
|
|
282
282
|
/**
|
|
283
|
-
* Gets default {@
|
|
283
|
+
* Gets default {@link StorageClient} instance.
|
|
284
284
|
*/
|
|
285
285
|
static getStorageClient(): StorageClient;
|
|
286
286
|
/**
|
|
287
|
-
* Gets default {@
|
|
287
|
+
* Gets default {@link EventManager} instance.
|
|
288
288
|
*/
|
|
289
289
|
static getEventManager(): EventManager;
|
|
290
290
|
/**
|
package/configuration.js
CHANGED
|
@@ -169,8 +169,8 @@ class Configuration {
|
|
|
169
169
|
this.getGlobalConfig().set(key, value);
|
|
170
170
|
}
|
|
171
171
|
/**
|
|
172
|
-
* Returns cached instance of {@
|
|
173
|
-
* this {@
|
|
172
|
+
* Returns cached instance of {@link StorageClient} using options as defined in the environment variables or in
|
|
173
|
+
* this {@link Configuration} instance. Only first call of this method will create the client, following calls will
|
|
174
174
|
* return the same client instance.
|
|
175
175
|
*
|
|
176
176
|
* Caching works based on the `storageClientOptions`, so calling this method with different options will return
|
|
@@ -232,13 +232,13 @@ class Configuration {
|
|
|
232
232
|
return Configuration.globalConfig;
|
|
233
233
|
}
|
|
234
234
|
/**
|
|
235
|
-
* Gets default {@
|
|
235
|
+
* Gets default {@link StorageClient} instance.
|
|
236
236
|
*/
|
|
237
237
|
static getStorageClient() {
|
|
238
238
|
return this.getGlobalConfig().getStorageClient();
|
|
239
239
|
}
|
|
240
240
|
/**
|
|
241
|
-
* Gets default {@
|
|
241
|
+
* Gets default {@link EventManager} instance.
|
|
242
242
|
*/
|
|
243
243
|
static getEventManager() {
|
|
244
244
|
return this.getGlobalConfig().getEventManager();
|
|
@@ -26,15 +26,15 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
26
26
|
session?: Session;
|
|
27
27
|
/**
|
|
28
28
|
* An object with information about currently used proxy by the crawler
|
|
29
|
-
* and configured by the {@
|
|
29
|
+
* and configured by the {@link ProxyConfiguration} class.
|
|
30
30
|
*/
|
|
31
31
|
proxyInfo?: ProxyInfo;
|
|
32
32
|
/**
|
|
33
|
-
* The original {@
|
|
33
|
+
* The original {@link Request} object.
|
|
34
34
|
*/
|
|
35
35
|
request: Request<UserData>;
|
|
36
36
|
/**
|
|
37
|
-
* This function allows you to push data to a {@
|
|
37
|
+
* This function allows you to push data to a {@link Dataset} specified by name, or the one currently used by the crawler.
|
|
38
38
|
*
|
|
39
39
|
* Shortcut for `crawler.pushData()`.
|
|
40
40
|
*
|
|
@@ -42,11 +42,11 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
42
42
|
*/
|
|
43
43
|
pushData(data: ReadonlyDeep<Parameters<Dataset['pushData']>[0]>, datasetIdOrName?: string): Promise<void>;
|
|
44
44
|
/**
|
|
45
|
-
* This function automatically finds and enqueues links from the current page, adding them to the {@
|
|
45
|
+
* This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
|
|
46
46
|
* currently used by the crawler.
|
|
47
47
|
*
|
|
48
48
|
* Optionally, the function allows you to filter the target links' URLs using an array of globs or regular expressions
|
|
49
|
-
* and override settings of the enqueued {@
|
|
49
|
+
* and override settings of the enqueued {@link Request} objects.
|
|
50
50
|
*
|
|
51
51
|
* Check out the [Crawl a website with relative links](https://crawlee.dev/js/docs/examples/crawl-relative-links) example
|
|
52
52
|
* for more details regarding its usage.
|
|
@@ -89,11 +89,11 @@ export interface RestrictedCrawlingContext<UserData extends Dictionary = Diction
|
|
|
89
89
|
export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary = Dictionary> extends RestrictedCrawlingContext<UserData> {
|
|
90
90
|
crawler: Crawler;
|
|
91
91
|
/**
|
|
92
|
-
* This function automatically finds and enqueues links from the current page, adding them to the {@
|
|
92
|
+
* This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
|
|
93
93
|
* currently used by the crawler.
|
|
94
94
|
*
|
|
95
95
|
* Optionally, the function allows you to filter the target links' URLs using an array of globs or regular expressions
|
|
96
|
-
* and override settings of the enqueued {@
|
|
96
|
+
* and override settings of the enqueued {@link Request} objects.
|
|
97
97
|
*
|
|
98
98
|
* Check out the [Crawl a website with relative links](https://crawlee.dev/js/docs/examples/crawl-relative-links) example
|
|
99
99
|
* for more details regarding its usage.
|
|
@@ -111,7 +111,7 @@ export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary
|
|
|
111
111
|
* ```
|
|
112
112
|
*
|
|
113
113
|
* @param [options] All `enqueueLinks()` parameters are passed via an options object.
|
|
114
|
-
* @returns Promise that resolves to {@
|
|
114
|
+
* @returns Promise that resolves to {@link BatchAddRequestsResult} object.
|
|
115
115
|
*/
|
|
116
116
|
enqueueLinks(options?: ReadonlyDeep<Omit<EnqueueLinksOptions, 'requestQueue'>> & Pick<EnqueueLinksOptions, 'requestQueue'>): Promise<BatchAddRequestsResult>;
|
|
117
117
|
/**
|
|
@@ -138,7 +138,7 @@ export interface CrawlingContext<Crawler = unknown, UserData extends Dictionary
|
|
|
138
138
|
sendRequest<Response = string>(overrideOptions?: Partial<OptionsInit>): Promise<GotResponse<Response>>;
|
|
139
139
|
}
|
|
140
140
|
/**
|
|
141
|
-
* A partial implementation of {@
|
|
141
|
+
* A partial implementation of {@link RestrictedCrawlingContext} that stores parameters of calls to context methods for later inspection.
|
|
142
142
|
*
|
|
143
143
|
* @experimental
|
|
144
144
|
*/
|
|
@@ -151,7 +151,7 @@ export declare class RequestHandlerResult {
|
|
|
151
151
|
private enqueueLinksCalls;
|
|
152
152
|
constructor(config: Configuration, crawleeStateKey: string);
|
|
153
153
|
/**
|
|
154
|
-
* A record of calls to {@
|
|
154
|
+
* A record of calls to {@link RestrictedCrawlingContext.pushData}, {@link RestrictedCrawlingContext.addRequests}, {@link RestrictedCrawlingContext.enqueueLinks} made by a request handler.
|
|
155
155
|
*/
|
|
156
156
|
get calls(): ReadonlyDeep<{
|
|
157
157
|
pushData: Parameters<RestrictedCrawlingContext['pushData']>[];
|
|
@@ -173,14 +173,14 @@ export declare class RequestHandlerResult {
|
|
|
173
173
|
datasetIdOrName?: string;
|
|
174
174
|
}[]>;
|
|
175
175
|
/**
|
|
176
|
-
* URLs enqueued to the request queue by a request handler, either via {@
|
|
176
|
+
* URLs enqueued to the request queue by a request handler, either via {@link RestrictedCrawlingContext.addRequests} or {@link RestrictedCrawlingContext.enqueueLinks}
|
|
177
177
|
*/
|
|
178
178
|
get enqueuedUrls(): ReadonlyDeep<{
|
|
179
179
|
url: string;
|
|
180
180
|
label?: string;
|
|
181
181
|
}[]>;
|
|
182
182
|
/**
|
|
183
|
-
* URL lists enqueued to the request queue by a request handler via {@
|
|
183
|
+
* URL lists enqueued to the request queue by a request handler via {@link RestrictedCrawlingContext.addRequests} using the `requestsFromUrl` option.
|
|
184
184
|
*/
|
|
185
185
|
get enqueuedUrlLists(): ReadonlyDeep<{
|
|
186
186
|
listUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEzE,OAAO,KAAK,EAAE,WAAW,EAAE,QAAQ,IAAI,WAAW,EAAE,MAAM,cAAc,CAAC;AACzE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AAC1E,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAClD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,4BAA4B,EAAE,MAAM,aAAa,CAAC;AACxF,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,gBAAgB;AAChB,MAAM,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC;AAEtD,gBAAgB;AAChB,MAAM,MAAM,YAAY,CAAC,CAAC,EAAE,CAAC,SAAS,MAAM,CAAC,IAAI,CAAC,GAAG;KAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;CAAE,CAAC;AAE1E,MAAM,MAAM,aAAa,CAAC,CAAC,SAAS,OAAO,IAAI,YAAY,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;AAEnF,gBAAgB;AAChB,MAAM,MAAM,aAAa,CAAC,OAAO,SAAS,yBAAyB,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GAC5F,OAAO,GACP;IACI,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;CAC9C,GAAG,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAEnC,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAE/E,SAAQ,MAAM,CAAC,MAAM,GAAG,EAAE,EAAE,OAAO,CAAC;IACpC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE3B;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1G;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,YAAY,EAAE,CAAC,OAAO,CAAC,EAAE,YAAY,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtG;;;;;OAKG;IACH,WAAW,EAAE,CACT,YAAY,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC/C,OAAO,CAAC,EAAE,YAAY,CAAC,4BAA4B,CAAC,KACnD,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnB;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,CAAC,EAAE,KAAK,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC;IAE1F;;OAEG;IACH,gBAAgB,EAAE,CACd,QAAQ,CAAC,EAAE,MAAM,KAChB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,GAAG,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC;IAElH;;OAEG;IACH,GAAG,EAAE,GAAG,CAAC;CACZ;AAED,MAAM,WAAW,eAAe,CAAC,OAAO,GAAG,OAAO,EAAE,QAAQ,SAAS,UAAU,GAAG,UAAU,CACxF,SAAQ,yBAAyB,CAAC,QAAQ,CAAC;IAC3C,OAAO,EAAE,OAAO,CAAC;IAEjB;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,YAAY,CACR,OAAO,CAAC,EAAE,YAAY,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,GAC9G,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAEnC;;OAEG;IACH,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,aAAa,CAAC,CAAC;IAEhE;;;;;;;;;;;;;;;;OAgBG;IACH,WAAW,CAAC,QAAQ,GAAG,MAAM,EAAE,eAAe,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;CAC1G;AAED;;;;GAIG;AACH,qBAAa,oBAAoB;IAWzB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,eAAe;IAX3B,OAAO,CAAC,qBAAqB,CACtB;IAEP,OAAO,CAAC,aAAa,CAA2D;IAEhF,OAAO,CAAC,gBAAgB,CAA8D;IAEtF,OAAO,CAAC,iBAAiB,CAA+D;gBAG5E,MAAM,EAAE,aAAa,EACrB,eAAe,EAAE,MAAM;IAGnC;;OAEG;IACH,IAAI,KAAK,IAAI,YAAY,CAAC;QACtB,QAAQ,EAAE,UAAU,CAAC,yBAAyB,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;QAC9D,WAAW,EAAE,UAAU,CAAC,yBAAyB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;QACpE,YAAY,EAAE,UAAU,CAAC,yBAAyB,CAAC,cAAc,CAAC,CAAC,EAAE,CAAC;KACzE,CAAC,CAMD;IAED;;OAEG;IACH,IAAI,oBAAoB,IAAI,YAAY,CACpC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,YAAY,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC,CACrF,CAEA;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAIjF;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAsBlE;IAED;;OAEG;IACH,IAAI,gBAAgB,IAAI,YAAY,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAgB1E;IAED,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAE7C;IAEF,YAAY,EAAE,yBAAyB,CAAC,cAAc,CAAC,CAErD;IAEF,WAAW,EAAE,yBAAyB,CAAC,aAAa,CAAC,CAEnD;IAEF,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAG7C;IAEF,gBAAgB,EAAE,yBAAyB,CAAC,kBAAkB,CAAC,
|
|
1
|
+
{"version":3,"file":"crawler_commons.d.ts","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,sBAAsB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEzE,OAAO,KAAK,EAAE,WAAW,EAAE,QAAQ,IAAI,WAAW,EAAE,MAAM,cAAc,CAAC;AACzE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AAC1E,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAClD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,4BAA4B,EAAE,MAAM,aAAa,CAAC;AACxF,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,gBAAgB;AAChB,MAAM,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC;AAEtD,gBAAgB;AAChB,MAAM,MAAM,YAAY,CAAC,CAAC,EAAE,CAAC,SAAS,MAAM,CAAC,IAAI,CAAC,GAAG;KAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;CAAE,CAAC;AAE1E,MAAM,MAAM,aAAa,CAAC,CAAC,SAAS,OAAO,IAAI,YAAY,CAAC,CAAC,EAAE,IAAI,GAAG,WAAW,CAAC,CAAC;AAEnF,gBAAgB;AAChB,MAAM,MAAM,aAAa,CAAC,OAAO,SAAS,yBAAyB,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,IAAI,GAC5F,OAAO,GACP;IACI,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;CAC9C,GAAG,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAEnC,MAAM,WAAW,yBAAyB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAE/E,SAAQ,MAAM,CAAC,MAAM,GAAG,EAAE,EAAE,OAAO,CAAC;IACpC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IAEtB;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE3B;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1G;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,YAAY,EAAE,CAAC,OAAO,CAAC,EAAE,YAAY,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAEtG;;;;;OAKG;IACH,WAAW,EAAE,CACT,YAAY,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC/C,OAAO,CAAC,EAAE,YAAY,CAAC,4BAA4B,CAAC,KACnD,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnB;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,CAAC,EAAE,KAAK,KAAK,OAAO,CAAC,KAAK,CAAC,CAAC;IAE1F;;OAEG;IACH,gBAAgB,EAAE,CACd,QAAQ,CAAC,EAAE,MAAM,KAChB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,GAAG,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC;IAElH;;OAEG;IACH,GAAG,EAAE,GAAG,CAAC;CACZ;AAED,MAAM,WAAW,eAAe,CAAC,OAAO,GAAG,OAAO,EAAE,QAAQ,SAAS,UAAU,GAAG,UAAU,CACxF,SAAQ,yBAAyB,CAAC,QAAQ,CAAC;IAC3C,OAAO,EAAE,OAAO,CAAC;IAEjB;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,YAAY,CACR,OAAO,CAAC,EAAE,YAAY,CAAC,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,CAAC,mBAAmB,EAAE,cAAc,CAAC,GAC9G,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAEnC;;OAEG;IACH,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,aAAa,CAAC,CAAC;IAEhE;;;;;;;;;;;;;;;;OAgBG;IACH,WAAW,CAAC,QAAQ,GAAG,MAAM,EAAE,eAAe,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;CAC1G;AAED;;;;GAIG;AACH,qBAAa,oBAAoB;IAWzB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,eAAe;IAX3B,OAAO,CAAC,qBAAqB,CACtB;IAEP,OAAO,CAAC,aAAa,CAA2D;IAEhF,OAAO,CAAC,gBAAgB,CAA8D;IAEtF,OAAO,CAAC,iBAAiB,CAA+D;gBAG5E,MAAM,EAAE,aAAa,EACrB,eAAe,EAAE,MAAM;IAGnC;;OAEG;IACH,IAAI,KAAK,IAAI,YAAY,CAAC;QACtB,QAAQ,EAAE,UAAU,CAAC,yBAAyB,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;QAC9D,WAAW,EAAE,UAAU,CAAC,yBAAyB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;QACpE,YAAY,EAAE,UAAU,CAAC,yBAAyB,CAAC,cAAc,CAAC,CAAC,EAAE,CAAC;KACzE,CAAC,CAMD;IAED;;OAEG;IACH,IAAI,oBAAoB,IAAI,YAAY,CACpC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,YAAY,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC,CACrF,CAEA;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAIjF;IAED;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAsBlE;IAED;;OAEG;IACH,IAAI,gBAAgB,IAAI,YAAY,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC,CAgB1E;IAED,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAE7C;IAEF,YAAY,EAAE,yBAAyB,CAAC,cAAc,CAAC,CAErD;IAEF,WAAW,EAAE,yBAAyB,CAAC,aAAa,CAAC,CAEnD;IAEF,QAAQ,EAAE,yBAAyB,CAAC,UAAU,CAAC,CAG7C;IAEF,gBAAgB,EAAE,yBAAyB,CAAC,kBAAkB,CAAC,CAa7D;IAEF,OAAO,CAAC,WAAW,CAAwF;IAE3G,OAAO,CAAC,4BAA4B,CAIlC;IAEF,OAAO,CAAC,4BAA4B,CASlC;CACL"}
|
|
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.RequestHandlerResult = void 0;
|
|
4
4
|
const storages_1 = require("../storages");
|
|
5
5
|
/**
|
|
6
|
-
* A partial implementation of {@
|
|
6
|
+
* A partial implementation of {@link RestrictedCrawlingContext} that stores parameters of calls to context methods for later inspection.
|
|
7
7
|
*
|
|
8
8
|
* @experimental
|
|
9
9
|
*/
|
|
@@ -88,17 +88,10 @@ class RequestHandlerResult {
|
|
|
88
88
|
id: this.idOrDefault(idOrName),
|
|
89
89
|
name: idOrName,
|
|
90
90
|
getValue: async (key) => this.getKeyValueStoreChangedValue(idOrName, key) ?? (await store.getValue(key)),
|
|
91
|
-
getAutoSavedValue: async (key, defaultValue = {}) => {
|
|
92
|
-
let value = this.getKeyValueStoreChangedValue(idOrName, key);
|
|
93
|
-
if (value === null) {
|
|
94
|
-
value = (await store.getValue(key)) ?? defaultValue;
|
|
95
|
-
this.setKeyValueStoreChangedValue(idOrName, key, value);
|
|
96
|
-
}
|
|
97
|
-
return value;
|
|
98
|
-
},
|
|
99
91
|
setValue: async (key, value, options) => {
|
|
100
92
|
this.setKeyValueStoreChangedValue(idOrName, key, value, options);
|
|
101
93
|
},
|
|
94
|
+
getAutoSavedValue: store.getAutoSavedValue.bind(store),
|
|
102
95
|
getPublicUrl: store.getPublicUrl.bind(store),
|
|
103
96
|
};
|
|
104
97
|
}
|
|
@@ -133,7 +126,7 @@ class RequestHandlerResult {
|
|
|
133
126
|
});
|
|
134
127
|
}
|
|
135
128
|
/**
|
|
136
|
-
* A record of calls to {@
|
|
129
|
+
* A record of calls to {@link RestrictedCrawlingContext.pushData}, {@link RestrictedCrawlingContext.addRequests}, {@link RestrictedCrawlingContext.enqueueLinks} made by a request handler.
|
|
137
130
|
*/
|
|
138
131
|
get calls() {
|
|
139
132
|
return {
|
|
@@ -155,7 +148,7 @@ class RequestHandlerResult {
|
|
|
155
148
|
return this.pushDataCalls.flatMap(([data, datasetIdOrName]) => (Array.isArray(data) ? data : [data]).map((item) => ({ item, datasetIdOrName })));
|
|
156
149
|
}
|
|
157
150
|
/**
|
|
158
|
-
* URLs enqueued to the request queue by a request handler, either via {@
|
|
151
|
+
* URLs enqueued to the request queue by a request handler, either via {@link RestrictedCrawlingContext.addRequests} or {@link RestrictedCrawlingContext.enqueueLinks}
|
|
159
152
|
*/
|
|
160
153
|
get enqueuedUrls() {
|
|
161
154
|
const result = [];
|
|
@@ -177,7 +170,7 @@ class RequestHandlerResult {
|
|
|
177
170
|
return result;
|
|
178
171
|
}
|
|
179
172
|
/**
|
|
180
|
-
* URL lists enqueued to the request queue by a request handler via {@
|
|
173
|
+
* URL lists enqueued to the request queue by a request handler via {@link RestrictedCrawlingContext.addRequests} using the `requestsFromUrl` option.
|
|
181
174
|
*/
|
|
182
175
|
get enqueuedUrlLists() {
|
|
183
176
|
const result = [];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":";;;AAYA,0CAA4C;AA4J5C;;;;GAIG;AACH,MAAa,oBAAoB;IAU7B,YACY,MAAqB,EACrB,eAAuB;QAD/B;;;;mBAAQ,MAAM;WAAe;QAC7B;;;;mBAAQ,eAAe;WAAQ;QAX3B;;;;mBACJ,EAAE;WAAC;QAEC;;;;mBAAqE,EAAE;WAAC;QAExE;;;;mBAA2E,EAAE;WAAC;QAE9E;;;;mBAA6E,EAAE;WAAC;QAwFxF;;;;mBAAkD,KAAK,EAAE,IAAI,EAAE,eAAe,EAAE,EAAE;gBAC9E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAC;YACrD,CAAC;WAAC;QAEF;;;;mBAA0D,KAAK,EAAE,OAAO,EAAE,EAAE;gBACxE,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3C,CAAC;WAAC;QAEF;;;;mBAAwD,KAAK,EAAE,QAAQ,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;gBACrF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;YACpD,CAAC;WAAC;QAEF;;;;mBAAkD,KAAK,EAAE,YAAY,EAAE,EAAE;gBACrE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACrD,OAAO,MAAM,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,eAAe,EAAE,YAAY,CAAC,CAAC;YAC7E,CAAC;WAAC;QAEF;;;;mBAAkE,KAAK,EAAE,QAAQ,EAAE,EAAE;gBACjF,MAAM,KAAK,GAAG,MAAM,wBAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;gBAE1E,OAAO;oBACH,EAAE,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC;oBAC9B,IAAI,EAAE,QAAQ;oBACd,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;oBACxG,
|
|
1
|
+
{"version":3,"file":"crawler_commons.js","sourceRoot":"","sources":["../../src/crawlers/crawler_commons.ts"],"names":[],"mappings":";;;AAYA,0CAA4C;AA4J5C;;;;GAIG;AACH,MAAa,oBAAoB;IAU7B,YACY,MAAqB,EACrB,eAAuB;QAD/B;;;;mBAAQ,MAAM;WAAe;QAC7B;;;;mBAAQ,eAAe;WAAQ;QAX3B;;;;mBACJ,EAAE;WAAC;QAEC;;;;mBAAqE,EAAE;WAAC;QAExE;;;;mBAA2E,EAAE;WAAC;QAE9E;;;;mBAA6E,EAAE;WAAC;QAwFxF;;;;mBAAkD,KAAK,EAAE,IAAI,EAAE,eAAe,EAAE,EAAE;gBAC9E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAC;YACrD,CAAC;WAAC;QAEF;;;;mBAA0D,KAAK,EAAE,OAAO,EAAE,EAAE;gBACxE,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3C,CAAC;WAAC;QAEF;;;;mBAAwD,KAAK,EAAE,QAAQ,EAAE,OAAO,GAAG,EAAE,EAAE,EAAE;gBACrF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;YACpD,CAAC;WAAC;QAEF;;;;mBAAkD,KAAK,EAAE,YAAY,EAAE,EAAE;gBACrE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACrD,OAAO,MAAM,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,eAAe,EAAE,YAAY,CAAC,CAAC;YAC7E,CAAC;WAAC;QAEF;;;;mBAAkE,KAAK,EAAE,QAAQ,EAAE,EAAE;gBACjF,MAAM,KAAK,GAAG,MAAM,wBAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;gBAE1E,OAAO;oBACH,EAAE,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC;oBAC9B,IAAI,EAAE,QAAQ;oBACd,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;oBACxG,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;wBACpC,IAAI,CAAC,4BAA4B,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;oBACrE,CAAC;oBACD,iBAAiB,EAAE,KAAK,CAAC,iBAAiB,CAAC,IAAI,CAAC,KAAK,CAAC;oBACtD,YAAY,EAAE,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;iBAC/C,CAAC;YACN,CAAC;WAAC;QAEM;;;;mBAAc,CAAC,QAAiB,EAAU,EAAE,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,wBAAwB,CAAC;WAAC;QAEnG;;;;mBAA+B,CAAC,QAA4B,EAAE,GAAW,EAAE,EAAE;;gBACjF,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACtC,MAAA,IAAI,CAAC,qBAAqB,EAAC,EAAE,SAAF,EAAE,IAAM,EAAE,EAAC;gBACtC,OAAO,IAAI,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,YAAY,IAAI,IAAI,CAAC;YACpE,CAAC;WAAC;QAEM;;;;mBAA+B,CACnC,QAA4B,EAC5B,GAAW,EACX,YAAqB,EACrB,OAAuB,EACzB,EAAE;;gBACA,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACtC,MAAA,IAAI,CAAC,qBAAqB,EAAC,EAAE,SAAF,EAAE,IAAM,EAAE,EAAC;gBACtC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;YACpE,CAAC;WAAC;IApIC,CAAC;IAEJ;;OAEG;IACH,IAAI,KAAK;QAKL,OAAO;YACH,QAAQ,EAAE,IAAI,CAAC,aAAa;YAC5B,WAAW,EAAE,IAAI,CAAC,gBAAgB;YAClC,YAAY,EAAE,IAAI,CAAC,iBAAiB;SACvC,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,oBAAoB;QAGpB,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,eAAe,CAAC,EAAE,EAAE,CAC1D,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC,CACnF,CAAC;IACN,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACZ,MAAM,MAAM,GAAsC,EAAE,CAAC;QAErD,KAAK,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC1F,CAAC;QAED,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,CAAC,CAAC,CAAC,iBAAiB,IAAI,OAAO,CAAC,IAAI,OAAO,CAAC,eAAe,KAAK,SAAS,CAAC;oBAC1E,OAAO,CAAC,GAAG,KAAK,SAAS,EAC3B,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5D,CAAC;qBAAM,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACrC,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAClC,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,IAAI,gBAAgB;QAChB,MAAM,MAAM,GAA0C,EAAE,CAAC;QAEzD,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IACI,OAAO,OAAO,KAAK,QAAQ;oBAC3B,iBAAiB,IAAI,OAAO;oBAC5B,OAAO,CAAC,eAAe,KAAK,SAAS,EACvC,CAAC;oBACC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,eAAe,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC5E,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;CAoDJ;AAlJD,oDAkJC"}
|