@crawlee/core 4.0.0-beta.4 → 4.0.0-beta.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/autoscaling/autoscaled_pool.d.ts +3 -5
- package/autoscaling/autoscaled_pool.d.ts.map +1 -1
- package/autoscaling/autoscaled_pool.js +3 -9
- package/autoscaling/autoscaled_pool.js.map +1 -1
- package/autoscaling/snapshotter.d.ts +3 -13
- package/autoscaling/snapshotter.d.ts.map +1 -1
- package/autoscaling/snapshotter.js +18 -29
- package/autoscaling/snapshotter.js.map +1 -1
- package/autoscaling/system_status.d.ts +0 -3
- package/autoscaling/system_status.d.ts.map +1 -1
- package/autoscaling/system_status.js +2 -3
- package/autoscaling/system_status.js.map +1 -1
- package/configuration.d.ts +5 -78
- package/configuration.d.ts.map +1 -1
- package/configuration.js +6 -102
- package/configuration.js.map +1 -1
- package/cookie_utils.d.ts +1 -1
- package/cookie_utils.d.ts.map +1 -1
- package/cookie_utils.js +8 -8
- package/cookie_utils.js.map +1 -1
- package/crawlers/context_pipeline.d.ts +71 -0
- package/crawlers/context_pipeline.d.ts.map +1 -0
- package/crawlers/context_pipeline.js +123 -0
- package/crawlers/context_pipeline.js.map +1 -0
- package/crawlers/crawler_commons.d.ts +15 -23
- package/crawlers/crawler_commons.d.ts.map +1 -1
- package/crawlers/crawler_commons.js +0 -8
- package/crawlers/crawler_commons.js.map +1 -1
- package/crawlers/error_snapshotter.d.ts +3 -2
- package/crawlers/error_snapshotter.d.ts.map +1 -1
- package/crawlers/error_snapshotter.js +2 -2
- package/crawlers/error_snapshotter.js.map +1 -1
- package/crawlers/error_tracker.d.ts +2 -1
- package/crawlers/error_tracker.d.ts.map +1 -1
- package/crawlers/error_tracker.js.map +1 -1
- package/crawlers/index.d.ts +1 -1
- package/crawlers/index.d.ts.map +1 -1
- package/crawlers/index.js +1 -1
- package/crawlers/index.js.map +1 -1
- package/crawlers/internals/types.d.ts +8 -0
- package/crawlers/internals/types.d.ts.map +1 -0
- package/crawlers/internals/types.js +2 -0
- package/crawlers/internals/types.js.map +1 -0
- package/crawlers/statistics.d.ts +15 -15
- package/crawlers/statistics.d.ts.map +1 -1
- package/crawlers/statistics.js +21 -18
- package/crawlers/statistics.js.map +1 -1
- package/enqueue_links/enqueue_links.d.ts +30 -18
- package/enqueue_links/enqueue_links.d.ts.map +1 -1
- package/enqueue_links/enqueue_links.js +41 -23
- package/enqueue_links/enqueue_links.js.map +1 -1
- package/enqueue_links/shared.d.ts +24 -7
- package/enqueue_links/shared.d.ts.map +1 -1
- package/enqueue_links/shared.js +66 -37
- package/enqueue_links/shared.js.map +1 -1
- package/errors.d.ts +18 -0
- package/errors.d.ts.map +1 -1
- package/errors.js +35 -0
- package/errors.js.map +1 -1
- package/events/event_manager.d.ts +8 -5
- package/events/event_manager.d.ts.map +1 -1
- package/events/event_manager.js +7 -9
- package/events/event_manager.js.map +1 -1
- package/events/local_event_manager.d.ts +14 -4
- package/events/local_event_manager.d.ts.map +1 -1
- package/events/local_event_manager.js +33 -39
- package/events/local_event_manager.js.map +1 -1
- package/index.d.ts +2 -1
- package/index.d.ts.map +1 -1
- package/index.js +2 -1
- package/index.js.map +1 -1
- package/log.d.ts +82 -2
- package/log.d.ts.map +1 -1
- package/log.js +102 -0
- package/log.js.map +1 -1
- package/package.json +6 -7
- package/proxy_configuration.d.ts +17 -94
- package/proxy_configuration.d.ts.map +1 -1
- package/proxy_configuration.js +18 -54
- package/proxy_configuration.js.map +1 -1
- package/recoverable_state.d.ts +121 -0
- package/recoverable_state.d.ts.map +1 -0
- package/recoverable_state.js +137 -0
- package/recoverable_state.js.map +1 -0
- package/request.d.ts +48 -6
- package/request.d.ts.map +1 -1
- package/request.js +62 -16
- package/request.js.map +1 -1
- package/service_locator.d.ts +162 -0
- package/service_locator.d.ts.map +1 -0
- package/service_locator.js +247 -0
- package/service_locator.js.map +1 -0
- package/session_pool/session.d.ts +9 -31
- package/session_pool/session.d.ts.map +1 -1
- package/session_pool/session.js +17 -21
- package/session_pool/session.js.map +1 -1
- package/session_pool/session_pool.d.ts +27 -54
- package/session_pool/session_pool.d.ts.map +1 -1
- package/session_pool/session_pool.js +54 -69
- package/session_pool/session_pool.js.map +1 -1
- package/storages/dataset.d.ts +53 -3
- package/storages/dataset.d.ts.map +1 -1
- package/storages/dataset.js +78 -6
- package/storages/dataset.js.map +1 -1
- package/storages/index.d.ts +2 -0
- package/storages/index.d.ts.map +1 -1
- package/storages/index.js +2 -0
- package/storages/index.js.map +1 -1
- package/storages/key_value_store.d.ts +71 -1
- package/storages/key_value_store.d.ts.map +1 -1
- package/storages/key_value_store.js +95 -12
- package/storages/key_value_store.js.map +1 -1
- package/storages/request_list.d.ts +9 -9
- package/storages/request_list.d.ts.map +1 -1
- package/storages/request_list.js +11 -8
- package/storages/request_list.js.map +1 -1
- package/storages/request_list_adapter.d.ts +58 -0
- package/storages/request_list_adapter.d.ts.map +1 -0
- package/storages/request_list_adapter.js +81 -0
- package/storages/request_list_adapter.js.map +1 -0
- package/storages/request_manager_tandem.d.ts +68 -0
- package/storages/request_manager_tandem.d.ts.map +1 -0
- package/storages/request_manager_tandem.js +124 -0
- package/storages/request_manager_tandem.js.map +1 -0
- package/storages/request_provider.d.ts +76 -9
- package/storages/request_provider.d.ts.map +1 -1
- package/storages/request_provider.js +92 -54
- package/storages/request_provider.js.map +1 -1
- package/storages/request_queue.d.ts +1 -3
- package/storages/request_queue.d.ts.map +1 -1
- package/storages/request_queue.js +2 -4
- package/storages/request_queue.js.map +1 -1
- package/storages/request_queue_v2.d.ts +3 -3
- package/storages/request_queue_v2.d.ts.map +1 -1
- package/storages/request_queue_v2.js +4 -5
- package/storages/request_queue_v2.js.map +1 -1
- package/storages/sitemap_request_list.d.ts +5 -5
- package/storages/sitemap_request_list.d.ts.map +1 -1
- package/storages/sitemap_request_list.js +10 -7
- package/storages/sitemap_request_list.js.map +1 -1
- package/storages/storage_manager.d.ts +10 -8
- package/storages/storage_manager.d.ts.map +1 -1
- package/storages/storage_manager.js +12 -22
- package/storages/storage_manager.js.map +1 -1
- package/storages/utils.d.ts.map +1 -1
- package/storages/utils.js +4 -3
- package/storages/utils.js.map +1 -1
- package/typedefs.d.ts +1 -1
- package/typedefs.d.ts.map +1 -1
- package/crawlers/crawler_extension.d.ts +0 -12
- package/crawlers/crawler_extension.d.ts.map +0 -1
- package/crawlers/crawler_extension.js +0 -14
- package/crawlers/crawler_extension.js.map +0 -1
- package/http_clients/base-http-client.d.ts +0 -134
- package/http_clients/base-http-client.d.ts.map +0 -1
- package/http_clients/base-http-client.js +0 -33
- package/http_clients/base-http-client.js.map +0 -1
- package/http_clients/form-data-like.d.ts +0 -67
- package/http_clients/form-data-like.d.ts.map +0 -1
- package/http_clients/form-data-like.js +0 -5
- package/http_clients/form-data-like.js.map +0 -1
- package/http_clients/got-scraping-http-client.d.ts +0 -15
- package/http_clients/got-scraping-http-client.d.ts.map +0 -1
- package/http_clients/got-scraping-http-client.js +0 -69
- package/http_clients/got-scraping-http-client.js.map +0 -1
- package/http_clients/index.d.ts +0 -3
- package/http_clients/index.d.ts.map +0 -1
- package/http_clients/index.js +0 -3
- package/http_clients/index.js.map +0 -1
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -1,12 +1,25 @@
|
|
|
1
|
-
import os from 'node:os';
|
|
2
|
-
import { getCurrentCpuTicksV2, getMemoryInfo, getMemoryInfoV2, isContainerized } from '@crawlee/utils';
|
|
3
|
-
import log from '@apify/log';
|
|
4
1
|
import { betterClearInterval, betterSetInterval } from '@apify/utilities';
|
|
2
|
+
import { serviceLocator } from '../service_locator.js';
|
|
5
3
|
import { EventManager } from './event_manager.js';
|
|
6
4
|
export class LocalEventManager extends EventManager {
|
|
7
|
-
|
|
5
|
+
systemInfoIntervalMillis;
|
|
6
|
+
constructor(options) {
|
|
7
|
+
super(options);
|
|
8
|
+
this.systemInfoIntervalMillis = options.systemInfoIntervalMillis;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Creates a new `LocalEventManager` based on the provided `Configuration`.
|
|
12
|
+
* Uses the global configuration from the service locator if none is provided.
|
|
13
|
+
*/
|
|
14
|
+
static fromConfig(config) {
|
|
15
|
+
const resolvedConfig = config ?? serviceLocator.getConfiguration();
|
|
16
|
+
return new LocalEventManager({
|
|
17
|
+
persistStateIntervalMillis: resolvedConfig.get('persistStateIntervalMillis'),
|
|
18
|
+
systemInfoIntervalMillis: resolvedConfig.get('systemInfoIntervalMillis'),
|
|
19
|
+
});
|
|
20
|
+
}
|
|
8
21
|
/**
|
|
9
|
-
* Initializes the EventManager and sets up periodic `systemInfo`
|
|
22
|
+
* Initializes the EventManager and sets up periodic `systemInfo` events.
|
|
10
23
|
* This is automatically called at the beginning of `crawler.run()`.
|
|
11
24
|
*/
|
|
12
25
|
async init() {
|
|
@@ -14,9 +27,8 @@ export class LocalEventManager extends EventManager {
|
|
|
14
27
|
return;
|
|
15
28
|
}
|
|
16
29
|
await super.init();
|
|
17
|
-
const systemInfoIntervalMillis = this.config.get('systemInfoIntervalMillis');
|
|
18
30
|
this.emitSystemInfoEvent = this.emitSystemInfoEvent.bind(this);
|
|
19
|
-
this.intervals.systemInfo = betterSetInterval(this.emitSystemInfoEvent.bind(this), systemInfoIntervalMillis);
|
|
31
|
+
this.intervals.systemInfo = betterSetInterval(this.emitSystemInfoEvent.bind(this), this.systemInfoIntervalMillis);
|
|
20
32
|
}
|
|
21
33
|
/**
|
|
22
34
|
* @inheritDoc
|
|
@@ -33,7 +45,7 @@ export class LocalEventManager extends EventManager {
|
|
|
33
45
|
*/
|
|
34
46
|
async emitSystemInfoEvent(intervalCallback) {
|
|
35
47
|
const info = await this.createSystemInfo({
|
|
36
|
-
maxUsedCpuRatio:
|
|
48
|
+
maxUsedCpuRatio: serviceLocator.getConfiguration().get('maxUsedCpuRatio'),
|
|
37
49
|
});
|
|
38
50
|
this.events.emit("systemInfo" /* EventType.SYSTEM_INFO */, info);
|
|
39
51
|
intervalCallback();
|
|
@@ -42,17 +54,8 @@ export class LocalEventManager extends EventManager {
|
|
|
42
54
|
* @internal
|
|
43
55
|
*/
|
|
44
56
|
async isContainerizedWrapper() {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
getCurrentCpuTicks() {
|
|
48
|
-
const cpus = os.cpus();
|
|
49
|
-
return cpus.reduce((acc, cpu) => {
|
|
50
|
-
const cpuTimes = Object.values(cpu.times);
|
|
51
|
-
return {
|
|
52
|
-
idle: acc.idle + cpu.times.idle,
|
|
53
|
-
total: acc.total + cpuTimes.reduce((sum, num) => sum + num),
|
|
54
|
-
};
|
|
55
|
-
}, { idle: 0, total: 0 });
|
|
57
|
+
const { isContainerized } = await import('@crawlee/utils');
|
|
58
|
+
return serviceLocator.getConfiguration().get('containerized', await isContainerized());
|
|
56
59
|
}
|
|
57
60
|
/**
|
|
58
61
|
* Creates a SystemInfo object based on local metrics.
|
|
@@ -65,18 +68,11 @@ export class LocalEventManager extends EventManager {
|
|
|
65
68
|
};
|
|
66
69
|
}
|
|
67
70
|
async createCpuInfo(options) {
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
const ticks = this.getCurrentCpuTicks();
|
|
76
|
-
const idleTicksDelta = ticks.idle - this.previousTicks.idle;
|
|
77
|
-
const totalTicksDelta = ticks.total - this.previousTicks.total;
|
|
78
|
-
const usedCpuRatio = totalTicksDelta ? 1 - idleTicksDelta / totalTicksDelta : 0;
|
|
79
|
-
Object.assign(this.previousTicks, ticks);
|
|
71
|
+
const { getCurrentCpuTicksV2 } = await import('@crawlee/utils');
|
|
72
|
+
const usedCpuRatio = await getCurrentCpuTicksV2({
|
|
73
|
+
containerized: await this.isContainerizedWrapper(),
|
|
74
|
+
logger: serviceLocator.getLogger(),
|
|
75
|
+
});
|
|
80
76
|
return {
|
|
81
77
|
cpuCurrentUsage: usedCpuRatio * 100,
|
|
82
78
|
isCpuOverloaded: usedCpuRatio > options.maxUsedCpuRatio,
|
|
@@ -84,19 +80,17 @@ export class LocalEventManager extends EventManager {
|
|
|
84
80
|
}
|
|
85
81
|
async createMemoryInfo() {
|
|
86
82
|
try {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
}
|
|
93
|
-
const memInfo = await getMemoryInfo();
|
|
83
|
+
const { getMemoryInfo } = await import('@crawlee/utils');
|
|
84
|
+
const memInfo = await getMemoryInfo({
|
|
85
|
+
containerized: await this.isContainerizedWrapper(),
|
|
86
|
+
logger: serviceLocator.getLogger(),
|
|
87
|
+
});
|
|
94
88
|
return {
|
|
95
89
|
memCurrentBytes: memInfo.mainProcessBytes + memInfo.childProcessesBytes,
|
|
96
90
|
};
|
|
97
91
|
}
|
|
98
92
|
catch (err) {
|
|
99
|
-
log.exception(err, 'Memory snapshot failed.');
|
|
93
|
+
this.log.exception(err, 'Memory snapshot failed.');
|
|
100
94
|
return {};
|
|
101
95
|
}
|
|
102
96
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local_event_manager.js","sourceRoot":"","sources":["../../src/events/local_event_manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"local_event_manager.js","sourceRoot":"","sources":["../../src/events/local_event_manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAI1E,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,YAAY,EAAuC,MAAM,oBAAoB,CAAC;AAOvF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IACvC,wBAAwB,CAAS;IAEzC,YAAY,OAAiC;QACzC,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,wBAAwB,GAAG,OAAO,CAAC,wBAAwB,CAAC;IACrE,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,UAAU,CAAC,MAAsB;QACpC,MAAM,cAAc,GAAG,MAAM,IAAI,cAAc,CAAC,gBAAgB,EAAE,CAAC;QAEnE,OAAO,IAAI,iBAAiB,CAAC;YACzB,0BAA0B,EAAE,cAAc,CAAC,GAAG,CAAC,4BAA4B,CAAC;YAC5E,wBAAwB,EAAE,cAAc,CAAC,GAAG,CAAC,0BAA0B,CAAC;SAC3E,CAAC,CAAC;IACP,CAAC;IAED;;;OAGG;IACM,KAAK,CAAC,IAAI;QACf,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACnB,OAAO;QACX,CAAC;QAED,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;QAEnB,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/D,IAAI,CAAC,SAAS,CAAC,UAAU,GAAG,iBAAiB,CACzC,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,EACnC,IAAI,CAAC,wBAAwB,CAChC,CAAC;IACN,CAAC;IAED;;OAEG;IACM,KAAK,CAAC,KAAK;QAChB,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACpB,OAAO;QACX,CAAC;QAED,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC;QACpB,mBAAmB,CAAC,IAAI,CAAC,SAAS,CAAC,UAAW,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,mBAAmB,CAAC,gBAA+B;QACrD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC;YACrC,eAAe,EAAE,cAAc,CAAC,gBAAgB,EAAE,CAAC,GAAG,CAAC,iBAAiB,CAAC;SAC5E,CAAC,CAAC;QACH,IAAI,CAAC,MAAM,CAAC,IAAI,2CAAwB,IAAI,CAAC,CAAC;QAC9C,gBAAgB,EAAE,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,sBAAsB;QACxB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;QAC3D,OAAO,cAAc,CAAC,gBAAgB,EAAE,CAAC,GAAG,CAAC,eAAe,EAAE,MAAM,eAAe,EAAE,CAAC,CAAC;IAC3F,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,gBAAgB,CAAC,OAAoC;QAC/D,OAAO;YACH,SAAS,EAAE,IAAI,IAAI,EAAE;YACrB,GAAG,CAAC,MAAM,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;YACtC,GAAG,CAAC,MAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;SACvB,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAAoC;QAC5D,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;QAChE,MAAM,YAAY,GAAG,MAAM,oBAAoB,CAAC;YAC5C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,EAAE;YAClD,MAAM,EAAE,cAAc,CAAC,SAAS,EAAE;SACrC,CAAC,CAAC;QACH,OAAO;YACH,eAAe,EAAE,YAAY,GAAG,GAAG;YACnC,eAAe,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe;SAC1D,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,gBAAgB;QAC1B,IAAI,CAAC;YACD,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACzD,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC;gBAChC,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,EAAE;gBAClD,MAAM,EAAE,cAAc,CAAC,SAAS,EAAE;aACrC,CAAC,CAAC;YACH,OAAO;gBACH,eAAe,EAAE,OAAO,CAAC,gBAAgB,GAAG,OAAO,CAAC,mBAAmB;aAC1E,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,GAAY,EAAE,yBAAyB,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACd,CAAC;IACL,CAAC;CACJ"}
|
package/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
export * from './errors.js';
|
|
2
2
|
export * from './autoscaling/index.js';
|
|
3
3
|
export * from './configuration.js';
|
|
4
|
+
export * from './service_locator.js';
|
|
4
5
|
export * from './crawlers/index.js';
|
|
5
6
|
export * from './enqueue_links/index.js';
|
|
6
7
|
export * from './events/index.js';
|
|
7
|
-
export * from './http_clients/index.js';
|
|
8
8
|
export * from './log.js';
|
|
9
9
|
export * from './proxy_configuration.js';
|
|
10
10
|
export * from './request.js';
|
|
@@ -14,6 +14,7 @@ export * from './session_pool/index.js';
|
|
|
14
14
|
export * from './storages/index.js';
|
|
15
15
|
export * from './validators.js';
|
|
16
16
|
export * from './cookie_utils.js';
|
|
17
|
+
export * from './recoverable_state.js';
|
|
17
18
|
export { PseudoUrl } from '@apify/pseudo_url';
|
|
18
19
|
export { Dictionary, Awaitable, Constructor, StorageClient, Cookie, QueueOperationInfo } from '@crawlee/types';
|
|
19
20
|
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,wBAAwB,CAAC;AACvC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
package/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
export * from './errors.js';
|
|
2
2
|
export * from './autoscaling/index.js';
|
|
3
3
|
export * from './configuration.js';
|
|
4
|
+
export * from './service_locator.js';
|
|
4
5
|
export * from './crawlers/index.js';
|
|
5
6
|
export * from './enqueue_links/index.js';
|
|
6
7
|
export * from './events/index.js';
|
|
7
|
-
export * from './http_clients/index.js';
|
|
8
8
|
export * from './log.js';
|
|
9
9
|
export * from './proxy_configuration.js';
|
|
10
10
|
export * from './request.js';
|
|
@@ -14,5 +14,6 @@ export * from './session_pool/index.js';
|
|
|
14
14
|
export * from './storages/index.js';
|
|
15
15
|
export * from './validators.js';
|
|
16
16
|
export * from './cookie_utils.js';
|
|
17
|
+
export * from './recoverable_state.js';
|
|
17
18
|
export { PseudoUrl } from '@apify/pseudo_url';
|
|
18
19
|
//# sourceMappingURL=index.js.map
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AACnC,cAAc,sBAAsB,CAAC;AACrC,cAAc,qBAAqB,CAAC;AACpC,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,UAAU,CAAC;AACzB,cAAc,0BAA0B,CAAC;AACzC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,qBAAqB,CAAC;AACpC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,wBAAwB,CAAC;AACvC,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC"}
|
package/log.d.ts
CHANGED
|
@@ -1,3 +1,83 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import type { CrawleeLogger, CrawleeLoggerOptions } from '@crawlee/types';
|
|
2
|
+
import type { LoggerOptions } from '@apify/log';
|
|
3
|
+
import log, { Log, Logger, LoggerJson, LoggerText, LogLevel } from '@apify/log';
|
|
4
|
+
export type { CrawleeLogger, CrawleeLoggerOptions };
|
|
5
|
+
/**
|
|
6
|
+
* Abstract base class for custom Crawlee logger implementations.
|
|
7
|
+
*
|
|
8
|
+
* Subclasses must implement two methods:
|
|
9
|
+
* - {@link BaseCrawleeLogger.logWithLevel} — the core logging dispatch
|
|
10
|
+
* - {@link BaseCrawleeLogger.createChild} — how to create a child logger instance
|
|
11
|
+
*
|
|
12
|
+
* All other `CrawleeLogger` methods (`error`, `warning`, `info`, `debug`, etc.)
|
|
13
|
+
* are derived automatically. Level filtering is entirely the responsibility of the
|
|
14
|
+
* underlying library — `logWithLevel()` is called for every message.
|
|
15
|
+
*
|
|
16
|
+
* **Example — Winston adapter:**
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const CRAWLEE_TO_WINSTON = { 1: 'error', 2: 'warn', 3: 'warn', 4: 'info', 5: 'debug', 6: 'debug' };
|
|
19
|
+
*
|
|
20
|
+
* class WinstonAdapter extends BaseCrawleeLogger {
|
|
21
|
+
* constructor(private logger: winston.Logger, options?: Partial<CrawleeLoggerOptions>) {
|
|
22
|
+
* super(options);
|
|
23
|
+
* }
|
|
24
|
+
*
|
|
25
|
+
* logWithLevel(level: number, message: string, data?: Record<string, unknown>): void {
|
|
26
|
+
* this.logger.log(CRAWLEE_TO_WINSTON[level] ?? 'info', message, data);
|
|
27
|
+
* }
|
|
28
|
+
*
|
|
29
|
+
* protected createChild(options: Partial<CrawleeLoggerOptions>): CrawleeLogger {
|
|
30
|
+
* return new WinstonAdapter(this.logger.child({ prefix: options.prefix }), { ...this.getOptions(), ...options });
|
|
31
|
+
* }
|
|
32
|
+
* }
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export declare abstract class BaseCrawleeLogger implements CrawleeLogger {
|
|
36
|
+
private options;
|
|
37
|
+
private readonly warningsLogged;
|
|
38
|
+
constructor(options?: Partial<CrawleeLoggerOptions>);
|
|
39
|
+
/**
|
|
40
|
+
* Core logging method. Subclasses must implement this to dispatch log messages
|
|
41
|
+
* to the underlying logger (Winston, Pino, console, etc.).
|
|
42
|
+
*
|
|
43
|
+
* Level filtering is the responsibility of the underlying library — this method
|
|
44
|
+
* is called for every message regardless of the current level.
|
|
45
|
+
*
|
|
46
|
+
* @param level Crawlee log level (use {@link LogLevel} constants)
|
|
47
|
+
* @param message The log message
|
|
48
|
+
* @param data Optional structured data to attach to the log entry
|
|
49
|
+
*/
|
|
50
|
+
abstract logWithLevel(level: number, message: string, data?: Record<string, unknown>): void;
|
|
51
|
+
/**
|
|
52
|
+
* Creates a child logger instance. Subclasses must implement this to define
|
|
53
|
+
* how child loggers are created for the underlying logger.
|
|
54
|
+
*/
|
|
55
|
+
protected abstract createChild(options: Partial<CrawleeLoggerOptions>): CrawleeLogger;
|
|
56
|
+
getOptions(): CrawleeLoggerOptions;
|
|
57
|
+
setOptions(options: Partial<CrawleeLoggerOptions>): void;
|
|
58
|
+
child(options: Partial<CrawleeLoggerOptions>): CrawleeLogger;
|
|
59
|
+
error(message: string, data?: Record<string, unknown>): void;
|
|
60
|
+
exception(exception: Error, message: string, data?: Record<string, unknown>): void;
|
|
61
|
+
softFail(message: string, data?: Record<string, unknown>): void;
|
|
62
|
+
warning(message: string, data?: Record<string, unknown>): void;
|
|
63
|
+
warningOnce(message: string): void;
|
|
64
|
+
info(message: string, data?: Record<string, unknown>): void;
|
|
65
|
+
debug(message: string, data?: Record<string, unknown>): void;
|
|
66
|
+
perf(message: string, data?: Record<string, unknown>): void;
|
|
67
|
+
deprecated(message: string): void;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Adapter that wraps `@apify/log`'s {@link Log} instance to implement the {@link CrawleeLogger} interface.
|
|
71
|
+
*
|
|
72
|
+
* This is the default logger used by Crawlee when no custom logger is configured.
|
|
73
|
+
* Users who want to use a different logging library should implement {@link BaseCrawleeLogger} directly.
|
|
74
|
+
*/
|
|
75
|
+
export declare class ApifyLogAdapter extends BaseCrawleeLogger {
|
|
76
|
+
private readonly apifyLog;
|
|
77
|
+
constructor(apifyLog: Log, options?: Partial<CrawleeLoggerOptions>);
|
|
78
|
+
logWithLevel(level: number, message: string, data?: Record<string, unknown>): void;
|
|
79
|
+
protected createChild(options: Partial<CrawleeLoggerOptions>): CrawleeLogger;
|
|
80
|
+
}
|
|
81
|
+
export { log, Log, LogLevel, Logger, LoggerJson, LoggerText };
|
|
82
|
+
export type { LoggerOptions };
|
|
3
83
|
//# sourceMappingURL=log.d.ts.map
|
package/log.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.d.ts","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AAAA,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"log.d.ts","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAE1E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhF,YAAY,EAAE,aAAa,EAAE,oBAAoB,EAAE,CAAC;AAEpD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,8BAAsB,iBAAkB,YAAW,aAAa;IAC5D,OAAO,CAAC,OAAO,CAAuB;IACtC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAqB;gBAExC,OAAO,GAAE,OAAO,CAAC,oBAAoB,CAAM;IAIvD;;;;;;;;;;OAUG;IACH,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAE3F;;;OAGG;IACH,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,aAAa;IAErF,UAAU,IAAI,oBAAoB;IAIlC,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,IAAI;IAIxD,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,aAAa;IAI5D,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI5D,SAAS,CAAC,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAQlF,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI/D,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI9D,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAOlC,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI3D,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI5D,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAI3D,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;CAGpC;AAED;;;;;GAKG;AACH,qBAAa,eAAgB,SAAQ,iBAAiB;IAE9C,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,GAAG,EAC9B,OAAO,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC;IAK3C,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAIlF,SAAS,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,aAAa;CAM/E;AAED,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;AAC9D,YAAY,EAAE,aAAa,EAAE,CAAC"}
|
package/log.js
CHANGED
|
@@ -1,3 +1,105 @@
|
|
|
1
1
|
import log, { Log, Logger, LoggerJson, LoggerText, LogLevel } from '@apify/log';
|
|
2
|
+
/**
|
|
3
|
+
* Abstract base class for custom Crawlee logger implementations.
|
|
4
|
+
*
|
|
5
|
+
* Subclasses must implement two methods:
|
|
6
|
+
* - {@link BaseCrawleeLogger.logWithLevel} — the core logging dispatch
|
|
7
|
+
* - {@link BaseCrawleeLogger.createChild} — how to create a child logger instance
|
|
8
|
+
*
|
|
9
|
+
* All other `CrawleeLogger` methods (`error`, `warning`, `info`, `debug`, etc.)
|
|
10
|
+
* are derived automatically. Level filtering is entirely the responsibility of the
|
|
11
|
+
* underlying library — `logWithLevel()` is called for every message.
|
|
12
|
+
*
|
|
13
|
+
* **Example — Winston adapter:**
|
|
14
|
+
* ```typescript
|
|
15
|
+
* const CRAWLEE_TO_WINSTON = { 1: 'error', 2: 'warn', 3: 'warn', 4: 'info', 5: 'debug', 6: 'debug' };
|
|
16
|
+
*
|
|
17
|
+
* class WinstonAdapter extends BaseCrawleeLogger {
|
|
18
|
+
* constructor(private logger: winston.Logger, options?: Partial<CrawleeLoggerOptions>) {
|
|
19
|
+
* super(options);
|
|
20
|
+
* }
|
|
21
|
+
*
|
|
22
|
+
* logWithLevel(level: number, message: string, data?: Record<string, unknown>): void {
|
|
23
|
+
* this.logger.log(CRAWLEE_TO_WINSTON[level] ?? 'info', message, data);
|
|
24
|
+
* }
|
|
25
|
+
*
|
|
26
|
+
* protected createChild(options: Partial<CrawleeLoggerOptions>): CrawleeLogger {
|
|
27
|
+
* return new WinstonAdapter(this.logger.child({ prefix: options.prefix }), { ...this.getOptions(), ...options });
|
|
28
|
+
* }
|
|
29
|
+
* }
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export class BaseCrawleeLogger {
|
|
33
|
+
options;
|
|
34
|
+
warningsLogged = new Set();
|
|
35
|
+
constructor(options = {}) {
|
|
36
|
+
this.options = options;
|
|
37
|
+
}
|
|
38
|
+
getOptions() {
|
|
39
|
+
return this.options;
|
|
40
|
+
}
|
|
41
|
+
setOptions(options) {
|
|
42
|
+
this.options = { ...this.options, ...options };
|
|
43
|
+
}
|
|
44
|
+
child(options) {
|
|
45
|
+
return this.createChild(options);
|
|
46
|
+
}
|
|
47
|
+
error(message, data) {
|
|
48
|
+
this.logWithLevel(LogLevel.ERROR, message, data);
|
|
49
|
+
}
|
|
50
|
+
exception(exception, message, data) {
|
|
51
|
+
this.logWithLevel(LogLevel.ERROR, `${message}: ${exception.message}`, {
|
|
52
|
+
...data,
|
|
53
|
+
stack: exception.stack,
|
|
54
|
+
exception,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
softFail(message, data) {
|
|
58
|
+
this.logWithLevel(LogLevel.SOFT_FAIL, message, data);
|
|
59
|
+
}
|
|
60
|
+
warning(message, data) {
|
|
61
|
+
this.logWithLevel(LogLevel.WARNING, message, data);
|
|
62
|
+
}
|
|
63
|
+
warningOnce(message) {
|
|
64
|
+
if (!this.warningsLogged.has(message)) {
|
|
65
|
+
this.warningsLogged.add(message);
|
|
66
|
+
this.warning(message);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
info(message, data) {
|
|
70
|
+
this.logWithLevel(LogLevel.INFO, message, data);
|
|
71
|
+
}
|
|
72
|
+
debug(message, data) {
|
|
73
|
+
this.logWithLevel(LogLevel.DEBUG, message, data);
|
|
74
|
+
}
|
|
75
|
+
perf(message, data) {
|
|
76
|
+
this.logWithLevel(LogLevel.PERF, `[PERF] ${message}`, data);
|
|
77
|
+
}
|
|
78
|
+
deprecated(message) {
|
|
79
|
+
this.warningOnce(`[DEPRECATED] ${message}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Adapter that wraps `@apify/log`'s {@link Log} instance to implement the {@link CrawleeLogger} interface.
|
|
84
|
+
*
|
|
85
|
+
* This is the default logger used by Crawlee when no custom logger is configured.
|
|
86
|
+
* Users who want to use a different logging library should implement {@link BaseCrawleeLogger} directly.
|
|
87
|
+
*/
|
|
88
|
+
export class ApifyLogAdapter extends BaseCrawleeLogger {
|
|
89
|
+
apifyLog;
|
|
90
|
+
constructor(apifyLog, options) {
|
|
91
|
+
super(options ?? {});
|
|
92
|
+
this.apifyLog = apifyLog;
|
|
93
|
+
}
|
|
94
|
+
logWithLevel(level, message, data) {
|
|
95
|
+
this.apifyLog.internal(level, message, data);
|
|
96
|
+
}
|
|
97
|
+
createChild(options) {
|
|
98
|
+
return new ApifyLogAdapter(this.apifyLog.child({ prefix: options.prefix ?? null }), {
|
|
99
|
+
...this.getOptions(),
|
|
100
|
+
...options,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
2
104
|
export { log, Log, LogLevel, Logger, LoggerJson, LoggerText };
|
|
3
105
|
//# sourceMappingURL=log.js.map
|
package/log.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"log.js","sourceRoot":"","sources":["../src/log.ts"],"names":[],"mappings":"AAGA,OAAO,GAAG,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAIhF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,MAAM,OAAgB,iBAAiB;IAC3B,OAAO,CAAuB;IACrB,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpD,YAAY,UAAyC,EAAE;QACnD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IAC3B,CAAC;IAqBD,UAAU;QACN,OAAO,IAAI,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,UAAU,CAAC,OAAsC;QAC7C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,EAAE,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,OAAsC;QACxC,OAAO,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,IAA8B;QACjD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACrD,CAAC;IAED,SAAS,CAAC,SAAgB,EAAE,OAAe,EAAE,IAA8B;QACvE,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,GAAG,OAAO,KAAK,SAAS,CAAC,OAAO,EAAE,EAAE;YAClE,GAAG,IAAI;YACP,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,SAAS;SACZ,CAAC,CAAC;IACP,CAAC;IAED,QAAQ,CAAC,OAAe,EAAE,IAA8B;QACpD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,CAAC,OAAe,EAAE,IAA8B;QACnD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACvD,CAAC;IAED,WAAW,CAAC,OAAe;QACvB,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACjC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,IAA8B;QAChD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACpD,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,IAA8B;QACjD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,IAA8B;QAChD,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,EAAE,IAAI,CAAC,CAAC;IAChE,CAAC;IAED,UAAU,CAAC,OAAe;QACtB,IAAI,CAAC,WAAW,CAAC,gBAAgB,OAAO,EAAE,CAAC,CAAC;IAChD,CAAC;CACJ;AAED;;;;;GAKG;AACH,MAAM,OAAO,eAAgB,SAAQ,iBAAiB;IAE7B;IADrB,YACqB,QAAa,EAC9B,OAAuC;QAEvC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QAHJ,aAAQ,GAAR,QAAQ,CAAK;IAIlC,CAAC;IAED,YAAY,CAAC,KAAa,EAAE,OAAe,EAAE,IAA8B;QACvE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAiB,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7D,CAAC;IAES,WAAW,CAAC,OAAsC;QACxD,OAAO,IAAI,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC,EAAE;YAChF,GAAG,IAAI,CAAC,UAAU,EAAE;YACpB,GAAG,OAAO;SACb,CAAC,CAAC;IACP,CAAC;CACJ;AAED,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.41",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -53,20 +53,19 @@
|
|
|
53
53
|
"@apify/pseudo_url": "^2.0.59",
|
|
54
54
|
"@apify/timeout": "^0.3.2",
|
|
55
55
|
"@apify/utilities": "^2.15.5",
|
|
56
|
-
"@crawlee/memory-storage": "4.0.0-beta.
|
|
57
|
-
"@crawlee/types": "4.0.0-beta.
|
|
58
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
56
|
+
"@crawlee/memory-storage": "4.0.0-beta.41",
|
|
57
|
+
"@crawlee/types": "4.0.0-beta.41",
|
|
58
|
+
"@crawlee/utils": "4.0.0-beta.41",
|
|
59
59
|
"@sapphire/async-queue": "^1.5.5",
|
|
60
60
|
"@vladfrangu/async_event_emitter": "^2.4.6",
|
|
61
61
|
"csv-stringify": "^6.5.2",
|
|
62
62
|
"fs-extra": "^11.3.0",
|
|
63
|
-
"got-scraping": "^4.1.1",
|
|
64
63
|
"json5": "^2.2.3",
|
|
65
64
|
"minimatch": "^10.0.1",
|
|
66
65
|
"ow": "^2.0.0",
|
|
67
66
|
"stream-json": "^1.9.1",
|
|
68
67
|
"tldts": "^7.0.6",
|
|
69
|
-
"tough-cookie": "^
|
|
68
|
+
"tough-cookie": "^6.0.0",
|
|
70
69
|
"tslib": "^2.8.1",
|
|
71
70
|
"type-fest": "^4.41.0"
|
|
72
71
|
},
|
|
@@ -77,5 +76,5 @@
|
|
|
77
76
|
}
|
|
78
77
|
}
|
|
79
78
|
},
|
|
80
|
-
"gitHead": "
|
|
79
|
+
"gitHead": "0f3d1a433e1a27647d99ad5dc81a605e82505d1a"
|
|
81
80
|
}
|
package/proxy_configuration.d.ts
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
|
+
import type { ProxyInfo } from '@crawlee/types';
|
|
1
2
|
import type { Request } from './request.js';
|
|
2
3
|
export interface ProxyConfigurationFunction {
|
|
3
|
-
(
|
|
4
|
+
(options?: {
|
|
4
5
|
request?: Request;
|
|
5
6
|
}): string | null | Promise<string | null>;
|
|
6
7
|
}
|
|
8
|
+
type UrlList = (string | null)[];
|
|
7
9
|
export interface ProxyConfigurationOptions {
|
|
8
10
|
/**
|
|
9
11
|
* An array of custom proxy URLs to be rotated.
|
|
10
12
|
* Custom proxies are not compatible with Apify Proxy and an attempt to use both
|
|
11
13
|
* configuration options will cause an error to be thrown on initialize.
|
|
12
14
|
*/
|
|
13
|
-
proxyUrls?:
|
|
15
|
+
proxyUrls?: UrlList;
|
|
14
16
|
/**
|
|
15
|
-
* Custom function that allows you to generate the new proxy URL dynamically. It gets
|
|
17
|
+
* Custom function that allows you to generate the new proxy URL dynamically. It gets an optional parameter with the `Request` object when applicable.
|
|
16
18
|
* Can return either stringified proxy URL or `null` if the proxy should not be used. Can be asynchronous.
|
|
17
19
|
*
|
|
18
20
|
* This function is used to generate the URL when {@link ProxyConfiguration.newUrl} or {@link ProxyConfiguration.newProxyInfo} is called.
|
|
@@ -29,72 +31,12 @@ export interface ProxyConfigurationOptions {
|
|
|
29
31
|
*
|
|
30
32
|
* Use `null` as a proxy URL to disable the proxy for the given tier.
|
|
31
33
|
*/
|
|
32
|
-
tieredProxyUrls?:
|
|
34
|
+
tieredProxyUrls?: UrlList[];
|
|
33
35
|
}
|
|
34
36
|
export interface TieredProxy {
|
|
35
37
|
proxyUrl: string | null;
|
|
36
38
|
proxyTier?: number;
|
|
37
39
|
}
|
|
38
|
-
/**
|
|
39
|
-
* The main purpose of the ProxyInfo object is to provide information
|
|
40
|
-
* about the current proxy connection used by the crawler for the request.
|
|
41
|
-
* Outside of crawlers, you can get this object by calling {@link ProxyConfiguration.newProxyInfo}.
|
|
42
|
-
*
|
|
43
|
-
* **Example usage:**
|
|
44
|
-
*
|
|
45
|
-
* ```javascript
|
|
46
|
-
* const proxyConfiguration = new ProxyConfiguration({
|
|
47
|
-
* proxyUrls: ['...', '...'] // List of Proxy URLs to rotate
|
|
48
|
-
* });
|
|
49
|
-
*
|
|
50
|
-
* // Getting proxyInfo object by calling class method directly
|
|
51
|
-
* const proxyInfo = await proxyConfiguration.newProxyInfo();
|
|
52
|
-
*
|
|
53
|
-
* // In crawler
|
|
54
|
-
* const crawler = new CheerioCrawler({
|
|
55
|
-
* // ...
|
|
56
|
-
* proxyConfiguration,
|
|
57
|
-
* requestHandler({ proxyInfo }) {
|
|
58
|
-
* // Getting used proxy URL
|
|
59
|
-
* const proxyUrl = proxyInfo.url;
|
|
60
|
-
*
|
|
61
|
-
* // Getting ID of used Session
|
|
62
|
-
* const sessionIdentifier = proxyInfo.sessionId;
|
|
63
|
-
* }
|
|
64
|
-
* })
|
|
65
|
-
*
|
|
66
|
-
* ```
|
|
67
|
-
*/
|
|
68
|
-
export interface ProxyInfo {
|
|
69
|
-
/**
|
|
70
|
-
* The identifier of used {@link Session}, if used.
|
|
71
|
-
*/
|
|
72
|
-
sessionId?: string;
|
|
73
|
-
/**
|
|
74
|
-
* The URL of the proxy.
|
|
75
|
-
*/
|
|
76
|
-
url: string;
|
|
77
|
-
/**
|
|
78
|
-
* Username for the proxy.
|
|
79
|
-
*/
|
|
80
|
-
username?: string;
|
|
81
|
-
/**
|
|
82
|
-
* User's password for the proxy.
|
|
83
|
-
*/
|
|
84
|
-
password: string;
|
|
85
|
-
/**
|
|
86
|
-
* Hostname of your proxy.
|
|
87
|
-
*/
|
|
88
|
-
hostname: string;
|
|
89
|
-
/**
|
|
90
|
-
* Proxy port.
|
|
91
|
-
*/
|
|
92
|
-
port: number | string;
|
|
93
|
-
/**
|
|
94
|
-
* Proxy tier for the current proxy, if applicable (only for `tieredProxyUrls`).
|
|
95
|
-
*/
|
|
96
|
-
proxyTier?: number;
|
|
97
|
-
}
|
|
98
40
|
interface TieredProxyOptions {
|
|
99
41
|
request?: Request;
|
|
100
42
|
proxyTier?: number;
|
|
@@ -156,12 +98,12 @@ declare class ProxyTierTracker {
|
|
|
156
98
|
export declare class ProxyConfiguration {
|
|
157
99
|
isManInTheMiddle: boolean;
|
|
158
100
|
protected nextCustomUrlIndex: number;
|
|
159
|
-
protected proxyUrls?:
|
|
160
|
-
protected tieredProxyUrls?:
|
|
161
|
-
protected usedProxyUrls: Map<string, string>;
|
|
101
|
+
protected proxyUrls?: UrlList;
|
|
102
|
+
protected tieredProxyUrls?: UrlList[];
|
|
103
|
+
protected usedProxyUrls: Map<string, string | null>;
|
|
162
104
|
protected newUrlFunction?: ProxyConfigurationFunction;
|
|
163
105
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
164
|
-
protected log: import("@
|
|
106
|
+
protected log: import("@crawlee/types").CrawleeLogger;
|
|
165
107
|
protected domainTiers: Map<string, ProxyTierTracker>;
|
|
166
108
|
/**
|
|
167
109
|
* Creates a {@link ProxyConfiguration} instance based on the provided options. Proxy servers are used to prevent target websites from
|
|
@@ -190,25 +132,16 @@ export declare class ProxyConfiguration {
|
|
|
190
132
|
* the currently used proxy via the requestHandler parameter `proxyInfo`.
|
|
191
133
|
* Use it if you want to work with a rich representation of a proxy URL.
|
|
192
134
|
* If you need the URL string only, use {@link ProxyConfiguration.newUrl}.
|
|
193
|
-
* @param [sessionId]
|
|
194
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
195
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
196
|
-
* When the provided sessionId is a number, it's converted to a string. Property sessionId of
|
|
197
|
-
* {@link ProxyInfo} is always returned as a type string.
|
|
198
135
|
*
|
|
199
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
200
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
201
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
202
136
|
* @return Represents information about used proxy and its configuration.
|
|
203
137
|
*/
|
|
204
|
-
newProxyInfo(
|
|
138
|
+
newProxyInfo(options?: TieredProxyOptions): Promise<ProxyInfo | undefined>;
|
|
205
139
|
/**
|
|
206
|
-
* Given a
|
|
207
|
-
* @param _sessionId Session identifier
|
|
140
|
+
* Given a request / proxy tier, this function returns a new proxy URL based on the provided configuration options.
|
|
208
141
|
* @param options Options for the tiered proxy rotation
|
|
209
142
|
* @returns An object with the proxy URL and the proxy tier used.
|
|
210
143
|
*/
|
|
211
|
-
protected _handleTieredUrl(
|
|
144
|
+
protected _handleTieredUrl(options?: TieredProxyOptions): TieredProxy;
|
|
212
145
|
/**
|
|
213
146
|
* Given a `Request` object, this function returns the tier of the proxy that should be used for the request.
|
|
214
147
|
*
|
|
@@ -216,27 +149,17 @@ export declare class ProxyConfiguration {
|
|
|
216
149
|
*/
|
|
217
150
|
protected predictProxyTier(request: Request): number | null;
|
|
218
151
|
/**
|
|
219
|
-
* Returns a new proxy URL based on provided configuration options
|
|
220
|
-
* @param [sessionId]
|
|
221
|
-
* Represents the identifier of user {@link Session} that can be managed by the {@link SessionPool} or
|
|
222
|
-
* you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
|
223
|
-
* When the provided sessionId is a number, it's converted to a string.
|
|
152
|
+
* Returns a new proxy URL based on provided configuration options.
|
|
224
153
|
*
|
|
225
|
-
* All the HTTP requests going through the proxy with the same session identifier
|
|
226
|
-
* will use the same target proxy server (i.e. the same IP address).
|
|
227
|
-
* The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
|
228
154
|
* @return A string with a proxy URL, including authentication credentials and port number.
|
|
229
155
|
* For example, `http://bob:password123@proxy.example.com:8000`
|
|
230
156
|
*/
|
|
231
|
-
newUrl(
|
|
232
|
-
|
|
233
|
-
* Handles custom url rotation with session
|
|
234
|
-
*/
|
|
235
|
-
protected _handleCustomUrl(sessionId?: string): string;
|
|
157
|
+
newUrl(options?: TieredProxyOptions): Promise<string | undefined>;
|
|
158
|
+
protected _handleProxyUrlsList(): string | null;
|
|
236
159
|
/**
|
|
237
160
|
* Calls the custom newUrlFunction and checks format of its return value
|
|
238
161
|
*/
|
|
239
|
-
protected _callNewUrlFunction(
|
|
162
|
+
protected _callNewUrlFunction(options?: {
|
|
240
163
|
request?: Request;
|
|
241
164
|
}): Promise<string | null>;
|
|
242
165
|
protected _throwCannotCombineCustomMethods(): never;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"proxy_configuration.d.ts","sourceRoot":"","sources":["../src/proxy_configuration.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAG5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAG5C,MAAM,WAAW,0BAA0B;IACvC,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,MAAM,GAAG,IAAI,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CAC7E;AAED,KAAK,OAAO,GAAG,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC;AAEjC,MAAM,WAAW,yBAAyB;IACtC;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,0BAA0B,CAAC;IAE5C;;;;;;;;;;OAUG;IACH,eAAe,CAAC,EAAE,OAAO,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,WAAW;IACxB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,UAAU,kBAAkB;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,cAAM,gBAAgB;IAClB,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,WAAW,CAAS;gBAEhB,eAAe,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,EAAE;IAKhD;;OAEG;IACH,OAAO,CAAC,WAAW;IAgBnB;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM;IAIrB;;;OAGG;IACH,WAAW;CAId;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,kBAAkB;IAC3B,gBAAgB,UAAS;IACzB,SAAS,CAAC,kBAAkB,SAAK;IACjC,SAAS,CAAC,SAAS,CAAC,EAAE,OAAO,CAAC;IAC9B,SAAS,CAAC,eAAe,CAAC,EAAE,OAAO,EAAE,CAAC;IACtC,SAAS,CAAC,aAAa,6BAAoC;IAC3D,SAAS,CAAC,cAAc,CAAC,EAAE,0BAA0B,CAAC;IACtD,SAAS,CAAC,GAAG,yCAAsE;IACnF,SAAS,CAAC,WAAW,gCAAuC;IAE5D;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,OAAO,GAAE,yBAA8B;IAwBnD;;;;;;;;OAQG;IACG,YAAY,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,SAAS,GAAG,SAAS,CAAC;IAyBhF;;;;OAIG;IACH,SAAS,CAAC,gBAAgB,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,WAAW;IAwBrE;;;;OAIG;IACH,SAAS,CAAC,gBAAgB,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,IAAI;IAiC3D;;;;;OAKG;IACG,MAAM,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAYvE,SAAS,CAAC,oBAAoB,IAAI,MAAM,GAAG,IAAI;IAI/C;;OAEG;cACa,mBAAmB,CAAC,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAA;KAAE;IAcnE,SAAS,CAAC,gCAAgC,IAAI,KAAK;IAMnD,SAAS,CAAC,uBAAuB,IAAI,KAAK;CAG7C"}
|