apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const tslib_1 = require("tslib");
|
|
4
|
-
/* eslint-disable max-classes-per-file */
|
|
5
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
6
|
-
const key_value_store_1 = require("../storages/key_value_store");
|
|
7
|
-
const constants_1 = require("../constants");
|
|
8
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("../utils_log"));
|
|
9
|
-
const events_1 = (0, tslib_1.__importDefault)(require("../events"));
|
|
10
|
-
/**
|
|
11
|
-
* @ignore
|
|
12
|
-
*/
|
|
13
|
-
class Job {
|
|
14
|
-
constructor() {
|
|
15
|
-
this.lastRunAt = null;
|
|
16
|
-
this.runs = 0;
|
|
17
|
-
}
|
|
18
|
-
run() {
|
|
19
|
-
this.lastRunAt = Date.now();
|
|
20
|
-
return ++this.runs;
|
|
21
|
-
}
|
|
22
|
-
finish() {
|
|
23
|
-
this.durationMillis = Date.now() - this.lastRunAt;
|
|
24
|
-
return this.durationMillis;
|
|
25
|
-
}
|
|
26
|
-
retryCount() {
|
|
27
|
-
return Math.max(0, this.runs - 1);
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* The statistics class provides an interface to collecting and logging run
|
|
32
|
-
* statistics for requests.
|
|
33
|
-
*
|
|
34
|
-
* All statistic information is saved on key value store
|
|
35
|
-
* under the key SDK_CRAWLER_STATISTICS_*, persists between
|
|
36
|
-
* migrations and abort/resurrect
|
|
37
|
-
*
|
|
38
|
-
* @property {StatisticState} state
|
|
39
|
-
* Current statistic state used for doing calculations on {@link Statistics#calculate} calls
|
|
40
|
-
* @property {number} id
|
|
41
|
-
* Statistic instance id
|
|
42
|
-
* @property {number[]} requestRetryHistogram
|
|
43
|
-
* Contains the current retries histogram.
|
|
44
|
-
* Index 0 means 0 retries, index 2, 2 retries,
|
|
45
|
-
* and so on
|
|
46
|
-
*/
|
|
47
|
-
class Statistics {
|
|
48
|
-
/**
|
|
49
|
-
* @param {StatisticsOptions} [options]
|
|
50
|
-
* @hideconstructor
|
|
51
|
-
*/
|
|
52
|
-
constructor(options = {}) {
|
|
53
|
-
(0, ow_1.default)(options, ow_1.default.object.exactShape({
|
|
54
|
-
logIntervalSecs: ow_1.default.optional.number,
|
|
55
|
-
logMessage: ow_1.default.optional.string,
|
|
56
|
-
}));
|
|
57
|
-
const { logIntervalSecs = 60, logMessage = 'Statistics', } = options;
|
|
58
|
-
this.log = utils_log_1.default.child({ prefix: 'Statistics' });
|
|
59
|
-
this.logIntervalMillis = logIntervalSecs * 1000;
|
|
60
|
-
this.logMessage = logMessage;
|
|
61
|
-
this.keyValueStore = null;
|
|
62
|
-
// assign an id while incrementing so it can be saved/restored from KV
|
|
63
|
-
this.id = Statistics.id++;
|
|
64
|
-
this.persistStateKey = `SDK_CRAWLER_STATISTICS_${this.id}`;
|
|
65
|
-
this.listener = this.persistState.bind(this);
|
|
66
|
-
this.requestRetryHistogram = [];
|
|
67
|
-
/**
|
|
68
|
-
* @private
|
|
69
|
-
* @type {Object<string|number, Job>}
|
|
70
|
-
*/
|
|
71
|
-
this.requestsInProgress = new Map();
|
|
72
|
-
// initialize by "resetting"
|
|
73
|
-
this.reset();
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Set the current statistic instance to pristine values
|
|
77
|
-
*/
|
|
78
|
-
reset() {
|
|
79
|
-
this.state = {
|
|
80
|
-
requestsFinished: 0,
|
|
81
|
-
requestsFailed: 0,
|
|
82
|
-
requestsRetries: 0,
|
|
83
|
-
requestsFailedPerMinute: 0,
|
|
84
|
-
requestsFinishedPerMinute: 0,
|
|
85
|
-
requestMinDurationMillis: Infinity,
|
|
86
|
-
requestMaxDurationMillis: 0,
|
|
87
|
-
requestTotalFailedDurationMillis: 0,
|
|
88
|
-
requestTotalFinishedDurationMillis: 0,
|
|
89
|
-
crawlerStartedAt: null,
|
|
90
|
-
crawlerFinishedAt: null,
|
|
91
|
-
statsPersistedAt: null,
|
|
92
|
-
crawlerRuntimeMillis: 0,
|
|
93
|
-
};
|
|
94
|
-
this.requestRetryHistogram.length = 0;
|
|
95
|
-
this.requestsInProgress.clear();
|
|
96
|
-
this.instanceStart = Date.now();
|
|
97
|
-
this._teardown();
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* Starts a job
|
|
101
|
-
*
|
|
102
|
-
* @param {number|string} id
|
|
103
|
-
* @ignore
|
|
104
|
-
*/
|
|
105
|
-
startJob(id) {
|
|
106
|
-
let job = this.requestsInProgress.get(id);
|
|
107
|
-
if (!job)
|
|
108
|
-
job = new Job();
|
|
109
|
-
job.run();
|
|
110
|
-
this.requestsInProgress.set(id, job);
|
|
111
|
-
}
|
|
112
|
-
/**
|
|
113
|
-
* Mark job as finished and sets the state
|
|
114
|
-
*
|
|
115
|
-
* @param {number|string} id
|
|
116
|
-
* @ignore
|
|
117
|
-
*/
|
|
118
|
-
finishJob(id) {
|
|
119
|
-
const job = this.requestsInProgress.get(id);
|
|
120
|
-
if (!job)
|
|
121
|
-
return;
|
|
122
|
-
const jobDurationMillis = job.finish();
|
|
123
|
-
this.state.requestsFinished++;
|
|
124
|
-
this.state.requestTotalFinishedDurationMillis += jobDurationMillis;
|
|
125
|
-
this._saveRetryCountForJob(job);
|
|
126
|
-
if (jobDurationMillis < this.state.requestMinDurationMillis)
|
|
127
|
-
this.state.requestMinDurationMillis = jobDurationMillis;
|
|
128
|
-
if (jobDurationMillis > this.state.requestMaxDurationMillis)
|
|
129
|
-
this.state.requestMaxDurationMillis = jobDurationMillis;
|
|
130
|
-
this.requestsInProgress.delete(id);
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Mark job as failed and sets the state
|
|
134
|
-
*
|
|
135
|
-
* @param {number|string} id
|
|
136
|
-
* @ignore
|
|
137
|
-
*/
|
|
138
|
-
failJob(id) {
|
|
139
|
-
const job = this.requestsInProgress.get(id);
|
|
140
|
-
if (!job)
|
|
141
|
-
return;
|
|
142
|
-
this.state.requestTotalFailedDurationMillis += job.finish();
|
|
143
|
-
this.state.requestsFailed++;
|
|
144
|
-
this._saveRetryCountForJob(job);
|
|
145
|
-
this.requestsInProgress.delete(id);
|
|
146
|
-
}
|
|
147
|
-
/**
|
|
148
|
-
* Calculate the current statistics
|
|
149
|
-
*/
|
|
150
|
-
calculate() {
|
|
151
|
-
const { requestsFailed, requestsFinished, requestTotalFailedDurationMillis, requestTotalFinishedDurationMillis, } = this.state;
|
|
152
|
-
const totalMillis = Date.now() - this.instanceStart;
|
|
153
|
-
const totalMinutes = totalMillis / 1000 / 60;
|
|
154
|
-
return {
|
|
155
|
-
requestAvgFailedDurationMillis: Math.round(requestTotalFailedDurationMillis / requestsFailed) || Infinity,
|
|
156
|
-
requestAvgFinishedDurationMillis: Math.round(requestTotalFinishedDurationMillis / requestsFinished) || Infinity,
|
|
157
|
-
requestsFinishedPerMinute: Math.round(requestsFinished / totalMinutes) || 0,
|
|
158
|
-
requestsFailedPerMinute: Math.floor(requestsFailed / totalMinutes) || 0,
|
|
159
|
-
requestTotalDurationMillis: requestTotalFinishedDurationMillis + requestTotalFailedDurationMillis,
|
|
160
|
-
requestsTotal: requestsFailed + requestsFinished,
|
|
161
|
-
crawlerRuntimeMillis: totalMillis,
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
/**
|
|
165
|
-
* Initializes the key value store for persisting the statistics,
|
|
166
|
-
* displaying the current state in predefined intervals
|
|
167
|
-
*/
|
|
168
|
-
async startCapturing() {
|
|
169
|
-
this.keyValueStore = await (0, key_value_store_1.openKeyValueStore)();
|
|
170
|
-
await this._maybeLoadStatistics();
|
|
171
|
-
if (this.state.crawlerStartedAt === null) {
|
|
172
|
-
this.state.crawlerStartedAt = new Date();
|
|
173
|
-
}
|
|
174
|
-
events_1.default.on(constants_1.ACTOR_EVENT_NAMES_EX.PERSIST_STATE, this.listener);
|
|
175
|
-
this.logInterval = setInterval(() => {
|
|
176
|
-
this.log.info(this.logMessage, {
|
|
177
|
-
...this.calculate(),
|
|
178
|
-
retryHistogram: this.requestRetryHistogram,
|
|
179
|
-
});
|
|
180
|
-
}, this.logIntervalMillis);
|
|
181
|
-
}
|
|
182
|
-
/**
|
|
183
|
-
* Stops logging and remove event listeners, then persist
|
|
184
|
-
*/
|
|
185
|
-
async stopCapturing() {
|
|
186
|
-
this._teardown();
|
|
187
|
-
this.state.crawlerFinishedAt = new Date();
|
|
188
|
-
await this.persistState();
|
|
189
|
-
}
|
|
190
|
-
/**
|
|
191
|
-
* @param {Job} job
|
|
192
|
-
* @ignore
|
|
193
|
-
* @protected
|
|
194
|
-
* @internal
|
|
195
|
-
*/
|
|
196
|
-
_saveRetryCountForJob(job) {
|
|
197
|
-
const retryCount = job.retryCount();
|
|
198
|
-
if (retryCount > 0)
|
|
199
|
-
this.state.requestsRetries++;
|
|
200
|
-
this.requestRetryHistogram[retryCount] = this.requestRetryHistogram[retryCount]
|
|
201
|
-
? this.requestRetryHistogram[retryCount] + 1
|
|
202
|
-
: 1;
|
|
203
|
-
}
|
|
204
|
-
/**
|
|
205
|
-
* Persist internal state to the key value store
|
|
206
|
-
*/
|
|
207
|
-
async persistState() {
|
|
208
|
-
// this might be called before startCapturing was called without using await, should not crash
|
|
209
|
-
if (!this.keyValueStore) {
|
|
210
|
-
return;
|
|
211
|
-
}
|
|
212
|
-
this.log.debug('Persisting state', { persistStateKey: this.persistStateKey });
|
|
213
|
-
await this.keyValueStore.setValue(this.persistStateKey, this.toJSON());
|
|
214
|
-
}
|
|
215
|
-
/**
|
|
216
|
-
* Loads the current statistic from the key value store if any
|
|
217
|
-
* @ignore
|
|
218
|
-
* @protected
|
|
219
|
-
* @internal
|
|
220
|
-
*/
|
|
221
|
-
async _maybeLoadStatistics() {
|
|
222
|
-
// this might be called before startCapturing was called without using await, should not crash
|
|
223
|
-
if (!this.keyValueStore) {
|
|
224
|
-
return;
|
|
225
|
-
}
|
|
226
|
-
const savedState = await this.keyValueStore.getValue(this.persistStateKey);
|
|
227
|
-
if (!savedState)
|
|
228
|
-
return;
|
|
229
|
-
// We saw a run where the requestRetryHistogram was not iterable and crashed
|
|
230
|
-
// the actor. Adding some logging to monitor this problem in the future.
|
|
231
|
-
if (!Array.isArray(savedState.requestRetryHistogram)) {
|
|
232
|
-
this.log.warning('Received invalid state from Key-value store.', {
|
|
233
|
-
persistStateKey: this.persistStateKey,
|
|
234
|
-
state: savedState,
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
this.log.debug('Recreating state from KeyValueStore', { persistStateKey: this.persistStateKey });
|
|
238
|
-
this.requestRetryHistogram.push(...savedState.requestRetryHistogram);
|
|
239
|
-
this.state.requestsFinished = savedState.requestsFinished;
|
|
240
|
-
this.state.requestsFailed = savedState.requestsFailed;
|
|
241
|
-
this.state.requestsRetries = savedState.requestsRetries;
|
|
242
|
-
this.state.requestTotalFailedDurationMillis = savedState.requestTotalFailedDurationMillis;
|
|
243
|
-
this.state.requestTotalFinishedDurationMillis = savedState.requestTotalFinishedDurationMillis;
|
|
244
|
-
this.state.requestMinDurationMillis = savedState.requestMinDurationMillis;
|
|
245
|
-
this.state.requestMaxDurationMillis = savedState.requestMaxDurationMillis;
|
|
246
|
-
// persisted state uses ISO date strings
|
|
247
|
-
this.state.crawlerFinishedAt = savedState.crawlerFinishedAt ? new Date(savedState.crawlerFinishedAt) : null;
|
|
248
|
-
this.state.crawlerStartedAt = savedState.crawlerStartedAt ? new Date(savedState.crawlerStartedAt) : null;
|
|
249
|
-
this.state.statsPersistedAt = savedState.statsPersistedAt ? new Date(savedState.statsPersistedAt) : null;
|
|
250
|
-
this.state.crawlerRuntimeMillis = savedState.crawlerRuntimeMillis;
|
|
251
|
-
this.instanceStart = Date.now() - (this.state.statsPersistedAt - savedState.crawlerLastStartTimestamp);
|
|
252
|
-
this.log.debug('Loaded from KeyValueStore');
|
|
253
|
-
}
|
|
254
|
-
/**
|
|
255
|
-
* @ignore
|
|
256
|
-
* @protected
|
|
257
|
-
* @internal
|
|
258
|
-
*/
|
|
259
|
-
_teardown() {
|
|
260
|
-
// this can be called before a call to startCapturing happens (or in a 'finally' block)
|
|
261
|
-
events_1.default.removeListener(constants_1.ACTOR_EVENT_NAMES_EX.PERSIST_STATE, this.listener);
|
|
262
|
-
if (this.logInterval) {
|
|
263
|
-
clearInterval(this.logInterval);
|
|
264
|
-
this.logInterval = null;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
/**
|
|
268
|
-
* Make this class serializable when called with `JSON.stringify(statsInstance)` directly
|
|
269
|
-
* or through `keyValueStore.setValue('KEY', statsInstance)`
|
|
270
|
-
*
|
|
271
|
-
* @returns {StatisticPersistedState & StatisticState}
|
|
272
|
-
*/
|
|
273
|
-
toJSON() {
|
|
274
|
-
// merge all the current state information that can be used from the outside
|
|
275
|
-
// without the need to reconstruct for the sake of stats.calculate()
|
|
276
|
-
// omit duplicated information
|
|
277
|
-
return {
|
|
278
|
-
...this.state,
|
|
279
|
-
crawlerLastStartTimestamp: this.instanceStart,
|
|
280
|
-
crawlerFinishedAt: this.state.crawlerFinishedAt ? new Date(this.state.crawlerFinishedAt).toISOString() : null,
|
|
281
|
-
crawlerStartedAt: this.state.crawlerStartedAt ? new Date(this.state.crawlerStartedAt).toISOString() : null,
|
|
282
|
-
requestRetryHistogram: this.requestRetryHistogram,
|
|
283
|
-
statsId: this.id,
|
|
284
|
-
statsPersistedAt: new Date().toISOString(),
|
|
285
|
-
...this.calculate(),
|
|
286
|
-
};
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
Statistics.id = 0;
|
|
290
|
-
exports.default = Statistics;
|
|
291
|
-
/**
|
|
292
|
-
* @ignore
|
|
293
|
-
* @typedef StatisticsOptions
|
|
294
|
-
* @property {number} [logIntervalSecs]
|
|
295
|
-
* @property {string} [logMessage]
|
|
296
|
-
*/
|
|
297
|
-
/**
|
|
298
|
-
* Format of the persisted stats
|
|
299
|
-
*
|
|
300
|
-
* @typedef StatisticPersistedState
|
|
301
|
-
* @property {number[]} requestRetryHistogram
|
|
302
|
-
* @property {number} statsId
|
|
303
|
-
* @property {number} requestAvgFailedDurationMillis
|
|
304
|
-
* @property {number} requestAvgFinishedDurationMillis
|
|
305
|
-
* @property {number} requestsFinishedPerMinute
|
|
306
|
-
* @property {number} requestsFailedPerMinute
|
|
307
|
-
* @property {number} requestTotalDurationMillis
|
|
308
|
-
* @property {number} requestsTotal
|
|
309
|
-
* @property {number} crawlerRuntimeMillis
|
|
310
|
-
* @property {number} crawlerLastStartTimestamp
|
|
311
|
-
* @property {string} statsPersistedAt
|
|
312
|
-
*/
|
|
313
|
-
/**
|
|
314
|
-
* Contains the statistics state
|
|
315
|
-
*
|
|
316
|
-
* @typedef StatisticState
|
|
317
|
-
* @property {number} requestsFinished
|
|
318
|
-
* @property {number} requestsFailed
|
|
319
|
-
* @property {number} requestsRetries
|
|
320
|
-
* @property {number} requestsFailedPerMinute
|
|
321
|
-
* @property {number} requestsFinishedPerMinute
|
|
322
|
-
* @property {number} requestMinDurationMillis
|
|
323
|
-
* @property {number} requestMaxDurationMillis
|
|
324
|
-
* @property {number} requestTotalFailedDurationMillis
|
|
325
|
-
* @property {number} requestTotalFinishedDurationMillis
|
|
326
|
-
* @property {Date|string|null} crawlerStartedAt
|
|
327
|
-
* @property {Date|string|null} crawlerFinishedAt
|
|
328
|
-
* @property {number} crawlerRuntimeMillis
|
|
329
|
-
* @property {Date|string|null} statsPersistedAt
|
|
330
|
-
*/
|
|
331
|
-
//# sourceMappingURL=statistics.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"statistics.js","sourceRoot":"","sources":["../../src/crawlers/statistics.js"],"names":[],"mappings":";;;AAAA,yCAAyC;AACzC,yDAAoB;AACpB,iEAAgE;AAChE,4CAAoD;AACpD,0EAAsC;AACtC,oEAA+B;AAE/B;;GAEG;AACH,MAAM,GAAG;IACL;QACI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,GAAG;QACC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC5B,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC;IACvB,CAAC;IAED,MAAM;QACF,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC;QAClD,OAAO,IAAI,CAAC,cAAc,CAAC;IAC/B,CAAC;IAED,UAAU;QACN,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC;IACtC,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU;IACZ;;;OAGG;IACH,YAAY,OAAO,GAAG,EAAE;QACpB,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,MAAM,CAAC,UAAU,CAAC;YAC7B,eAAe,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;YACnC,UAAU,EAAE,YAAE,CAAC,QAAQ,CAAC,MAAM;SACjC,CAAC,CAAC,CAAC;QAEJ,MAAM,EACF,eAAe,GAAG,EAAE,EACpB,UAAU,GAAG,YAAY,GAC5B,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,GAAG,GAAG,mBAAU,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QACtD,IAAI,CAAC,iBAAiB,GAAG,eAAe,GAAG,IAAI,CAAC;QAChD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;QAC1B,sEAAsE;QACtE,IAAI,CAAC,EAAE,GAAG,UAAU,CAAC,EAAE,EAAE,CAAC;QAC1B,IAAI,CAAC,eAAe,GAAG,0BAA0B,IAAI,CAAC,EAAE,EAAE,CAAC;QAC3D,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,IAAI,CAAC,qBAAqB,GAAG,EAAE,CAAC;QAEhC;;;WAGG;QACH,IAAI,CAAC,kBAAkB,GAAG,IAAI,GAAG,EAAE,CAAC;QAEpC,4BAA4B;QAC5B,IAAI,CAAC,KAAK,EAAE,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK;QACD,IAAI,CAAC,KAAK,GAAG;YACT,gBAAgB,EAAE,CAAC;YACnB,cAAc,EAAE,CAAC;YACjB,eAAe,EAAE,CAAC;YAClB,uBAAuB,EAAE,CAAC;YAC1B,yBAAyB,EAAE,CAAC;YAC5B,wBAAwB,EAAE,QAAQ;YAClC,wBAAwB,EAAE,CAAC;YAC3B,gCAAgC,EAAE,CAAC;YACnC,kCAAkC,EAAE,CAAC;YACrC,gBAAgB,EAAE,IAAI;YACtB,iBAAiB,EAAE,IAAI;YACvB,gBAAgB,EAAE,IAAI;YACtB,oBAAoB,EAAE,CAAC;SAC1B,CAAC;QAEF,IAAI,CAAC,qBAAqB,CAAC,MAAM,GAAG,CAAC,CAAC;QACtC,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEhC,IAAI,CAAC,SAAS,EAAE,CAAC;IACrB,CAAC;IAED;;;;;OAKG;IACH,QAAQ,CAAC,EAAE;QACP,IAAI,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC1C,IAAI,CAAC,GAAG;YAAE,GAAG,GAAG,IAAI,GAAG,EAAE,CAAC;QAC1B,GAAG,CAAC,GAAG,EAAE,CAAC;QACV,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,EAAE;QACR,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,MAAM,iBAAiB,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QACvC,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;QAC9B,IAAI,CAAC,KAAK,CAAC,kCAAkC,IAAI,iBAAiB,CAAC;QACnE,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,wBAAwB;YAAE,IAAI,CAAC,KAAK,CAAC,wBAAwB,GAAG,iBAAiB,CAAC;QACrH,IAAI,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,wBAAwB;YAAE,IAAI,CAAC,KAAK,CAAC,wBAAwB,GAAG,iBAAiB,CAAC;QACrH,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACvC,CAAC;IAED;;;;;OAKG;IACH,OAAO,CAAC,EAAE;QACN,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,IAAI,CAAC,KAAK,CAAC,gCAAgC,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;QAC5D,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;QAC5B,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,SAAS;QACL,MAAM,EACF,cAAc,EACd,gBAAgB,EAChB,gCAAgC,EAChC,kCAAkC,GACrC,GAAG,IAAI,CAAC,KAAK,CAAC;QACf,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;QACpD,MAAM,YAAY,GAAG,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAE7C,OAAO;YACH,8BAA8B,EAAE,IAAI,CAAC,KAAK,CAAC,gCAAgC,GAAG,cAAc,CAAC,IAAI,QAAQ;YACzG,gCAAgC,EAAE,IAAI,CAAC,KAAK,CAAC,kCAAkC,GAAG,gBAAgB,CAAC,IAAI,QAAQ;YAC/G,yBAAyB,EAAE,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC;YAC3E,uBAAuB,EAAE,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC;YACvE,0BAA0B,EAAE,kCAAkC,GAAG,gCAAgC;YACjG,aAAa,EAAE,cAAc,GAAG,gBAAgB;YAChD,oBAAoB,EAAE,WAAW;SACpC,CAAC;IACN,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,cAAc;QAChB,IAAI,CAAC,aAAa,GAAG,MAAM,IAAA,mCAAiB,GAAE,CAAC;QAE/C,MAAM,IAAI,CAAC,oBAAoB,EAAE,CAAC;QAElC,IAAI,IAAI,CAAC,KAAK,CAAC,gBAAgB,KAAK,IAAI,EAAE;YACtC,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,IAAI,IAAI,EAAE,CAAC;SAC5C;QAED,gBAAM,CAAC,EAAE,CAAC,gCAAoB,CAAC,aAAa,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE7D,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,GAAG,EAAE;YAChC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE;gBAC3B,GAAG,IAAI,CAAC,SAAS,EAAE;gBACnB,cAAc,EAAE,IAAI,CAAC,qBAAqB;aAC7C,CAAC,CAAC;QACP,CAAC,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa;QACf,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,IAAI,CAAC,KAAK,CAAC,iBAAiB,GAAG,IAAI,IAAI,EAAE,CAAC;QAE1C,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;IAC9B,CAAC;IAED;;;;;OAKG;IACH,qBAAqB,CAAC,GAAG;QACrB,MAAM,UAAU,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,UAAU,GAAG,CAAC;YAAE,IAAI,CAAC,KAAK,CAAC,eAAe,EAAE,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC;YAC3E,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,UAAU,CAAC,GAAG,CAAC;YAC5C,CAAC,CAAC,CAAC,CAAC;IACZ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY;QACd,8FAA8F;QAC9F,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE;YACrB,OAAO;SACV;QAED,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,kBAAkB,EAAE,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC;QAE9E,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,oBAAoB;QACtB,8FAA8F;QAC9F,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE;YACrB,OAAO;SACV;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE3E,IAAI,CAAC,UAAU;YAAE,OAAO;QAExB,4EAA4E;QAC5E,wEAAwE;QACxE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,qBAAqB,CAAC,EAAE;YAClD,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,8CAA8C,EAAE;gBAC7D,eAAe,EAAE,IAAI,CAAC,eAAe;gBACrC,KAAK,EAAE,UAAU;aACpB,CAAC,CAAC;SACN;QAED,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,qCAAqC,EAAE,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC;QAEjG,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,qBAAqB,CAAC,CAAC;QACrE,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,UAAU,CAAC,gBAAgB,CAAC;QAC1D,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,UAAU,CAAC,cAAc,CAAC;QACtD,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,UAAU,CAAC,eAAe,CAAC;QAExD,IAAI,CAAC,KAAK,CAAC,gCAAgC,GAAG,UAAU,CAAC,gCAAgC,CAAC;QAC1F,IAAI,CAAC,KAAK,CAAC,kCAAkC,GAAG,UAAU,CAAC,kCAAkC,CAAC;QAC9F,IAAI,CAAC,KAAK,CAAC,wBAAwB,GAAG,UAAU,CAAC,wBAAwB,CAAC;QAC1E,IAAI,CAAC,KAAK,CAAC,wBAAwB,GAAG,UAAU,CAAC,wBAAwB,CAAC;QAC1E,wCAAwC;QACxC,IAAI,CAAC,KAAK,CAAC,iBAAiB,GAAG,UAAU,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5G,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACzG,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACzG,IAAI,CAAC,KAAK,CAAC,oBAAoB,GAAG,UAAU,CAAC,oBAAoB,CAAC;QAClE,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,UAAU,CAAC,yBAAyB,CAAC,CAAC;QAEvG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAChD,CAAC;IAED;;;;OAIG;IACH,SAAS;QACL,uFAAuF;QACvF,gBAAM,CAAC,cAAc,CAAC,gCAAoB,CAAC,aAAa,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEzE,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAChC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;SAC3B;IACL,CAAC;IAED;;;;;OAKG;IACH,MAAM;QACF,4EAA4E;QAC5E,oEAAoE;QACpE,8BAA8B;QAC9B,OAAO;YACH,GAAG,IAAI,CAAC,KAAK;YACb,yBAAyB,EAAE,IAAI,CAAC,aAAa;YAC7C,iBAAiB,EAAE,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI;YAC7G,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI;YAC1G,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;YACjD,OAAO,EAAE,IAAI,CAAC,EAAE;YAChB,gBAAgB,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC1C,GAAG,IAAI,CAAC,SAAS,EAAE;SACtB,CAAC;IACN,CAAC;CACJ;AAED,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC;AAElB,kBAAe,UAAU,CAAC;AAE1B;;;;;GAKG;AAEH;;;;;;;;;;;;;;;GAeG;AAEH;;;;;;;;;;;;;;;;;GAiBG"}
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* The function finds elements matching a specific CSS selector in a Puppeteer page,
|
|
3
|
-
* clicks all those elements using a mouse move and a left mouse button click and intercepts
|
|
4
|
-
* all the navigation requests that are subsequently produced by the page. The intercepted
|
|
5
|
-
* requests, including their methods, headers and payloads are then enqueued to a provided
|
|
6
|
-
* {@link RequestQueue}. This is useful to crawl JavaScript heavy pages where links are not available
|
|
7
|
-
* in `href` elements, but rather navigations are triggered in click handlers.
|
|
8
|
-
* If you're looking to find URLs in `href` attributes of the page, see {@link utils#enqueueLinks}.
|
|
9
|
-
*
|
|
10
|
-
* Optionally, the function allows you to filter the target links' URLs using an array of {@link PseudoUrl} objects
|
|
11
|
-
* and override settings of the enqueued {@link Request} objects.
|
|
12
|
-
*
|
|
13
|
-
* **IMPORTANT**: To be able to do this, this function uses various mutations on the page,
|
|
14
|
-
* such as changing the Z-index of elements being clicked and their visibility. Therefore,
|
|
15
|
-
* it is recommended to only use this function as the last operation in the page.
|
|
16
|
-
*
|
|
17
|
-
* **USING HEADFUL BROWSER**: When using a headful browser, this function will only be able to click elements
|
|
18
|
-
* in the focused tab, effectively limiting concurrency to 1. In headless mode, full concurrency can be achieved.
|
|
19
|
-
*
|
|
20
|
-
* **PERFORMANCE**: Clicking elements with a mouse and intercepting requests is not a low level operation
|
|
21
|
-
* that takes nanoseconds. It's not very CPU intensive, but it takes time. We strongly recommend limiting
|
|
22
|
-
* the scope of the clicking as much as possible by using a specific selector that targets only the elements
|
|
23
|
-
* that you assume or know will produce a navigation. You can certainly click everything by using
|
|
24
|
-
* the `*` selector, but be prepared to wait minutes to get results on a large and complex page.
|
|
25
|
-
*
|
|
26
|
-
* **Example usage**
|
|
27
|
-
*
|
|
28
|
-
* ```javascript
|
|
29
|
-
* await Apify.utils.puppeteer.enqueueLinksByClickingElements({
|
|
30
|
-
* page,
|
|
31
|
-
* requestQueue,
|
|
32
|
-
* selector: 'a.product-detail',
|
|
33
|
-
* pseudoUrls: [
|
|
34
|
-
* 'https://www.example.com/handbags/[.*]'
|
|
35
|
-
* 'https://www.example.com/purses/[.*]'
|
|
36
|
-
* ],
|
|
37
|
-
* });
|
|
38
|
-
* ```
|
|
39
|
-
* @param {object} options
|
|
40
|
-
* All `enqueueLinksByClickingElements()` parameters are passed
|
|
41
|
-
* via an options object with the following keys:
|
|
42
|
-
* @param {Page} options.page
|
|
43
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
44
|
-
* @param {RequestQueue} options.requestQueue
|
|
45
|
-
* A request queue to which the URLs will be enqueued.
|
|
46
|
-
* @param {string} options.selector
|
|
47
|
-
* A CSS selector matching elements to be clicked on. Unlike in {@link utils#enqueueLinks}, there is no default
|
|
48
|
-
* value. This is to prevent suboptimal use of this function by using it too broadly.
|
|
49
|
-
* @param {Array<(string|RegExp|Object<string, *>)>} [options.pseudoUrls]
|
|
50
|
-
* An array of {@link PseudoUrl}s matching the URLs to be enqueued,
|
|
51
|
-
* or an array of strings or RegExps or plain Objects from which the {@link PseudoUrl}s can be constructed.
|
|
52
|
-
*
|
|
53
|
-
* The plain objects must include at least the `purl` property, which holds the pseudo-URL string or RegExp.
|
|
54
|
-
* All remaining keys will be used as the `requestTemplate` argument of the {@link PseudoUrl} constructor,
|
|
55
|
-
* which lets you specify special properties for the enqueued {@link Request} objects.
|
|
56
|
-
*
|
|
57
|
-
* If `pseudoUrls` is an empty array, `null` or `undefined`, then the function
|
|
58
|
-
* enqueues all links found on the page.
|
|
59
|
-
* @param {object} [options.clickOptions]
|
|
60
|
-
* click options for use in Puppeteer's click handler
|
|
61
|
-
* @param {number} [options.clickOptions.clickCount]
|
|
62
|
-
* Number of clicks to be executed. Defaults to 1
|
|
63
|
-
* @param {number} [options.clickOptions.delay]
|
|
64
|
-
* Time to wait between mousedown and mouseup in milliseconds. Defaults to 0
|
|
65
|
-
* @param {RequestTransform} [options.transformRequestFunction]
|
|
66
|
-
* Just before a new {@link Request} is constructed and enqueued to the {@link RequestQueue}, this function can be used
|
|
67
|
-
* to remove it or modify its contents such as `userData`, `payload` or, most importantly `uniqueKey`. This is useful
|
|
68
|
-
* when you need to enqueue multiple `Requests` to the queue that share the same URL, but differ in methods or payloads,
|
|
69
|
-
* or to dynamically update or create `userData`.
|
|
70
|
-
*
|
|
71
|
-
* For example: by adding `useExtendedUniqueKey: true` to the `request` object, `uniqueKey` will be computed from
|
|
72
|
-
* a combination of `url`, `method` and `payload` which enables crawling of websites that navigate using form submits
|
|
73
|
-
* (POST requests).
|
|
74
|
-
*
|
|
75
|
-
* **Example:**
|
|
76
|
-
* ```javascript
|
|
77
|
-
* {
|
|
78
|
-
* transformRequestFunction: (request) => {
|
|
79
|
-
* request.userData.foo = 'bar';
|
|
80
|
-
* request.useExtendedUniqueKey = true;
|
|
81
|
-
* return request;
|
|
82
|
-
* }
|
|
83
|
-
* }
|
|
84
|
-
* ```
|
|
85
|
-
* @param {number} [options.waitForPageIdleSecs=1]
|
|
86
|
-
* Clicking in the page triggers various asynchronous operations that lead to new URLs being shown
|
|
87
|
-
* by the browser. It could be a simple JavaScript redirect or opening of a new tab in the browser.
|
|
88
|
-
* These events often happen only some time after the actual click. Requests typically take milliseconds
|
|
89
|
-
* while new tabs open in hundreds of milliseconds.
|
|
90
|
-
*
|
|
91
|
-
* To be able to capture all those events, the `enqueueLinksByClickingElements()` function repeatedly waits
|
|
92
|
-
* for the `waitForPageIdleSecs`. By repeatedly we mean that whenever a relevant event is triggered, the timer
|
|
93
|
-
* is restarted. As long as new events keep coming, the function will not return, unless
|
|
94
|
-
* the below `maxWaitForPageIdleSecs` timeout is reached.
|
|
95
|
-
*
|
|
96
|
-
* You may want to reduce this for example when you're sure that your clicks do not open new tabs,
|
|
97
|
-
* or increase when you're not getting all the expected URLs.
|
|
98
|
-
* @param {number} [options.maxWaitForPageIdleSecs=5]
|
|
99
|
-
* This is the maximum period for which the function will keep tracking events, even if more events keep coming.
|
|
100
|
-
* Its purpose is to prevent a deadlock in the page by periodic events, often unrelated to the clicking itself.
|
|
101
|
-
* See `waitForPageIdleSecs` above for an explanation.
|
|
102
|
-
* @return {Promise<Array<QueueOperationInfo>>}
|
|
103
|
-
* Promise that resolves to an array of {@link QueueOperationInfo} objects.
|
|
104
|
-
* @memberOf puppeteer
|
|
105
|
-
* @name enqueueLinksByClickingElements
|
|
106
|
-
* @function
|
|
107
|
-
*/
|
|
108
|
-
export function enqueueLinksByClickingElements(options: {
|
|
109
|
-
page: Page;
|
|
110
|
-
requestQueue: RequestQueue;
|
|
111
|
-
selector: string;
|
|
112
|
-
pseudoUrls?: (string | RegExp | {
|
|
113
|
-
[x: string]: any;
|
|
114
|
-
})[] | undefined;
|
|
115
|
-
clickOptions?: {
|
|
116
|
-
clickCount?: number | undefined;
|
|
117
|
-
delay?: number | undefined;
|
|
118
|
-
} | undefined;
|
|
119
|
-
transformRequestFunction?: RequestTransform | undefined;
|
|
120
|
-
waitForPageIdleSecs?: number | undefined;
|
|
121
|
-
maxWaitForPageIdleSecs?: number | undefined;
|
|
122
|
-
}): Promise<Array<QueueOperationInfo>>;
|
|
123
|
-
/**
|
|
124
|
-
* Clicks all elements of given page matching given selector.
|
|
125
|
-
* Catches and intercepts all initiated navigation requests and opened pages.
|
|
126
|
-
* Returns a list of all target URLs.
|
|
127
|
-
*
|
|
128
|
-
* @param {object} options
|
|
129
|
-
* @param {Page} options.page
|
|
130
|
-
* @param {string} options.selector
|
|
131
|
-
* @param {number} [options.waitForPageIdleMillis]
|
|
132
|
-
* @param {number} [options.maxWaitForPageIdleMillis]
|
|
133
|
-
* @param {object} [clickOptions]
|
|
134
|
-
* @param {number} [clickOptions.clickCount]
|
|
135
|
-
* @param {number} [clickOptions.delay]
|
|
136
|
-
* @return {Promise<Array<*>>}
|
|
137
|
-
* @ignore
|
|
138
|
-
*/
|
|
139
|
-
export function clickElementsAndInterceptNavigationRequests(options: {
|
|
140
|
-
page: Page;
|
|
141
|
-
selector: string;
|
|
142
|
-
waitForPageIdleMillis?: number | undefined;
|
|
143
|
-
maxWaitForPageIdleMillis?: number | undefined;
|
|
144
|
-
}): Promise<Array<any>>;
|
|
145
|
-
/**
|
|
146
|
-
* We're only interested in pages created by the page we're currently clicking in.
|
|
147
|
-
* There will generally be a lot of other targets being created in the browser.
|
|
148
|
-
* @param {Page} page
|
|
149
|
-
* @param {Target} target
|
|
150
|
-
* @return {boolean}
|
|
151
|
-
*/
|
|
152
|
-
export function isTargetRelevant(page: Page, target: Target): boolean;
|
|
153
|
-
/**
|
|
154
|
-
* Click all elements matching the given selector. To be able to do this using
|
|
155
|
-
* Puppeteer's `.click()` we need to make sure the elements are reachable by mouse,
|
|
156
|
-
* so we first move them to the top of the page's stacking context and then click.
|
|
157
|
-
* We do all in series to prevent elements from hiding one another. Therefore,
|
|
158
|
-
* for large element sets, this will take considerable amount of time.
|
|
159
|
-
*
|
|
160
|
-
* @param {Page} page
|
|
161
|
-
* @param {string} selector
|
|
162
|
-
* @param {object} [clickOptions]
|
|
163
|
-
* @param {number} [clickOptions.clickCount]
|
|
164
|
-
* @param {number} [clickOptions.delay]
|
|
165
|
-
* @return {Promise<void>}
|
|
166
|
-
* @ignore
|
|
167
|
-
*/
|
|
168
|
-
export function clickElements(page: Page, selector: string, clickOptions?: {
|
|
169
|
-
clickCount?: number | undefined;
|
|
170
|
-
delay?: number | undefined;
|
|
171
|
-
} | undefined): Promise<void>;
|
|
172
|
-
// @ts-ignore optional peer dependency
|
|
173
|
-
import { Page } from "puppeteer";
|
|
174
|
-
import { RequestQueue } from "../storages/request_queue";
|
|
175
|
-
import { RequestTransform } from "./shared";
|
|
176
|
-
import { QueueOperationInfo } from "../storages/request_queue";
|
|
177
|
-
// @ts-ignore optional peer dependency
|
|
178
|
-
import { Target } from "puppeteer";
|
|
179
|
-
//# sourceMappingURL=click_elements.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"click_elements.d.ts","sourceRoot":"","sources":["../../src/enqueue_links/click_elements.js"],"names":[],"mappings":"AAkBA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0GG;AACH;IAlEyB,IAAI,EAAlB,IAAI;IAEkB,YAAY,EAAlC,YAAY;IAEI,QAAQ,EAAxB,MAAM;IAG6C,UAAU;;;IAU5C,YAAY;;;;IAMF,wBAAwB;IAoBlC,mBAAmB;IAanB,sBAAsB;IAItC,QAAQ,MAAM,kBAAkB,CAAC,CAAC,CA8C7C;AAED;;;;;;;;;;;;;;;GAeG;AACH;IAVyB,IAAI,EAAlB,IAAI;IACY,QAAQ,EAAxB,MAAM;IACW,qBAAqB;IACrB,wBAAwB;IAIxC,QAAQ,UAAQ,CAAC,CAoC5B;AA6DD;;;;;;GAMG;AACH,uCAJW,IAAI,UACJ,MAAM,GACL,OAAO,CAKlB;AA2CD;;;;;;;;;;;;;;GAcG;AACH,oCARW,IAAI,YACJ,MAAM;;;gBAIL,QAAQ,IAAI,CAAC,CAyBxB"}
|