apify 2.3.1-beta.4 → 3.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +69 -128
- package/build/actor.d.ts +0 -113
- package/build/actor.d.ts.map +0 -1
- package/build/actor.js +0 -582
- package/build/actor.js.map +0 -1
- package/build/apify.d.ts +0 -752
- package/build/apify.d.ts.map +0 -1
- package/build/apify.js +0 -877
- package/build/apify.js.map +0 -1
- package/build/autoscaling/autoscaled_pool.d.ts +0 -384
- package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
- package/build/autoscaling/autoscaled_pool.js +0 -557
- package/build/autoscaling/autoscaled_pool.js.map +0 -1
- package/build/autoscaling/snapshotter.d.ts +0 -278
- package/build/autoscaling/snapshotter.d.ts.map +0 -1
- package/build/autoscaling/snapshotter.js +0 -447
- package/build/autoscaling/snapshotter.js.map +0 -1
- package/build/autoscaling/system_status.d.ts +0 -224
- package/build/autoscaling/system_status.d.ts.map +0 -1
- package/build/autoscaling/system_status.js +0 -228
- package/build/autoscaling/system_status.js.map +0 -1
- package/build/browser_launchers/browser_launcher.d.ts +0 -154
- package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
- package/build/browser_launchers/browser_launcher.js +0 -160
- package/build/browser_launchers/browser_launcher.js.map +0 -1
- package/build/browser_launchers/browser_plugin.d.ts +0 -23
- package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
- package/build/browser_launchers/browser_plugin.js +0 -25
- package/build/browser_launchers/browser_plugin.js.map +0 -1
- package/build/browser_launchers/playwright_launcher.d.ts +0 -131
- package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
- package/build/browser_launchers/playwright_launcher.js +0 -150
- package/build/browser_launchers/playwright_launcher.js.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
- package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
- package/build/browser_launchers/puppeteer_launcher.js +0 -197
- package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
- package/build/cache_container.d.ts +0 -31
- package/build/cache_container.d.ts.map +0 -1
- package/build/cache_container.js +0 -48
- package/build/cache_container.js.map +0 -1
- package/build/configuration.d.ts +0 -226
- package/build/configuration.d.ts.map +0 -1
- package/build/configuration.js +0 -325
- package/build/configuration.js.map +0 -1
- package/build/constants.d.ts +0 -37
- package/build/constants.d.ts.map +0 -1
- package/build/constants.js +0 -41
- package/build/constants.js.map +0 -1
- package/build/crawlers/basic_crawler.d.ts +0 -443
- package/build/crawlers/basic_crawler.d.ts.map +0 -1
- package/build/crawlers/basic_crawler.js +0 -664
- package/build/crawlers/basic_crawler.js.map +0 -1
- package/build/crawlers/browser_crawler.d.ts +0 -512
- package/build/crawlers/browser_crawler.d.ts.map +0 -1
- package/build/crawlers/browser_crawler.js +0 -540
- package/build/crawlers/browser_crawler.js.map +0 -1
- package/build/crawlers/cheerio_crawler.d.ts +0 -931
- package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
- package/build/crawlers/cheerio_crawler.js +0 -913
- package/build/crawlers/cheerio_crawler.js.map +0 -1
- package/build/crawlers/crawler_extension.d.ts +0 -10
- package/build/crawlers/crawler_extension.d.ts.map +0 -1
- package/build/crawlers/crawler_extension.js +0 -19
- package/build/crawlers/crawler_extension.js.map +0 -1
- package/build/crawlers/crawler_utils.d.ts +0 -34
- package/build/crawlers/crawler_utils.d.ts.map +0 -1
- package/build/crawlers/crawler_utils.js +0 -87
- package/build/crawlers/crawler_utils.js.map +0 -1
- package/build/crawlers/playwright_crawler.d.ts +0 -448
- package/build/crawlers/playwright_crawler.d.ts.map +0 -1
- package/build/crawlers/playwright_crawler.js +0 -299
- package/build/crawlers/playwright_crawler.js.map +0 -1
- package/build/crawlers/puppeteer_crawler.d.ts +0 -425
- package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
- package/build/crawlers/puppeteer_crawler.js +0 -299
- package/build/crawlers/puppeteer_crawler.js.map +0 -1
- package/build/crawlers/statistics.d.ts +0 -185
- package/build/crawlers/statistics.d.ts.map +0 -1
- package/build/crawlers/statistics.js +0 -331
- package/build/crawlers/statistics.js.map +0 -1
- package/build/enqueue_links/click_elements.d.ts +0 -179
- package/build/enqueue_links/click_elements.d.ts.map +0 -1
- package/build/enqueue_links/click_elements.js +0 -434
- package/build/enqueue_links/click_elements.js.map +0 -1
- package/build/enqueue_links/enqueue_links.d.ts +0 -117
- package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
- package/build/enqueue_links/enqueue_links.js +0 -163
- package/build/enqueue_links/enqueue_links.js.map +0 -1
- package/build/enqueue_links/shared.d.ts +0 -42
- package/build/enqueue_links/shared.d.ts.map +0 -1
- package/build/enqueue_links/shared.js +0 -121
- package/build/enqueue_links/shared.js.map +0 -1
- package/build/errors.d.ts +0 -29
- package/build/errors.d.ts.map +0 -1
- package/build/errors.js +0 -38
- package/build/errors.js.map +0 -1
- package/build/events.d.ts +0 -11
- package/build/events.d.ts.map +0 -1
- package/build/events.js +0 -147
- package/build/events.js.map +0 -1
- package/build/index.d.ts +0 -4
- package/build/index.d.ts.map +0 -1
- package/build/index.js +0 -7
- package/build/index.js.map +0 -1
- package/build/main.d.ts +0 -179
- package/build/main.d.ts.map +0 -1
- package/build/main.js +0 -81
- package/build/main.js.map +0 -1
- package/build/playwright_utils.d.ts +0 -9
- package/build/playwright_utils.d.ts.map +0 -1
- package/build/playwright_utils.js +0 -90
- package/build/playwright_utils.js.map +0 -1
- package/build/proxy_configuration.d.ts +0 -411
- package/build/proxy_configuration.d.ts.map +0 -1
- package/build/proxy_configuration.js +0 -517
- package/build/proxy_configuration.js.map +0 -1
- package/build/pseudo_url.d.ts +0 -86
- package/build/pseudo_url.d.ts.map +0 -1
- package/build/pseudo_url.js +0 -153
- package/build/pseudo_url.js.map +0 -1
- package/build/puppeteer_request_interception.d.ts +0 -8
- package/build/puppeteer_request_interception.d.ts.map +0 -1
- package/build/puppeteer_request_interception.js +0 -235
- package/build/puppeteer_request_interception.js.map +0 -1
- package/build/puppeteer_utils.d.ts +0 -250
- package/build/puppeteer_utils.d.ts.map +0 -1
- package/build/puppeteer_utils.js +0 -551
- package/build/puppeteer_utils.js.map +0 -1
- package/build/request.d.ts +0 -180
- package/build/request.d.ts.map +0 -1
- package/build/request.js +0 -261
- package/build/request.js.map +0 -1
- package/build/request_list.d.ts +0 -581
- package/build/request_list.d.ts.map +0 -1
- package/build/request_list.js +0 -826
- package/build/request_list.js.map +0 -1
- package/build/serialization.d.ts +0 -5
- package/build/serialization.d.ts.map +0 -1
- package/build/serialization.js +0 -139
- package/build/serialization.js.map +0 -1
- package/build/session_pool/errors.d.ts +0 -11
- package/build/session_pool/errors.d.ts.map +0 -1
- package/build/session_pool/errors.js +0 -18
- package/build/session_pool/errors.js.map +0 -1
- package/build/session_pool/events.d.ts +0 -5
- package/build/session_pool/events.d.ts.map +0 -1
- package/build/session_pool/events.js +0 -6
- package/build/session_pool/events.js.map +0 -1
- package/build/session_pool/session.d.ts +0 -286
- package/build/session_pool/session.d.ts.map +0 -1
- package/build/session_pool/session.js +0 -355
- package/build/session_pool/session.js.map +0 -1
- package/build/session_pool/session_pool.d.ts +0 -280
- package/build/session_pool/session_pool.d.ts.map +0 -1
- package/build/session_pool/session_pool.js +0 -393
- package/build/session_pool/session_pool.js.map +0 -1
- package/build/session_pool/session_utils.d.ts +0 -4
- package/build/session_pool/session_utils.d.ts.map +0 -1
- package/build/session_pool/session_utils.js +0 -24
- package/build/session_pool/session_utils.js.map +0 -1
- package/build/stealth/hiding_tricks.d.ts +0 -22
- package/build/stealth/hiding_tricks.d.ts.map +0 -1
- package/build/stealth/hiding_tricks.js +0 -308
- package/build/stealth/hiding_tricks.js.map +0 -1
- package/build/stealth/stealth.d.ts +0 -56
- package/build/stealth/stealth.d.ts.map +0 -1
- package/build/stealth/stealth.js +0 -125
- package/build/stealth/stealth.js.map +0 -1
- package/build/storages/dataset.d.ts +0 -288
- package/build/storages/dataset.d.ts.map +0 -1
- package/build/storages/dataset.js +0 -480
- package/build/storages/dataset.js.map +0 -1
- package/build/storages/key_value_store.d.ts +0 -243
- package/build/storages/key_value_store.d.ts.map +0 -1
- package/build/storages/key_value_store.js +0 -462
- package/build/storages/key_value_store.js.map +0 -1
- package/build/storages/request_queue.d.ts +0 -318
- package/build/storages/request_queue.d.ts.map +0 -1
- package/build/storages/request_queue.js +0 -636
- package/build/storages/request_queue.js.map +0 -1
- package/build/storages/storage_manager.d.ts +0 -87
- package/build/storages/storage_manager.d.ts.map +0 -1
- package/build/storages/storage_manager.js +0 -150
- package/build/storages/storage_manager.js.map +0 -1
- package/build/tsconfig.tsbuildinfo +0 -1
- package/build/typedefs.d.ts +0 -146
- package/build/typedefs.d.ts.map +0 -1
- package/build/typedefs.js +0 -88
- package/build/typedefs.js.map +0 -1
- package/build/utils.d.ts +0 -175
- package/build/utils.d.ts.map +0 -1
- package/build/utils.js +0 -731
- package/build/utils.js.map +0 -1
- package/build/utils_log.d.ts +0 -41
- package/build/utils_log.d.ts.map +0 -1
- package/build/utils_log.js +0 -192
- package/build/utils_log.js.map +0 -1
- package/build/utils_request.d.ts +0 -77
- package/build/utils_request.d.ts.map +0 -1
- package/build/utils_request.js +0 -385
- package/build/utils_request.js.map +0 -1
- package/build/utils_social.d.ts +0 -210
- package/build/utils_social.d.ts.map +0 -1
- package/build/utils_social.js +0 -787
- package/build/utils_social.js.map +0 -1
- package/build/validators.d.ts +0 -23
- package/build/validators.d.ts.map +0 -1
- package/build/validators.js +0 -29
- package/build/validators.js.map +0 -1
package/build/pseudo_url.js
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const tslib_1 = require("tslib");
|
|
4
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
5
|
-
const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
|
|
6
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
7
|
-
const request_1 = (0, tslib_1.__importDefault)(require("./request")); // eslint-disable-line import/named,no-unused-vars
|
|
8
|
-
/**
|
|
9
|
-
* Parses PURL into Regex string.
|
|
10
|
-
* @ignore
|
|
11
|
-
*/
|
|
12
|
-
const parsePurl = (purl) => {
|
|
13
|
-
const trimmedPurl = purl.trim();
|
|
14
|
-
if (trimmedPurl.length === 0)
|
|
15
|
-
throw new Error(`Cannot parse PURL '${trimmedPurl}': it must be an non-empty string`);
|
|
16
|
-
let regex = '^';
|
|
17
|
-
try {
|
|
18
|
-
let openBrackets = 0;
|
|
19
|
-
for (let i = 0; i < trimmedPurl.length; i++) {
|
|
20
|
-
const ch = trimmedPurl.charAt(i);
|
|
21
|
-
if (ch === '[' && ++openBrackets === 1) {
|
|
22
|
-
// Beginning of '[regex]' section
|
|
23
|
-
// Enclose regex in () brackets to enforce operator priority
|
|
24
|
-
regex += '(';
|
|
25
|
-
}
|
|
26
|
-
else if (ch === ']' && openBrackets > 0 && --openBrackets === 0) {
|
|
27
|
-
// End of '[regex]' section
|
|
28
|
-
regex += ')';
|
|
29
|
-
}
|
|
30
|
-
else if (openBrackets > 0) {
|
|
31
|
-
// Inside '[regex]' section
|
|
32
|
-
regex += ch;
|
|
33
|
-
}
|
|
34
|
-
else {
|
|
35
|
-
// Outside '[regex]' section, parsing the URL part
|
|
36
|
-
const code = ch.charCodeAt(0);
|
|
37
|
-
if ((code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122)) {
|
|
38
|
-
// Alphanumeric character => copy it.
|
|
39
|
-
regex += ch;
|
|
40
|
-
}
|
|
41
|
-
else {
|
|
42
|
-
// Special character => escape it
|
|
43
|
-
const hex = code < 16 ? `0${code.toString(16)}` : code.toString(16);
|
|
44
|
-
regex += `\\x${hex}`;
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
regex += '$';
|
|
49
|
-
}
|
|
50
|
-
catch (err) {
|
|
51
|
-
throw new Error(`Cannot parse PURL '${purl}': ${err}`);
|
|
52
|
-
}
|
|
53
|
-
return regex;
|
|
54
|
-
};
|
|
55
|
-
/**
|
|
56
|
-
* Represents a pseudo-URL (PURL) - an URL pattern used by web crawlers
|
|
57
|
-
* to specify which URLs should the crawler visit.
|
|
58
|
-
* This class is used by the {@link utils#enqueueLinks} function.
|
|
59
|
-
*
|
|
60
|
-
* A PURL is simply a URL with special directives enclosed in `[]` brackets.
|
|
61
|
-
* Currently, the only supported directive is `[RegExp]`,
|
|
62
|
-
* which defines a JavaScript-style regular expression to match against the URL.
|
|
63
|
-
*
|
|
64
|
-
* The `PseudoUrl` class can be constructed either using a pseudo-URL string
|
|
65
|
-
* or a regular expression (an instance of the `RegExp` object).
|
|
66
|
-
* With a pseudo-URL string, the matching is always case-insensitive.
|
|
67
|
-
* If you need case-sensitive matching, use an appropriate `RegExp` object.
|
|
68
|
-
*
|
|
69
|
-
* For example, a PURL `http://www.example.com/pages/[(\w|-)*]` will match all of the following URLs:
|
|
70
|
-
*
|
|
71
|
-
* - `http://www.example.com/pages/`
|
|
72
|
-
* - `http://www.example.com/pages/my-awesome-page`
|
|
73
|
-
* - `http://www.example.com/pages/something`
|
|
74
|
-
*
|
|
75
|
-
* Be careful to correctly escape special characters in the pseudo-URL string.
|
|
76
|
-
* If either `[` or `]` is part of the normal query string, it must be encoded as `[\x5B]` or `[\x5D]`,
|
|
77
|
-
* respectively. For example, the following PURL:
|
|
78
|
-
* ```http
|
|
79
|
-
* http://www.example.com/search?do[\x5B]load[\x5D]=1
|
|
80
|
-
* ```
|
|
81
|
-
* will match the URL:
|
|
82
|
-
* ```http
|
|
83
|
-
* http://www.example.com/search?do[load]=1
|
|
84
|
-
* ```
|
|
85
|
-
*
|
|
86
|
-
* If the regular expression in the pseudo-URL contains a backslash character (\),
|
|
87
|
-
* you need to escape it with another back backslash, as shown in the example below.
|
|
88
|
-
*
|
|
89
|
-
* **Example usage:**
|
|
90
|
-
*
|
|
91
|
-
* ```javascript
|
|
92
|
-
* // Using a pseudo-URL string
|
|
93
|
-
* const purl = new Apify.PseudoUrl('http://www.example.com/pages/[(\\w|-)+]', {
|
|
94
|
-
* userData: { foo: 'bar' },
|
|
95
|
-
* });
|
|
96
|
-
*
|
|
97
|
-
* // Using a regular expression
|
|
98
|
-
* const purl2 = new Apify.PseudoUrl(/http:\/\/www\.example\.com\/pages\/(\w|-)+/);
|
|
99
|
-
*
|
|
100
|
-
* if (purl.matches('http://www.example.com/pages/my-awesome-page')) console.log('Match!');
|
|
101
|
-
* ```
|
|
102
|
-
*/
|
|
103
|
-
class PseudoUrl {
|
|
104
|
-
/**
|
|
105
|
-
* @param {(string|RegExp)} purl
|
|
106
|
-
* A pseudo-URL string or a regular expression object.
|
|
107
|
-
* Using a `RegExp` instance enables more granular control,
|
|
108
|
-
* such as making the matching case sensitive.
|
|
109
|
-
* @param {RequestOptions} requestTemplate
|
|
110
|
-
* Options for the new {@link Request} instances created for matching URLs
|
|
111
|
-
* by the {@link utils#enqueueLinks} function.
|
|
112
|
-
*/
|
|
113
|
-
constructor(purl, requestTemplate = {}) {
|
|
114
|
-
(0, ow_1.default)(purl, ow_1.default.any(ow_1.default.string, ow_1.default.regExp));
|
|
115
|
-
(0, ow_1.default)(requestTemplate, ow_1.default.object);
|
|
116
|
-
if (purl instanceof RegExp) {
|
|
117
|
-
this.regex = purl;
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
const regex = parsePurl(purl);
|
|
121
|
-
utils_log_1.default.debug('PURL parsed', { purl, regex });
|
|
122
|
-
this.regex = new RegExp(regex, 'i');
|
|
123
|
-
}
|
|
124
|
-
this.requestTemplate = requestTemplate;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Determines whether a URL matches this pseudo-URL pattern.
|
|
128
|
-
*
|
|
129
|
-
* @param {string} url URL to be matched.
|
|
130
|
-
* @return {boolean} Returns `true` if given URL matches pseudo-URL.
|
|
131
|
-
*/
|
|
132
|
-
matches(url) {
|
|
133
|
-
return underscore_1.default.isString(url) && url.match(this.regex) !== null;
|
|
134
|
-
}
|
|
135
|
-
/**
|
|
136
|
-
* Creates a Request object from a provided `requestTemplate` and a given URL
|
|
137
|
-
* or an object that specifies ${@link Request} properties. In case of a collision
|
|
138
|
-
* the properties will override the template, except for `userData`, which will
|
|
139
|
-
* be merged together, with the `userData` property having preference over the template.
|
|
140
|
-
* This enables dynamic overriding of the template.
|
|
141
|
-
*
|
|
142
|
-
* @param {(string|Object<string, *>)} urlOrProps
|
|
143
|
-
* @return {Request}
|
|
144
|
-
*/
|
|
145
|
-
createRequest(urlOrProps) {
|
|
146
|
-
const props = typeof urlOrProps === 'string' ? { url: urlOrProps } : urlOrProps;
|
|
147
|
-
props.userData = { ...this.requestTemplate.userData, ...props.userData };
|
|
148
|
-
const options = { ...this.requestTemplate, ...props }; // props.userData will override template with merged data.
|
|
149
|
-
return new request_1.default(options);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
exports.default = PseudoUrl;
|
|
153
|
-
//# sourceMappingURL=pseudo_url.js.map
|
package/build/pseudo_url.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pseudo_url.js","sourceRoot":"","sources":["../src/pseudo_url.js"],"names":[],"mappings":";;;AAAA,yDAAoB;AACpB,yEAA2B;AAC3B,yEAA8B;AAC9B,qEAAoD,CAAC,kDAAkD;AAEvG;;;GAGG;AACH,MAAM,SAAS,GAAG,CAAC,IAAI,EAAE,EAAE;IACvB,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,sBAAsB,WAAW,mCAAmC,CAAC,CAAC;IAEpH,IAAI,KAAK,GAAG,GAAG,CAAC;IAEhB,IAAI;QACA,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACzC,MAAM,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAEjC,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,YAAY,KAAK,CAAC,EAAE;gBACpC,iCAAiC;gBACjC,4DAA4D;gBAC5D,KAAK,IAAI,GAAG,CAAC;aAChB;iBAAM,IAAI,EAAE,KAAK,GAAG,IAAI,YAAY,GAAG,CAAC,IAAI,EAAE,YAAY,KAAK,CAAC,EAAE;gBAC/D,2BAA2B;gBAC3B,KAAK,IAAI,GAAG,CAAC;aAChB;iBAAM,IAAI,YAAY,GAAG,CAAC,EAAE;gBACzB,2BAA2B;gBAC3B,KAAK,IAAI,EAAE,CAAC;aACf;iBAAM;gBACH,kDAAkD;gBAClD,MAAM,IAAI,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC9B,IAAI,CAAC,IAAI,IAAI,EAAE,IAAI,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,IAAI,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,IAAI,IAAI,IAAI,GAAG,CAAC,EAAE;oBACzF,qCAAqC;oBACrC,KAAK,IAAI,EAAE,CAAC;iBACf;qBAAM;oBACH,iCAAiC;oBACjC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;oBACpE,KAAK,IAAI,MAAM,GAAG,EAAE,CAAC;iBACxB;aACJ;SACJ;QACD,KAAK,IAAI,GAAG,CAAC;KAChB;IAAC,OAAO,GAAG,EAAE;QACV,MAAM,IAAI,KAAK,CAAC,sBAAsB,IAAI,MAAM,GAAG,EAAE,CAAC,CAAC;KAC1D;IAED,OAAO,KAAK,CAAC;AACjB,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH,MAAM,SAAS;IACX;;;;;;;;OAQG;IACH,YAAY,IAAI,EAAE,eAAe,GAAG,EAAE;QAClC,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,GAAG,CAAC,YAAE,CAAC,MAAM,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC,CAAC;QACvC,IAAA,YAAE,EAAC,eAAe,EAAE,YAAE,CAAC,MAAM,CAAC,CAAC;QAE/B,IAAI,IAAI,YAAY,MAAM,EAAE;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;SACrB;aAAM;YACH,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YAC9B,mBAAG,CAAC,KAAK,CAAC,aAAa,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;YAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;SACvC;QAED,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IAC3C,CAAC;IAED;;;;;OAKG;IACH,OAAO,CAAC,GAAG;QACP,OAAO,oBAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,IAAI,CAAC;IAC7D,CAAC;IAED;;;;;;;;;OASG;IACH,aAAa,CAAC,UAAU;QACpB,MAAM,KAAK,GAAG,OAAO,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;QAChF,KAAK,CAAC,QAAQ,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC;QACzE,MAAM,OAAO,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,EAAE,GAAG,KAAK,EAAE,CAAC,CAAC,0DAA0D;QACjH,OAAO,IAAI,iBAAO,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;CACJ;AAED,kBAAe,SAAS,CAAC"}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
export function addInterceptRequestHandler(page: Page, handler: InterceptHandler): Promise<void>;
|
|
2
|
-
export function removeInterceptRequestHandler(page: Page, handler: InterceptHandler): Promise<void>;
|
|
3
|
-
export type InterceptHandler = (request: PuppeteerRequest) => any;
|
|
4
|
-
// @ts-ignore optional peer dependency
|
|
5
|
-
import { Page } from "puppeteer";
|
|
6
|
-
// @ts-ignore optional peer dependency
|
|
7
|
-
import { HTTPRequest as PuppeteerRequest } from "puppeteer";
|
|
8
|
-
//# sourceMappingURL=puppeteer_request_interception.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer_request_interception.d.ts","sourceRoot":"","sources":["../src/puppeteer_request_interception.js"],"names":[],"mappings":"AA2KO,iDAPI,IAAI,WAEJ,gBAAgB,GACf,QAAQ,IAAI,CAAC,CAsCxB;AAYM,oDAPI,IAAI,WAEJ,gBAAgB,GACf,QAAQ,IAAI,CAAC,CAmCxB;yCA/MU,gBAAgB"}
|
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.removeInterceptRequestHandler = exports.addInterceptRequestHandler = void 0;
|
|
4
|
-
const tslib_1 = require("tslib");
|
|
5
|
-
const events_1 = require("events");
|
|
6
|
-
const ow_1 = (0, tslib_1.__importDefault)(require("ow"));
|
|
7
|
-
const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
|
|
8
|
-
const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
|
|
9
|
-
// We use weak maps here so that the content gets discarted after page gets closed.
|
|
10
|
-
const pageInterceptRequestHandlersMap = new WeakMap(); // Maps page to an array of request interception handlers.
|
|
11
|
-
const pageInterceptRequestMasterHandlerMap = new WeakMap(); // Maps page to master request interception handler.
|
|
12
|
-
const pageInterceptedRequestsMap = new WeakMap(); // Maps page to a set of its pending intercepted requests.
|
|
13
|
-
/**
|
|
14
|
-
* Enables observation of changes of internal state
|
|
15
|
-
* to be able to queue other actions based on it.
|
|
16
|
-
* @ignore
|
|
17
|
-
*/
|
|
18
|
-
class ObservableSet extends events_1.EventEmitter {
|
|
19
|
-
constructor() {
|
|
20
|
-
super();
|
|
21
|
-
this.set = new Set();
|
|
22
|
-
}
|
|
23
|
-
add(value) {
|
|
24
|
-
this.set.add(value);
|
|
25
|
-
this.emit('add', value);
|
|
26
|
-
return this.set;
|
|
27
|
-
}
|
|
28
|
-
delete(value) {
|
|
29
|
-
const success = this.set.delete(value);
|
|
30
|
-
this.emit('delete', value);
|
|
31
|
-
return success;
|
|
32
|
-
}
|
|
33
|
-
get size() {
|
|
34
|
-
return this.set.size;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
/**
|
|
38
|
-
* @callback InterceptHandler
|
|
39
|
-
* @param {PuppeteerRequest} request
|
|
40
|
-
*/
|
|
41
|
-
/**
|
|
42
|
-
* Makes all request headers capitalized to more look like in browser
|
|
43
|
-
* @param {Object<string, string>} headers
|
|
44
|
-
* @returns {Object<string, string>}
|
|
45
|
-
*/
|
|
46
|
-
const browserifyHeaders = (headers) => {
|
|
47
|
-
const finalHeaders = {};
|
|
48
|
-
// eslint-disable-next-line prefer-const
|
|
49
|
-
for (let [key, value] of Object.entries(headers)) {
|
|
50
|
-
key = key.toLowerCase()
|
|
51
|
-
.split('-')
|
|
52
|
-
.map((str) => str.charAt(0).toUpperCase() + str.slice(1))
|
|
53
|
-
.join('-');
|
|
54
|
-
finalHeaders[key] = value;
|
|
55
|
-
}
|
|
56
|
-
return finalHeaders;
|
|
57
|
-
};
|
|
58
|
-
/**
|
|
59
|
-
* Executes an array for given intercept request handlers for a given request object.
|
|
60
|
-
*
|
|
61
|
-
* @param {PuppeteerRequest} request Puppeteer's Request object.
|
|
62
|
-
* @param {Array<InterceptHandler>} interceptRequestHandlers An array of intercept request handlers.
|
|
63
|
-
* @ignore
|
|
64
|
-
*/
|
|
65
|
-
const handleRequest = async (request, interceptRequestHandlers) => {
|
|
66
|
-
// If there are no intercept handlers, it means that request interception is not enabled (anymore)
|
|
67
|
-
// and therefore .abort() .respond() and .continue() would throw and crash the process.
|
|
68
|
-
if (!interceptRequestHandlers.length)
|
|
69
|
-
return;
|
|
70
|
-
let wasAborted = false;
|
|
71
|
-
let wasResponded = false;
|
|
72
|
-
let wasContinued = false;
|
|
73
|
-
const accumulatedOverrides = {
|
|
74
|
-
headers: browserifyHeaders(request.headers()),
|
|
75
|
-
};
|
|
76
|
-
const originalContinue = request.continue.bind(request);
|
|
77
|
-
request.continue = (overrides = {}) => {
|
|
78
|
-
wasContinued = true;
|
|
79
|
-
const headers = browserifyHeaders({ ...accumulatedOverrides.headers, ...overrides.headers });
|
|
80
|
-
Object.assign(accumulatedOverrides, overrides, { headers });
|
|
81
|
-
};
|
|
82
|
-
request.abort = underscore_1.default.wrap(request.abort.bind(request), (abort, ...args) => {
|
|
83
|
-
wasAborted = true;
|
|
84
|
-
return abort(...args);
|
|
85
|
-
});
|
|
86
|
-
request.respond = underscore_1.default.wrap(request.respond.bind(request), (respond, ...args) => {
|
|
87
|
-
wasResponded = true;
|
|
88
|
-
return respond(...args);
|
|
89
|
-
});
|
|
90
|
-
for (const handler of interceptRequestHandlers) {
|
|
91
|
-
wasContinued = false;
|
|
92
|
-
await handler(request);
|
|
93
|
-
// Check that one of the functions was called.
|
|
94
|
-
if (!wasAborted && !wasResponded && !wasContinued) {
|
|
95
|
-
throw new Error('Intercept request handler must call one of request.continue|respond|abort() methods!');
|
|
96
|
-
}
|
|
97
|
-
// If request was aborted or responded then we can finish immediately.
|
|
98
|
-
if (wasAborted || wasResponded)
|
|
99
|
-
return;
|
|
100
|
-
}
|
|
101
|
-
return originalContinue(accumulatedOverrides);
|
|
102
|
-
};
|
|
103
|
-
/**
|
|
104
|
-
* Adds request interception handler in similar to `page.on('request', handler);` but in addition to that
|
|
105
|
-
* supports multiple parallel handlers.
|
|
106
|
-
*
|
|
107
|
-
* All the handlers are executed sequentially in the order as they were added.
|
|
108
|
-
* Each of the handlers must call one of `request.continue()`, `request.abort()` and `request.respond()`.
|
|
109
|
-
* In addition to that any of the handlers may modify the request object (method, postData, headers)
|
|
110
|
-
* by passing its overrides to `request.continue()`.
|
|
111
|
-
* If multiple handlers modify same property then the last one wins. Headers are merged separately so you can
|
|
112
|
-
* override only a value of specific header.
|
|
113
|
-
*
|
|
114
|
-
* If one the handlers calls `request.abort()` or `request.respond()` then request is not propagated further
|
|
115
|
-
* to any of the remaining handlers.
|
|
116
|
-
*
|
|
117
|
-
*
|
|
118
|
-
* **Example usage:**
|
|
119
|
-
*
|
|
120
|
-
* ```javascript
|
|
121
|
-
* // Replace images with placeholder.
|
|
122
|
-
* await addInterceptRequestHandler(page, (request) => {
|
|
123
|
-
* if (request.resourceType() === 'image') {
|
|
124
|
-
* return request.respond({
|
|
125
|
-
* statusCode: 200,
|
|
126
|
-
* contentType: 'image/jpeg',
|
|
127
|
-
* body: placeholderImageBuffer,
|
|
128
|
-
* });
|
|
129
|
-
* }
|
|
130
|
-
* return request.continue();
|
|
131
|
-
* });
|
|
132
|
-
*
|
|
133
|
-
* // Abort all the scripts.
|
|
134
|
-
* await addInterceptRequestHandler(page, (request) => {
|
|
135
|
-
* if (request.resourceType() === 'script') return request.abort();
|
|
136
|
-
* return request.continue();
|
|
137
|
-
* });
|
|
138
|
-
*
|
|
139
|
-
* // Change requests to post.
|
|
140
|
-
* await addInterceptRequestHandler(page, (request) => {
|
|
141
|
-
* return request.continue({
|
|
142
|
-
* method: 'POST',
|
|
143
|
-
* });
|
|
144
|
-
* });
|
|
145
|
-
*
|
|
146
|
-
* await page.goto('http://example.com');
|
|
147
|
-
* ```
|
|
148
|
-
*
|
|
149
|
-
* @param {Page} page
|
|
150
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
151
|
-
* @param {InterceptHandler} handler Request interception handler.
|
|
152
|
-
* @return {Promise<void>}
|
|
153
|
-
* @memberOf puppeteer
|
|
154
|
-
* @name addInterceptRequestHandler
|
|
155
|
-
*/
|
|
156
|
-
const addInterceptRequestHandler = async (page, handler) => {
|
|
157
|
-
(0, ow_1.default)(page, ow_1.default.object.hasKeys('goto', 'evaluate'));
|
|
158
|
-
(0, ow_1.default)(handler, ow_1.default.function);
|
|
159
|
-
if (!pageInterceptRequestHandlersMap.has(page)) {
|
|
160
|
-
pageInterceptRequestHandlersMap.set(page, []);
|
|
161
|
-
}
|
|
162
|
-
if (!pageInterceptedRequestsMap.has(page)) {
|
|
163
|
-
pageInterceptedRequestsMap.set(page, new ObservableSet());
|
|
164
|
-
}
|
|
165
|
-
const handlersArray = pageInterceptRequestHandlersMap.get(page);
|
|
166
|
-
handlersArray.push(handler);
|
|
167
|
-
// First handler was just added at this point so we need to set up request interception.
|
|
168
|
-
if (handlersArray.length === 1) {
|
|
169
|
-
await page.setRequestInterception(true);
|
|
170
|
-
// This is a handler that gets set in page.on('request', ...) and that executes all the user
|
|
171
|
-
// added custom handlers.
|
|
172
|
-
const masterHandler = async (request) => {
|
|
173
|
-
const interceptedRequests = pageInterceptedRequestsMap.get(page);
|
|
174
|
-
interceptedRequests.add(request);
|
|
175
|
-
const interceptHandlers = pageInterceptRequestHandlersMap.get(page);
|
|
176
|
-
try {
|
|
177
|
-
await handleRequest(request, interceptHandlers);
|
|
178
|
-
}
|
|
179
|
-
finally {
|
|
180
|
-
interceptedRequests.delete(request);
|
|
181
|
-
}
|
|
182
|
-
};
|
|
183
|
-
pageInterceptRequestMasterHandlerMap.set(page, masterHandler);
|
|
184
|
-
page.on('request', masterHandler);
|
|
185
|
-
}
|
|
186
|
-
};
|
|
187
|
-
exports.addInterceptRequestHandler = addInterceptRequestHandler;
|
|
188
|
-
/**
|
|
189
|
-
* Removes request interception handler for given page.
|
|
190
|
-
*
|
|
191
|
-
* @param {Page} page
|
|
192
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
193
|
-
* @param {InterceptHandler} handler Request interception handler.
|
|
194
|
-
* @return {Promise<void>}
|
|
195
|
-
* @memberOf puppeteer
|
|
196
|
-
* @name removeInterceptRequestHandler
|
|
197
|
-
*/
|
|
198
|
-
const removeInterceptRequestHandler = async (page, handler) => {
|
|
199
|
-
(0, ow_1.default)(page, ow_1.default.object.hasKeys('goto', 'evaluate'));
|
|
200
|
-
(0, ow_1.default)(handler, ow_1.default.function);
|
|
201
|
-
const handlersArray = pageInterceptRequestHandlersMap
|
|
202
|
-
.get(page)
|
|
203
|
-
.filter((item) => item !== handler);
|
|
204
|
-
pageInterceptRequestHandlersMap.set(page, handlersArray);
|
|
205
|
-
if (handlersArray.length === 0) {
|
|
206
|
-
const interceptedRequestsInProgress = pageInterceptedRequestsMap.get(page);
|
|
207
|
-
// Since handlers can be async, we can't simply turn off request interception
|
|
208
|
-
// when there are no handlers, because some handlers could still
|
|
209
|
-
// be in progress and request.abort|respond|continue() would throw.
|
|
210
|
-
if (interceptedRequestsInProgress.size === 0) {
|
|
211
|
-
await disableRequestInterception(page);
|
|
212
|
-
}
|
|
213
|
-
else {
|
|
214
|
-
const onDelete = async () => {
|
|
215
|
-
if (interceptedRequestsInProgress.size === 0) {
|
|
216
|
-
try {
|
|
217
|
-
await disableRequestInterception(page);
|
|
218
|
-
interceptedRequestsInProgress.removeListener('delete', onDelete);
|
|
219
|
-
}
|
|
220
|
-
catch (error) {
|
|
221
|
-
utils_log_1.default.debug('Error while disabling request interception', { error });
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
};
|
|
225
|
-
interceptedRequestsInProgress.on('delete', onDelete);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
};
|
|
229
|
-
exports.removeInterceptRequestHandler = removeInterceptRequestHandler;
|
|
230
|
-
async function disableRequestInterception(page) {
|
|
231
|
-
await page.setRequestInterception(false);
|
|
232
|
-
const requestHandler = pageInterceptRequestMasterHandlerMap.get(page);
|
|
233
|
-
page.removeListener('request', requestHandler);
|
|
234
|
-
}
|
|
235
|
-
//# sourceMappingURL=puppeteer_request_interception.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer_request_interception.js","sourceRoot":"","sources":["../src/puppeteer_request_interception.js"],"names":[],"mappings":";;;;AAAA,mCAAsC;AACtC,yDAAoB;AACpB,yEAA2B;AAE3B,yEAA8B;AAE9B,mFAAmF;AACnF,MAAM,+BAA+B,GAAG,IAAI,OAAO,EAAE,CAAC,CAAC,0DAA0D;AACjH,MAAM,oCAAoC,GAAG,IAAI,OAAO,EAAE,CAAC,CAAC,oDAAoD;AAChH,MAAM,0BAA0B,GAAG,IAAI,OAAO,EAAE,CAAC,CAAC,0DAA0D;AAE5G;;;;GAIG;AACH,MAAM,aAAc,SAAQ,qBAAY;IACpC;QACI,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,GAAG,GAAG,IAAI,GAAG,EAAE,CAAC;IACzB,CAAC;IAED,GAAG,CAAC,KAAK;QACL,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACxB,OAAO,IAAI,CAAC,GAAG,CAAC;IACpB,CAAC;IAED,MAAM,CAAC,KAAK;QACR,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC3B,OAAO,OAAO,CAAC;IACnB,CAAC;IAED,IAAI,IAAI;QACJ,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;IACzB,CAAC;CACJ;AAED;;;GAGG;AAEH;;;;GAIG;AACH,MAAM,iBAAiB,GAAG,CAAC,OAAO,EAAE,EAAE;IAClC,MAAM,YAAY,GAAG,EAAE,CAAC;IACxB,wCAAwC;IACxC,KAAK,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;QAC9C,GAAG,GAAG,GAAG,CAAC,WAAW,EAAE;aAClB,KAAK,CAAC,GAAG,CAAC;aACV,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aACxD,IAAI,CAAC,GAAG,CAAC,CAAC;QAEf,YAAY,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;KAC7B;IAED,OAAO,YAAY,CAAC;AACxB,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,aAAa,GAAG,KAAK,EAAE,OAAO,EAAE,wBAAwB,EAAE,EAAE;IAC9D,kGAAkG;IAClG,uFAAuF;IACvF,IAAI,CAAC,wBAAwB,CAAC,MAAM;QAAE,OAAO;IAE7C,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,MAAM,oBAAoB,GAAG;QACzB,OAAO,EAAE,iBAAiB,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;KAChD,CAAC;IAEF,MAAM,gBAAgB,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxD,OAAO,CAAC,QAAQ,GAAG,CAAC,SAAS,GAAG,EAAE,EAAE,EAAE;QAClC,YAAY,GAAG,IAAI,CAAC;QACpB,MAAM,OAAO,GAAG,iBAAiB,CAAC,EAAE,GAAG,oBAAoB,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7F,MAAM,CAAC,MAAM,CAAC,oBAAoB,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;IAChE,CAAC,CAAC;IAEF,OAAO,CAAC,KAAK,GAAG,oBAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,IAAI,EAAE,EAAE;QACnE,UAAU,GAAG,IAAI,CAAC;QAElB,OAAO,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,OAAO,GAAG,oBAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,IAAI,EAAE,EAAE;QACzE,YAAY,GAAG,IAAI,CAAC;QAEpB,OAAO,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,KAAK,MAAM,OAAO,IAAI,wBAAwB,EAAE;QAC5C,YAAY,GAAG,KAAK,CAAC;QAErB,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;QACvB,8CAA8C;QAC9C,IAAI,CAAC,UAAU,IAAI,CAAC,YAAY,IAAI,CAAC,YAAY,EAAE;YAC/C,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;SAC3G;QAED,sEAAsE;QACtE,IAAI,UAAU,IAAI,YAAY;YAAE,OAAO;KAC1C;IAED,OAAO,gBAAgB,CAAC,oBAAoB,CAAC,CAAC;AAClD,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoDG;AACI,MAAM,0BAA0B,GAAG,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;IAC9D,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;IAChD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,CAAC;IAEzB,IAAI,CAAC,+BAA+B,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;QAC5C,+BAA+B,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;KACjD;IACD,IAAI,CAAC,0BAA0B,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;QACvC,0BAA0B,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,aAAa,EAAE,CAAC,CAAC;KAC7D;IAED,MAAM,aAAa,GAAG,+BAA+B,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAChE,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAE5B,wFAAwF;IACxF,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE;QAC5B,MAAM,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;QAExC,4FAA4F;QAC5F,yBAAyB;QACzB,MAAM,aAAa,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE;YACpC,MAAM,mBAAmB,GAAG,0BAA0B,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACjE,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,iBAAiB,GAAG,+BAA+B,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACpE,IAAI;gBACA,MAAM,aAAa,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;aACnD;oBAAS;gBACN,mBAAmB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;aACvC;QACL,CAAC,CAAC;QAEF,oCAAoC,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QAC9D,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;KACrC;AACL,CAAC,CAAC;AAlCW,QAAA,0BAA0B,8BAkCrC;AAEF;;;;;;;;;GASG;AACI,MAAM,6BAA6B,GAAG,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;IACjE,IAAA,YAAE,EAAC,IAAI,EAAE,YAAE,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;IAChD,IAAA,YAAE,EAAC,OAAO,EAAE,YAAE,CAAC,QAAQ,CAAC,CAAC;IAEzB,MAAM,aAAa,GAAG,+BAA+B;SAChD,GAAG,CAAC,IAAI,CAAC;SACT,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;IAExC,+BAA+B,CAAC,GAAG,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAEzD,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE;QAC5B,MAAM,6BAA6B,GAAG,0BAA0B,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3E,6EAA6E;QAC7E,gEAAgE;QAChE,mEAAmE;QACnE,IAAI,6BAA6B,CAAC,IAAI,KAAK,CAAC,EAAE;YAC1C,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAC;SAC1C;aAAM;YACH,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;gBACxB,IAAI,6BAA6B,CAAC,IAAI,KAAK,CAAC,EAAE;oBAC1C,IAAI;wBACA,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAC;wBACvC,6BAA6B,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;qBACpE;oBAAC,OAAO,KAAK,EAAE;wBACZ,mBAAG,CAAC,KAAK,CAAC,4CAA4C,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;qBACtE;iBACJ;YACL,CAAC,CAAC;YACF,6BAA6B,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;SACxD;KACJ;AACL,CAAC,CAAC;AA/BW,QAAA,6BAA6B,iCA+BxC;AAEF,KAAK,UAAU,0BAA0B,CAAC,IAAI;IAC1C,MAAM,IAAI,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;IACzC,MAAM,cAAc,GAAG,oCAAoC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACtE,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
export function gotoExtended(page: Page, request: Request, gotoOptions?: any): Promise<(Response | null)>;
|
|
2
|
-
export function infiniteScroll(page: Page, options?: {
|
|
3
|
-
timeoutSecs?: number | undefined;
|
|
4
|
-
waitForSecs?: number | undefined;
|
|
5
|
-
scrollDownAndUp?: boolean | undefined;
|
|
6
|
-
buttonSelector?: string | undefined;
|
|
7
|
-
stopScrollCallback?: Function | undefined;
|
|
8
|
-
} | undefined): Promise<void>;
|
|
9
|
-
export namespace puppeteerUtils {
|
|
10
|
-
export { injectFile };
|
|
11
|
-
export { injectJQuery };
|
|
12
|
-
export { injectUnderscore };
|
|
13
|
-
export { enqueueLinksByClickingElements };
|
|
14
|
-
export { blockRequests };
|
|
15
|
-
export { blockResources };
|
|
16
|
-
export { cacheResponses };
|
|
17
|
-
export { compileScript };
|
|
18
|
-
export { gotoExtended };
|
|
19
|
-
export { addInterceptRequestHandler };
|
|
20
|
-
export { removeInterceptRequestHandler };
|
|
21
|
-
export { infiniteScroll };
|
|
22
|
-
export { saveSnapshot };
|
|
23
|
-
}
|
|
24
|
-
export type CompiledScriptParams = {
|
|
25
|
-
page: Page;
|
|
26
|
-
request: Request;
|
|
27
|
-
};
|
|
28
|
-
export type CompiledScriptFunction = (params: CompiledScriptParams) => Promise<any>;
|
|
29
|
-
// @ts-ignore optional peer dependency
|
|
30
|
-
import { Page } from "puppeteer";
|
|
31
|
-
/**
|
|
32
|
-
* Injects a JavaScript file into a Puppeteer page.
|
|
33
|
-
* Unlike Puppeteer's `addScriptTag` function, this function works on pages
|
|
34
|
-
* with arbitrary Cross-Origin Resource Sharing (CORS) policies.
|
|
35
|
-
*
|
|
36
|
-
* File contents are cached for up to 10 files to limit file system access.
|
|
37
|
-
*
|
|
38
|
-
* @param {Page} page
|
|
39
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
40
|
-
* @param {string} filePath File path
|
|
41
|
-
* @param {object} [options]
|
|
42
|
-
* @param {boolean} [options.surviveNavigations]
|
|
43
|
-
* Enables the injected script to survive page navigations and reloads without need to be re-injected manually.
|
|
44
|
-
* This does not mean, however, that internal state will be preserved. Just that it will be automatically
|
|
45
|
-
* re-injected on each navigation before any other scripts get the chance to execute.
|
|
46
|
-
* @return {Promise<*>}
|
|
47
|
-
* @memberOf puppeteer
|
|
48
|
-
*/
|
|
49
|
-
declare function injectFile(page: Page, filePath: string, options?: {
|
|
50
|
-
surviveNavigations?: boolean | undefined;
|
|
51
|
-
} | undefined): Promise<any>;
|
|
52
|
-
/**
|
|
53
|
-
* Injects the [jQuery](https://jquery.com/) library into a Puppeteer page.
|
|
54
|
-
* jQuery is often useful for various web scraping and crawling tasks.
|
|
55
|
-
* For example, it can help extract text from HTML elements using CSS selectors.
|
|
56
|
-
*
|
|
57
|
-
* Beware that the injected jQuery object will be set to the `window.$` variable and thus it might cause conflicts with
|
|
58
|
-
* other libraries included by the page that use the same variable name (e.g. another version of jQuery).
|
|
59
|
-
* This can affect functionality of page's scripts.
|
|
60
|
-
*
|
|
61
|
-
* The injected jQuery will survive page navigations and reloads.
|
|
62
|
-
*
|
|
63
|
-
* **Example usage:**
|
|
64
|
-
* ```javascript
|
|
65
|
-
* await Apify.utils.puppeteer.injectJQuery(page);
|
|
66
|
-
* const title = await page.evaluate(() => {
|
|
67
|
-
* return $('head title').text();
|
|
68
|
-
* });
|
|
69
|
-
* ```
|
|
70
|
-
*
|
|
71
|
-
* Note that `injectJQuery()` does not affect the Puppeteer's
|
|
72
|
-
* [`page.$()`](https://pptr.dev/#?product=Puppeteer&show=api-pageselector)
|
|
73
|
-
* function in any way.
|
|
74
|
-
*
|
|
75
|
-
* @param {Page} page
|
|
76
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
77
|
-
* @return {Promise<*>}
|
|
78
|
-
* @memberOf puppeteer
|
|
79
|
-
*/
|
|
80
|
-
declare function injectJQuery(page: Page): Promise<any>;
|
|
81
|
-
/**
|
|
82
|
-
* Injects the [Underscore](https://underscorejs.org/) library into a Puppeteer page.
|
|
83
|
-
*
|
|
84
|
-
* Beware that the injected Underscore object will be set to the `window._` variable and thus it might cause conflicts with
|
|
85
|
-
* libraries included by the page that use the same variable name.
|
|
86
|
-
* This can affect functionality of page's scripts.
|
|
87
|
-
*
|
|
88
|
-
* The injected Underscore will survive page navigations and reloads.
|
|
89
|
-
*
|
|
90
|
-
* **Example usage:**
|
|
91
|
-
* ```javascript
|
|
92
|
-
* await Apify.utils.puppeteer.injectUnderscore(page);
|
|
93
|
-
* const escapedHtml = await page.evaluate(() => {
|
|
94
|
-
* return _.escape('<h1>Hello</h1>');
|
|
95
|
-
* });
|
|
96
|
-
* ```
|
|
97
|
-
*
|
|
98
|
-
* @param {Page} page Puppeteer [Page](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#class-page) object.
|
|
99
|
-
* @return {Promise<*>}
|
|
100
|
-
* @memberOf puppeteer
|
|
101
|
-
*/
|
|
102
|
-
declare function injectUnderscore(page: Page): Promise<any>;
|
|
103
|
-
import { enqueueLinksByClickingElements } from "./enqueue_links/click_elements";
|
|
104
|
-
/**
|
|
105
|
-
* Forces the Puppeteer browser tab to block loading URLs that match a provided pattern.
|
|
106
|
-
* This is useful to speed up crawling of websites, since it reduces the amount
|
|
107
|
-
* of data that needs to be downloaded from the web, but it may break some websites
|
|
108
|
-
* or unexpectedly prevent loading of resources.
|
|
109
|
-
*
|
|
110
|
-
* By default, the function will block all URLs including the following patterns:
|
|
111
|
-
*
|
|
112
|
-
* ```json
|
|
113
|
-
* [".css", ".jpg", ".jpeg", ".png", ".svg", ".gif", ".woff", ".pdf", ".zip"]
|
|
114
|
-
* ```
|
|
115
|
-
*
|
|
116
|
-
* If you want to extend this list further, use the `extraUrlPatterns` option,
|
|
117
|
-
* which will keep blocking the default patterns, as well as add your custom ones.
|
|
118
|
-
* If you would like to block only specific patterns, use the `urlPatterns` option,
|
|
119
|
-
* which will override the defaults and block only URLs with your custom patterns.
|
|
120
|
-
*
|
|
121
|
-
* This function does not use Puppeteer's request interception and therefore does not interfere
|
|
122
|
-
* with browser cache. It's also faster than blocking requests using interception,
|
|
123
|
-
* because the blocking happens directly in the browser without the round-trip to Node.js,
|
|
124
|
-
* but it does not provide the extra benefits of request interception.
|
|
125
|
-
*
|
|
126
|
-
* The function will never block main document loads and their respective redirects.
|
|
127
|
-
*
|
|
128
|
-
* **Example usage**
|
|
129
|
-
* ```javascript
|
|
130
|
-
* const Apify = require('apify');
|
|
131
|
-
*
|
|
132
|
-
* const browser = await Apify.launchPuppeteer();
|
|
133
|
-
* const page = await browser.newPage();
|
|
134
|
-
*
|
|
135
|
-
* // Block all requests to URLs that include `adsbygoogle.js` and also all defaults.
|
|
136
|
-
* await Apify.utils.puppeteer.blockRequests(page, {
|
|
137
|
-
* extraUrlPatterns: ['adsbygoogle.js'],
|
|
138
|
-
* });
|
|
139
|
-
*
|
|
140
|
-
* await page.goto('https://cnn.com');
|
|
141
|
-
* ```
|
|
142
|
-
*
|
|
143
|
-
* @param {Page} page
|
|
144
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
145
|
-
* @param {object} [options]
|
|
146
|
-
* @param {string[]} [options.urlPatterns]
|
|
147
|
-
* The patterns of URLs to block from being loaded by the browser.
|
|
148
|
-
* Only `*` can be used as a wildcard. It is also automatically added to the beginning
|
|
149
|
-
* and end of the pattern. This limitation is enforced by the DevTools protocol.
|
|
150
|
-
* `.png` is the same as `*.png*`.
|
|
151
|
-
* @param {string[]} [options.extraUrlPatterns]
|
|
152
|
-
* If you just want to append to the default blocked patterns, use this property.
|
|
153
|
-
* @return {Promise<void>}
|
|
154
|
-
* @memberOf puppeteer
|
|
155
|
-
*/
|
|
156
|
-
declare function blockRequests(page: Page, options?: {
|
|
157
|
-
urlPatterns?: string[] | undefined;
|
|
158
|
-
extraUrlPatterns?: string[] | undefined;
|
|
159
|
-
} | undefined): Promise<void>;
|
|
160
|
-
/**
|
|
161
|
-
* `blockResources()` has a high impact on performance in recent versions of Puppeteer.
|
|
162
|
-
* 'Until this resolves, please use `Apify.utils.puppeteer.blockRequests()`.
|
|
163
|
-
* @deprecated
|
|
164
|
-
*/
|
|
165
|
-
declare function blockResources(page: any, resourceTypes?: string[]): Promise<void>;
|
|
166
|
-
/**
|
|
167
|
-
* *NOTE:* In recent versions of Puppeteer using this function entirely disables browser cache which resolves in sub-optimal
|
|
168
|
-
* performance. Until this resolves, we suggest just relying on the in-browser cache unless absolutely necessary.
|
|
169
|
-
*
|
|
170
|
-
* Enables caching of intercepted responses into a provided object. Automatically enables request interception in Puppeteer.
|
|
171
|
-
* *IMPORTANT*: Caching responses stores them to memory, so too loose rules could cause memory leaks for longer running crawlers.
|
|
172
|
-
* This issue should be resolved or atleast mitigated in future iterations of this feature.
|
|
173
|
-
* @param {Page} page
|
|
174
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
175
|
-
* @param {Object<string, *>} cache
|
|
176
|
-
* Object in which responses are stored
|
|
177
|
-
* @param {Array<(string|RegExp)>} responseUrlRules
|
|
178
|
-
* List of rules that are used to check if the response should be cached.
|
|
179
|
-
* String rules are compared as page.url().includes(rule) while RegExp rules are evaluated as rule.test(page.url()).
|
|
180
|
-
* @return {Promise<void>}
|
|
181
|
-
* @memberOf puppeteer
|
|
182
|
-
* @deprecated
|
|
183
|
-
*/
|
|
184
|
-
declare function cacheResponses(page: Page, cache: {
|
|
185
|
-
[x: string]: any;
|
|
186
|
-
}, responseUrlRules: Array<(string | RegExp)>): Promise<void>;
|
|
187
|
-
/**
|
|
188
|
-
* Compiles a Puppeteer script into an async function that may be executed at any time
|
|
189
|
-
* by providing it with the following object:
|
|
190
|
-
* ```
|
|
191
|
-
* {
|
|
192
|
-
* page: Page,
|
|
193
|
-
* request: Request,
|
|
194
|
-
* }
|
|
195
|
-
* ```
|
|
196
|
-
* Where `page` is a Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page)
|
|
197
|
-
* and `request` is a {@link Request}.
|
|
198
|
-
*
|
|
199
|
-
* The function is compiled by using the `scriptString` parameter as the function's body,
|
|
200
|
-
* so any limitations to function bodies apply. Return value of the compiled function
|
|
201
|
-
* is the return value of the function body = the `scriptString` parameter.
|
|
202
|
-
*
|
|
203
|
-
* As a security measure, no globals such as `process` or `require` are accessible
|
|
204
|
-
* from within the function body. Note that the function does not provide a safe
|
|
205
|
-
* sandbox and even though globals are not easily accessible, malicious code may
|
|
206
|
-
* still execute in the main process via prototype manipulation. Therefore you
|
|
207
|
-
* should only use this function to execute sanitized or safe code.
|
|
208
|
-
*
|
|
209
|
-
* Custom context may also be provided using the `context` parameter. To improve security,
|
|
210
|
-
* make sure to only pass the really necessary objects to the context. Preferably making
|
|
211
|
-
* secured copies beforehand.
|
|
212
|
-
*
|
|
213
|
-
* @param {string} scriptString
|
|
214
|
-
* @param {Object<string, *>} context
|
|
215
|
-
* @return {CompiledScriptFunction}
|
|
216
|
-
* @memberOf puppeteer
|
|
217
|
-
*/
|
|
218
|
-
declare function compileScript(scriptString: string, context?: {
|
|
219
|
-
[x: string]: any;
|
|
220
|
-
}): CompiledScriptFunction;
|
|
221
|
-
import { addInterceptRequestHandler } from "./puppeteer_request_interception";
|
|
222
|
-
import { removeInterceptRequestHandler } from "./puppeteer_request_interception";
|
|
223
|
-
/**
|
|
224
|
-
* Saves a full screenshot and HTML of the current page into a Key-Value store.
|
|
225
|
-
* @param {Page} page
|
|
226
|
-
* Puppeteer [`Page`](https://pptr.dev/#?product=Puppeteer&show=api-class-page) object.
|
|
227
|
-
* @param {object} [options]
|
|
228
|
-
* @param {string} [options.key=SNAPSHOT]
|
|
229
|
-
* Key under which the screenshot and HTML will be saved. `.jpg` will be appended for screenshot and `.html` for HTML.
|
|
230
|
-
* @param {number} [options.screenshotQuality=50]
|
|
231
|
-
* The quality of the image, between 0-100. Higher quality images have bigger size and require more storage.
|
|
232
|
-
* @param {boolean} [options.saveScreenshot=true]
|
|
233
|
-
* If true, it will save a full screenshot of the current page as a record with `key` appended by `.jpg`.
|
|
234
|
-
* @param {boolean} [options.saveHtml=true]
|
|
235
|
-
* If true, it will save a full HTML of the current page as a record with `key` appended by `.html`.
|
|
236
|
-
* @param {string|null} [options.keyValueStoreName=null]
|
|
237
|
-
* Name or id of the Key-Value store where snapshot is saved. By default it is saved to default Key-Value store.
|
|
238
|
-
* @returns {Promise<void>}
|
|
239
|
-
* @memberOf puppeteer
|
|
240
|
-
* @name saveSnapshot
|
|
241
|
-
*/
|
|
242
|
-
declare function saveSnapshot(page: Page, options?: {
|
|
243
|
-
key?: string | undefined;
|
|
244
|
-
screenshotQuality?: number | undefined;
|
|
245
|
-
saveScreenshot?: boolean | undefined;
|
|
246
|
-
saveHtml?: boolean | undefined;
|
|
247
|
-
keyValueStoreName?: string | null | undefined;
|
|
248
|
-
} | undefined): Promise<void>;
|
|
249
|
-
export {};
|
|
250
|
-
//# sourceMappingURL=puppeteer_utils.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer_utils.d.ts","sourceRoot":"","sources":["../src/puppeteer_utils.js"],"names":[],"mappings":"AAsVO,mCATI,IAAI,WAEJ,OAAO,sBAEN,QAAQ,CAAC,WAAS,IAAI,CAAC,CAAC,CA2CnC;AAsBM,qCAjBI,IAAI;;;;;;gBAaF,QAAQ,IAAI,CAAC,CAyGzB;;;;;;;;;;;;;;;;;UAhea,IAAI;aACJ,OAAO;;8CAIV,oBAAoB,KAClB,YAAU;;AAOvB;;;;;;;;;;;;;;;;;GAiBG;AACH,kCAXW,IAAI,YAEJ,MAAM;;gBAML,YAAU,CAmBrB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,oCALW,IAAI,GAEH,YAAU,CAMrB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wCAJW,IAAI,GACH,YAAU,CAMrB;;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmDG;AACH,qCAbW,IAAI;;;gBAUH,QAAQ,IAAI,CAAC,CAkBxB;AAED;;;;GAIG;AACH,oFAQC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,sCAXW,IAAI;QAEG,MAAM;qBAEb,MAAM,CAAC,MAAM,GAAC,MAAM,CAAC,CAAC,GAGrB,QAAQ,IAAI,CAAC,CA+CxB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,6CALW,MAAM;QACC,MAAM;IACZ,sBAAsB,CAiBjC;;;AAsLD;;;;;;;;;;;;;;;;;;GAkBG;AACH,oCAjBW,IAAI;;;;;;gBAaF,QAAQ,IAAI,CAAC,CAsCzB"}
|