@opentermsarchive/engine 9.1.2 → 9.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import puppeteer from 'puppeteer-extra';
|
|
2
2
|
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
3
3
|
|
|
4
|
+
import { resolveProxyConfiguration, extractProxyCredentials } from './proxyUtils.js';
|
|
5
|
+
|
|
4
6
|
puppeteer.use(stealthPlugin());
|
|
5
7
|
|
|
6
8
|
let browser;
|
|
@@ -25,6 +27,10 @@ export default async function fetch(url, cssSelectors, config) {
|
|
|
25
27
|
|
|
26
28
|
await client.send('Network.clearBrowserCookies'); // Clear cookies to ensure clean state between fetches and prevent session persistence across different URLs
|
|
27
29
|
|
|
30
|
+
if (browser.proxyCredentials?.username && browser.proxyCredentials?.password) {
|
|
31
|
+
await page.authenticate(browser.proxyCredentials);
|
|
32
|
+
}
|
|
33
|
+
|
|
28
34
|
response = await page.goto(url, { waitUntil: 'load' }); // Using `load` instead of `networkidle0` as it's more reliable and faster. The 'load' event fires when the page and all its resources (stylesheets, scripts, images) have finished loading. `networkidle0` can be problematic as it waits for 500ms of network inactivity, which may never occur on dynamic pages and then triggers a navigation timeout.
|
|
29
35
|
|
|
30
36
|
if (!response) {
|
|
@@ -86,7 +92,34 @@ export async function launchHeadlessBrowser() {
|
|
|
86
92
|
return browser;
|
|
87
93
|
}
|
|
88
94
|
|
|
89
|
-
|
|
95
|
+
const options = {
|
|
96
|
+
args: [],
|
|
97
|
+
headless: !process.env.OTA_ENGINE_FETCHER_NO_HEADLESS,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const { httpProxy, httpsProxy } = resolveProxyConfiguration();
|
|
101
|
+
|
|
102
|
+
let proxyCredentials = null;
|
|
103
|
+
|
|
104
|
+
if (httpProxy) {
|
|
105
|
+
const httpProxyUrl = new URL(httpProxy);
|
|
106
|
+
const httpsProxyUrl = new URL(httpsProxy);
|
|
107
|
+
|
|
108
|
+
proxyCredentials = extractProxyCredentials(httpProxy, httpsProxy);
|
|
109
|
+
|
|
110
|
+
options.args.push(`--proxy-server=http=${httpProxyUrl.host};https=${httpsProxyUrl.host}`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (process.env.OTA_ENGINE_FETCHER_NO_SANDBOX) {
|
|
114
|
+
options.args.push('--no-sandbox');
|
|
115
|
+
options.args.push('--disable-setuid-sandbox');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
browser = await puppeteer.launch(options);
|
|
119
|
+
|
|
120
|
+
if (proxyCredentials) {
|
|
121
|
+
browser.proxyCredentials = proxyCredentials;
|
|
122
|
+
}
|
|
90
123
|
|
|
91
124
|
return browser;
|
|
92
125
|
}
|
|
@@ -4,6 +4,8 @@ import HttpProxyAgent from 'http-proxy-agent';
|
|
|
4
4
|
import HttpsProxyAgent from 'https-proxy-agent';
|
|
5
5
|
import nodeFetch, { AbortError } from 'node-fetch';
|
|
6
6
|
|
|
7
|
+
import { resolveProxyConfiguration } from './proxyUtils.js';
|
|
8
|
+
|
|
7
9
|
export default async function fetch(url, config) {
|
|
8
10
|
const controller = new AbortController();
|
|
9
11
|
const timeout = setTimeout(() => controller.abort(), config.navigationTimeout);
|
|
@@ -14,10 +16,12 @@ export default async function fetch(url, config) {
|
|
|
14
16
|
headers: { 'Accept-Language': config.language },
|
|
15
17
|
};
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
nodeFetchOptions.agent = new
|
|
19
|
+
const { httpProxy, httpsProxy } = resolveProxyConfiguration();
|
|
20
|
+
|
|
21
|
+
if (url.startsWith('https:') && httpsProxy) {
|
|
22
|
+
nodeFetchOptions.agent = new HttpsProxyAgent(httpsProxy);
|
|
23
|
+
} else if (url.startsWith('http:') && httpProxy) {
|
|
24
|
+
nodeFetchOptions.agent = new HttpProxyAgent(httpProxy);
|
|
21
25
|
}
|
|
22
26
|
|
|
23
27
|
let response;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export function resolveProxyConfiguration() {
|
|
2
|
+
const httpProxy = process.env.http_proxy || process.env.HTTP_PROXY;
|
|
3
|
+
const httpsProxy = process.env.https_proxy || process.env.HTTPS_PROXY || httpProxy;
|
|
4
|
+
|
|
5
|
+
return {
|
|
6
|
+
httpProxy,
|
|
7
|
+
httpsProxy,
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function extractProxyCredentials(httpProxy, httpsProxy) {
|
|
12
|
+
if (!httpProxy) {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const httpProxyUrl = new URL(httpProxy);
|
|
17
|
+
const httpsProxyUrl = new URL(httpsProxy);
|
|
18
|
+
|
|
19
|
+
const { username, password } = httpProxyUrl;
|
|
20
|
+
|
|
21
|
+
if (!username || !password) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (httpProxyUrl.username !== httpsProxyUrl.username || httpProxyUrl.password !== httpsProxyUrl.password) {
|
|
26
|
+
throw new Error('Unsupported proxies specified, http and https proxy should have the same credentials.');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return { username, password };
|
|
30
|
+
}
|