@opentermsarchive/engine 10.2.0 → 10.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "10.2.0",
3
+ "version": "10.3.1",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -3,12 +3,14 @@ import stealthPlugin from 'puppeteer-extra-plugin-stealth';
3
3
 
4
4
  import { resolveProxyConfiguration, extractProxyCredentials } from './proxyUtils.js';
5
5
 
6
- puppeteer.use(stealthPlugin());
7
-
8
6
  let browser;
9
7
 
10
8
  export default async function fetch(url, cssSelectors, config) {
9
+ puppeteer.use(stealthPlugin({ locale: config.language }));
10
+
11
+ let context;
11
12
  let page;
13
+ let client;
12
14
  let response;
13
15
  const selectors = [].concat(cssSelectors);
14
16
 
@@ -17,15 +19,20 @@ export default async function fetch(url, cssSelectors, config) {
17
19
  }
18
20
 
19
21
  try {
20
- page = await browser.newPage();
22
+ context = await browser.createBrowserContext(); // Create an isolated browser context to ensure complete isolation between fetches (cookies, localStorage, sessionStorage, IndexedDB, cache)
23
+ page = await context.newPage();
21
24
 
25
+ await page.setViewport({ width: 1920, height: 1080 }); // Set a realistic viewport size to avoid detection based on default Puppeteer dimensions (800x600)
22
26
  await page.setDefaultNavigationTimeout(config.navigationTimeout);
23
27
  await page.setExtraHTTPHeaders({ 'Accept-Language': config.language });
24
28
 
25
- await page.setCacheEnabled(false); // Disable cache to ensure fresh content on each fetch and prevent stale data from previous requests
26
- const client = await page.target().createCDPSession();
29
+ // Use CDP to ensure the browser language is set correctly (most reliable method, see https://zirkelc.dev/posts/puppeteer-language-experiment)
30
+ client = await page.createCDPSession();
27
31
 
28
- await client.send('Network.clearBrowserCookies'); // Clear cookies to ensure clean state between fetches and prevent session persistence across different URLs
32
+ await client.send('Network.setUserAgentOverride', {
33
+ userAgent: await browser.userAgent(),
34
+ acceptLanguage: config.language,
35
+ });
29
36
 
30
37
  if (browser.proxyCredentials?.username && browser.proxyCredentials?.password) {
31
38
  await page.authenticate(browser.proxyCredentials);
@@ -75,9 +82,15 @@ export default async function fetch(url, cssSelectors, config) {
75
82
  }
76
83
  throw new Error(error.message);
77
84
  } finally {
85
+ if (client) {
86
+ await client.detach();
87
+ }
78
88
  if (page) {
79
89
  await page.close();
80
90
  }
91
+ if (context) {
92
+ await context.close(); // Close the isolated context to free resources and ensure complete cleanup
93
+ }
81
94
  }
82
95
  }
83
96