@opentermsarchive/engine 5.6.0 → 5.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "5.6.0",
3
+ "version": "5.7.0",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -20,6 +20,11 @@ export default async function fetch(url, cssSelectors, config) {
20
20
  await page.setDefaultNavigationTimeout(config.navigationTimeout);
21
21
  await page.setExtraHTTPHeaders({ 'Accept-Language': config.language });
22
22
 
23
+ await page.setCacheEnabled(false); // Disable cache to ensure fresh content on each fetch and prevent stale data from previous requests
24
+ const client = await page.target().createCDPSession();
25
+
26
+ await client.send('Network.clearBrowserCookies'); // Clear cookies to ensure clean state between fetches and prevent session persistence across different URLs
27
+
23
28
  response = await page.goto(url, { waitUntil: 'load' }); // Using `load` instead of `networkidle0` as it's more reliable and faster. The 'load' event fires when the page and all its resources (stylesheets, scripts, images) have finished loading. `networkidle0` can be problematic as it waits for 500ms of network inactivity, which may never occur on dynamic pages and then triggers a navigation timeout.
24
29
 
25
30
  if (!response) {
@@ -48,15 +48,11 @@ export default async function fetch({
48
48
  executeClientScripts,
49
49
  };
50
50
 
51
- try {
52
- if (executeClientScripts) {
53
- return await fetchWithFullDom(url, cssSelectors, fetcherConfig);
54
- }
55
-
56
- return await fetchWithFallback(url, cssSelectors, fetcherConfig);
57
- } catch (error) {
58
- throw new FetchDocumentError(error.message);
51
+ if (executeClientScripts) {
52
+ return fetchWithFullDom(url, cssSelectors, fetcherConfig);
59
53
  }
54
+
55
+ return fetchWithFallback(url, cssSelectors, fetcherConfig);
60
56
  }
61
57
 
62
58
  async function fetchWithFallback(url, cssSelectors, fetcherConfig) {
@@ -17,8 +17,8 @@ const ERROR_MESSAGE_TO_ISSUE_LABEL_MAP = {
17
17
  'HTTP code 502': '502',
18
18
  'HTTP code 503': '503',
19
19
  'Timed out after': 'timeout',
20
- 'getaddrinfo EAI_AGAIN': 'EAI_AGAIN',
21
- 'getaddrinfo ENOTFOUND': 'ENOTFOUND',
20
+ EAI_AGAIN: 'EAI_AGAIN',
21
+ ENOTFOUND: 'ENOTFOUND',
22
22
  'Response is empty': 'empty response',
23
23
  'unable to verify the first certificate': 'first certificate',
24
24
  'certificate has expired': 'certificate expired',