@fanboynz/network-scanner 1.0.88 → 1.0.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/evaldocument.js +193 -0
- package/nwss.js +60 -157
- package/package.json +1 -1
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Module for handling evaluateOnNewDocument functionality
|
|
3
|
+
* Provides Fetch/XHR interception and page protection mechanisms
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Applies evaluateOnNewDocument script injection to a page
|
|
8
|
+
* @param {import('puppeteer').Page} page - Puppeteer page instance
|
|
9
|
+
* @param {string} currentUrl - Current URL being processed
|
|
10
|
+
* @param {Object} siteConfig - Site configuration
|
|
11
|
+
* @param {boolean} globalEvalOnDoc - Global eval-on-doc flag
|
|
12
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
13
|
+
* @param {Function} formatLogMessage - Log formatting function
|
|
14
|
+
* @returns {Promise<boolean>} Success status of injection
|
|
15
|
+
*/
|
|
16
|
+
async function applyEvaluateOnNewDocument(page, currentUrl, siteConfig, globalEvalOnDoc, forceDebug, formatLogMessage) {
|
|
17
|
+
const shouldInjectEvalForPage = siteConfig.evaluateOnNewDocument === true || globalEvalOnDoc;
|
|
18
|
+
let evalOnDocSuccess = false;
|
|
19
|
+
|
|
20
|
+
if (!shouldInjectEvalForPage) {
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (forceDebug) {
|
|
25
|
+
if (globalEvalOnDoc) {
|
|
26
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Global Fetch/XHR interception enabled, applying to: ${currentUrl}`));
|
|
27
|
+
} else {
|
|
28
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Site-specific Fetch/XHR interception enabled for: ${currentUrl}`));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Strategy 1: Try full injection with health check
|
|
33
|
+
let browserResponsive = false;
|
|
34
|
+
try {
|
|
35
|
+
await Promise.race([
|
|
36
|
+
page.browser().version(), // Quick responsiveness test
|
|
37
|
+
new Promise((_, reject) =>
|
|
38
|
+
setTimeout(() => reject(new Error('Browser health check timeout')), 3000)
|
|
39
|
+
)
|
|
40
|
+
]);
|
|
41
|
+
browserResponsive = true;
|
|
42
|
+
} catch (healthErr) {
|
|
43
|
+
if (forceDebug) {
|
|
44
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Browser health check failed: ${healthErr.message}`));
|
|
45
|
+
}
|
|
46
|
+
browserResponsive = false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Strategy 2: Try injection with reduced complexity if browser is responsive
|
|
50
|
+
if (browserResponsive) {
|
|
51
|
+
try {
|
|
52
|
+
await Promise.race([
|
|
53
|
+
page.evaluateOnNewDocument(createFullInterceptionScript()),
|
|
54
|
+
new Promise((_, reject) =>
|
|
55
|
+
setTimeout(() => reject(new Error('Injection timeout')), 8000)
|
|
56
|
+
)
|
|
57
|
+
]);
|
|
58
|
+
evalOnDocSuccess = true;
|
|
59
|
+
if (forceDebug) {
|
|
60
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
|
|
61
|
+
}
|
|
62
|
+
} catch (fullInjectionErr) {
|
|
63
|
+
if (forceDebug) {
|
|
64
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed: ${fullInjectionErr.message}, trying simplified fallback`));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
|
|
68
|
+
try {
|
|
69
|
+
await Promise.race([
|
|
70
|
+
page.evaluateOnNewDocument(createMinimalInterceptionScript()),
|
|
71
|
+
new Promise((_, reject) =>
|
|
72
|
+
setTimeout(() => reject(new Error('Minimal injection timeout')), 3000)
|
|
73
|
+
)
|
|
74
|
+
]);
|
|
75
|
+
evalOnDocSuccess = true;
|
|
76
|
+
if (forceDebug) {
|
|
77
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection successful for ${currentUrl}`));
|
|
78
|
+
}
|
|
79
|
+
} catch (minimalInjectionErr) {
|
|
80
|
+
if (forceDebug) {
|
|
81
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection also failed: ${minimalInjectionErr.message}`));
|
|
82
|
+
}
|
|
83
|
+
evalOnDocSuccess = false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
} else {
|
|
87
|
+
if (forceDebug) {
|
|
88
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
|
|
89
|
+
}
|
|
90
|
+
evalOnDocSuccess = false;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Final status logging
|
|
94
|
+
if (!evalOnDocSuccess) {
|
|
95
|
+
console.warn(formatLogMessage('warn', `[evalOnDoc] All injection strategies failed for ${currentUrl} - continuing with standard request monitoring only`));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return evalOnDocSuccess;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Creates the full interception script with all protections
|
|
103
|
+
* @returns {Function} Script function for evaluateOnNewDocument
|
|
104
|
+
*/
|
|
105
|
+
function createFullInterceptionScript() {
|
|
106
|
+
return () => {
|
|
107
|
+
// Prevent infinite reload loops
|
|
108
|
+
let reloadCount = 0;
|
|
109
|
+
const MAX_RELOADS = 2;
|
|
110
|
+
const originalReload = window.location.reload;
|
|
111
|
+
const originalReplace = window.location.replace;
|
|
112
|
+
const originalAssign = window.location.assign;
|
|
113
|
+
|
|
114
|
+
window.location.reload = function() {
|
|
115
|
+
if (++reloadCount > MAX_RELOADS) {
|
|
116
|
+
console.log('[loop-protection] Blocked excessive reload attempt');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
return originalReload.apply(this, arguments);
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// Also protect against location.replace/assign to same URL
|
|
123
|
+
const currentHref = window.location.href;
|
|
124
|
+
window.location.replace = function(url) {
|
|
125
|
+
if (url === currentHref && ++reloadCount > MAX_RELOADS) {
|
|
126
|
+
console.log('[loop-protection] Blocked same-page replace attempt');
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
return originalReplace.apply(this, arguments);
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
// This script intercepts and logs Fetch and XHR requests
|
|
133
|
+
// from within the page context at the earliest possible moment.
|
|
134
|
+
const originalFetch = window.fetch;
|
|
135
|
+
window.fetch = (...args) => {
|
|
136
|
+
try {
|
|
137
|
+
console.log('[evalOnDoc][fetch]', args[0]); // Log fetch requests
|
|
138
|
+
const fetchPromise = originalFetch.apply(this, args);
|
|
139
|
+
|
|
140
|
+
// Add network error handling to prevent page errors
|
|
141
|
+
return fetchPromise.catch(fetchErr => {
|
|
142
|
+
console.log('[evalOnDoc][fetch-error]', args[0], fetchErr.message);
|
|
143
|
+
throw fetchErr; // Re-throw to maintain normal error flow
|
|
144
|
+
});
|
|
145
|
+
} catch (fetchWrapperErr) {
|
|
146
|
+
console.log('[evalOnDoc][fetch-wrapper-error]', fetchWrapperErr.message);
|
|
147
|
+
return originalFetch.apply(this, args);
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const originalXHROpen = XMLHttpRequest.prototype.open;
|
|
152
|
+
XMLHttpRequest.prototype.open = function (method, xhrUrl) {
|
|
153
|
+
try {
|
|
154
|
+
console.log('[evalOnDoc][xhr]', xhrUrl); // Log XHR requests
|
|
155
|
+
|
|
156
|
+
// Add error handling for XHR
|
|
157
|
+
this.addEventListener('error', function(event) {
|
|
158
|
+
console.log('[evalOnDoc][xhr-error]', xhrUrl, 'Network error occurred');
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
return originalXHROpen.apply(this, arguments);
|
|
162
|
+
} catch (xhrOpenErr) {
|
|
163
|
+
console.log('[evalOnDoc][xhr-open-error]', xhrOpenErr.message);
|
|
164
|
+
return originalXHROpen.apply(this, arguments);
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Creates the minimal interception script (fetch only)
|
|
172
|
+
* @returns {Function} Script function for evaluateOnNewDocument
|
|
173
|
+
*/
|
|
174
|
+
function createMinimalInterceptionScript() {
|
|
175
|
+
return () => {
|
|
176
|
+
// Minimal injection - just fetch monitoring
|
|
177
|
+
if (window.fetch) {
|
|
178
|
+
const originalFetch = window.fetch;
|
|
179
|
+
window.fetch = (...args) => {
|
|
180
|
+
try {
|
|
181
|
+
console.log('[evalOnDoc][fetch-minimal]', args[0]);
|
|
182
|
+
return originalFetch.apply(this, args);
|
|
183
|
+
} catch (err) {
|
|
184
|
+
return originalFetch.apply(this, args);
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
module.exports = {
|
|
192
|
+
applyEvaluateOnNewDocument
|
|
193
|
+
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.90 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -45,6 +45,9 @@ const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } =
|
|
|
45
45
|
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
46
46
|
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
47
47
|
|
|
48
|
+
// Evaluate on new document functionality
|
|
49
|
+
const { applyEvaluateOnNewDocument } = require('./lib/evaldocument');
|
|
50
|
+
|
|
48
51
|
// Fast setTimeout helper for Puppeteer 22.x compatibility
|
|
49
52
|
// Uses standard Promise constructor for better performance than node:timers/promises
|
|
50
53
|
function fastTimeout(ms) {
|
|
@@ -125,7 +128,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
125
128
|
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
|
|
126
129
|
|
|
127
130
|
// --- Script Configuration & Constants ---
|
|
128
|
-
const VERSION = '1.0.
|
|
131
|
+
const VERSION = '1.0.90'; // Script version
|
|
129
132
|
|
|
130
133
|
// get startTime
|
|
131
134
|
const startTime = Date.now();
|
|
@@ -1460,21 +1463,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1460
1463
|
];
|
|
1461
1464
|
|
|
1462
1465
|
try {
|
|
1463
|
-
|
|
1464
|
-
const isHealthy = await isBrowserHealthy(browserInstance);
|
|
1465
|
-
if (!isHealthy) {
|
|
1466
|
-
if (forceDebug) {
|
|
1467
|
-
console.log(formatLogMessage('debug', `Browser health degraded before processing ${currentUrl} - forcing immediate restart`));
|
|
1468
|
-
}
|
|
1469
|
-
// Return special code to trigger immediate browser restart
|
|
1470
|
-
return {
|
|
1471
|
-
url: currentUrl,
|
|
1472
|
-
rules: [],
|
|
1473
|
-
success: false,
|
|
1474
|
-
needsImmediateRestart: true,
|
|
1475
|
-
error: 'Browser health degraded - restart required'
|
|
1476
|
-
};
|
|
1477
|
-
}
|
|
1466
|
+
|
|
1478
1467
|
// Check for Protocol timeout errors that indicate browser is broken
|
|
1479
1468
|
if (browserInstance.process() && browserInstance.process().killed) {
|
|
1480
1469
|
throw new Error('Browser process was killed - restart required');
|
|
@@ -1486,22 +1475,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1486
1475
|
throw new Error('Failed to create valid page instance');
|
|
1487
1476
|
}
|
|
1488
1477
|
|
|
1489
|
-
|
|
1490
|
-
const stillHealthy = await isQuicklyResponsive(browserInstance, 3000);
|
|
1491
|
-
|
|
1492
|
-
if (!stillHealthy) {
|
|
1493
|
-
if (forceDebug) {
|
|
1494
|
-
console.log(formatLogMessage('debug', `Browser unresponsive during page setup for ${currentUrl} - triggering restart`));
|
|
1495
|
-
}
|
|
1496
|
-
return {
|
|
1497
|
-
url: currentUrl,
|
|
1498
|
-
rules: [],
|
|
1499
|
-
success: false,
|
|
1500
|
-
needsImmediateRestart: true,
|
|
1501
|
-
error: 'Browser became unresponsive during page setup - restart required'
|
|
1502
|
-
};
|
|
1503
|
-
}
|
|
1504
|
-
|
|
1478
|
+
|
|
1505
1479
|
// Set aggressive timeouts for problematic operations
|
|
1506
1480
|
// Optimized timeouts for Puppeteer 23.x responsiveness
|
|
1507
1481
|
page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
|
|
@@ -1581,107 +1555,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1581
1555
|
|
|
1582
1556
|
// --- START: evaluateOnNewDocument for Fetch/XHR Interception (Moved and Fixed) ---
|
|
1583
1557
|
// This script is injected if --eval-on-doc is used or siteConfig.evaluateOnNewDocument is true.
|
|
1584
|
-
const
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
if (globalEvalOnDoc) {
|
|
1588
|
-
console.log(formatLogMessage('debug', `[evalOnDoc] Global Fetch/XHR interception enabled, applying to: ${currentUrl}`));
|
|
1589
|
-
} else { // siteConfig.evaluateOnNewDocument must be true
|
|
1590
|
-
console.log(formatLogMessage('debug', `[evalOnDoc] Site-specific Fetch/XHR interception enabled for: ${currentUrl}`));
|
|
1591
|
-
}
|
|
1592
|
-
}
|
|
1593
|
-
// Quick browser health check before script injection
|
|
1594
|
-
let browserResponsive = false;
|
|
1595
|
-
try {
|
|
1596
|
-
await Promise.race([
|
|
1597
|
-
browserInstance.version(), // Quick responsiveness test
|
|
1598
|
-
new Promise((_, reject) =>
|
|
1599
|
-
setTimeout(() => reject(new Error('Browser health check timeout')), 5000)
|
|
1600
|
-
)
|
|
1601
|
-
]);
|
|
1602
|
-
browserResponsive = true;
|
|
1603
|
-
} catch (healthErr) {
|
|
1604
|
-
console.warn(formatLogMessage('warn', `[evalOnDoc] Browser unresponsive for ${currentUrl}: ${healthErr.message} - skipping script injection`));
|
|
1605
|
-
browserResponsive = false;
|
|
1606
|
-
}
|
|
1607
|
-
|
|
1608
|
-
if (browserResponsive) {
|
|
1609
|
-
try {
|
|
1610
|
-
await page.evaluateOnNewDocument(() => {
|
|
1611
|
-
// Prevent infinite reload loops
|
|
1612
|
-
let reloadCount = 0;
|
|
1613
|
-
const MAX_RELOADS = 2;
|
|
1614
|
-
const originalReload = window.location.reload;
|
|
1615
|
-
const originalReplace = window.location.replace;
|
|
1616
|
-
const originalAssign = window.location.assign;
|
|
1617
|
-
|
|
1618
|
-
window.location.reload = function() {
|
|
1619
|
-
if (++reloadCount > MAX_RELOADS) {
|
|
1620
|
-
console.log('[loop-protection] Blocked excessive reload attempt');
|
|
1621
|
-
return;
|
|
1622
|
-
}
|
|
1623
|
-
return originalReload.apply(this, arguments);
|
|
1624
|
-
};
|
|
1625
|
-
|
|
1626
|
-
// Also protect against location.replace/assign to same URL
|
|
1627
|
-
const currentHref = window.location.href;
|
|
1628
|
-
window.location.replace = function(url) {
|
|
1629
|
-
if (url === currentHref && ++reloadCount > MAX_RELOADS) {
|
|
1630
|
-
console.log('[loop-protection] Blocked same-page replace attempt');
|
|
1631
|
-
return;
|
|
1632
|
-
}
|
|
1633
|
-
return originalReplace.apply(this, arguments);
|
|
1634
|
-
};
|
|
1635
|
-
|
|
1636
|
-
// This script intercepts and logs Fetch and XHR requests
|
|
1637
|
-
// from within the page context at the earliest possible moment.
|
|
1638
|
-
const originalFetch = window.fetch;
|
|
1639
|
-
window.fetch = (...args) => {
|
|
1640
|
-
try {
|
|
1641
|
-
console.log('[evalOnDoc][fetch]', args[0]); // Log fetch requests
|
|
1642
|
-
const fetchPromise = originalFetch.apply(this, args);
|
|
1643
|
-
|
|
1644
|
-
// Add network error handling to prevent page errors
|
|
1645
|
-
return fetchPromise.catch(fetchErr => {
|
|
1646
|
-
console.log('[evalOnDoc][fetch-error]', args[0], fetchErr.message);
|
|
1647
|
-
throw fetchErr; // Re-throw to maintain normal error flow
|
|
1648
|
-
});
|
|
1649
|
-
} catch (fetchWrapperErr) {
|
|
1650
|
-
console.log('[evalOnDoc][fetch-wrapper-error]', fetchWrapperErr.message);
|
|
1651
|
-
return originalFetch.apply(this, args);
|
|
1652
|
-
}
|
|
1653
|
-
};
|
|
1654
|
-
|
|
1655
|
-
const originalXHROpen = XMLHttpRequest.prototype.open;
|
|
1656
|
-
XMLHttpRequest.prototype.open = function (method, xhrUrl) {
|
|
1657
|
-
try {
|
|
1658
|
-
console.log('[evalOnDoc][xhr]', xhrUrl); // Log XHR requests
|
|
1659
|
-
|
|
1660
|
-
// Add error handling for XHR
|
|
1661
|
-
this.addEventListener('error', function(event) {
|
|
1662
|
-
console.log('[evalOnDoc][xhr-error]', xhrUrl, 'Network error occurred');
|
|
1663
|
-
});
|
|
1664
|
-
|
|
1665
|
-
return originalXHROpen.apply(this, arguments);
|
|
1666
|
-
} catch (xhrOpenErr) {
|
|
1667
|
-
console.log('[evalOnDoc][xhr-open-error]', xhrOpenErr.message);
|
|
1668
|
-
return originalXHROpen.apply(this, arguments);
|
|
1669
|
-
}
|
|
1670
|
-
};
|
|
1671
|
-
});
|
|
1672
|
-
} catch (evalErr) {
|
|
1673
|
-
if (evalErr.message.includes('timed out') || evalErr.message.includes('ProtocolError')) {
|
|
1674
|
-
console.warn(formatLogMessage('warn', `[evalOnDoc] Script injection protocol timeout for ${currentUrl} - continuing without XHR/Fetch interception`));
|
|
1675
|
-
} else {
|
|
1676
|
-
console.warn(formatLogMessage('warn', `[evalOnDoc] Failed to set up Fetch/XHR interception for ${currentUrl}: ${evalErr.message}`));
|
|
1677
|
-
}
|
|
1678
|
-
}
|
|
1679
|
-
} else {
|
|
1680
|
-
if (forceDebug) {
|
|
1681
|
-
console.log(formatLogMessage('debug', `[evalOnDoc] Continuing ${currentUrl} without XHR/Fetch interception due to browser health`));
|
|
1682
|
-
}
|
|
1683
|
-
}
|
|
1684
|
-
}
|
|
1558
|
+
const evalOnDocSuccess = await applyEvaluateOnNewDocument(
|
|
1559
|
+
page, currentUrl, siteConfig, globalEvalOnDoc, forceDebug, formatLogMessage
|
|
1560
|
+
);
|
|
1685
1561
|
// --- END: evaluateOnNewDocument for Fetch/XHR Interception ---
|
|
1686
1562
|
|
|
1687
1563
|
// --- CSS Element Blocking Setup ---
|
|
@@ -1732,7 +1608,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1732
1608
|
await Promise.race([
|
|
1733
1609
|
page.setRequestInterception(true),
|
|
1734
1610
|
new Promise((_, reject) =>
|
|
1735
|
-
setTimeout(() => reject(new Error('Network.enable timeout')),
|
|
1611
|
+
setTimeout(() => reject(new Error('Network.enable timeout')), 8000)
|
|
1736
1612
|
)
|
|
1737
1613
|
]);
|
|
1738
1614
|
|
|
@@ -1742,7 +1618,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1742
1618
|
} catch (networkErr) {
|
|
1743
1619
|
if (networkErr.message.includes('timed out') ||
|
|
1744
1620
|
networkErr.message.includes('Network.enable') ||
|
|
1745
|
-
networkErr.message.includes('timeout')
|
|
1621
|
+
networkErr.message.includes('timeout') ||
|
|
1622
|
+
networkErr.constructor.name === 'ProtocolError' ||
|
|
1623
|
+
networkErr.name === 'ProtocolError') {
|
|
1746
1624
|
console.warn(formatLogMessage('warn', `Network setup failed for ${currentUrl}: ${networkErr.message} - triggering browser restart`));
|
|
1747
1625
|
return {
|
|
1748
1626
|
url: currentUrl,
|
|
@@ -3132,20 +3010,31 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3132
3010
|
const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
|
|
3133
3011
|
const currentBatch = allTasks.slice(batchStart, batchEnd);
|
|
3134
3012
|
|
|
3135
|
-
//
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
|
|
3013
|
+
// IMPROVED: Only check health if we have indicators of problems
|
|
3014
|
+
let healthCheck = { shouldRestart: false, reason: null };
|
|
3015
|
+
const recentResults = results.slice(-8); // Check more results for better pattern detection
|
|
3016
|
+
const recentFailureRate = recentResults.length > 0 ?
|
|
3017
|
+
recentResults.filter(r => !r.success).length / recentResults.length : 0;
|
|
3018
|
+
const hasHighFailureRate = recentFailureRate > 0.75; // 75% failure threshold (more conservative)
|
|
3019
|
+
const hasCriticalErrors = recentResults.filter(r => r.needsImmediateRestart).length > 2;
|
|
3020
|
+
|
|
3021
|
+
// Only run health checks when we have STRONG indicators of problems
|
|
3022
|
+
if (urlsSinceLastCleanup > 15 && (
|
|
3023
|
+
(hasHighFailureRate && recentResults.length >= 5) || // Need sufficient sample size
|
|
3024
|
+
hasCriticalErrors ||
|
|
3025
|
+
urlsSinceLastCleanup > RESOURCE_CLEANUP_INTERVAL * 0.9 // Very close to cleanup limit
|
|
3026
|
+
)) {
|
|
3027
|
+
healthCheck = await monitorBrowserHealth(browser, {}, {
|
|
3028
|
+
siteIndex: Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL),
|
|
3029
|
+
totalSites: Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL),
|
|
3030
|
+
urlsSinceCleanup: urlsSinceLastCleanup,
|
|
3031
|
+
cleanupInterval: RESOURCE_CLEANUP_INTERVAL,
|
|
3032
|
+
forceDebug,
|
|
3033
|
+
silentMode
|
|
3034
|
+
});
|
|
3035
|
+
} else if (forceDebug && urlsSinceLastCleanup > 10) {
|
|
3036
|
+
console.log(formatLogMessage('debug', `Skipping health check: failure rate ${Math.round(recentFailureRate * 100)}%, critical errors: ${hasCriticalErrors ? 'yes' : 'no'}`));
|
|
3037
|
+
}
|
|
3149
3038
|
|
|
3150
3039
|
const batchSize = currentBatch.length;
|
|
3151
3040
|
|
|
@@ -3155,17 +3044,21 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3155
3044
|
// Check if processing this entire site would exceed cleanup interval OR health check suggests restart
|
|
3156
3045
|
const wouldExceedLimit = urlsSinceLastCleanup + batchSize >= Math.min(RESOURCE_CLEANUP_INTERVAL, 100);
|
|
3157
3046
|
const isNotLastBatch = batchEnd < totalUrls;
|
|
3047
|
+
// IMPROVED: More restrictive health-based restart conditions
|
|
3048
|
+
const shouldRestartFromHealth = healthCheck.shouldRestart &&
|
|
3049
|
+
!healthCheck.reason?.includes('Scheduled cleanup') &&
|
|
3050
|
+
(healthCheck.reason?.includes('Critical') || healthCheck.reason?.includes('disconnected'));
|
|
3158
3051
|
|
|
3159
3052
|
// Restart browser if we've processed enough URLs, health check suggests it, and this isn't the last site
|
|
3160
|
-
if ((wouldExceedLimit ||
|
|
3053
|
+
if ((wouldExceedLimit || shouldRestartFromHealth || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8 && isNotLastBatch) {
|
|
3161
3054
|
|
|
3162
3055
|
let restartReason = 'Unknown';
|
|
3163
|
-
if (
|
|
3056
|
+
if (shouldRestartFromHealth) {
|
|
3164
3057
|
restartReason = healthCheck.reason;
|
|
3165
|
-
} else if (
|
|
3166
|
-
restartReason =
|
|
3058
|
+
} else if (hasHighFailureRate) {
|
|
3059
|
+
restartReason = `High failure rate: ${Math.round(recentFailureRate * 100)}% in recent batch`;
|
|
3167
3060
|
} else if (wouldExceedLimit) {
|
|
3168
|
-
restartReason = `Processed ${urlsSinceLastCleanup} URLs`;
|
|
3061
|
+
restartReason = `Processed ${urlsSinceLastCleanup} URLs (scheduled maintenance)`;
|
|
3169
3062
|
}
|
|
3170
3063
|
|
|
3171
3064
|
if (!silentMode) {
|
|
@@ -3233,8 +3126,18 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3233
3126
|
const batchTasks = currentBatch.map(task => originalLimit(() => processUrl(task.url, task.config, browser)));
|
|
3234
3127
|
const batchResults = await Promise.all(batchTasks);
|
|
3235
3128
|
|
|
3236
|
-
//
|
|
3237
|
-
const
|
|
3129
|
+
// IMPROVED: Much more conservative emergency restart logic
|
|
3130
|
+
const criticalRestartCount = batchResults.filter(r => r.needsImmediateRestart).length;
|
|
3131
|
+
// Require either:
|
|
3132
|
+
// - More than 50% of batch has critical errors, OR
|
|
3133
|
+
// - At least 3 critical errors in any size batch
|
|
3134
|
+
const restartThreshold = Math.max(3, Math.floor(batchSize * 0.5)); // 50% of batch or min 3
|
|
3135
|
+
const needsImmediateRestart = criticalRestartCount >= restartThreshold && criticalRestartCount >= 2;
|
|
3136
|
+
|
|
3137
|
+
// Log restart decision for debugging
|
|
3138
|
+
if (forceDebug && criticalRestartCount > 0) {
|
|
3139
|
+
console.log(formatLogMessage('debug', `Emergency restart decision: ${criticalRestartCount}/${batchSize} critical errors (threshold: ${restartThreshold}, restart: ${needsImmediateRestart ? 'YES' : 'NO'})`));
|
|
3140
|
+
}
|
|
3238
3141
|
|
|
3239
3142
|
// Log completion of concurrent processing
|
|
3240
3143
|
if (forceDebug) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.90",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|