@fanboynz/network-scanner 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -248,6 +248,7 @@ When a page redirects to a new domain, first-party/third-party detection is base
248
248
  | `isBrave` | Boolean | `false` | Spoof Brave browser detection |
249
249
  | `evaluateOnNewDocument` | Boolean | `false` | Inject fetch/XHR interceptor in page |
250
250
  | `cdp` | Boolean | `false` | Enable CDP logging for this site |
251
+ | `cdp_specific` | Array | - | Enable CDP logging only for specific domains in the URL list |
251
252
  | `css_blocked` | Array | - | CSS selectors to hide elements |
252
253
  | `source` | Boolean | `false` | Save page source HTML after load |
253
254
  | `screenshot` | Boolean | `false` | Capture screenshot on load failure |
package/lib/cdp.js CHANGED
@@ -116,6 +116,7 @@ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
116
116
  * @param {boolean} options.enableCDP - Global CDP flag (from --cdp command line)
117
117
  * @param {boolean} options.siteSpecificCDP - Site-specific CDP flag (from config)
118
118
  * @param {boolean} options.forceDebug - Debug logging flag
119
+ * @param {string} options.currentUrl - Current URL for domain-specific CDP decisions
119
120
  * @returns {Promise<object>} CDP session object with cleanup method
120
121
  */
121
122
  async function createCDPSession(page, currentUrl, options = {}) {
@@ -132,10 +133,14 @@ async function createCDPSession(page, currentUrl, options = {}) {
132
133
 
133
134
  // Log which CDP mode is being used
134
135
  if (forceDebug) {
136
+ const urlHostname = (() => {
137
+ try { return new URL(currentUrl).hostname; } catch { return 'unknown'; }
138
+ })();
139
+
135
140
  if (enableCDP) {
136
- console.log(formatLogMessage('debug', `CDP logging globally enabled by --cdp, applying to page: ${currentUrl}`));
141
+ console.log(formatLogMessage('debug', `[cdp] Global CDP enabled by --cdp flag for ${urlHostname}`));
137
142
  } else if (siteSpecificCDP === true) {
138
- console.log(formatLogMessage('debug', `CDP logging enabled for page ${currentUrl} via site-specific 'cdp: true' config.`));
143
+ console.log(formatLogMessage('debug', `[cdp] Site-specific CDP enabled for ${urlHostname} (via cdp: true or cdp_specific domain match)`));
139
144
  }
140
145
  }
141
146
 
@@ -163,14 +168,22 @@ async function createCDPSession(page, currentUrl, options = {}) {
163
168
  // Extract hostname for logging context (handles URL parsing errors gracefully)
164
169
  let hostnameForLog = 'unknown-host';
165
170
  try {
166
- hostnameForLog = new URL(currentUrl).hostname;
171
+ const currentHostname = new URL(currentUrl).hostname;
172
+ const requestHostname = new URL(requestUrl).hostname;
173
+ // Show both hostnames if different (cross-domain requests)
174
+ if (currentHostname !== requestHostname) {
175
+ hostnameForLog = `${currentHostname}?${requestHostname}`;
176
+ } else {
177
+ hostnameForLog = currentHostname;
178
+ }
167
179
  } catch (_) {
168
180
  // Ignore URL parsing errors for logging context
169
181
  }
170
182
 
171
- // Log the request with context - customize this for your needs
172
- // Format: [cdp][hostname] METHOD url (initiator: type)
173
- console.log(formatLogMessage('debug', `[cdp][${hostnameForLog}] ${method} ${requestUrl} (initiator: ${initiator})`));
183
+ // Log the request with context only if debug mode is enabled
184
+ if (forceDebug) {
185
+ console.log(formatLogMessage('debug', `[cdp][${hostnameForLog}] ${method} ${requestUrl} (initiator: ${initiator})`));
186
+ }
174
187
  });
175
188
 
176
189
  if (forceDebug) {
@@ -200,6 +213,15 @@ async function createCDPSession(page, currentUrl, options = {}) {
200
213
  } catch (cdpErr) {
201
214
  cdpSession = null; // Reset on failure
202
215
 
216
+ // Enhanced error context for CDP domain-specific debugging
217
+ const urlContext = (() => {
218
+ try {
219
+ return new URL(currentUrl).hostname;
220
+ } catch {
221
+ return currentUrl.substring(0, 50) + '...';
222
+ }
223
+ })();
224
+
203
225
  // Categorize CDP errors for proper handling
204
226
  // Enhanced error handling for Puppeteer 20+ error patterns
205
227
  if (cdpErr.message.includes('Network.enable timed out') ||
@@ -236,9 +258,10 @@ async function createCDPSession(page, currentUrl, options = {}) {
236
258
  *
237
259
  * @param {object} siteConfig - Site configuration object
238
260
  * @param {boolean} globalCDP - Global CDP flag
261
+ * @param {Array} cdpSpecificDomains - Array of domains for cdp_specific feature
239
262
  * @returns {object} Validation result with recommendations
240
263
  */
241
- function validateCDPConfig(siteConfig, globalCDP) {
264
+ function validateCDPConfig(siteConfig, globalCDP, cdpSpecificDomains = []) {
242
265
  const warnings = [];
243
266
  const recommendations = [];
244
267
 
@@ -247,8 +270,25 @@ function validateCDPConfig(siteConfig, globalCDP) {
247
270
  warnings.push('Site-specific CDP disabled but global CDP is enabled - global setting will override');
248
271
  }
249
272
 
273
+ // Validate cdp_specific configuration
274
+ if (siteConfig.cdp_specific) {
275
+ if (!Array.isArray(siteConfig.cdp_specific)) {
276
+ warnings.push('cdp_specific must be an array of domain strings');
277
+ } else if (siteConfig.cdp_specific.length === 0) {
278
+ warnings.push('cdp_specific is empty - no domains will have CDP enabled');
279
+ } else {
280
+ // Validate domain format
281
+ const invalidDomains = siteConfig.cdp_specific.filter(domain => {
282
+ return typeof domain !== 'string' || domain.trim() === '';
283
+ });
284
+ if (invalidDomains.length > 0) {
285
+ warnings.push(`cdp_specific contains invalid domains: ${invalidDomains.join(', ')}`);
286
+ }
287
+ }
288
+ }
289
+
250
290
  // Performance recommendations
251
- if (globalCDP || siteConfig.cdp === true) {
291
+ if (globalCDP || siteConfig.cdp === true || (siteConfig.cdp_specific && siteConfig.cdp_specific.length > 0)) {
252
292
  recommendations.push('CDP logging enabled - this may impact performance for high-traffic sites');
253
293
 
254
294
  if (siteConfig.timeout && siteConfig.timeout < 30000) {
package/nwss.1 CHANGED
@@ -282,7 +282,6 @@ Array of CSS selectors to hide elements on the page.
282
282
  .B userAgent
283
283
  Spoof User-Agent: \fB"chrome"\fR, \fB"chrome_mac"\fR, \fB"chrome_linux"\fR, \fB"firefox"\fR, \fB"firefox_mac"\fR, \fB"firefox_linux"\fR, or \fB"safari"\fR.
284
284
 
285
-
286
285
  .TP
287
286
  .B interact
288
287
  Boolean. Simulate mouse movements and clicks.
@@ -442,6 +441,10 @@ Boolean. Inject Fetch/XHR interceptor scripts into page context.
442
441
  .B cdp
443
442
  Boolean. Enable Chrome DevTools Protocol logging for this specific site.
444
443
 
444
+ .TP
445
+ .B cdp_specific
446
+ Array of domain names. Enable Chrome DevTools Protocol logging only for URLs matching these specific domains within a multi-URL site configuration. Takes precedence over \fBcdp: false\fR but is ignored if \fBcdp: true\fR is set. Supports exact hostname matching and subdomain matching (e.g., "example.com" matches both "example.com" and "subdomain.example.com"). Useful for selective debugging of network requests on specific domains while avoiding CDP overhead on others.
447
+
445
448
  .TP
446
449
  .B source
447
450
  Boolean. Save page source HTML after loading.
@@ -880,6 +883,26 @@ node nwss.js --ignore-cache --debug -o rules.txt
880
883
  }
881
884
  .EE
882
885
 
886
+ .SS Selective CDP logging for specific domains:
887
+ .EX
888
+ {
889
+ "url": [
890
+ "https://site1.com/page1",
891
+ "https://debug-target.com/page2",
892
+ "https://site2.com/page3"
893
+ ],
894
+ "filterRegex": "\\\\.(space|website)\\\\b",
895
+ "cdp_specific": ["debug-target.com"],
896
+ "resourceTypes": ["script", "fetch"],
897
+ "comments": [
898
+ "CDP enabled only for debug-target.com",
899
+ "Other URLs run without CDP overhead"
900
+ ]
901
+ }
902
+ .EE
903
+
904
+ Note: If \fBcdp: true\fR is also set, \fBcdp_specific\fR is ignored and CDP is enabled for all URLs.
905
+
883
906
  .SS FlowProxy protection handling:
884
907
  .EX
885
908
  {
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.8 ===
1
+ // === Network scanner script (nwss.js) v2.0.9 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -129,7 +129,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
129
129
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage } = require('./lib/browserhealth');
130
130
 
131
131
  // --- Script Configuration & Constants ---
132
- const VERSION = '2.0.8'; // Script version
132
+ const VERSION = '2.0.9'; // Script version
133
133
 
134
134
  // get startTime
135
135
  const startTime = Date.now();
@@ -555,6 +555,7 @@ FlowProxy Protection Options:
555
555
  Advanced Options:
556
556
  evaluateOnNewDocument: true/false Inject fetch/XHR interceptor in page (for this site)
557
557
  cdp: true/false Enable CDP logging for this site Inject fetch/XHR interceptor in page
558
+ cdp_specific: ["domain1.com", "domain2.com"] Enable CDP logging only for specific domains in the URL list
558
559
  interact_duration: <milliseconds> Duration of interaction simulation (default: 2000)
559
560
  interact_scrolling: true/false Enable scrolling simulation (default: true)
560
561
  interact_clicks: true/false Enable element clicking simulation (default: false)
@@ -923,6 +924,30 @@ function safeGetDomain(url, getFullHostname = false) {
923
924
  }
924
925
  }
925
926
 
927
+ /**
928
+ * Checks if a URL matches any domain in the cdp_specific list
929
+ * @param {string} url - The URL to check
930
+ * @param {Array} cdpSpecificList - Array of domains that should have CDP enabled
931
+ * @returns {boolean} True if URL matches a domain in the list
932
+ */
933
+ function shouldEnableCDPForUrl(url, cdpSpecificList) {
934
+ if (!cdpSpecificList || !Array.isArray(cdpSpecificList) || cdpSpecificList.length === 0) {
935
+ return false;
936
+ }
937
+
938
+ try {
939
+ const urlHostname = new URL(url).hostname;
940
+ return cdpSpecificList.some(domain => {
941
+ // Remove protocol if present and clean domain
942
+ const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '');
943
+ // Match exact hostname or subdomain
944
+ return urlHostname === cleanDomain || urlHostname.endsWith('.' + cleanDomain);
945
+ });
946
+ } catch (urlErr) {
947
+ return false;
948
+ }
949
+ }
950
+
926
951
  /**
927
952
  * Outputs dry run results to console with formatted display
928
953
  * If outputFile is specified, also captures output for file writing
@@ -1475,6 +1500,24 @@ function setupFrameHandling(page, forceDebug) {
1475
1500
  return { url: currentUrl, rules: [], success: false, skipped: true };
1476
1501
  }
1477
1502
 
1503
+ // Determine CDP enablement based on cdp_specific or traditional cdp setting
1504
+ let shouldEnableCDPForThisUrl = false;
1505
+ if (siteConfig.cdp === true) {
1506
+ // If cdp: true is set, enable CDP for all URLs and ignore cdp_specific
1507
+ shouldEnableCDPForThisUrl = true;
1508
+ if (forceDebug && siteConfig.cdp_specific) {
1509
+ console.log(formatLogMessage('debug', `CDP enabled for all URLs via cdp: true - ignoring cdp_specific for ${currentUrl}`));
1510
+ }
1511
+ } else if (siteConfig.cdp_specific && Array.isArray(siteConfig.cdp_specific)) {
1512
+ // Only use cdp_specific if cdp is not explicitly set to true
1513
+ shouldEnableCDPForThisUrl = shouldEnableCDPForUrl(currentUrl, siteConfig.cdp_specific);
1514
+ if (forceDebug && shouldEnableCDPForThisUrl) {
1515
+ console.log(formatLogMessage('debug', `CDP enabled for ${currentUrl} via cdp_specific domain match`));
1516
+ }
1517
+ } else {
1518
+ shouldEnableCDPForThisUrl = false;
1519
+ }
1520
+
1478
1521
  let page = null;
1479
1522
  let cdpSession = null;
1480
1523
  let cdpSessionManager = null;
@@ -1884,7 +1927,7 @@ function setupFrameHandling(page, forceDebug) {
1884
1927
  try {
1885
1928
  cdpSessionManager = await createCDPSession(page, currentUrl, {
1886
1929
  enableCDP,
1887
- siteSpecificCDP: siteConfig.cdp,
1930
+ siteSpecificCDP: shouldEnableCDPForThisUrl,
1888
1931
  forceDebug
1889
1932
  });
1890
1933
  } catch (cdpErr) {
@@ -3120,12 +3163,18 @@ function setupFrameHandling(page, forceDebug) {
3120
3163
  if (useForceReload && !reloadSuccess) {
3121
3164
  // Attempt force reload: disable cache, reload, re-enable cache
3122
3165
  try {
3123
- // Add timeout protection for setCacheEnabled operations
3166
+ // Timeout-protected cache disable
3124
3167
  await Promise.race([
3125
3168
  page.setCacheEnabled(false),
3126
- new Promise((_, reject) =>
3127
- setTimeout(() => reject(new Error('setCacheEnabled(false) timeout')), 5000)
3128
- )
3169
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Cache disable timeout')), 8000))
3170
+ ]);
3171
+
3172
+ await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 12000) });
3173
+
3174
+ // Timeout-protected cache enable
3175
+ await Promise.race([
3176
+ page.setCacheEnabled(true),
3177
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Cache enable timeout')), 8000))
3129
3178
  ]);
3130
3179
 
3131
3180
  await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 12000) });
@@ -3349,7 +3398,7 @@ function setupFrameHandling(page, forceDebug) {
3349
3398
  urlsToProcess.forEach(url => {
3350
3399
  allTasks.push({
3351
3400
  url,
3352
- config: site,
3401
+ config: { ...site, _originalUrl: url }, // Preserve original URL for CDP domain checking
3353
3402
  taskId: allTasks.length // For tracking
3354
3403
  });
3355
3404
  });
@@ -3867,4 +3916,4 @@ function setupFrameHandling(page, forceDebug) {
3867
3916
  if (forceDebug) console.log(formatLogMessage('debug', `About to exit process...`));
3868
3917
  process.exit(0);
3869
3918
 
3870
- })();
3919
+ })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.8",
3
+ "version": "2.0.9",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {