@fanboynz/network-scanner 1.0.83 → 1.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/output.js CHANGED
@@ -18,13 +18,35 @@ function matchesIgnoreDomain(domain, ignorePatterns) {
18
18
 
19
19
  return ignorePatterns.some(pattern => {
20
20
  if (pattern.includes('*')) {
21
- // Convert wildcard pattern to regex
22
- const regexPattern = pattern
23
- .replace(/\./g, '\\.') // Escape dots
24
- .replace(/\*/g, '.*'); // Convert * to .*
25
- return new RegExp(`^${regexPattern}$`).test(domain);
21
+ // Enhanced wildcard pattern handling
22
+ if (pattern.startsWith('*.')) {
23
+ // Pattern: *.example.com
24
+ const wildcardDomain = pattern.substring(2); // Remove "*."
25
+ const wildcardRoot = extractDomainFromRule(`||${wildcardDomain}^`) || wildcardDomain;
26
+ const domainRoot = extractDomainFromRule(`||${domain}^`) || domain;
27
+
28
+ // Use basic root domain comparison for output filtering
29
+ const getSimpleRoot = (d) => {
30
+ const parts = d.split('.');
31
+ return parts.length >= 2 ? parts.slice(-2).join('.') : d;
32
+ };
33
+
34
+ return getSimpleRoot(domainRoot) === getSimpleRoot(wildcardRoot);
35
+ } else if (pattern.endsWith('.*')) {
36
+ // Pattern: example.*
37
+ const baseDomain = pattern.slice(0, -2); // Remove ".*"
38
+ return domain.startsWith(baseDomain + '.');
39
+ } else {
40
+ // Complex wildcard pattern
41
+ const regexPattern = pattern
42
+ .replace(/\./g, '\\.') // Escape dots
43
+ .replace(/\*/g, '.*'); // Convert * to .*
44
+ return new RegExp(`^${regexPattern}$`).test(domain);
45
+ }
46
+ } else {
47
+ // Exact pattern matching
48
+ return domain === pattern || domain.endsWith('.' + pattern);
26
49
  }
27
- return domain.endsWith(pattern);
28
50
  });
29
51
  }
30
52
 
@@ -316,6 +316,77 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
316
316
  return cleanedResults;
317
317
  }
318
318
 
319
+ /**
320
+ * Enhanced domain extraction helper that reuses existing parsing logic
321
+ * @param {string} rule - Rule string in various formats
322
+ * @returns {string|null} Extracted domain or null if not found
323
+ */
324
+ function extractDomainFromRule(rule) {
325
+ if (!rule || typeof rule !== 'string') {
326
+ return null;
327
+ }
328
+
329
+ try {
330
+ // Reuse the existing parsing logic from cleanupFirstPartyDomains
331
+ let extractedDomain = null;
332
+
333
+ if (rule.startsWith('||') && rule.includes('^')) {
334
+ // ||domain.com^ format (adblock)
335
+ const match = rule.match(/^\|\|([^/\^]+)/);
336
+ if (match) {
337
+ extractedDomain = match[1];
338
+ }
339
+ } else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
340
+ // hosts file format
341
+ const parts = rule.split(/\s+/);
342
+ if (parts.length >= 2) {
343
+ extractedDomain = parts[1];
344
+ }
345
+ } else if (rule.includes('local=/') && rule.includes('/')) {
346
+ // dnsmasq format: local=/domain.com/
347
+ const match = rule.match(/local=\/([^/]+)\//);
348
+ if (match) {
349
+ extractedDomain = match[1];
350
+ }
351
+ } else if (rule.includes('server=/') && rule.includes('/')) {
352
+ // dnsmasq old format: server=/domain.com/
353
+ const match = rule.match(/server=\/([^/]+)\//);
354
+ if (match) {
355
+ extractedDomain = match[1];
356
+ }
357
+ } else if (rule.includes('local-zone:') && rule.includes('always_null')) {
358
+ // unbound format: local-zone: "domain.com." always_null
359
+ const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
360
+ if (match) {
361
+ extractedDomain = match[1];
362
+ }
363
+ } else if (rule.includes('+block') && rule.includes('.')) {
364
+ // privoxy format: { +block } .domain.com
365
+ const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
366
+ if (match) {
367
+ extractedDomain = match[1];
368
+ }
369
+ } else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
370
+ // pi-hole regex format: (^|\.)domain\.com$
371
+ const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
372
+ if (match) {
373
+ // Unescape the domain
374
+ extractedDomain = match[1].replace(/\\\./g, '.');
375
+ }
376
+ } else {
377
+ // Try to extract any domain-like pattern as fallback
378
+ const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
379
+ if (domainMatch) {
380
+ extractedDomain = domainMatch[1];
381
+ }
382
+ }
383
+
384
+ return extractedDomain;
385
+ } catch (parseErr) {
386
+ return null;
387
+ }
388
+ }
389
+
319
390
  /**
320
391
  * Post-scan cleanup function to remove first-party domains from results
321
392
  * Only processes sites that have firstParty: false in their configuration
@@ -564,6 +635,111 @@ function validateScanResults(results, options = {}) {
564
635
  return validatedResults;
565
636
  }
566
637
 
638
+
639
+ /**
640
+ * Final validation check for firstParty: false violations
641
+ * Reuses existing domain extraction and matching logic
642
+ *
643
+ * @param {Array} results - Array of scan results
644
+ * @param {Array} sites - Array of site configurations
645
+ * @param {Object} options - Options object
646
+ * @returns {Array} Results with any remaining first-party domains removed
647
+ */
648
+ function finalFirstPartyValidation(results, sites, options = {}) {
649
+ const { forceDebug = false, silentMode = false } = options;
650
+
651
+ if (!results || results.length === 0) {
652
+ return results;
653
+ }
654
+
655
+ // Reuse the URL-to-config mapping pattern from cleanupFirstPartyDomains
656
+ const urlToSiteConfig = new Map();
657
+ sites.forEach(site => {
658
+ const urls = Array.isArray(site.url) ? site.url : [site.url];
659
+ urls.forEach(url => {
660
+ urlToSiteConfig.set(url, site);
661
+ });
662
+ });
663
+
664
+ const finalResults = [];
665
+ let totalViolationsFound = 0;
666
+ let sitesWithViolations = 0;
667
+
668
+ results.forEach(result => {
669
+ const siteConfig = urlToSiteConfig.get(result.url);
670
+
671
+ // Only validate sites with firstParty: false
672
+ const shouldValidate = siteConfig && siteConfig.firstParty === false;
673
+
674
+ if (!shouldValidate || !result.rules || result.rules.length === 0) {
675
+ finalResults.push(result);
676
+ return;
677
+ }
678
+
679
+ const scannedDomain = safeGetDomain(result.url, false);
680
+ if (!scannedDomain) {
681
+ finalResults.push(result);
682
+ return;
683
+ }
684
+
685
+ // Reuse the same filtering logic pattern from cleanupFirstPartyDomains
686
+ const cleanedRules = [];
687
+ const violatingRules = [];
688
+
689
+ result.rules.forEach(rule => {
690
+ const extractedDomain = extractDomainFromRule(rule);
691
+ if (extractedDomain) {
692
+ // Reuse the shouldRemoveAsFirstParty logic
693
+ const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
694
+
695
+ if (matchResult.shouldRemove) {
696
+ violatingRules.push({
697
+ rule: rule,
698
+ domain: extractedDomain,
699
+ reason: `VALIDATION FAILURE: ${matchResult.reason}`
700
+ });
701
+ totalViolationsFound++;
702
+ return;
703
+ }
704
+ }
705
+ cleanedRules.push(rule);
706
+ });
707
+
708
+ if (violatingRules.length > 0) {
709
+ sitesWithViolations++;
710
+
711
+ if (!silentMode) {
712
+ const errorMessage = `? CONFIG VIOLATION: Found ${violatingRules.length} first-party rule(s) in ${scannedDomain} (firstParty: false)`;
713
+ if (messageColors && messageColors.error) {
714
+ console.log(messageColors.error(errorMessage));
715
+ } else {
716
+ console.log(errorMessage);
717
+ }
718
+ }
719
+
720
+ if (forceDebug) {
721
+ console.log(formatLogMessage('debug', `[final-validation] Violations found for ${result.url}:`));
722
+ violatingRules.forEach((violation, idx) => {
723
+ console.log(formatLogMessage('debug', ` [${idx + 1}] ${violation.rule} -> ${violation.domain}`));
724
+ });
725
+ }
726
+ }
727
+
728
+ finalResults.push({ ...result, rules: cleanedRules });
729
+ });
730
+
731
+ // Summary using existing message patterns
732
+ if (totalViolationsFound > 0 && !silentMode) {
733
+ const summaryMessage = `\n? SCAN FILTERING FAILURE: Removed ${totalViolationsFound} first-party rules from ${sitesWithViolations} site(s) in post-processing`;
734
+ console.log(summaryMessage);
735
+ console.log('?? This indicates firstParty: false filtering failed during scan - consider investigating root cause.');
736
+ } else if (forceDebug) {
737
+ console.log(formatLogMessage('debug', '[final-validation] No first-party violations found - filtering working correctly'));
738
+ }
739
+
740
+ return finalResults;
741
+ }
742
+
567
743
  /**
568
744
  * Main post-processing function that runs all cleanup and validation steps
569
745
  *
@@ -587,8 +763,11 @@ function processResults(results, sites, options = {}) {
587
763
 
588
764
  // Step 2: Clean up ignoreDomains (final safety net)
589
765
  processedResults = cleanupIgnoreDomains(processedResults, options.ignoreDomains || [], options);
766
+
767
+ // Step 3: Final validation for firstParty: false configurations
768
+ processedResults = finalFirstPartyValidation(processedResults, sites, options);
590
769
 
591
- // Step 3: Validate results
770
+ // Step 4: Validate results
592
771
  processedResults = validateScanResults(processedResults, options);
593
772
 
594
773
  if (forceDebug) {
@@ -602,6 +781,8 @@ function processResults(results, sites, options = {}) {
602
781
  module.exports = {
603
782
  cleanupFirstPartyDomains,
604
783
  cleanupIgnoreDomains,
784
+ finalFirstPartyValidation,
785
+ extractDomainFromRule,
605
786
  validateScanResults,
606
787
  processResults
607
788
  };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.83 ===
1
+ // === Network scanner script (nwss.js) v1.0.85 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.83'; // Script version
126
+ const VERSION = '1.0.85'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -1435,6 +1435,9 @@ function setupFrameHandling(page, forceDebug) {
1435
1435
  const originalRootDomain = safeGetDomain(currentUrl, false);
1436
1436
  if (originalRootDomain) {
1437
1437
  firstPartyDomains.add(originalRootDomain);
1438
+ if (forceDebug) {
1439
+ console.log(formatLogMessage('debug', `Initial first-party domain: ${originalRootDomain} for ${currentUrl}`));
1440
+ }
1438
1441
  }
1439
1442
 
1440
1443
  // Track redirect domains to exclude from matching
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.83",
3
+ "version": "1.0.85",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
package/regex-samples.md CHANGED
@@ -1,10 +1,130 @@
1
- Just examples of usage, always review the output before using publicly
1
+ # EasyPrivacy Generic Strings - JSON Regex Conversion
2
2
 
3
- Validate using;
3
+ Tracking and analytics patterns converted to JSON-safe regex. Always validate before using.
4
+
5
+ Test patterns at:
4
6
  * https://regex101.com/
5
7
  * https://regexr.com/
6
8
 
7
- | Domain | JSON Regex |
9
+ ## Event & Impression Tracking
10
+
11
+ | Pattern | JSON Regex |
12
+ |:--------|:-----------|
13
+ | `&EventType=DataDealImpression&` | `&EventType=DataDealImpression&` |
14
+ | `?logType=impression&` | `\\?logType=impression&` |
15
+ | `?groupType=engagement&eventType=CLICK&` | `\\?groupType=engagement&eventType=CLICK&` |
16
+ | `?type=page&event=` | `\\?type=page&event=` |
17
+ | `?event=performancelogger:` | `\\?event=performancelogger:` |
18
+ | `*view*pixel&` | `.*view.*pixel&` |
19
+
20
+ ## Script Files & Resources
21
+
22
+ | Pattern | JSON Regex |
23
+ |:--------|:-----------|
24
+ | `-adobeDatalayer_bridge.js` | `-adobeDatalayer_bridge\\.js` |
25
+ | `_chartbeat.js` | `_chartbeat\\.js` |
26
+ | `/owa.tracker-combined-min.js` | `\\/owa\\.tracker-combined-min\\.js` |
27
+ | `.v4.analytics.` | `\\.v4\\.analytics\\.` |
28
+ | `/vli-platform/adb-analytics@` | `\\/vli-platform\\/adb-analytics@` |
29
+
30
+ ## Tracking Endpoints
31
+
32
+ | Pattern | JSON Regex |
33
+ |:--------|:-----------|
34
+ | `.com/track?v=` | `\\.com\\/track\\?v=` |
35
+ | `/track?cb=` | `\\/track\\?cb=` |
36
+ | `/track.gif?data=` | `\\/track\\.gif\\?data=` |
37
+ | `/track_framework_metrics?` | `\\/track_framework_metrics\\?` |
38
+ | `/track/pageview?` | `\\/track\\/pageview\\?` |
39
+ | `-click-tracker.` | `-click-tracker\\.` |
40
+
41
+ ## Service & Tracking IDs
42
+
43
+ | Pattern | JSON Regex |
44
+ |:--------|:-----------|
45
+ | `.svc/?tracking_id=` | `\\.svc\\/\\?tracking_id=` |
46
+ | `/get_site_data?requestUUID=` | `\\/get_site_data\\?requestUUID=` |
47
+ | `/ns.html?id=GTM-` | `\\/ns\\.html\\?id=GTM-` |
48
+ | `/tag/proxy?id=G-` | `\\/tag\\/proxy\\?id=G-` |
49
+ | `/sk-park.php?pid=` | `\\/sk-park\\.php\\?pid=` |
50
+
51
+ ## PHP Action Parameters
52
+
53
+ | Pattern | JSON Regex |
54
+ |:--------|:-----------|
55
+ | `.php?action=browse&` | `\\.php\\?action=browse&` |
56
+ | `.php?action_name=` | `\\.php\\?action_name=` |
57
+ | `_stat.php?referer=` | `_stat\\.php\\?referer=` |
58
+ | `/pagelogger/connector.php?` | `\\/pagelogger\\/connector\\.php\\?` |
59
+
60
+ ## Analytics & Performance
61
+
62
+ | Pattern | JSON Regex |
63
+ |:--------|:-----------|
64
+ | `/?essb_counter_` | `\\/\\?essb_counter_` |
65
+ | `/_i?referral_url=` | `\\/_i\\?referral_url=` |
66
+ | `/?livehit=` | `\\/\\?livehit=` |
67
+ | `/__ssobj/rum?` | `\\/__ssobj\\/rum\\?` |
68
+ | `/analytics/?event=` | `\\/analytics\\/\\?event=` |
69
+ | `/analytics/visit.php` | `\\/analytics\\/visit\\.php` |
70
+ | `/wpstatistics/v1/hit?` | `\\/wpstatistics\\/v1\\/hit\\?` |
71
+
72
+ ## Logging & Experiments
73
+
74
+ | Pattern | JSON Regex |
75
+ |:--------|:-----------|
76
+ | `/?log=experiment&` | `\\/\\?log=experiment&` |
77
+ | `/?log=performance-` | `\\/\\?log=performance-` |
78
+ | `?log=stats-` | `\\?log=stats-` |
79
+ | `?log=xhl-widgets-events&` | `\\?log=xhl-widgets-events&` |
80
+ | `/_/_/logClientError/` | `\\/_\\/_\\/logClientError\\/` |
81
+ | `/hits/logger?` | `\\/hits\\/logger\\?` |
82
+
83
+ ## Pixel & Image Tracking
84
+
85
+ | Pattern | JSON Regex |
86
+ |:--------|:-----------|
87
+ | `/anonymous_user_guid.gif?` | `\\/anonymous_user_guid\\.gif\\?` |
88
+ | `/0.gif?` | `\\/0\\.gif\\?` |
89
+ | `_c.gif?c=` | `_c\\.gif\\?c=` |
90
+ | `/urchin.html?` | `\\/urchin\\.html\\?` |
91
+
92
+ ## Counters & Views
93
+
94
+ | Pattern | JSON Regex |
95
+ |:--------|:-----------|
96
+ | `/ViewCounter/` | `\\/ViewCounter\\/` |
97
+ | `/prod/ping?` | `\\/prod\\/ping\\?` |
98
+
99
+ ## DataLayer & Google Analytics
100
+
101
+ | Pattern | JSON Regex |
102
+ |:--------|:-----------|
103
+ | `?[AQB]&ndh=1&t=` | `\\?[AQB]&ndh=1&t=` |
104
+ | `&l=dataLayer&cx=c` | `&l=dataLayer&cx=c` |
105
+
106
+ ## Common Variations & Wildcards
107
+
108
+ | Pattern Type | Example | JSON Regex |
109
+ |:-------------|:--------|:-----------|
110
+ | **Any tracking script** | `*track*.js` | `.*track.*\\.js` |
111
+ | **Any analytics path** | `/*/analytics/*` | `\\/.*\\/analytics\\/.*` |
112
+ | **Any pixel tracking** | `*.gif?*` | `.*\\.gif\\?.*` |
113
+ | **Any log parameter** | `?*log*=*` | `\\?.*log.*=.*` |
114
+ | **Any event parameter** | `?*event*=*` | `\\?.*event.*=.*` |
115
+
116
+ ## Advanced Patterns
117
+
118
+ | Use Case | Pattern | JSON Regex |
119
+ |:---------|:--------|:-----------|
120
+ | **Multiple parameters** | `?param1=value&param2=` | `\\?param1=value&param2=` |
121
+ | **Optional parameters** | `/path?optional_param` | `\\/path\\?.*optional_param` |
122
+ | **Subdomain tracking** | `tracking.*.com/` | `tracking\\..+\\.com\\/` |
123
+ | **Version in path** | `/v1/track` or `/v2/track` | `\\/v[0-9]+\\/track` |
124
+ | **Hash-based IDs** | `/track?id=abc123` | `\\/track\\?id=[a-zA-Z0-9]+` |
125
+
126
+
127
+ | Pattern | JSON Regex |
8
128
  |:---------------------------|:------------|
9
129
  | `/api/test/` | `\\/api\\/test\\/` |
10
130
  | `/rto.js` | `\\/rto\\.js` |
@@ -18,10 +138,16 @@ Validate using;
18
138
  | `abcdefghjk.top/` | `^https?:\\/\\/[a-z]{8,19}\\.top\\/$` |
19
139
  | `abcdefghjk.top/*` | `^https?:\\/\\/[a-z]{8,19}\\.top\\/.*$` |
20
140
  | `abcdefghjk.top/com` | `^https?:\\/\\/[a-z]{8,19}\\.(top\|com)\\/$` |
21
- | `abcdefghjk.top com/*` | `^https?:\\/\\/[a-z]{8,19}\\.(top\|com)\\/.*$` |
22
141
  | `.net/bar/` | `\\.net\\/bar\\/` |
23
142
  | `&test_me=` | `&test_me=` |
24
143
  | `/new/` `/test/` | `\\/(new\|test)\\/` |
25
- | `.com` or `.net` | `\\.(com\|net)\\/` |
26
-
144
+ | `.com` or `.net` | `\\.(com\|net)\\/` |
27
145
 
146
+ ## Notes
147
+ - Use `.*` for wildcard matching (matches any characters)
148
+ - Escape dots: `.` ? `\\.`
149
+ - Escape slashes: `/` ? `\\/`
150
+ - Escape question marks: `?` ? `\\?`
151
+ - Use `[a-zA-Z0-9]` for alphanumeric characters
152
+ - Use `+` for one or more characters
153
+ - Use `*` after `.` for zero or more characters