@fanboynz/network-scanner 2.0.46 → 2.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/adblock.js CHANGED
@@ -62,6 +62,7 @@ function parseAdblockRules(filePath, options = {}) {
62
62
  domainMap: new Map(), // ||domain.com^ - Exact domains for O(1) lookup
63
63
  domainRules: [], // ||*.domain.com^ - Wildcard domains (fallback)
64
64
  thirdPartyRules: [], // ||domain.com^$third-party
65
+ firstPartyRules: [],
65
66
  pathRules: [], // /ads/*
66
67
  scriptRules: [], // .js$script
67
68
  regexRules: [], // /regex/
@@ -73,6 +74,7 @@ function parseAdblockRules(filePath, options = {}) {
73
74
  domain: 0,
74
75
  domainMapEntries: 0, // Exact domain matches in Map
75
76
  thirdParty: 0,
77
+ firstParty: 0,
76
78
  path: 0,
77
79
  script: 0,
78
80
  regex: 0,
@@ -137,6 +139,9 @@ function parseAdblockRules(filePath, options = {}) {
137
139
  if (parsedRule.isThirdParty) {
138
140
  rules.thirdPartyRules.push(parsedRule);
139
141
  rules.stats.thirdParty++;
142
+ } else if (parsedRule.isFirstParty) {
143
+ rules.firstPartyRules.push(parsedRule);
144
+ rules.stats.firstParty++;
140
145
  } else if (parsedRule.isDomain) {
141
146
  // Store exact domains in Map for O(1) lookup, wildcards in array
142
147
  if (parsedRule.domain && !parsedRule.domain.includes('*')) {
@@ -170,6 +175,7 @@ function parseAdblockRules(filePath, options = {}) {
170
175
  console.log(` • Exact matches (Map): ${rules.stats.domainMapEntries}`);
171
176
  console.log(` • Wildcard patterns (Array): ${rules.domainRules.length}`);
172
177
  console.log(` - Third-party rules: ${rules.stats.thirdParty}`);
178
+ console.log(` - First-party rules: ${rules.stats.firstParty}`);
173
179
  console.log(` - Path rules: ${rules.stats.path}`);
174
180
  console.log(` - Script rules: ${rules.stats.script}`);
175
181
  console.log(` - Regex rules: ${rules.stats.regex}`);
@@ -193,7 +199,10 @@ function parseRule(rule, isWhitelist) {
193
199
  isWhitelist,
194
200
  isDomain: false,
195
201
  isThirdParty: false,
202
+ isFirstParty: false,
196
203
  isScript: false,
204
+ resourceTypes: null, // Array of allowed resource types, null = all types
205
+ excludedResourceTypes: null, // Array of excluded resource types ($~script, $~image)
197
206
  isRegex: false,
198
207
  domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
199
208
  pattern: '',
@@ -233,10 +242,47 @@ function parseRule(rule, isWhitelist) {
233
242
  parsed.isThirdParty = true;
234
243
  }
235
244
 
236
- // Check for script option
237
- if (parsed.options['script']) {
238
- parsed.isScript = true;
245
+ // Check for first-party option ($first-party, $1p, $~third-party)
246
+ if (parsed.options['first-party'] || parsed.options['1p'] || parsed.options['~third-party']) {
247
+ parsed.isFirstParty = true;
239
248
  }
249
+
250
+ // Parse resource type options
251
+ const TYPE_MAP = {
252
+ 'script': 'script',
253
+ 'stylesheet': 'stylesheet',
254
+ 'css': 'stylesheet',
255
+ 'image': 'image',
256
+ 'xmlhttprequest': 'xhr',
257
+ 'xhr': 'xhr',
258
+ 'font': 'font',
259
+ 'media': 'media',
260
+ 'websocket': 'websocket',
261
+ 'subdocument': 'subdocument',
262
+ 'document': 'document',
263
+ 'ping': 'ping',
264
+ 'other': 'other'
265
+ };
266
+
267
+ const matchedTypes = Object.keys(parsed.options)
268
+ .filter(key => TYPE_MAP[key])
269
+ .map(key => TYPE_MAP[key]);
270
+
271
+ const excludedTypes = Object.keys(parsed.options)
272
+ .filter(key => key.startsWith('~') && TYPE_MAP[key.substring(1)])
273
+ .map(key => TYPE_MAP[key.substring(1)]);
274
+
275
+ if (matchedTypes.length > 0) {
276
+ parsed.resourceTypes = matchedTypes;
277
+ if (parsed.options['script']) {
278
+ parsed.isScript = true;
279
+ }
280
+ }
281
+
282
+ if (excludedTypes.length > 0) {
283
+ parsed.excludedResourceTypes = excludedTypes;
284
+ }
285
+
240
286
  // Parse domain option: $domain=site1.com|site2.com|~excluded.com
241
287
  if (parsed.options['domain']) {
242
288
  const domainList = parsed.options['domain'];
@@ -261,15 +307,6 @@ function parseRule(rule, isWhitelist) {
261
307
  exclude: exclude.length > 0 ? exclude : null
262
308
  };
263
309
 
264
- // For debugging
265
- if (enableLogging && parsed.domainRestrictions) {
266
- if (parsed.domainRestrictions.include) {
267
- // console.log(`[Adblock] Rule includes domains: ${parsed.domainRestrictions.include.join(', ')}`);
268
- }
269
- if (parsed.domainRestrictions.exclude) {
270
- // console.log(`[Adblock] Rule excludes domains: ${parsed.domainRestrictions.exclude.join(', ')}`);
271
- }
272
- }
273
310
  }
274
311
  }
275
312
 
@@ -284,7 +321,8 @@ function parseRule(rule, isWhitelist) {
284
321
  else if (pattern.startsWith('/') && pattern.endsWith('/')) {
285
322
  parsed.isRegex = true;
286
323
  const regexPattern = pattern.substring(1, pattern.length - 1);
287
- parsed.matcher = new RegExp(regexPattern, 'i');
324
+ const regex = new RegExp(regexPattern, 'i');
325
+ parsed.matcher = (url) => regex.test(url);
288
326
  }
289
327
  // Path/wildcard rules: /ads/* or ad.js
290
328
  else {
@@ -344,6 +382,7 @@ function createMatcher(rules, options = {}) {
344
382
  const urlCache = new URLCache(1000);
345
383
  let cacheHits = 0;
346
384
  let cacheMisses = 0;
385
+ const hasPartyRules = rules.thirdPartyRules.length > 0 || rules.firstPartyRules.length > 0;
347
386
 
348
387
  return {
349
388
  rules,
@@ -378,22 +417,21 @@ function createMatcher(rules, options = {}) {
378
417
  cacheMisses++;
379
418
  }
380
419
 
381
- // OPTIMIZATION #1: Only calculate third-party status if we have third-party rules to check
382
- // Avoids expensive URL parsing (2x new URL() calls) when no third-party rules exist
383
- const isThirdParty = (sourceUrl && rules.thirdPartyRules.length > 0)
384
- ? isThirdPartyRequest(url, sourceUrl)
385
- : false;
386
-
387
- // OPTIMIZATION #2: Calculate hostname parts once and reuse (avoid duplicate split operations)
420
+ // Calculate hostname parts once and reuse
388
421
  const hostnameParts = lowerHostname.split('.');
422
+
423
+ // Precompute parent domains once, reused for whitelist and block checks
424
+ const parentDomains = [];
425
+ const partsLen = hostnameParts.length;
426
+ for (let i = 1; i < partsLen; i++) {
427
+ parentDomains.push(hostnameParts.slice(i).join('.'));
428
+ }
389
429
 
390
- // V8 OPT: Extract and cache source page domain for $domain option checking
430
+ // Extract and cache source page domain for $domain and third-party checks
391
431
  let sourceDomain = null;
392
- let cachedSourceData = null;
393
432
 
394
433
  if (sourceUrl) {
395
- // Check if sourceUrl is in cache (avoid duplicate URL parsing)
396
- cachedSourceData = urlCache.get(sourceUrl);
434
+ const cachedSourceData = urlCache.get(sourceUrl);
397
435
 
398
436
  if (cachedSourceData) {
399
437
  sourceDomain = cachedSourceData.lowerHostname;
@@ -416,6 +454,11 @@ function createMatcher(rules, options = {}) {
416
454
  }
417
455
  }
418
456
 
457
+ // Calculate third-party status using already-parsed hostnames
458
+ const isThirdParty = (sourceDomain && hasPartyRules)
459
+ ? getBaseDomain(lowerHostname) !== getBaseDomain(sourceDomain)
460
+ : false;
461
+
419
462
  // === WHITELIST CHECK (exception rules take precedence) ===
420
463
 
421
464
  // Fast path: Check exact domain in Map (O(1))
@@ -430,10 +473,8 @@ function createMatcher(rules, options = {}) {
430
473
  }
431
474
 
432
475
  // Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
433
- const partsLen = hostnameParts.length; // V8: Cache array length
434
- for (let i = 1; i < partsLen; i++) {
435
- const parentDomain = hostnameParts.slice(i).join('.');
436
- rule = rules.whitelistMap.get(parentDomain); // V8: Single Map lookup
476
+ for (let i = 0; i < parentDomains.length; i++) {
477
+ rule = rules.whitelistMap.get(parentDomains[i]);
437
478
  if (rule) {
438
479
  if (enableLogging) {
439
480
  console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
@@ -470,9 +511,8 @@ function createMatcher(rules, options = {}) {
470
511
  }
471
512
 
472
513
  // Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
473
- for (let i = 1; i < partsLen; i++) { // V8: Reuse cached length
474
- const parentDomain = hostnameParts.slice(i).join('.');
475
- rule = rules.domainMap.get(parentDomain); // V8: Single Map lookup
514
+ for (let i = 0; i < parentDomains.length; i++) {
515
+ rule = rules.domainMap.get(parentDomains[i]);
476
516
  if (rule) {
477
517
  if (enableLogging) {
478
518
  console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
@@ -513,6 +553,24 @@ function createMatcher(rules, options = {}) {
513
553
  }
514
554
  }
515
555
 
556
+ // Check first-party rules
557
+ if (!isThirdParty) {
558
+ const firstPartyLen = rules.firstPartyRules.length;
559
+ for (let i = 0; i < firstPartyLen; i++) {
560
+ const rule = rules.firstPartyRules[i];
561
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
562
+ if (enableLogging) {
563
+ console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw})`);
564
+ }
565
+ return {
566
+ blocked: true,
567
+ rule: rule.raw,
568
+ reason: 'first_party_rule'
569
+ };
570
+ }
571
+ }
572
+ }
573
+
516
574
  // Check script rules
517
575
  if (resourceType === 'script' || url.endsWith('.js')) {
518
576
  const scriptRulesLen = rules.scriptRules.length; // V8: Cache length
@@ -675,6 +733,14 @@ function matchesDomainRestrictions(rule, sourceDomain) {
675
733
  return true;
676
734
  }
677
735
 
736
+ // Module-level constant for resource type normalization (hot path)
737
+ const RESOURCE_TYPE_ALIASES = {
738
+ 'script': 'script', 'stylesheet': 'stylesheet', 'image': 'image',
739
+ 'xhr': 'xhr', 'fetch': 'xhr', 'font': 'font', 'media': 'media',
740
+ 'websocket': 'websocket', 'subdocument': 'subdocument',
741
+ 'document': 'document', 'ping': 'ping', 'other': 'other'
742
+ };
743
+
678
744
  /**
679
745
  * Check if rule matches the given URL
680
746
  * @param {Object} rule - Parsed rule object
@@ -685,6 +751,7 @@ function matchesDomainRestrictions(rule, sourceDomain) {
685
751
  * @param {string|null} sourceDomain - Source page domain (for $domain option)
686
752
  * @returns {boolean} True if rule matches
687
753
  */
754
+
688
755
  function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain) {
689
756
  // Check domain restrictions first
690
757
  if (!matchesDomainRestrictions(rule, sourceDomain)) {
@@ -695,11 +762,34 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
695
762
  return false;
696
763
  }
697
764
 
698
- // Check script option
699
- if (rule.isScript && resourceType !== 'script' && !url.endsWith('.js')) {
765
+ // Check first-party option
766
+ if (rule.isFirstParty && isThirdParty) {
700
767
  return false;
701
768
  }
702
769
 
770
+ // Check resource type restrictions
771
+ if (rule.resourceTypes) {
772
+ if (!resourceType) {
773
+ // No resource type info available — allow match for safety
774
+ } else {
775
+ // Normalize Puppeteer resource types to match our type names
776
+ const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
777
+ if (!rule.resourceTypes.includes(normalizedType)) {
778
+ return false;
779
+ }
780
+ }
781
+ }
782
+
783
+ // Check negated resource type restrictions ($~script, $~image, etc.)
784
+ if (rule.excludedResourceTypes) {
785
+ if (resourceType) {
786
+ const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
787
+ if (rule.excludedResourceTypes.includes(normalizedType)) {
788
+ return false;
789
+ }
790
+ }
791
+ }
792
+
703
793
  // Apply matcher function
704
794
  if (rule.isDomain) {
705
795
  return rule.matcher(url, hostname);
@@ -708,27 +798,6 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
708
798
  }
709
799
  }
710
800
 
711
- /**
712
- * Determine if request is third-party
713
- * @param {string} requestUrl - URL being requested
714
- * @param {string} sourceUrl - URL of the page making the request
715
- * @returns {boolean} True if third-party request
716
- */
717
- function isThirdPartyRequest(requestUrl, sourceUrl) {
718
- try {
719
- const requestHostname = new URL(requestUrl).hostname;
720
- const sourceHostname = new URL(sourceUrl).hostname;
721
-
722
- // Extract base domain (handle subdomains)
723
- const requestDomain = getBaseDomain(requestHostname);
724
- const sourceDomain = getBaseDomain(sourceHostname);
725
-
726
- return requestDomain !== sourceDomain;
727
- } catch (err) {
728
- return false;
729
- }
730
- }
731
-
732
801
  /**
733
802
  * Extract base domain from hostname
734
803
  * @param {string} hostname - Full hostname
@@ -745,6 +814,5 @@ function getBaseDomain(hostname) {
745
814
 
746
815
  module.exports = {
747
816
  parseAdblockRules,
748
- isThirdPartyRequest,
749
817
  getBaseDomain
750
818
  };
package/lib/grep.js CHANGED
@@ -3,9 +3,6 @@
3
3
 
4
4
  const fs = require('fs');
5
5
  const { spawnSync } = require('child_process');
6
- const crypto = require('crypto');
7
- const path = require('path');
8
- const os = require('os');
9
6
  const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
10
7
 
11
8
  // === Constants ===
@@ -21,53 +18,9 @@ const GREP_DEFAULTS = {
21
18
  GREP_SUCCESS_STATUS: 0,
22
19
  GREP_NOT_FOUND_STATUS: 1,
23
20
  CURL_SUCCESS_STATUS: 0,
24
- VERSION_LINE_INDEX: 0,
25
- RANDOM_STRING_LENGTH: 9,
26
- TEMP_DIR_PREFIX: 'grep_search_'
21
+ VERSION_LINE_INDEX: 0
27
22
  };
28
23
 
29
- /**
30
- * Creates a temporary directory and file with content for grep processing
31
- * Uses mkdtempSync to avoid race conditions from filename collisions
32
- * @param {string} content - The content to write to temp file
33
- * @param {string} prefix - Prefix for temp filename
34
- * @returns {object} Object containing tempDir and tempFile paths
35
- */
36
- function createTempFile(content, prefix = 'scanner_grep') {
37
- const tempDir = os.tmpdir();
38
-
39
- // Create a unique temporary directory to avoid race conditions
40
- const uniqueTempDir = fs.mkdtempSync(path.join(tempDir, GREP_DEFAULTS.TEMP_DIR_PREFIX));
41
-
42
- // Use cryptographically secure random ID for additional uniqueness
43
- const uniqueId = crypto.randomBytes(8).toString('hex');
44
- const tempFile = path.join(uniqueTempDir, `${prefix}_${uniqueId}.tmp`);
45
-
46
- try {
47
- // Write atomically with error handling
48
- fs.writeFileSync(tempFile, content, {
49
- encoding: 'utf8',
50
- mode: 0o600 // Restrict permissions for security
51
- });
52
-
53
- return { tempDir: uniqueTempDir, tempFile };
54
- } catch (error) {
55
- // Clean up temp directory on write failure
56
- try {
57
- if (fs.existsSync(tempFile)) {
58
- fs.unlinkSync(tempFile);
59
- }
60
- if (fs.existsSync(uniqueTempDir)) {
61
- fs.rmdirSync(uniqueTempDir);
62
- }
63
- } catch (cleanupErr) {
64
- // Ignore cleanup errors, report original error
65
- }
66
-
67
- throw new Error(`Failed to create temp file: ${error.message}`);
68
- }
69
- }
70
-
71
24
  /**
72
25
  * Searches content using grep with the provided patterns
73
26
  * @param {string} content - The content to search
@@ -86,14 +39,8 @@ async function grepContent(content, searchPatterns, options = {}) {
86
39
  if (!content || searchPatterns.length === 0) {
87
40
  return { found: false, matchedPattern: null, allMatches: [] };
88
41
  }
89
-
90
- let tempFile = null;
91
-
42
+
92
43
  try {
93
- // Create temporary directory and file with content
94
- const tempResult = createTempFile(content, 'grep_search');
95
- tempDir = tempResult.tempDir;
96
- tempFile = tempResult.tempFile;
97
44
 
98
45
  const allMatches = [];
99
46
  let firstMatch = null;
@@ -110,12 +57,12 @@ async function grepContent(content, searchPatterns, options = {}) {
110
57
  if (wholeWord) grepArgs.push('-w');
111
58
  if (!regex) grepArgs.push('-F'); // Fixed strings (literal)
112
59
 
113
- // Add pattern and file
114
- grepArgs.push(pattern, tempFile);
60
+ grepArgs.push(pattern);
115
61
 
116
62
  try {
117
63
  const result = spawnSync('grep', grepArgs, {
118
64
  encoding: 'utf8',
65
+ input: content,
119
66
  timeout: GREP_DEFAULTS.GREP_TIMEOUT,
120
67
  maxBuffer: GREP_DEFAULTS.MAX_BUFFER_SIZE
121
68
  });
@@ -146,22 +93,6 @@ async function grepContent(content, searchPatterns, options = {}) {
146
93
 
147
94
  } catch (error) {
148
95
  throw new Error(`Grep search failed: ${error.message}`);
149
- } finally {
150
- // Clean up temporary file and directory
151
- if (tempFile) {
152
- try {
153
- fs.unlinkSync(tempFile);
154
- } catch (cleanupErr) {
155
- console.warn(formatLogMessage('warn', `[grep] Failed to cleanup temp file ${tempFile}: ${cleanupErr.message}`));
156
- }
157
- }
158
- if (tempDir) {
159
- try {
160
- fs.rmdirSync(tempDir);
161
- } catch (cleanupErr) {
162
- console.warn(formatLogMessage('warn', `[grep] Failed to cleanup temp directory ${tempDir}: ${cleanupErr.message}`));
163
- }
164
- }
165
96
  }
166
97
  }
167
98
 
@@ -417,6 +348,5 @@ module.exports = {
417
348
  grepContent,
418
349
  downloadAndGrep,
419
350
  createGrepHandler,
420
- validateGrepAvailability,
421
- createTempFile
351
+ validateGrepAvailability
422
352
  };
package/nwss.js CHANGED
@@ -1790,10 +1790,6 @@ function setupFrameHandling(page, forceDebug) {
1790
1790
  }
1791
1791
  });
1792
1792
 
1793
- page.on('response', (response) => {
1794
- // Response handler - removed incorrect error logging
1795
- });
1796
-
1797
1793
  // Apply flowProxy timeouts if detection is enabled
1798
1794
  if (flowproxyDetection) {
1799
1795
  const flowproxyTimeouts = getFlowProxyTimeouts(siteConfig);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.46",
3
+ "version": "2.0.48",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {