@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,8 @@
14
14
  // INSTALL:
15
15
  // npm install ghost-cursor (optional dependency)
16
16
 
17
- const { formatLogMessage } = require('./colorize');
17
+ const { formatLogMessage, messageColors } = require('./colorize');
18
+ const GHOST_CURSOR_TAG = messageColors.processing('[ghost-cursor]');
18
19
 
19
20
  let ghostCursorModule = null;
20
21
  let ghostCursorAvailable = false;
@@ -61,7 +62,7 @@ function createGhostCursor(page, options = {}) {
61
62
  return cursor;
62
63
  } catch (err) {
63
64
  if (forceDebug) {
64
- console.log(formatLogMessage('debug', `[ghost-cursor] Failed to create cursor: ${err.message}`));
65
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Failed to create cursor: ${err.message}`));
65
66
  }
66
67
  return null;
67
68
  }
@@ -103,13 +104,13 @@ async function ghostMove(cursor, toX, toY, options = {}) {
103
104
  await cursor.moveTo({ x: toX, y: toY }, moveOpts);
104
105
 
105
106
  if (forceDebug) {
106
- console.log(formatLogMessage('debug', `[ghost-cursor] Moved to (${Math.round(toX)}, ${Math.round(toY)})`));
107
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Moved to (${Math.round(toX)}, ${Math.round(toY)})`));
107
108
  }
108
109
 
109
110
  return true;
110
111
  } catch (err) {
111
112
  if (forceDebug) {
112
- console.log(formatLogMessage('debug', `[ghost-cursor] Move failed: ${err.message}`));
113
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Move failed: ${err.message}`));
113
114
  }
114
115
  return false;
115
116
  }
@@ -162,13 +163,13 @@ async function ghostClick(cursor, target, options = {}) {
162
163
 
163
164
  if (forceDebug) {
164
165
  const label = typeof target === 'string' ? target : `(${Math.round(target.x)}, ${Math.round(target.y)})`;
165
- console.log(formatLogMessage('debug', `[ghost-cursor] Clicked ${label}`));
166
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Clicked ${label}`));
166
167
  }
167
168
 
168
169
  return true;
169
170
  } catch (err) {
170
171
  if (forceDebug) {
171
- console.log(formatLogMessage('debug', `[ghost-cursor] Click failed: ${err.message}`));
172
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Click failed: ${err.message}`));
172
173
  }
173
174
  return false;
174
175
  }
@@ -193,7 +194,7 @@ async function ghostRandomMove(cursor, options = {}) {
193
194
  return true;
194
195
  } catch (err) {
195
196
  if (options.forceDebug) {
196
- console.log(formatLogMessage('debug', `[ghost-cursor] Random move failed: ${err.message}`));
197
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Random move failed: ${err.message}`));
197
198
  }
198
199
  return false;
199
200
  }
package/lib/grep.js CHANGED
@@ -2,33 +2,85 @@
2
2
  // Alternative to searchstring.js using grep for pattern matching
3
3
 
4
4
  const fs = require('fs');
5
+ // spawnSync only used for validateGrepAvailability (runs once at
6
+ // startup). Production grep + curl paths go through runProcess (async).
5
7
  const { spawnSync } = require('child_process');
6
- const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
8
+ const { runProcess } = require('./spawn-async');
9
+ const { messageColors, formatLogMessage } = require('./colorize');
10
+ const GREP_TAG = messageColors.processing('[grep]');
7
11
 
8
12
  // === Constants ===
9
13
  const GREP_DEFAULTS = {
10
14
  TIMEOUT_SECONDS: 30,
11
15
  MAX_REDIRECTS: 5,
12
- MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
13
- VALIDATION_TIMEOUT: 5000, // 5 seconds
14
- SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
15
- GREP_TIMEOUT: 10000, // 10 seconds for grep operations
16
- MAX_BUFFER_SIZE: 1024 * 1024, // 1MB max buffer
16
+ // 50MB to match lib/curl.js and lib/searchstring.js — the three
17
+ // download paths previously had two different caps (10MB here, 50MB
18
+ // there) so the same URL could succeed via one path and fail via
19
+ // another.
20
+ MAX_SIZE_BYTES: 50 * 1024 * 1024,
21
+ // Cap grep's stdout collection at the input size — output can in
22
+ // theory exceed input (overlapping match contexts) but in practice
23
+ // matching lines from 50MB of content max out around that. Replaces
24
+ // the old 1MB MAX_BUFFER_SIZE that silently killed grep with ENOBUFS
25
+ // on pages with many matching lines, making the pattern silently
26
+ // report "not found" despite thousands of matches.
27
+ MAX_GREP_OUTPUT_BYTES: 50 * 1024 * 1024,
28
+ VALIDATION_TIMEOUT: 5000,
29
+ GREP_TIMEOUT: 10000,
17
30
  DEFAULT_MAX_MATCHES: 1000,
18
31
  GREP_SUCCESS_STATUS: 0,
19
- GREP_NOT_FOUND_STATUS: 1,
20
32
  CURL_SUCCESS_STATUS: 0,
21
33
  VERSION_LINE_INDEX: 0
22
34
  };
23
35
 
24
36
  /**
25
- * Searches content using grep with the provided patterns
37
+ * Run a single grep pattern against `content`, returning the result
38
+ * asynchronously. Uses spawn (NOT spawnSync) — same rationale as
39
+ * downloadAndGrep — and handles stdout buffering ourselves so we can
40
+ * accept output up to MAX_GREP_OUTPUT_BYTES instead of being capped
41
+ * at spawnSync's `maxBuffer` (which silently killed grep with ENOBUFS
42
+ * on pages with many matching lines).
43
+ *
44
+ * @param {string} content - Stdin content for grep
45
+ * @param {string} pattern - The pattern to search for
46
+ * @param {string[]} baseArgs - Pre-computed grep flags (-i, -F, etc.)
47
+ * @returns {Promise<{status: number|null, stdout: string, truncated: boolean, signal: string|null, error?: string}>}
48
+ */
49
+ async function grepOne(content, pattern, baseArgs) {
50
+ // Shared async-spawn helper handles stdout cap, kill timer, error/close
51
+ // wiring, and stdin EPIPE swallowing. We just adapt the return shape
52
+ // to what grepContent expects (string stdout, status alias for code).
53
+ const result = await runProcess('grep', [...baseArgs, pattern], {
54
+ timeout: GREP_DEFAULTS.GREP_TIMEOUT,
55
+ maxStdout: GREP_DEFAULTS.MAX_GREP_OUTPUT_BYTES,
56
+ input: content,
57
+ collectStderr: false // grep's stderr isn't used by callers
58
+ });
59
+ return {
60
+ status: result.error ? -1 : result.code,
61
+ stdout: result.stdout.toString('utf8'),
62
+ truncated: result.truncated,
63
+ signal: result.signal,
64
+ error: result.error
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Searches content using grep with the provided patterns.
70
+ *
71
+ * Async — runs one spawn per pattern (sequential, not concurrent, to
72
+ * avoid spiking memory with N copies of `content` on grep's stdin
73
+ * simultaneously). The previous spawnSync-per-pattern implementation
74
+ * blocked the event loop for the duration of every grep call; the
75
+ * outer downloadAndGrep's switch to async spawn was undone by this
76
+ * sync inner step.
77
+ *
26
78
  * @param {string} content - The content to search
27
79
  * @param {Array<string>} searchPatterns - Array of grep patterns to search for
28
- * @param {object} options - Grep options
29
- * @returns {Promise<object>} Object with found boolean, matchedPattern, and allMatches array
80
+ * @param {object} options - Grep options (ignoreCase, wholeWord, regex, maxMatches)
81
+ * @returns {Promise<{found: boolean, matchedPattern: string|null, allMatches: Array<{pattern: string, matches: string[]}>}>}
30
82
  */
31
- function grepContent(content, searchPatterns, options = {}) {
83
+ async function grepContent(content, searchPatterns, options = {}) {
32
84
  const {
33
85
  ignoreCase = true,
34
86
  wholeWord = false,
@@ -36,60 +88,53 @@ function grepContent(content, searchPatterns, options = {}) {
36
88
  maxMatches = GREP_DEFAULTS.DEFAULT_MAX_MATCHES
37
89
  } = options;
38
90
 
39
- if (!content || searchPatterns.length === 0) {
91
+ // Pre-filter empty/whitespace patterns at the top instead of doing
92
+ // `if (!pattern || ...) continue` inside the loop. `typeof === 'string'`
93
+ // guard rejects non-string entries (numbers, booleans, etc.) so we
94
+ // don't trip TypeError on `p.trim()` for misconfigured input.
95
+ const validPatterns = Array.isArray(searchPatterns)
96
+ ? searchPatterns.filter(p => typeof p === 'string' && p.trim().length > 0)
97
+ : [];
98
+
99
+ if (!content || validPatterns.length === 0) {
40
100
  return { found: false, matchedPattern: null, allMatches: [] };
41
101
  }
42
-
43
- try {
44
- const allMatches = [];
45
- let firstMatch = null;
46
-
47
- // Build common args once outside the loop
48
- const baseArgs = ['--text', '--color=never'];
49
- if (ignoreCase) baseArgs.push('-i');
50
- if (wholeWord) baseArgs.push('-w');
51
- if (!regex) baseArgs.push('-F');
52
-
53
- for (const pattern of searchPatterns) {
54
- if (!pattern || pattern.trim().length === 0) continue;
55
-
56
- const grepArgs = [...baseArgs, pattern];
57
-
58
- try {
59
- const result = spawnSync('grep', grepArgs, {
60
- encoding: 'utf8',
61
- input: content,
62
- timeout: GREP_DEFAULTS.GREP_TIMEOUT,
63
- maxBuffer: GREP_DEFAULTS.MAX_BUFFER_SIZE
64
- });
65
-
66
- // grep returns 0 if found, 1 if not found, 2+ for errors
67
- if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS && result.stdout) {
68
- allMatches.push({
69
- pattern: pattern,
70
- matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
71
- });
72
-
73
- if (!firstMatch) {
74
- firstMatch = pattern;
75
- }
76
- }
77
-
78
- } catch (grepErr) {
79
- // Continue with next pattern if this one fails
80
- console.warn(formatLogMessage('warn', `[grep] Pattern "${pattern}" failed: ${grepErr.message}`));
81
- }
102
+
103
+ const baseArgs = ['--text', '--color=never'];
104
+ if (ignoreCase) baseArgs.push('-i');
105
+ if (wholeWord) baseArgs.push('-w');
106
+ if (!regex) baseArgs.push('-F');
107
+
108
+ const allMatches = [];
109
+ let firstMatch = null;
110
+
111
+ for (const pattern of validPatterns) {
112
+ const result = await grepOne(content, pattern, baseArgs);
113
+ if (result.error) {
114
+ console.warn(formatLogMessage('warn', `${GREP_TAG} Pattern "${pattern}" failed: ${result.error}`));
115
+ continue;
116
+ }
117
+ // Surface truncation so admins can see when grep output hit the
118
+ // 50MB cap — previously this was silent (the SIGTERM-on-truncation
119
+ // path looks the same as a normal exit to the caller).
120
+ if (result.truncated) {
121
+ console.warn(formatLogMessage('warn', `${GREP_TAG} Pattern "${pattern}" output truncated at ${GREP_DEFAULTS.MAX_GREP_OUTPUT_BYTES} bytes; results may be incomplete`));
122
+ }
123
+ // grep exit codes: 0 = found, 1 = not found, 2+ = error.
124
+ // Also accept truncated output — we collected enough to slice to
125
+ // maxMatches even though more existed beyond the cap.
126
+ if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS && result.stdout) {
127
+ const lines = result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches);
128
+ allMatches.push({ pattern, matches: lines });
129
+ if (!firstMatch) firstMatch = pattern;
82
130
  }
83
-
84
- return {
85
- found: allMatches.length > 0,
86
- matchedPattern: firstMatch,
87
- allMatches: allMatches
88
- };
89
-
90
- } catch (error) {
91
- throw new Error(`Grep search failed: ${error.message}`);
92
131
  }
132
+
133
+ return {
134
+ found: allMatches.length > 0,
135
+ matchedPattern: firstMatch,
136
+ allMatches
137
+ };
93
138
  }
94
139
 
95
140
  /**
@@ -102,74 +147,84 @@ function grepContent(content, searchPatterns, options = {}) {
102
147
  * @returns {Promise<object>} Object with found boolean, matchedPattern, and content
103
148
  */
104
149
  async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = GREP_DEFAULTS.TIMEOUT_SECONDS) {
105
- try {
106
- const curlArgs = [
107
- '-s', // Silent mode
108
- '-L', // Follow redirects
109
- '--max-time', timeout.toString(),
110
- '--max-redirs', GREP_DEFAULTS.MAX_REDIRECTS.toString(),
111
- '--fail-with-body', // Return body even on HTTP errors
112
- '--compressed', // Accept compressed responses
113
- ];
114
-
115
- if (userAgent) {
116
- curlArgs.push('-H', `User-Agent: ${userAgent}`);
117
- }
150
+ const curlArgs = [
151
+ '-s',
152
+ '-L',
153
+ '--max-time', timeout.toString(),
154
+ '--max-redirs', GREP_DEFAULTS.MAX_REDIRECTS.toString(),
155
+ '--fail-with-body',
156
+ '--compressed'
157
+ ];
158
+ if (userAgent) curlArgs.push('-H', `User-Agent: ${userAgent}`);
159
+ curlArgs.push(
160
+ '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
161
+ '-H', 'Accept-Language: en-US,en;q=0.5',
162
+ '-H', 'Accept-Encoding: gzip, deflate',
163
+ '-H', 'Connection: keep-alive',
164
+ '-H', 'Upgrade-Insecure-Requests: 1'
165
+ );
166
+ curlArgs.push(url);
118
167
 
119
- // Add common headers to appear more browser-like
120
- curlArgs.push(
121
- '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
122
- '-H', 'Accept-Language: en-US,en;q=0.5',
123
- '-H', 'Accept-Encoding: gzip, deflate',
124
- '-H', 'Connection: keep-alive',
125
- '-H', 'Upgrade-Insecure-Requests: 1'
126
- );
168
+ const result = await runProcess('curl', curlArgs, {
169
+ timeout: timeout * 1000,
170
+ maxStdout: GREP_DEFAULTS.MAX_SIZE_BYTES
171
+ });
127
172
 
128
- curlArgs.push(url);
173
+ if (result.error) throw new Error(`Download and grep failed for ${url}: ${result.error}`);
174
+ if (result.truncated) throw new Error(`Output exceeded ${GREP_DEFAULTS.MAX_SIZE_BYTES} bytes for ${url}`);
175
+ if (result.signal) throw new Error(`Curl killed by signal ${result.signal} for ${url}`);
176
+ if (result.code !== GREP_DEFAULTS.CURL_SUCCESS_STATUS) {
177
+ throw new Error(`Curl exited with status ${result.code}: ${result.stderr.toString('utf8')}`);
178
+ }
129
179
 
130
- // Download content with curl
131
- const curlResult = spawnSync('curl', curlArgs, {
132
- encoding: 'utf8',
133
- timeout: timeout * GREP_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
134
- maxBuffer: GREP_DEFAULTS.MAX_SIZE_BYTES
135
- });
136
-
137
- if (curlResult.error) {
138
- throw curlResult.error;
139
- }
140
-
141
- if (curlResult.status !== GREP_DEFAULTS.CURL_SUCCESS_STATUS) {
142
- throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
143
- }
144
-
145
- const content = curlResult.stdout;
146
-
147
- // Search content with grep
180
+ const content = result.stdout.toString('utf8');
181
+ try {
148
182
  const grepResult = await grepContent(content, searchPatterns, grepOptions);
149
-
150
183
  return {
151
184
  found: grepResult.found,
152
185
  matchedPattern: grepResult.matchedPattern,
153
186
  allMatches: grepResult.allMatches,
154
- content: content,
187
+ content,
155
188
  contentLength: content.length
156
189
  };
157
-
158
- } catch (error) {
159
- throw new Error(`Download and grep failed for ${url}: ${error.message}`);
190
+ } catch (grepErr) {
191
+ throw new Error(`Download and grep failed for ${url}: ${grepErr.message}`);
160
192
  }
161
193
  }
162
194
 
163
195
  /**
164
- * Creates a grep-based URL handler for downloading and searching content
165
- * @param {object} config - Configuration object containing all necessary parameters
166
- * @returns {Function} URL handler function for grep-based content analysis
196
+ * Creates a grep-based URL handler for downloading and searching content.
197
+ *
198
+ * @param {object} config
199
+ * @param {string[]} config.searchStrings - OR-logic patterns (any match)
200
+ * @param {string[]} config.searchStringsAnd - AND-logic patterns (all must match)
201
+ * @param {boolean} config.hasSearchString - True if searchStrings is non-empty
202
+ * @param {boolean} config.hasSearchStringAnd - True if searchStringsAnd is non-empty;
203
+ * when true, AND-logic is applied to the combined grep result
204
+ * @param {RegExp[]} config.regexes - URL regex patterns for the first-pass filter
205
+ * @param {Function} config.addMatchedDomain - Sink for matched domains
206
+ * @param {Function} config.isDomainAlreadyDetected - Skip-if-true predicate
207
+ * @param {Function} [config.onContentFetched] - Optional cache hook
208
+ * @param {string} config.currentUrl - The page URL being scanned
209
+ * @param {boolean} config.perSiteSubDomains - Track at subdomain granularity
210
+ * @param {string[]} config.ignoreDomains - Domain ignore list
211
+ * @param {Function} config.matchesIgnoreDomain - Ignore-list matcher
212
+ * @param {Function} config.getRootDomain - URL → registrable root domain
213
+ * @param {object} config.siteConfig - Per-site config (verbose, firstParty, thirdParty)
214
+ * @param {boolean} config.dumpUrls - Write matched URLs to file
215
+ * @param {string} config.matchedUrlsLogFile - Path for dumpUrls output
216
+ * @param {boolean} config.forceDebug
217
+ * @param {string} config.userAgent - Curl user agent
218
+ * @param {string|null} config.resourceType - Resource type for adblock-rules mode
219
+ * @param {object} [config.grepOptions] - Passed through to grepContent
220
+ * (ignoreCase, wholeWord, regex, maxMatches)
221
+ * @returns {Function} URL handler: async (requestUrl) => void
167
222
  */
168
223
  function createGrepHandler(config) {
169
224
  const {
170
225
  searchStrings,
226
+ searchStringsAnd,
171
227
  regexes,
172
- matchedDomains,
173
228
  addMatchedDomain,
174
229
  isDomainAlreadyDetected,
175
230
  onContentFetched,
@@ -185,122 +240,143 @@ function createGrepHandler(config) {
185
240
  userAgent,
186
241
  resourceType,
187
242
  hasSearchString,
243
+ hasSearchStringAnd,
188
244
  grepOptions = {}
189
245
  } = config;
190
246
 
247
+ // Hoisted: currentUrl doesn't change for this handler's lifetime.
248
+ // Previously parsed on every single request.
249
+ let currentRootDomain = '';
250
+ let currentUrlHostname = '';
251
+ try { currentRootDomain = getRootDomain(currentUrl); } catch (_) {}
252
+ try { currentUrlHostname = new URL(currentUrl).hostname; } catch (_) {}
253
+
191
254
  return async function grepHandler(requestUrl) {
192
- const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
193
- const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
194
-
195
- // Skip if already detected to avoid duplicates
255
+ // Regex check FIRST cheap filter that skips ~99% of requests.
256
+ // Previously this ran AFTER URL parses and a domain-cache lookup,
257
+ // paying for parses on requests we then immediately drop.
258
+ const matchesRegex = regexes.some(re => re.test(requestUrl));
259
+ if (!matchesRegex) return;
260
+
261
+ // Parse requestUrl ONCE and reuse. Was parsed 4 times previously
262
+ // (two hostname parses + two for currentUrlHostname/requestHostname).
263
+ let requestHostname;
264
+ try { requestHostname = new URL(requestUrl).hostname; } catch (_) { return; }
265
+ const fullSubdomain = requestHostname;
266
+ const respDomain = perSiteSubDomains ? requestHostname : getRootDomain(requestUrl);
267
+
196
268
  if (isDomainAlreadyDetected(fullSubdomain)) {
197
269
  if (forceDebug) {
198
- console.log(formatLogMessage('debug', `[grep] Skipping already detected subdomain: ${fullSubdomain}`));
270
+ console.log(formatLogMessage('debug', `${GREP_TAG} Skipping already detected subdomain: ${fullSubdomain}`));
199
271
  }
200
272
  return;
201
273
  }
202
-
203
- // Only process URLs that match our regex patterns
204
- const matchesRegex = regexes.some(re => re.test(requestUrl));
205
- if (!matchesRegex) return;
206
-
207
- // Check if this is a first-party request (same domain as the URL being scanned)
208
- const currentUrlHostname = new URL(currentUrl).hostname;
209
- const requestHostname = new URL(requestUrl).hostname;
274
+
210
275
  const isFirstParty = currentUrlHostname === requestHostname;
211
-
212
- // Apply first-party/third-party filtering
276
+
213
277
  if (isFirstParty && siteConfig.firstParty === false) {
214
278
  if (forceDebug) {
215
- console.log(formatLogMessage('debug', `[grep] Skipping first-party request (firstParty=false): ${requestUrl}`));
279
+ console.log(formatLogMessage('debug', `${GREP_TAG} Skipping first-party request (firstParty=false): ${requestUrl}`));
216
280
  }
217
281
  return;
218
282
  }
219
-
220
283
  if (!isFirstParty && siteConfig.thirdParty === false) {
221
284
  if (forceDebug) {
222
- console.log(formatLogMessage('debug', `[grep] Skipping third-party request (thirdParty=false): ${requestUrl}`));
285
+ console.log(formatLogMessage('debug', `${GREP_TAG} Skipping third-party request (thirdParty=false): ${requestUrl}`));
223
286
  }
224
287
  return;
225
288
  }
226
-
289
+
227
290
  try {
228
291
  if (forceDebug) {
229
- console.log(formatLogMessage('debug', `[grep] Downloading and searching content from: ${requestUrl}`));
292
+ console.log(formatLogMessage('debug', `${GREP_TAG} Downloading and searching content from: ${requestUrl}`));
230
293
  }
231
-
232
- // If NO searchstring is defined, match immediately (like browser behavior)
233
- if (!hasSearchString) {
234
- if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
235
- return;
236
- }
237
-
294
+
295
+ // No searchstring at all match immediately on regex alone.
296
+ if (!hasSearchString && !hasSearchStringAnd) {
297
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) return;
238
298
  addMatchedDomain(respDomain, resourceType, fullSubdomain);
239
- const simplifiedUrl = getRootDomain(currentUrl);
240
-
299
+
300
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
241
301
  if (siteConfig.verbose === 1) {
242
- const partyType = isFirstParty ? 'first-party' : 'third-party';
243
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) matched regex`));
302
+ console.log(formatLogMessage('match', `[${currentRootDomain}] ${requestUrl} (${partyType}, grep) matched regex`));
244
303
  }
245
-
246
- if (dumpUrls) {
304
+ if (dumpUrls && matchedUrlsLogFile) {
247
305
  const timestamp = new Date().toISOString();
248
- const partyType = isFirstParty ? 'first-party' : 'third-party';
249
306
  try {
250
- fs.appendFileSync(matchedUrlsLogFile,
251
- `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep)\n`);
307
+ fs.appendFileSync(matchedUrlsLogFile,
308
+ `${timestamp} [match][${currentRootDomain}] ${requestUrl} (${partyType}, grep)\n`);
252
309
  } catch (logErr) {
253
310
  console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
254
311
  }
255
312
  }
256
313
  return;
257
314
  }
258
-
259
- // If searchstring IS defined, download and grep content
260
- const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, GREP_DEFAULTS.TIMEOUT_SECONDS);
261
315
 
262
- // Cache the fetched content if callback provided
316
+ // Combine OR + AND patterns into one grep pass. The AND-logic
317
+ // check below uses per-pattern attribution from
318
+ // grepContent.allMatches. Previously createGrepHandler only
319
+ // destructured `searchStrings` and ignored `searchStringsAnd`
320
+ // entirely — users configuring AND-only patterns with grep mode
321
+ // got silent zero matches.
322
+ const allPatterns = [
323
+ ...(searchStrings || []),
324
+ ...(searchStringsAnd || [])
325
+ ];
326
+ const result = await downloadAndGrep(requestUrl, allPatterns, userAgent, grepOptions, GREP_DEFAULTS.TIMEOUT_SECONDS);
327
+
263
328
  if (onContentFetched && result.content) {
264
329
  try {
265
330
  onContentFetched(requestUrl, result.content);
266
331
  } catch (cacheErr) {
267
- if (forceDebug) console.log(formatLogMessage('debug', `[grep] Content caching failed: ${cacheErr.message}`));
332
+ if (forceDebug) console.log(formatLogMessage('debug', `${GREP_TAG} Content caching failed: ${cacheErr.message}`));
268
333
  }
269
334
  }
270
-
271
- if (result.found) {
272
- if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
273
- return;
335
+
336
+ // Apply OR vs AND logic. AND requires every searchStringsAnd
337
+ // pattern to appear in grepResult.allMatches; OR just needs
338
+ // anything found.
339
+ let matched = false;
340
+ let matchDescription = null;
341
+
342
+ if (hasSearchStringAnd && searchStringsAnd && searchStringsAnd.length > 0) {
343
+ const foundPatterns = new Set(result.allMatches.map(m => m.pattern));
344
+ if (searchStringsAnd.every(p => foundPatterns.has(p))) {
345
+ matched = true;
346
+ matchDescription = `patterns: ${searchStringsAnd.length}/${searchStringsAnd.length} (AND)`;
274
347
  }
275
-
348
+ } else if (result.found) {
349
+ matched = true;
350
+ matchDescription = `pattern: "${result.matchedPattern}"`;
351
+ }
352
+
353
+ if (matched) {
354
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) return;
276
355
  addMatchedDomain(respDomain, resourceType, fullSubdomain);
277
- const simplifiedUrl = getRootDomain(currentUrl);
278
-
356
+
357
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
358
+ const matchCount = result.allMatches.reduce((sum, m) => sum + m.matches.length, 0);
359
+
279
360
  if (siteConfig.verbose === 1) {
280
- const partyType = isFirstParty ? 'first-party' : 'third-party';
281
- const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
282
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) contains pattern: "${result.matchedPattern}" (${matchCount} matches)`));
361
+ console.log(formatLogMessage('match', `[${currentRootDomain}] ${requestUrl} (${partyType}, grep) contains ${matchDescription} (${matchCount} matches)`));
283
362
  }
284
-
285
- if (dumpUrls) {
363
+ if (dumpUrls && matchedUrlsLogFile) {
286
364
  const timestamp = new Date().toISOString();
287
- const partyType = isFirstParty ? 'first-party' : 'third-party';
288
- const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
289
365
  try {
290
- fs.appendFileSync(matchedUrlsLogFile,
291
- `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep, pattern: "${result.matchedPattern}", matches: ${matchCount})\n`);
366
+ fs.appendFileSync(matchedUrlsLogFile,
367
+ `${timestamp} [match][${currentRootDomain}] ${requestUrl} (${partyType}, grep, ${matchDescription}, matches: ${matchCount})\n`);
292
368
  } catch (logErr) {
293
369
  console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
294
370
  }
295
371
  }
296
372
  } else if (forceDebug) {
297
373
  const partyType = isFirstParty ? 'first-party' : 'third-party';
298
- console.log(formatLogMessage('debug', `[grep] ${requestUrl} (${partyType}) matched regex but no patterns found`));
374
+ console.log(formatLogMessage('debug', `${GREP_TAG} ${requestUrl} (${partyType}) matched regex but no patterns found`));
299
375
  }
300
-
376
+
301
377
  } catch (err) {
302
378
  if (forceDebug) {
303
- console.log(formatLogMessage('debug', `[grep] Failed to download/grep content for ${requestUrl}: ${err.message}`));
379
+ console.log(formatLogMessage('debug', `${GREP_TAG} Failed to download/grep content for ${requestUrl}: ${err.message}`));
304
380
  }
305
381
  }
306
382
  };
@@ -340,9 +416,10 @@ function validateGrepAvailability() {
340
416
  }
341
417
  }
342
418
 
419
+ // Public surface. downloadAndGrep is module-internal (only called by
420
+ // createGrepHandler) — was exported but no external caller imported it.
343
421
  module.exports = {
344
422
  grepContent,
345
- downloadAndGrep,
346
423
  createGrepHandler,
347
424
  validateGrepAvailability
348
425
  };