muaddib-scanner 2.10.40 → 2.10.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.40",
3
+ "version": "2.10.42",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -10,6 +10,46 @@ function generateBase32(length) {
10
10
  return Array.from(bytes).map(b => chars[b % 32]).join('');
11
11
  }
12
12
 
13
+ // Minimum consecutive chars from an encoded variant to match in a DNS domain.
14
+ // 8 chars avoids FP on short legitimate hex subdomains (e.g. commit SHAs, CDN hashes).
15
+ const MIN_ENCODED_MATCH = 8;
16
+
17
+ /**
18
+ * Generate hex/base64/base64url encoded variants of a token value.
19
+ * Attackers encode tokens before DNS exfiltration to evade raw string matching.
20
+ * @param {string} value - Raw token value
21
+ * @returns {Array<{encoding: string, encoded: string}>}
22
+ */
23
+ function generateEncodedVariants(value) {
24
+ const buf = Buffer.from(value);
25
+ return [
26
+ { encoding: 'hex', encoded: buf.toString('hex') },
27
+ { encoding: 'base64', encoded: buf.toString('base64') },
28
+ { encoding: 'base64url', encoded: buf.toString('base64url') }
29
+ ];
30
+ }
31
+
32
+ /**
33
+ * Check if a DNS domain contains an encoded token variant.
34
+ * Strips dots to reassemble data chunked across DNS labels (RFC 1035: max 63 chars/label).
35
+ * @param {string} domain - Full DNS query domain
36
+ * @param {Array<{encoding: string, encoded: string}>} variants
37
+ * @returns {{encoding: string, match: string}|null}
38
+ */
39
+ function findEncodedInDomain(domain, variants) {
40
+ const stripped = domain.replace(/\./g, '');
41
+ for (const { encoding, encoded } of variants) {
42
+ if (encoded.length < MIN_ENCODED_MATCH) continue;
43
+ for (let i = 0; i <= encoded.length - MIN_ENCODED_MATCH; i++) {
44
+ const chunk = encoded.substring(i, i + MIN_ENCODED_MATCH);
45
+ if (stripped.includes(chunk)) {
46
+ return { encoding, match: chunk };
47
+ }
48
+ }
49
+ }
50
+ return null;
51
+ }
52
+
13
53
  /**
14
54
  * Canary token generators.
15
55
  * Each generator produces a format-valid token that matches the real service format.
@@ -173,6 +213,7 @@ function detectCanaryExfiltration(networkLogs, tokens) {
173
213
  for (const domain of (networkLogs.dns_queries || [])) {
174
214
  if (!domain) continue;
175
215
  for (const [tokenName, tokenValue] of tokenEntries) {
216
+ // Raw value match
176
217
  if (domain.includes(tokenValue)) {
177
218
  exfiltrations.push({
178
219
  token: tokenName,
@@ -180,6 +221,18 @@ function detectCanaryExfiltration(networkLogs, tokens) {
180
221
  foundIn: `DNS query: ${domain}`,
181
222
  severity: 'CRITICAL'
182
223
  });
224
+ continue;
225
+ }
226
+ // Encoded variant match (hex, base64, base64url — catches DNS label chunking)
227
+ const variants = generateEncodedVariants(tokenValue);
228
+ const encodedMatch = findEncodedInDomain(domain, variants);
229
+ if (encodedMatch) {
230
+ exfiltrations.push({
231
+ token: tokenName,
232
+ value: tokenValue,
233
+ foundIn: `DNS query (${encodedMatch.encoding}-encoded): ${domain}`,
234
+ severity: 'CRITICAL'
235
+ });
183
236
  }
184
237
  }
185
238
  }
@@ -12,6 +12,8 @@ const { processQueue, SCAN_CONCURRENCY } = require('./queue.js');
12
12
  const { startHealthcheck } = require('./healthcheck.js');
13
13
 
14
14
  const POLL_INTERVAL = 60_000;
15
+ const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
16
+ const QUEUE_WARNING_THRESHOLD = 5_000; // Warn if queue depth exceeds this
15
17
 
16
18
  function sleep(ms) {
17
19
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -171,15 +173,21 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
171
173
  console.log('[MONITOR] npm changes stream enabled (replicate.npmjs.com) with RSS fallback');
172
174
  console.log(`[MONITOR] Scan concurrency: ${SCAN_CONCURRENCY} (MUADDIB_SCAN_CONCURRENCY to override)`);
173
175
  console.log(`[MONITOR] Sandbox concurrency: ${SANDBOX_CONCURRENCY_MAX} (MUADDIB_SANDBOX_CONCURRENCY to override)`);
174
- console.log(`[MONITOR] Polling every ${POLL_INTERVAL / 1000}s. Ctrl+C to stop.\n`);
176
+ console.log(`[MONITOR] Polling every ${POLL_INTERVAL / 1000}s (decoupled from processing). Ctrl+C to stop.\n`);
175
177
 
176
178
  let running = true;
179
+ let pollIntervalHandle = null; // Decoupled poll timer — set after initial poll
177
180
 
178
181
  // Graceful shutdown handler (shared by SIGINT and SIGTERM)
179
182
  // Daily report is NEVER sent on shutdown — it only fires at 08:00 Paris time.
180
183
  // Counters are persisted to disk so they survive the restart.
181
184
  async function gracefulShutdown(signal) {
182
185
  console.log(`\n[MONITOR] Received ${signal} — shutting down...`);
186
+ running = false;
187
+ if (pollIntervalHandle) {
188
+ clearInterval(pollIntervalHandle);
189
+ pollIntervalHandle = null;
190
+ }
183
191
  healthcheck.stop();
184
192
  // Flush all pending scope groups before exit
185
193
  for (const [scope, group] of pendingGrouped) {
@@ -191,25 +199,47 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
191
199
  saveState(state, stats);
192
200
  reportStats(stats);
193
201
  console.log('[MONITOR] State saved. Bye!');
194
- running = false;
195
202
  process.exit(0);
196
203
  }
197
204
 
198
205
  process.on('SIGINT', () => gracefulShutdown('SIGINT'));
199
206
  process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
200
207
 
201
- // Initial poll + scan
208
+ // Initial poll + scan (sequential for first run)
202
209
  await poll(state, scanQueue, stats);
203
210
  saveState(state, stats);
204
211
  await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
205
212
 
206
- // Interval polling
213
+ // ─── Decoupled polling ───
214
+ // Poll runs on its own interval, independent of processing.
215
+ // This ensures new packages are ingested even while a large batch is being scanned.
216
+ // Without this, a 2h processing batch blocks all polling — packages published and
217
+ // removed during that window are never seen (e.g. axios/plain-crypto-js 2026-03-30).
218
+ let pollInProgress = false;
219
+ pollIntervalHandle = setInterval(async () => {
220
+ if (!running || pollInProgress) return;
221
+ pollInProgress = true;
222
+ try {
223
+ await poll(state, scanQueue, stats);
224
+ saveState(state, stats);
225
+ if (scanQueue.length > QUEUE_WARNING_THRESHOLD) {
226
+ console.log(`[MONITOR] WARNING: scan queue depth ${scanQueue.length} — processing may be lagging behind ingestion`);
227
+ }
228
+ } catch (err) {
229
+ console.error('[MONITOR] Poll error (interval):', err.message);
230
+ } finally {
231
+ pollInProgress = false;
232
+ }
233
+ }, POLL_INTERVAL);
234
+
235
+ // ─── Continuous processing loop ───
236
+ // Consumes scanQueue independently of polling. Workers inside processQueue
237
+ // check scanQueue.length > 0 after each item, so items added by a concurrent
238
+ // poll are picked up immediately by running workers.
207
239
  while (running) {
208
- await sleep(POLL_INTERVAL);
209
- if (!running) break;
210
- await poll(state, scanQueue, stats);
211
- saveState(state, stats);
212
- await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
240
+ if (scanQueue.length > 0) {
241
+ await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
242
+ }
213
243
 
214
244
  // Hourly stats report + cache purge
215
245
  if (Date.now() - stats.lastReportTime >= 3600_000) {
@@ -221,6 +251,9 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
221
251
  if (isDailyReportDue(stats)) {
222
252
  await sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache);
223
253
  }
254
+
255
+ // Short pause before re-checking queue — yields event loop for poll interval
256
+ await sleep(PROCESS_LOOP_INTERVAL);
224
257
  }
225
258
  }
226
259
 
@@ -231,5 +264,7 @@ module.exports = {
231
264
  reportStats,
232
265
  isDailyReportDue,
233
266
  sleep,
234
- POLL_INTERVAL
267
+ POLL_INTERVAL,
268
+ PROCESS_LOOP_INTERVAL,
269
+ QUEUE_WARNING_THRESHOLD
235
270
  };
@@ -0,0 +1,348 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ // ══════════════════════════════════════════════════════════════
7
+ // gVisor strace log parser
8
+ //
9
+ // gVisor's --strace flag logs syscalls at the kernel level in its
10
+ // debug log files. This parser extracts security-relevant data
11
+ // (file access, network connections, process execution) and returns
12
+ // the SAME structure as sandbox-runner.sh's strace/tcpdump parsing,
13
+ // so the downstream scoreFindings() analyzer needs no changes.
14
+ //
15
+ // gVisor strace format:
16
+ // D0331 12:34:56.789012 1 strace.go:587] [ PID] process E syscall(args) = ret (dur)
17
+ //
18
+ // Or bare (without Go log prefix):
19
+ // [ PID] process E syscall(args) = ret (dur)
20
+ // ══════════════════════════════════════════════════════════════
21
+
22
+ const SENSITIVE_PATTERN = /\.npmrc|\.ssh\/|\.aws\/|\.env(?:$|[^a-zA-Z])|\/etc\/passwd|\/etc\/shadow|\.gitconfig|\.bash_history/;
23
+
24
+ // Processes that are sandbox infrastructure — not spawned by the package
25
+ const SAFE_PROCESSES = new Set(['node', 'npm', 'npx', 'sh', 'git']);
26
+
27
+ // ── Line-level parsers ──
28
+
29
+ /**
30
+ * Parse a single gVisor strace line.
31
+ * Handles both Go-log-prefixed and bare formats.
32
+ *
33
+ * @param {string} line - Raw log line
34
+ * @returns {object|null} Parsed syscall info or null if not a strace line
35
+ */
36
+ function parseStraceLine(line) {
37
+ // Strip Go log prefix if present: everything up to `] ` before the `[PID]` block
38
+ const bracketIdx = line.indexOf('] [');
39
+ const content = bracketIdx >= 0 ? line.substring(bracketIdx + 2).trim() : line.trim();
40
+
41
+ // Match: [PID] process E/X syscall(args) = return (duration)
42
+ const match = content.match(
43
+ /^\[\s*(\d+)\]\s+(\S+)\s+[EX]\s+(\w+)\((.+)\)\s*=\s*(.+?)(?:\s+\([\d.]+[µm]?s\))?$/
44
+ );
45
+ if (!match) return null;
46
+
47
+ return {
48
+ pid: parseInt(match[1], 10),
49
+ process: match[2],
50
+ syscall: match[3],
51
+ args: match[4],
52
+ returnValue: match[5].trim()
53
+ };
54
+ }
55
+
56
+ /**
57
+ * Extract file path and flags from openat/open args.
58
+ * gVisor format: AT_FDCWD, "/path", O_RDONLY|O_CLOEXEC
59
+ *
60
+ * @param {string} args - Syscall arguments string
61
+ * @returns {object|null} { path, flags } or null
62
+ */
63
+ function extractOpenatPath(args) {
64
+ // Comma-separated: AT_FDCWD, "/path/to/file", O_RDONLY|O_CLOEXEC, 0o0
65
+ const match = args.match(/"([^"]+)"[\s,]+([A-Z_|]+)/);
66
+ if (match) return { path: match[1], flags: match[2] };
67
+
68
+ // Space-separated (some gVisor versions): AT_FDCWD "/path" O_RDONLY
69
+ const matchSpace = args.match(/"([^"]+)"\s+([A-Z_|]+)/);
70
+ if (matchSpace) return { path: matchSpace[1], flags: matchSpace[2] };
71
+
72
+ return null;
73
+ }
74
+
75
+ /**
76
+ * Extract IP and port from connect() args.
77
+ * gVisor format: {Family: AF_INET, Addr: 1.2.3.4, Port: 443}
78
+ *
79
+ * @param {string} args - Syscall arguments string
80
+ * @returns {object|null} { ip, port } or null
81
+ */
82
+ function extractConnectInfo(args) {
83
+ const match = args.match(/\{Family:\s*AF_INET,\s*Addr:\s*([\d.]+),\s*Port:\s*(\d+)\}/);
84
+ if (!match) return null;
85
+ return { ip: match[1], port: parseInt(match[2], 10) };
86
+ }
87
+
88
+ /**
89
+ * Extract command path from execve() args.
90
+ * gVisor format: execve("/usr/bin/curl", ["curl", ...], ...)
91
+ *
92
+ * @param {string} args - Syscall arguments string
93
+ * @returns {string|null} Command path or null
94
+ */
95
+ function extractExecveCommand(args) {
96
+ const match = args.match(/^"([^"]+)"/);
97
+ return match ? match[1] : null;
98
+ }
99
+
100
+ // ── Main parser ──
101
+
102
+ /**
103
+ * Parse gVisor strace log content and extract security-relevant findings.
104
+ * Returns the SAME data structure as sandbox-runner.sh's strace parsing
105
+ * so scoreFindings() works identically.
106
+ *
107
+ * @param {string} content - Raw gVisor strace log content
108
+ * @returns {object} { sensitive_files: {read, written}, network: {http_connections}, processes: {spawned} }
109
+ */
110
+ function parseGvisorStrace(content) {
111
+ const sensitiveReads = new Set();
112
+ const sensitiveWrites = new Set();
113
+ const connections = new Map(); // dedup by ip:port
114
+ const processes = new Map(); // dedup by pid:command
115
+
116
+ const lines = content.split('\n');
117
+
118
+ for (const line of lines) {
119
+ const parsed = parseStraceLine(line);
120
+ if (!parsed) continue;
121
+
122
+ // Only process successful syscalls (return >= 0)
123
+ if (parsed.returnValue.startsWith('-')) continue;
124
+
125
+ switch (parsed.syscall) {
126
+ case 'openat':
127
+ case 'open': {
128
+ const info = extractOpenatPath(parsed.args);
129
+ if (!info || !SENSITIVE_PATTERN.test(info.path)) break;
130
+
131
+ if (/O_WRONLY|O_RDWR|O_CREAT/.test(info.flags)) {
132
+ sensitiveWrites.add(info.path);
133
+ } else if (/O_RDONLY/.test(info.flags)) {
134
+ sensitiveReads.add(info.path);
135
+ }
136
+ break;
137
+ }
138
+
139
+ case 'connect': {
140
+ const conn = extractConnectInfo(parsed.args);
141
+ if (!conn) break;
142
+ if (conn.ip.startsWith('127.')) break; // skip loopback
143
+ if (conn.port === 65535) break; // skip probe port
144
+ const key = `${conn.ip}:${conn.port}`;
145
+ if (!connections.has(key)) {
146
+ connections.set(key, { host: conn.ip, port: conn.port, protocol: 'TCP' });
147
+ }
148
+ break;
149
+ }
150
+
151
+ case 'execve': {
152
+ const cmd = extractExecveCommand(parsed.args);
153
+ if (!cmd) break;
154
+ const basename = path.basename(cmd);
155
+ if (SAFE_PROCESSES.has(basename)) break;
156
+ const key = `${parsed.pid}:${cmd}`;
157
+ if (!processes.has(key)) {
158
+ processes.set(key, { command: cmd, pid: parsed.pid });
159
+ }
160
+ break;
161
+ }
162
+ }
163
+ }
164
+
165
+ return {
166
+ sensitive_files: {
167
+ read: [...sensitiveReads],
168
+ written: [...sensitiveWrites]
169
+ },
170
+ network: {
171
+ http_connections: [...connections.values()]
172
+ },
173
+ processes: {
174
+ spawned: [...processes.values()]
175
+ }
176
+ };
177
+ }
178
+
179
+ // ── Log discovery ──
180
+
181
+ /**
182
+ * Find gVisor log files for a specific container.
183
+ * Searches the debug-log directory using multiple strategies:
184
+ * 1. %ID% template → directory named after full container ID
185
+ * 2. Truncated ID (12 chars) directory
186
+ * 3. Files in logDir containing the container ID
187
+ *
188
+ * @param {string} containerId - Docker container ID (full 64-char or truncated)
189
+ * @param {string} logDir - gVisor debug-log base directory (default: /tmp/runsc)
190
+ * @returns {string[]} Matching log file paths
191
+ */
192
+ function findGvisorLogs(containerId, logDir) {
193
+ logDir = logDir || '/tmp/runsc';
194
+ const logFiles = [];
195
+
196
+ if (!fs.existsSync(logDir)) return logFiles;
197
+
198
+ const shortId = containerId.substring(0, 12);
199
+
200
+ // Strategy 1: directory named after full container ID (%ID% template)
201
+ const fullDir = path.join(logDir, containerId);
202
+ if (fs.existsSync(fullDir) && fs.statSync(fullDir).isDirectory()) {
203
+ return collectLogFiles(fullDir);
204
+ }
205
+
206
+ // Strategy 2: directory named after truncated ID
207
+ const shortDir = path.join(logDir, shortId);
208
+ if (fs.existsSync(shortDir) && fs.statSync(shortDir).isDirectory()) {
209
+ return collectLogFiles(shortDir);
210
+ }
211
+
212
+ // Strategy 3: flat files containing the container ID in name
213
+ try {
214
+ const files = fs.readdirSync(logDir);
215
+ for (const file of files) {
216
+ if ((file.includes(containerId) || file.includes(shortId)) &&
217
+ (file.endsWith('.log') || file.includes('boot'))) {
218
+ logFiles.push(path.join(logDir, file));
219
+ }
220
+ }
221
+ } catch { /* directory not readable */ }
222
+
223
+ return logFiles;
224
+ }
225
+
226
+ function collectLogFiles(dir) {
227
+ const files = [];
228
+ try {
229
+ for (const file of fs.readdirSync(dir)) {
230
+ if (file.endsWith('.log') || file.includes('boot')) {
231
+ files.push(path.join(dir, file));
232
+ }
233
+ }
234
+ } catch { /* directory not readable */ }
235
+ return files;
236
+ }
237
+
238
+ // ── Aggregated parser (main entry point) ──
239
+
240
+ /**
241
+ * Parse gVisor log file and return report-compatible structure.
242
+ * This is the main export matching the spec:
243
+ * parseGvisorLog(logPath) → same format as parseStraceOutput()
244
+ *
245
+ * @param {string} logPath - Path to a gVisor strace log file
246
+ * @returns {object} Report supplement with sensitive_files, network, processes
247
+ */
248
+ function parseGvisorLog(logPath) {
249
+ try {
250
+ const content = fs.readFileSync(logPath, 'utf8');
251
+ return parseGvisorStrace(content);
252
+ } catch {
253
+ return {
254
+ sensitive_files: { read: [], written: [] },
255
+ network: { http_connections: [] },
256
+ processes: { spawned: [] }
257
+ };
258
+ }
259
+ }
260
+
261
+ /**
262
+ * Parse all gVisor logs for a container and return aggregated report supplement.
263
+ *
264
+ * @param {string} containerId - Docker container ID
265
+ * @param {string} logDir - gVisor debug-log base directory
266
+ * @returns {object} Aggregated report supplement
267
+ */
268
+ function parseGvisorLogs(containerId, logDir) {
269
+ const emptyResult = {
270
+ sensitive_files: { read: [], written: [] },
271
+ network: { http_connections: [] },
272
+ processes: { spawned: [] }
273
+ };
274
+
275
+ const logFiles = findGvisorLogs(containerId, logDir);
276
+ if (logFiles.length === 0) return emptyResult;
277
+
278
+ // Aggregate across all log files (boot, gofer, etc.)
279
+ const allReads = new Set();
280
+ const allWrites = new Set();
281
+ const allConnections = new Map();
282
+ const allProcesses = new Map();
283
+
284
+ for (const logFile of logFiles) {
285
+ const result = parseGvisorLog(logFile);
286
+
287
+ for (const f of result.sensitive_files.read) allReads.add(f);
288
+ for (const f of result.sensitive_files.written) allWrites.add(f);
289
+ for (const c of result.network.http_connections) {
290
+ const key = `${c.host}:${c.port}`;
291
+ if (!allConnections.has(key)) allConnections.set(key, c);
292
+ }
293
+ for (const p of result.processes.spawned) {
294
+ const key = `${p.pid}:${p.command}`;
295
+ if (!allProcesses.has(key)) allProcesses.set(key, p);
296
+ }
297
+ }
298
+
299
+ return {
300
+ sensitive_files: { read: [...allReads], written: [...allWrites] },
301
+ network: { http_connections: [...allConnections.values()] },
302
+ processes: { spawned: [...allProcesses.values()] }
303
+ };
304
+ }
305
+
306
+ /**
307
+ * Clean up gVisor log files for a container after analysis.
308
+ * Prevents disk fill from accumulated logs across sandbox runs.
309
+ *
310
+ * @param {string} containerId - Docker container ID
311
+ * @param {string} logDir - gVisor debug-log base directory
312
+ */
313
+ function cleanupGvisorLogs(containerId, logDir) {
314
+ logDir = logDir || '/tmp/runsc';
315
+ const shortId = containerId.substring(0, 12);
316
+
317
+ try {
318
+ // Try container-specific directory first
319
+ for (const dirName of [containerId, shortId]) {
320
+ const dir = path.join(logDir, dirName);
321
+ if (fs.existsSync(dir) && fs.statSync(dir).isDirectory()) {
322
+ fs.rmSync(dir, { recursive: true, force: true });
323
+ return;
324
+ }
325
+ }
326
+
327
+ // Fall back to individual files
328
+ const files = fs.readdirSync(logDir);
329
+ for (const file of files) {
330
+ if (file.includes(containerId) || file.includes(shortId)) {
331
+ fs.unlinkSync(path.join(logDir, file));
332
+ }
333
+ }
334
+ } catch { /* cleanup is best-effort */ }
335
+ }
336
+
337
+ module.exports = {
338
+ parseGvisorLog,
339
+ parseGvisorLogs,
340
+ parseGvisorStrace,
341
+ findGvisorLogs,
342
+ cleanupGvisorLogs,
343
+ // Exported for unit tests
344
+ parseStraceLine,
345
+ extractOpenatPath,
346
+ extractConnectInfo,
347
+ extractExecveCommand
348
+ };