npm - muaddib-scanner - Versions diffs - 2.11.82 → 2.11.83 - Mend

muaddib-scanner 2.11.82 → 2.11.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/{self-scan-v2.11.82.json → self-scan-v2.11.83.json} +1 -1
package/src/monitor/queue.js +19 -5
package/src/monitor/scan-queue.js +47 -3
package/src/monitor/state.js +4 -0
package/src/monitor/webhook.js +20 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.82",
+  "version": "2.11.83",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.82.json → self-scan-v2.11.83.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-06-10T12:51:04.328Z",
+  "timestamp": "2026-06-10T14:16:30.189Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/monitor/queue.js CHANGED Viewed

@@ -80,7 +80,7 @@ const {
 // From ./ingestion.js
 const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
-const { enqueueScan } = require('./scan-queue.js');
+const { enqueueScan, dequeueScan } = require('./scan-queue.js');
 // From ./tarball-archive.js
 const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
       maxSeverity: result.summary ? result.summary.riskLevel : null,
       types: [...new Set((result.threats || []).map(t => t.type))],
       sandbox: params.sandboxResult ? 'run' : 'none',
-      source: 'scan'
+      source: 'scan',
+      // AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
+      firstPublish: !!(result && result._firstPublish)
     });
   } catch (err) {
     // Non-fatal: ML export must never crash the monitor
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     // First-publish detection: used for sandbox priority below
     const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
+    // AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
+    // below carries firstPublish into the scan-ledger (all ~10 call sites share this
+    // `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
+    // first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
+    // comment below promised this; the threading was missing until now.
+    result._firstPublish = isFirstPublish;
     // npm registry metadata was fetched ONCE before the worker spawn (hoisted above
     // to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
           console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
         }
-        // Record daily alert with post-reputation score for top suspects ranking
+        // Record daily alert with post-reputation score for top suspects ranking.
+        // AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
+        // can annotate MCP suspects with their signals (visual triage, no scoring change).
         if (dailyAlerts.length < MAX_DAILY_ALERTS) {
-          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
+          const signals = [...new Set((result.threats || [])
+            .filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
+            .map(t => t.type))].slice(0, 6);
+          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
         }
         // LLM Detective: AI-powered analysis for T1a/T1b suspects
         // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
   _activeWorkers++;
   try {
     while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
-      const item = scanQueue.shift();
+      // AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
+      const item = dequeueScan(scanQueue);
       if (!item) break;
       await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
     }

package/src/monitor/scan-queue.js CHANGED Viewed

@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
       if (evicted && evicted.name) {
         require('./state.js').appendScanLedger({
           name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
-          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
+          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
+          // AUDIT-A1 observability (see evictFromScanQueueBulk)
+          firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
         });
       }
     } catch { /* ledger is best-effort */ }
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
           appendLedger({
             name: item.name, version: item.version, ecosystem: item.ecosystem,
             outcome: 'dropped',
-            source: _isProtected(item) ? `${source}_protected` : source
+            source: _isProtected(item) ? `${source}_protected` : source,
+            // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
+            // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
+            // measure if the memory breaker is evicting genuine new packages.
+            firstPublish: !!item.firstPublish,
+            isBurstExtra: !!item.isATOBurstExtra
           });
         } catch { /* ledger is best-effort — must never break the breaker */ }
       }
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
   return { dropped: toDrop, droppedProtected };
 }
-module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
+// ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
+// Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
+// the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
+// bounded head-window before falling back to FIFO — so a genuine new package never
+// ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
+// the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
+// data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
+const PRIORITY_DEQUEUE = (() => {
+  const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
+  return v === '1' || v === 'true';
+})();
+const PRIORITY_DEQUEUE_WINDOW = (() => {
+  const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
+  return Number.isFinite(v) && v > 0 ? v : 2048;
+})();
+function _isPriority(item) {
+  return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
+}
+/**
+ * Remove and return the next item to scan. Strict FIFO by default (unchanged). With
+ * MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
+ * FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
+ * @param {Array} scanQueue
+ * @param {{priority?: boolean, window?: number}} [opts] test overrides
+ */
+function dequeueScan(scanQueue, opts = {}) {
+  const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
+  if (!priority || scanQueue.length === 0) return scanQueue.shift();
+  const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
+  for (let i = 0; i < win; i++) {
+    if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
+  }
+  return scanQueue.shift();
+}
+module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };

package/src/monitor/state.js CHANGED Viewed

@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
       types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
       sandbox: e.sandbox || 'none',
       firstPublish: !!e.firstPublish,
+      // AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
+      // rollup separate "first-publish lost" from "spam extra dropped (expected)".
+      // Only written when true to keep the 127MB ledger lean.
+      ...(e.isBurstExtra ? { isBurstExtra: true } : {}),
       source: e.source || 'scan'
     };
     fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');

package/src/monitor/webhook.js CHANGED Viewed

@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
   return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
 }
+// AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
+// feature-extractor.js — kept local to avoid importing the ML module into the embed path).
+const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
+/**
+ * Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
+ * package self-identifies as an MCP server/installer, else ''. Signals come from the
+ * alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
+ */
+function mcpTriageTag(a) {
+  const name = (a && (a.name || a.package)) || '';
+  if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
+  const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
+  return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
+}
 function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
   // Use in-memory stats (accumulated since last reset, restored from disk on restart)
   // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
         const version = a.version || 'N/A';
         const count = a.findingsCount || (a.findings ? a.findings.length : 0);
         const scoreText = a.score != null ? `score ${a.score}, ` : '';
-        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)`;
+        // AUDIT-C: annotate MCP suspects (identity + signals) for visual triage — MCP
+        // servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
+        // Pure presentation, no scoring change.
+        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
       }).join('\n')
     : 'None';