muaddib-scanner 2.11.17 → 2.11.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/pipeline/executor.js +26 -8
- package/src/response/playbooks.js +10 -0
- package/src/rules/index.js +22 -0
- package/src/scanner/dataflow.js +87 -9
- package/src/scanner/module-graph/build-graph.js +14 -1
- package/src/scanner/module-graph/constants.js +1 -1
- package/src/scanner/monorepo.js +104 -0
- package/src/scoring.js +26 -18
package/package.json
CHANGED
package/src/pipeline/executor.js
CHANGED
|
@@ -9,6 +9,7 @@ const { scanHashes } = require('../scanner/hash.js');
|
|
|
9
9
|
const { scanIocStrings } = require('../scanner/ioc-strings.js');
|
|
10
10
|
const { scanAntiForensic } = require('../scanner/anti-forensic.js');
|
|
11
11
|
const { scanStubPackage } = require('../scanner/stub-package.js');
|
|
12
|
+
const { scanMonorepo } = require('../scanner/monorepo.js');
|
|
12
13
|
const { analyzeDataFlow } = require('../scanner/dataflow.js');
|
|
13
14
|
const { scanTyposquatting, findPyPITyposquatMatch } = require('../scanner/typosquat.js');
|
|
14
15
|
const { scanGitHubActions } = require('../scanner/github-actions.js');
|
|
@@ -127,12 +128,25 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
127
128
|
// Bounded: 5s timeout to prevent DoS on large/adversarial packages
|
|
128
129
|
const MODULE_GRAPH_TIMEOUT_MS = 5000;
|
|
129
130
|
let crossFileFlows = [];
|
|
131
|
+
// Threats ABOUT the module graph (audit DF-C1): truncation when the package
|
|
132
|
+
// exceeds MAX_GRAPH_NODES. Separate from crossFileFlows because the latter
|
|
133
|
+
// gets filtered/reshaped (line ~316 requires sourceFile && sinkFile).
|
|
134
|
+
const moduleGraphThreats = [];
|
|
130
135
|
if (!options.noModuleGraph) {
|
|
131
136
|
const moduleGraphWork = async () => {
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
warnings.push(
|
|
137
|
+
const graphMeta = {};
|
|
138
|
+
const graph = await yieldThen(() => buildModuleGraph(targetPath, graphMeta));
|
|
139
|
+
if (graphMeta.truncated) {
|
|
140
|
+
warnings.push(`Module graph skipped: ${graphMeta.fileCount} files exceeds MAX_GRAPH_NODES (${graphMeta.maxNodes})`);
|
|
141
|
+
moduleGraphThreats.push({
|
|
142
|
+
type: 'large_package_graph_truncated',
|
|
143
|
+
severity: 'MEDIUM',
|
|
144
|
+
message: `Cross-file analysis désactivée : ${graphMeta.fileCount} fichiers dépassent la limite (${graphMeta.maxNodes}). Risque de blind spot sur monorepo / large package — auditer les sous-modules manuellement.`,
|
|
145
|
+
file: 'package.json',
|
|
146
|
+
line: 0,
|
|
147
|
+
fileCount: graphMeta.fileCount,
|
|
148
|
+
maxNodes: graphMeta.maxNodes
|
|
149
|
+
});
|
|
136
150
|
}
|
|
137
151
|
const tainted = await yieldThen(() => annotateTaintedExports(graph, targetPath));
|
|
138
152
|
const sinkAnnotations = await yieldThen(() => annotateSinkExports(graph, targetPath));
|
|
@@ -187,7 +201,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
187
201
|
'scanDependencies', 'scanHashes', 'analyzeDataFlow', 'scanTyposquatting',
|
|
188
202
|
'scanGitHubActions', 'matchPythonIOCs', 'checkPyPITyposquatting',
|
|
189
203
|
'scanEntropy', 'scanAIConfig', 'scanIocStrings', 'scanAntiForensic',
|
|
190
|
-
'scanStubPackage'
|
|
204
|
+
'scanStubPackage', 'scanMonorepo'
|
|
191
205
|
];
|
|
192
206
|
|
|
193
207
|
const settledResults = await Promise.allSettled([
|
|
@@ -206,7 +220,8 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
206
220
|
yieldThen(() => scanAIConfig(targetPath)),
|
|
207
221
|
yieldThen(() => scanIocStrings(targetPath)),
|
|
208
222
|
withTimeout(() => scanAntiForensic(targetPath), 'scanAntiForensic'),
|
|
209
|
-
yieldThen(() => scanStubPackage(targetPath))
|
|
223
|
+
yieldThen(() => scanStubPackage(targetPath)),
|
|
224
|
+
yieldThen(() => scanMonorepo(targetPath))
|
|
210
225
|
]);
|
|
211
226
|
|
|
212
227
|
// Extract results: use empty array for rejected scanners, log errors
|
|
@@ -234,7 +249,8 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
234
249
|
aiConfigThreats,
|
|
235
250
|
iocStringThreats,
|
|
236
251
|
antiForensicThreats,
|
|
237
|
-
stubPackageThreats
|
|
252
|
+
stubPackageThreats,
|
|
253
|
+
monorepoThreats
|
|
238
254
|
] = scanResult;
|
|
239
255
|
|
|
240
256
|
// Emit warning if file count cap was hit + quick-scan overflow files
|
|
@@ -313,12 +329,14 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
313
329
|
...iocStringThreats,
|
|
314
330
|
...antiForensicThreats,
|
|
315
331
|
...stubPackageThreats,
|
|
332
|
+
...monorepoThreats,
|
|
316
333
|
...crossFileFlows.filter(f => f && f.sourceFile && f.sinkFile).map(f => ({
|
|
317
334
|
type: f.type,
|
|
318
335
|
severity: f.severity,
|
|
319
336
|
message: `Cross-file dataflow: ${f.source} in ${f.sourceFile} → ${f.sink} in ${f.sinkFile}`,
|
|
320
337
|
file: f.sinkFile
|
|
321
|
-
}))
|
|
338
|
+
})),
|
|
339
|
+
...moduleGraphThreats
|
|
322
340
|
];
|
|
323
341
|
|
|
324
342
|
// Paranoid mode
|
|
@@ -683,6 +683,16 @@ const PLAYBOOKS = {
|
|
|
683
683
|
'Verifier si des donnees sensibles sont envoyees via ce canal. ' +
|
|
684
684
|
'Les proxies HTTP classiques ne filtrent pas ce trafic.',
|
|
685
685
|
|
|
686
|
+
large_package_graph_truncated:
|
|
687
|
+
'Package volumineux (> MAX_GRAPH_NODES fichiers). Cross-file dataflow non analyse. ' +
|
|
688
|
+
'Auditer les sous-modules manuellement ou scanner par sous-paquet. ' +
|
|
689
|
+
'Sur un monorepo, scanner chaque workspace independamment.',
|
|
690
|
+
|
|
691
|
+
monorepo_detected:
|
|
692
|
+
'Monorepo detecte — scanner chaque workspace individuellement pour un verdict per-package. ' +
|
|
693
|
+
'Le score global reflete un perimetre agrege ; muaddib ne supporte pas encore le scoring per-workspace. ' +
|
|
694
|
+
'Ignorer les FP structurels (.yarn/, packages/*/test/, fixtures/) sera ajoute en v2.12.',
|
|
695
|
+
|
|
686
696
|
bin_field_hijack:
|
|
687
697
|
'CRITIQUE: Le champ "bin" de package.json shadow une commande systeme (node, npm, git, bash, etc.). ' +
|
|
688
698
|
'A l\'installation, npm cree un symlink dans node_modules/.bin/ qui intercepte la commande reelle. ' +
|
package/src/rules/index.js
CHANGED
|
@@ -94,6 +94,17 @@ const RULES = {
|
|
|
94
94
|
],
|
|
95
95
|
mitre: 'T1195.002'
|
|
96
96
|
},
|
|
97
|
+
monorepo_detected: {
|
|
98
|
+
id: 'MUADDIB-PKG-021',
|
|
99
|
+
name: 'Monorepo Detected',
|
|
100
|
+
severity: 'MEDIUM',
|
|
101
|
+
confidence: 'high',
|
|
102
|
+
description: 'Workspace monorepo detecte (yarn/pnpm/lerna/turbo). Le perimetre du scan depasse un seul package — auditer chaque workspace separement pour un scoring per-package.',
|
|
103
|
+
references: [
|
|
104
|
+
'https://docs.npmjs.com/cli/v10/using-npm/workspaces'
|
|
105
|
+
],
|
|
106
|
+
mitre: 'T1195.002'
|
|
107
|
+
},
|
|
97
108
|
|
|
98
109
|
// Obfuscation detections
|
|
99
110
|
obfuscation_detected: {
|
|
@@ -2110,6 +2121,17 @@ const RULES = {
|
|
|
2110
2121
|
],
|
|
2111
2122
|
mitre: 'T1071'
|
|
2112
2123
|
},
|
|
2124
|
+
large_package_graph_truncated: {
|
|
2125
|
+
id: 'MUADDIB-FLOW-006',
|
|
2126
|
+
name: 'Large Package Graph Truncated',
|
|
2127
|
+
severity: 'MEDIUM',
|
|
2128
|
+
confidence: 'medium',
|
|
2129
|
+
description: 'Le graphe de modules depasse la limite (MAX_GRAPH_NODES). Cross-file dataflow non analyse — risque de blind spot sur monorepo ou large package. Auditer les sous-modules manuellement.',
|
|
2130
|
+
references: [
|
|
2131
|
+
'https://attack.mitre.org/techniques/T1195/002/'
|
|
2132
|
+
],
|
|
2133
|
+
mitre: 'T1195.002'
|
|
2134
|
+
},
|
|
2113
2135
|
|
|
2114
2136
|
// Audit v3 Bypass Detections (AST-062 to AST-069)
|
|
2115
2137
|
reflect_apply_require: {
|
package/src/scanner/dataflow.js
CHANGED
|
@@ -40,13 +40,39 @@ const MODULE_SINK_METHODS = {
|
|
|
40
40
|
ws: { send: 'network_send', write: 'network_send' },
|
|
41
41
|
mqtt: { publish: 'network_send', send: 'network_send' },
|
|
42
42
|
'socket.io-client': { emit: 'network_send', send: 'network_send' },
|
|
43
|
-
'socket.io': { emit: 'network_send', send: 'network_send' }
|
|
43
|
+
'socket.io': { emit: 'network_send', send: 'network_send' },
|
|
44
|
+
// audit DF-H1 v2.11.15: 2026 sinks with clean direct call patterns
|
|
45
|
+
undici: { request: 'network_send', fetch: 'network_send', stream: 'network_send' },
|
|
46
|
+
'graphql-request': { request: 'network_send', gql: 'network_send' },
|
|
47
|
+
'@apollo/client': { query: 'network_send', mutate: 'network_send' },
|
|
48
|
+
'@grpc/grpc-js': {
|
|
49
|
+
makeUnaryRequest: 'network_send',
|
|
50
|
+
makeClientStreamRequest: 'network_send',
|
|
51
|
+
makeServerStreamRequest: 'network_send',
|
|
52
|
+
makeBidiStreamRequest: 'network_send'
|
|
53
|
+
}
|
|
44
54
|
};
|
|
45
55
|
|
|
46
|
-
//
|
|
56
|
+
// audit DF-H1 v2.11.15: 2026 exfil-prone modules. When imported, ANY call with a
|
|
57
|
+
// credential/env source in the same file → suspicious_module_sink MEDIUM with
|
|
58
|
+
// module attribution. Catches chained access (bot.telegram.sendMessage), dynamic
|
|
59
|
+
// methods (actor.exfil), and SDK fluent APIs that direct method matching misses.
|
|
60
|
+
const EXFIL_PRONE_MODULES = new Set([
|
|
61
|
+
'telegraf', 'node-telegram-bot-api',
|
|
62
|
+
'discord.js',
|
|
63
|
+
'@dfinity/agent',
|
|
64
|
+
'undici',
|
|
65
|
+
'@grpc/grpc-js',
|
|
66
|
+
'@apollo/client', 'graphql-request'
|
|
67
|
+
]);
|
|
68
|
+
|
|
69
|
+
// All tracked module names (for filtering in buildTaintMap). EXFIL_PRONE_MODULES
|
|
70
|
+
// must be tracked even when not in MODULE_SINK_METHODS so buildTaintMap registers
|
|
71
|
+
// them (e.g. require('telegraf') populates taintMap, enabling the heuristic).
|
|
47
72
|
const TRACKED_MODULES = new Set([
|
|
48
73
|
...Object.keys(MODULE_SOURCE_METHODS),
|
|
49
|
-
...Object.keys(MODULE_SINK_METHODS)
|
|
74
|
+
...Object.keys(MODULE_SINK_METHODS),
|
|
75
|
+
...EXFIL_PRONE_MODULES
|
|
50
76
|
]);
|
|
51
77
|
|
|
52
78
|
// Methods that execute commands — used for exec result capture detection
|
|
@@ -805,7 +831,7 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
805
831
|
const envVar = node.property?.name || '';
|
|
806
832
|
if (isSensitiveEnv(envVar)) {
|
|
807
833
|
sources.push({
|
|
808
|
-
type: 'env_read',
|
|
834
|
+
type: isCredentialEnv(envVar) ? 'credential_env_read' : 'env_read',
|
|
809
835
|
name: envVar,
|
|
810
836
|
line: node.loc?.start?.line,
|
|
811
837
|
taint_tracked: true
|
|
@@ -831,7 +857,7 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
831
857
|
const envVar = node.property?.name || '';
|
|
832
858
|
if (isSensitiveEnv(envVar)) {
|
|
833
859
|
sources.push({
|
|
834
|
-
type: 'env_read',
|
|
860
|
+
type: isCredentialEnv(envVar) ? 'credential_env_read' : 'env_read',
|
|
835
861
|
name: envVar,
|
|
836
862
|
line: node.loc?.start?.line
|
|
837
863
|
});
|
|
@@ -911,6 +937,29 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
911
937
|
});
|
|
912
938
|
}
|
|
913
939
|
|
|
940
|
+
// audit DF-H1 v2.11.15: 2026 exfil-prone module heuristic.
|
|
941
|
+
// If any EXFIL_PRONE_MODULES (telegraf, discord.js, @dfinity/agent, undici, gRPC,
|
|
942
|
+
// GraphQL clients) is imported AND a credential/env_read source is present in
|
|
943
|
+
// the same file, emit suspicious_module_sink with module attribution. Catches
|
|
944
|
+
// chained access (bot.telegram.sendMessage) and dynamic methods (actor.exfil)
|
|
945
|
+
// that direct MODULE_SINK_METHODS matching cannot reach.
|
|
946
|
+
const exfilProneInScope = [];
|
|
947
|
+
for (const taint of taintMap.values()) {
|
|
948
|
+
if (taint && EXFIL_PRONE_MODULES.has(taint.source)) exfilProneInScope.push(taint.source);
|
|
949
|
+
}
|
|
950
|
+
if (exfilProneInScope.length > 0 &&
|
|
951
|
+
sources.some(s => s.type === 'env_read' || s.type === 'credential_env_read' || s.type === 'credential_read')) {
|
|
952
|
+
const moduleList = [...new Set(exfilProneInScope)].join(', ');
|
|
953
|
+
const firstSourceLine = sources.find(s => s.line)?.line || 0;
|
|
954
|
+
threats.push({
|
|
955
|
+
type: 'suspicious_module_sink',
|
|
956
|
+
severity: 'MEDIUM',
|
|
957
|
+
message: `Module exfil-prone 2026 (${moduleList}) avec credential/env source dans le meme fichier — canal d'exfiltration potentiel.`,
|
|
958
|
+
file: path.relative(basePath, filePath),
|
|
959
|
+
line: firstSourceLine
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
|
|
914
963
|
// Detect staged payload: network fetch + eval in same file (no credential source needed)
|
|
915
964
|
const hasNetworkSink = sinks.some(s => s.type === 'network_send' || s.type === 'exec_network');
|
|
916
965
|
const hasEvalSink = sinks.some(s => s.type === 'eval_exec');
|
|
@@ -979,12 +1028,16 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
979
1028
|
}
|
|
980
1029
|
|
|
981
1030
|
// Graduation: HIGH → MEDIUM for env/telemetry-only sources (no credential file reads,
|
|
982
|
-
// no fingerprint reads, no command output
|
|
983
|
-
// is the dominant FP pattern (SDK/API usage, binary
|
|
984
|
-
// Real credential exfiltration uses credential_read
|
|
1031
|
+
// no fingerprint reads, no command output, no credential-tier env vars). Distant
|
|
1032
|
+
// env/telemetry → network_send is the dominant FP pattern (SDK/API usage, binary
|
|
1033
|
+
// wrappers, config libraries). Real credential exfiltration uses credential_read,
|
|
1034
|
+
// fingerprint_read, or credential_env_read sources (audit 2026-05 DF-C4: NPM_TOKEN,
|
|
1035
|
+
// GITHUB_TOKEN, AWS_SECRET_* now classified as credential_env_read upstream and
|
|
1036
|
+
// retained at HIGH instead of downgrading to MEDIUM with the rest of env_read).
|
|
985
1037
|
if (severity === 'HIGH') {
|
|
986
1038
|
const hasHighRiskSource = sources.some(s =>
|
|
987
|
-
s.type === 'credential_read' || s.type === 'fingerprint_read' ||
|
|
1039
|
+
s.type === 'credential_read' || s.type === 'fingerprint_read' ||
|
|
1040
|
+
s.type === 'command_output' || s.type === 'credential_env_read'
|
|
988
1041
|
);
|
|
989
1042
|
if (!hasHighRiskSource) {
|
|
990
1043
|
severity = 'MEDIUM';
|
|
@@ -1120,4 +1173,29 @@ function isSensitiveEnv(name) {
|
|
|
1120
1173
|
return sensitive.some(s => upper.includes(s));
|
|
1121
1174
|
}
|
|
1122
1175
|
|
|
1176
|
+
// Audit 2026-05 DF-C4: credential-tier env vars distinguished from generic env_read.
|
|
1177
|
+
// These represent authentication material (NPM_TOKEN, GITHUB_TOKEN, AWS_SECRET_ACCESS_KEY,
|
|
1178
|
+
// STRIPE_API_KEY etc.) — strictly narrower than isSensitiveEnv. Sources of this type
|
|
1179
|
+
// participate in hasHighRiskSource so credential exfil patterns are NOT downgraded by the
|
|
1180
|
+
// HIGH→MEDIUM graduation. System identity vars (HOME, USER) remain plain env_read since
|
|
1181
|
+
// they are fingerprinting signals, not credentials.
|
|
1182
|
+
const KNOWN_CREDENTIAL_ENV_VARS = new Set([
|
|
1183
|
+
'NPM_TOKEN', 'GITHUB_TOKEN', 'GH_TOKEN', 'NODE_AUTH_TOKEN',
|
|
1184
|
+
'CIRCLE_TOKEN', 'GITLAB_TOKEN', 'CARGO_REGISTRY_TOKEN', 'PYPI_TOKEN',
|
|
1185
|
+
'GOOGLE_APPLICATION_CREDENTIALS', 'AZURE_CLIENT_SECRET',
|
|
1186
|
+
'SENTRY_AUTH_TOKEN', 'NPM_AUTH_TOKEN', 'NPM_CONFIG_AUTHTOKEN'
|
|
1187
|
+
]);
|
|
1188
|
+
|
|
1189
|
+
const CREDENTIAL_ENV_SUFFIX_RE = /(?:^|_)(?:TOKEN|SECRET|PASSWORD|PASSPHRASE|CREDENTIAL|CREDENTIALS|API_KEY|ACCESS_KEY|ACCESS_KEY_ID|SECRET_KEY|PRIVATE_KEY|SIGNING_KEY|SESSION_TOKEN|REFRESH_TOKEN|AUTH_TOKEN)$/;
|
|
1190
|
+
|
|
1191
|
+
function isCredentialEnv(name) {
|
|
1192
|
+
const upper = name.toUpperCase();
|
|
1193
|
+
// System identity vars are fingerprinting, not credentials
|
|
1194
|
+
if (SYSTEM_IDENTITY_ENVS.has(upper)) return false;
|
|
1195
|
+
// Public keys are not credentials (e.g., SSH_PUBLIC_KEY, GPG_PUBLIC_KEY)
|
|
1196
|
+
if (upper.includes('PUBLIC_KEY') || upper.includes('PUBKEY')) return false;
|
|
1197
|
+
if (KNOWN_CREDENTIAL_ENV_VARS.has(upper)) return true;
|
|
1198
|
+
return CREDENTIAL_ENV_SUFFIX_RE.test(upper);
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1123
1201
|
module.exports = { analyzeDataFlow };
|
|
@@ -8,17 +8,30 @@ const { parseFile, isLocalImport, resolveLocal, toRel } = require('./parse-utils
|
|
|
8
8
|
/**
|
|
9
9
|
* Build a dependency graph of local modules within a package.
|
|
10
10
|
* Only tracks local imports (./ ../) — node_modules are ignored.
|
|
11
|
+
*
|
|
12
|
+
* @param {string} packagePath
|
|
13
|
+
* @param {Object} [meta] - Optional out-param: mutated with { fileCount, truncated, maxNodes }
|
|
14
|
+
* so the caller can emit a `large_package_graph_truncated` threat
|
|
15
|
+
* when the package exceeds MAX_GRAPH_NODES (audit DF-C1).
|
|
11
16
|
*/
|
|
12
|
-
function buildModuleGraph(packagePath) {
|
|
17
|
+
function buildModuleGraph(packagePath, meta = {}) {
|
|
13
18
|
const graph = {};
|
|
19
|
+
// maxFiles: 0 (unlimited) — we need the true count to detect monorepo / large package
|
|
20
|
+
// truncation. MAX_GRAPH_NODES below caps the AST work; MODULE_GRAPH_TIMEOUT_MS in
|
|
21
|
+
// executor.js bounds the wall-time (audit DF-C1).
|
|
14
22
|
const files = findFiles(packagePath, {
|
|
15
23
|
extensions: ['.js', '.mjs', '.cjs'],
|
|
16
24
|
excludedDirs: EXCLUDED_DIRS,
|
|
25
|
+
maxFiles: 0,
|
|
17
26
|
});
|
|
18
27
|
|
|
28
|
+
meta.fileCount = files.length;
|
|
29
|
+
meta.maxNodes = MAX_GRAPH_NODES;
|
|
30
|
+
|
|
19
31
|
// Bounded path: skip module graph for very large packages
|
|
20
32
|
if (files.length > MAX_GRAPH_NODES) {
|
|
21
33
|
debugLog(`[MODULE-GRAPH] Skipping: ${files.length} files exceeds MAX_GRAPH_NODES (${MAX_GRAPH_NODES})`);
|
|
34
|
+
meta.truncated = true;
|
|
22
35
|
return graph;
|
|
23
36
|
}
|
|
24
37
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
const { ACORN_OPTIONS: BASE_ACORN_OPTIONS } = require('../../shared/constants.js');
|
|
4
4
|
|
|
5
5
|
// --- Bounded path limits ---
|
|
6
|
-
const MAX_GRAPH_NODES =
|
|
6
|
+
const MAX_GRAPH_NODES = 5000; // Max files in dependency graph (covers ~99.5% of npm packages — audit DF-C1 v2.11.15)
|
|
7
7
|
const MAX_GRAPH_EDGES = 400; // Max total import edges
|
|
8
8
|
const MAX_FLOWS = 20; // Max cross-file flow findings per package
|
|
9
9
|
const MAX_TAINT_DEPTH = 50; // Max AST recursion depth (DoS guard)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Monorepo Detection Scanner (audit 2026-05 MR-C1)
|
|
5
|
+
*
|
|
6
|
+
* Detects when the scan target is a monorepo root (Yarn/npm workspaces, pnpm,
|
|
7
|
+
* Lerna, Turbo). Emits ONE informational MEDIUM threat `monorepo_detected`
|
|
8
|
+
* so the user knows the score reflects an aggregated perimeter rather than a
|
|
9
|
+
* single package, and that per-workspace scanning is the correct strategy
|
|
10
|
+
* until full workspace-aware scoring lands (backlog v2.13).
|
|
11
|
+
*
|
|
12
|
+
* Detection precedence (manager priority on first match):
|
|
13
|
+
* 1. pnpm-workspace.yaml → manager='pnpm'
|
|
14
|
+
* 2. lerna.json → manager='lerna'
|
|
15
|
+
* 3. turbo.json + pkg.workspaces → manager='turbo'
|
|
16
|
+
* 4. pkg.workspaces (array or {packages: [...]}) → manager='yarn' (also npm 8+)
|
|
17
|
+
*
|
|
18
|
+
* @param {string} targetPath
|
|
19
|
+
* @returns {Array} threats — empty if not a monorepo, one entry otherwise.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const fs = require('fs');
|
|
23
|
+
const path = require('path');
|
|
24
|
+
|
|
25
|
+
function readJsonSafe(filePath) {
|
|
26
|
+
try {
|
|
27
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
28
|
+
} catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function countYamlListEntries(filePath) {
|
|
34
|
+
try {
|
|
35
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
36
|
+
const matches = content.match(/^\s*-\s+\S/gm);
|
|
37
|
+
return matches ? matches.length : 0;
|
|
38
|
+
} catch {
|
|
39
|
+
return 0;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function workspacesCount(workspaces) {
|
|
44
|
+
if (Array.isArray(workspaces)) return workspaces.length;
|
|
45
|
+
if (workspaces && typeof workspaces === 'object' && Array.isArray(workspaces.packages)) {
|
|
46
|
+
return workspaces.packages.length;
|
|
47
|
+
}
|
|
48
|
+
return 0;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function scanMonorepo(targetPath) {
|
|
52
|
+
const threats = [];
|
|
53
|
+
|
|
54
|
+
const pnpmWs = path.join(targetPath, 'pnpm-workspace.yaml');
|
|
55
|
+
const lernaJson = path.join(targetPath, 'lerna.json');
|
|
56
|
+
const turboJson = path.join(targetPath, 'turbo.json');
|
|
57
|
+
const pkgJson = path.join(targetPath, 'package.json');
|
|
58
|
+
|
|
59
|
+
let manager = null;
|
|
60
|
+
let manifest = 'package.json';
|
|
61
|
+
let workspaceCount = 0;
|
|
62
|
+
|
|
63
|
+
if (fs.existsSync(pnpmWs)) {
|
|
64
|
+
manager = 'pnpm';
|
|
65
|
+
manifest = 'pnpm-workspace.yaml';
|
|
66
|
+
workspaceCount = countYamlListEntries(pnpmWs);
|
|
67
|
+
} else if (fs.existsSync(lernaJson)) {
|
|
68
|
+
manager = 'lerna';
|
|
69
|
+
manifest = 'lerna.json';
|
|
70
|
+
const lerna = readJsonSafe(lernaJson);
|
|
71
|
+
workspaceCount = lerna && Array.isArray(lerna.packages) ? lerna.packages.length : 0;
|
|
72
|
+
if (workspaceCount === 0 && lerna && lerna.workspaces) {
|
|
73
|
+
workspaceCount = workspacesCount(lerna.workspaces);
|
|
74
|
+
}
|
|
75
|
+
} else {
|
|
76
|
+
const pkg = fs.existsSync(pkgJson) ? readJsonSafe(pkgJson) : null;
|
|
77
|
+
const wsCount = pkg && pkg.workspaces ? workspacesCount(pkg.workspaces) : 0;
|
|
78
|
+
if (fs.existsSync(turboJson) && wsCount > 0) {
|
|
79
|
+
manager = 'turbo';
|
|
80
|
+
manifest = 'turbo.json';
|
|
81
|
+
workspaceCount = wsCount;
|
|
82
|
+
} else if (wsCount > 0) {
|
|
83
|
+
manager = 'yarn';
|
|
84
|
+
manifest = 'package.json';
|
|
85
|
+
workspaceCount = wsCount;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (!manager) return threats;
|
|
90
|
+
|
|
91
|
+
threats.push({
|
|
92
|
+
type: 'monorepo_detected',
|
|
93
|
+
severity: 'MEDIUM',
|
|
94
|
+
message: `Monorepo ${manager} detecte (${workspaceCount} workspace${workspaceCount > 1 ? 's' : ''}). Perimetre elargi — scanner chaque package independamment pour un verdict per-workspace.`,
|
|
95
|
+
file: manifest,
|
|
96
|
+
line: 0,
|
|
97
|
+
manager,
|
|
98
|
+
workspaceCount
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
return threats;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
module.exports = { scanMonorepo };
|
package/src/scoring.js
CHANGED
|
@@ -126,7 +126,11 @@ const PACKAGE_LEVEL_TYPES = new Set([
|
|
|
126
126
|
// intel-triage P1.3: stub-package detector closes ltidi gap (memory project_detection_gap_ltidi_chain)
|
|
127
127
|
'stub_package_external_payload', 'stub_package_external_dep',
|
|
128
128
|
// intel-triage P3.1 family compounds
|
|
129
|
-
'axios_family', 'stub_with_string_ioc'
|
|
129
|
+
'axios_family', 'stub_with_string_ioc',
|
|
130
|
+
// audit DF-C1: emitted when MAX_GRAPH_NODES exceeded so cross-file blind spot is visible in scoring
|
|
131
|
+
'large_package_graph_truncated',
|
|
132
|
+
// audit MR-C1: informational signal that the scan target is a monorepo root (per-workspace scoring TBD)
|
|
133
|
+
'monorepo_detected'
|
|
130
134
|
]);
|
|
131
135
|
|
|
132
136
|
// ============================================
|
|
@@ -179,13 +183,11 @@ function isPackageLevelThreat(threat) {
|
|
|
179
183
|
* @param {Array} threats - array of threat objects (after FP reductions)
|
|
180
184
|
* @returns {number} score 0-100
|
|
181
185
|
*/
|
|
182
|
-
// Hybrid v3 Phase 3:
|
|
183
|
-
//
|
|
184
|
-
//
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
return _COMPOUND_REPLACE_ENABLED() && t.replacedByCompound;
|
|
188
|
-
}
|
|
186
|
+
// Hybrid v3 Phase 3: threats tagged with replacedByCompound (their compound has
|
|
187
|
+
// fired and represents their score) contribute 0 to the group score. Avoids the
|
|
188
|
+
// additive double-count of compound + constituents. Audit 2026-05 SC-C1: the
|
|
189
|
+
// previous MUADDIB_COMPOUND_REPLACE env-var gate is removed — the tag posed by
|
|
190
|
+
// applyCompoundBoosts is now honored unconditionally.
|
|
189
191
|
|
|
190
192
|
function computeGroupScore(threats) {
|
|
191
193
|
// Score decay default ON since v2.11.9 (FPR plan Chantier 1). Opt-out: MUADDIB_DECAY=0.
|
|
@@ -195,7 +197,7 @@ function computeGroupScore(threats) {
|
|
|
195
197
|
let dataflowMediumPoints = 0;
|
|
196
198
|
|
|
197
199
|
for (const t of threats) {
|
|
198
|
-
if (
|
|
200
|
+
if (t.replacedByCompound) continue;
|
|
199
201
|
const weight = _severityWeights[t.severity] || 0;
|
|
200
202
|
const rule = getRule(t.type);
|
|
201
203
|
const factor = CONFIDENCE_FACTORS[rule.confidence] || 1.0;
|
|
@@ -253,7 +255,7 @@ function computeGroupScoreDecay(threats) {
|
|
|
253
255
|
const typeBuckets = new Map();
|
|
254
256
|
|
|
255
257
|
for (const t of threats) {
|
|
256
|
-
if (
|
|
258
|
+
if (t.replacedByCompound) continue;
|
|
257
259
|
const weight = _severityWeights[t.severity] || 0;
|
|
258
260
|
const rule = getRule(t.type);
|
|
259
261
|
const factor = CONFIDENCE_FACTORS[rule.confidence] || 1.0;
|
|
@@ -299,7 +301,11 @@ const FP_COUNT_THRESHOLDS = {
|
|
|
299
301
|
// Real malware uses 1-2 targeted Function() calls.
|
|
300
302
|
dangerous_call_function: { maxCount: 5, to: 'LOW' },
|
|
301
303
|
require_cache_poison: { maxCount: 3, from: 'CRITICAL', to: 'LOW' },
|
|
302
|
-
|
|
304
|
+
// Audit 2026-05 SC-C2: floorEligible: true opts suspicious_dataflow into the
|
|
305
|
+
// dilution floor without adding a `from` constraint (which would block MEDIUM
|
|
306
|
+
// count-threshold downgrades). Restores 1 instance at original severity so an
|
|
307
|
+
// attacker can't dilute real exfil flows by injecting benign data flows.
|
|
308
|
+
suspicious_dataflow: { maxCount: 3, to: 'LOW', floorEligible: true },
|
|
303
309
|
obfuscation_detected: { maxCount: 3, to: 'LOW' },
|
|
304
310
|
module_compile_dynamic: { maxCount: 3, from: 'HIGH', to: 'LOW' },
|
|
305
311
|
module_compile: { maxCount: 3, from: 'HIGH', to: 'LOW' },
|
|
@@ -318,8 +324,10 @@ const FP_COUNT_THRESHOLDS = {
|
|
|
318
324
|
// P6: HTTP client libraries (undici, aws-sdk, nodemailer, jsdom) parse Authorization/Bearer headers
|
|
319
325
|
// with 3+ credential regexes. Real harvesters use 1-2 targeted regexes.
|
|
320
326
|
// Audit v3: removed `from` constraint — ALL severity levels downgraded when count > 2.
|
|
321
|
-
//
|
|
322
|
-
|
|
327
|
+
// Audit 2026-05 SC-C2: floorEligible: true restores 1 instance at original
|
|
328
|
+
// severity. Without it an attacker injects 3+ benign header regexes (Authorization,
|
|
329
|
+
// Cookie, X-Forwarded-For) and downgrades all real exfil regexes to LOW.
|
|
330
|
+
credential_regex_harvest: { maxCount: 2, to: 'LOW', floorEligible: true },
|
|
323
331
|
// P7→Audit v3: Config frameworks (pm2, nx, dotenv, aws-sdk, oclif) read 5+ env vars — not credential theft.
|
|
324
332
|
// Real stealers access 1-4 targeted env vars. Count >4 = config loader pattern.
|
|
325
333
|
// Lowered from 10→4 for better FP reduction. B5 network_sink_immunity protects genuine exfiltration.
|
|
@@ -1011,16 +1019,16 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps, re
|
|
|
1011
1019
|
|
|
1012
1020
|
// Dilution floor: retain at least one instance at original severity per type
|
|
1013
1021
|
// to prevent complete count-threshold dilution by injected benign patterns.
|
|
1014
|
-
//
|
|
1015
|
-
//
|
|
1016
|
-
//
|
|
1017
|
-
//
|
|
1022
|
+
// Applies to types with low maxCount (≤3) AND either a `from` severity
|
|
1023
|
+
// constraint OR an explicit `floorEligible: true` opt-in (audit 2026-05 SC-C2).
|
|
1024
|
+
// High-count types (dynamic_require, env_access) represent legitimate framework
|
|
1025
|
+
// patterns and remain ineligible (no floor → full downgrade allowed).
|
|
1018
1026
|
const restoredTypes = new Set();
|
|
1019
1027
|
for (const t of threats) {
|
|
1020
1028
|
const lastReduction = t.reductions?.find(r => r.rule === 'count_threshold');
|
|
1021
1029
|
if (lastReduction && !restoredTypes.has(t.type)) {
|
|
1022
1030
|
const rule = FP_COUNT_THRESHOLDS[t.type];
|
|
1023
|
-
if (rule && rule.from && rule.maxCount <= 3) {
|
|
1031
|
+
if (rule && (rule.from || rule.floorEligible) && rule.maxCount <= 3) {
|
|
1024
1032
|
t.severity = lastReduction.from;
|
|
1025
1033
|
t.reductions = t.reductions.filter(r => r.rule !== 'count_threshold');
|
|
1026
1034
|
t.reductions.push({ rule: 'count_threshold_floor', note: 'retained one instance at original severity' });
|