muaddib-scanner 2.10.94 → 2.10.96
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -76,6 +76,504 @@ const TOP_THREAT_TYPES = [
|
|
|
76
76
|
|
|
77
77
|
const TOP_THREAT_TYPES_SET = new Set(TOP_THREAT_TYPES);
|
|
78
78
|
|
|
79
|
+
// --- Cluster FP contextual feature helpers (v2.10.96) ---
|
|
80
|
+
//
|
|
81
|
+
// Target: P1 CRITICAL webhook suppression (score >= 75). The four helpers
|
|
82
|
+
// below encode the four FP clusters identified in the v2.10.9x weekly FP
|
|
83
|
+
// review: Cluster A (native binary installers via GitHub releases),
|
|
84
|
+
// Cluster B (minified bundles w/o install scripts), Cluster C (dev tooling
|
|
85
|
+
// writing git hooks from local files), Cluster E (first-party SDKs exfil
|
|
86
|
+
// pattern on their own API).
|
|
87
|
+
//
|
|
88
|
+
// These features intentionally operate on scan-result signals ONLY so they
|
|
89
|
+
// can be recomputed on historical JSONL records without re-scanning.
|
|
90
|
+
|
|
91
|
+
// Threats whose presence implies the package performs a network call.
|
|
92
|
+
const NETWORK_ADJACENT_TYPES = new Set([
|
|
93
|
+
'suspicious_dataflow',
|
|
94
|
+
'network_require',
|
|
95
|
+
'remote_code_load',
|
|
96
|
+
'curl_exec',
|
|
97
|
+
'intent_credential_exfil',
|
|
98
|
+
'intent_command_exfil',
|
|
99
|
+
'dangerous_call_fetch',
|
|
100
|
+
'external_tarball_dep',
|
|
101
|
+
'dependency_url_suspicious'
|
|
102
|
+
]);
|
|
103
|
+
|
|
104
|
+
// Package-scope -> first-party domain mapping for well-known SDK publishers.
|
|
105
|
+
// Keys are lowercase npm scope names (without '@'). Used by
|
|
106
|
+
// `network_destination_first_party` when the package is scoped.
|
|
107
|
+
const SCOPE_FIRST_PARTY_DOMAINS = {
|
|
108
|
+
'anthropic-ai': ['anthropic.com'],
|
|
109
|
+
'openai': ['openai.com'],
|
|
110
|
+
'google-cloud': ['googleapis.com', 'google.com'],
|
|
111
|
+
'google-ai': ['googleapis.com', 'google.com'],
|
|
112
|
+
'aws-sdk': ['amazonaws.com', 'aws.amazon.com'],
|
|
113
|
+
'aws-amplify': ['amazonaws.com'],
|
|
114
|
+
'azure': ['azure.com', 'microsoft.com'],
|
|
115
|
+
'microsoft': ['microsoft.com', 'azure.com'],
|
|
116
|
+
'supabase': ['supabase.co', 'supabase.com'],
|
|
117
|
+
'stripe': ['stripe.com'],
|
|
118
|
+
'twilio': ['twilio.com'],
|
|
119
|
+
'sendgrid': ['sendgrid.com', 'sendgrid.net'],
|
|
120
|
+
'datadog': ['datadoghq.com'],
|
|
121
|
+
'sentry': ['sentry.io'],
|
|
122
|
+
'slack': ['slack.com'],
|
|
123
|
+
'octokit': ['github.com', 'githubusercontent.com'],
|
|
124
|
+
'cloudflare': ['cloudflare.com'],
|
|
125
|
+
'auth0': ['auth0.com'],
|
|
126
|
+
'hubspot': ['hubspot.com', 'hubapi.com'],
|
|
127
|
+
'contentful': ['contentful.com'],
|
|
128
|
+
'mongodb': ['mongodb.com', 'mongodb.net'],
|
|
129
|
+
'mailgun': ['mailgun.net', 'mailgun.com'],
|
|
130
|
+
'vercel': ['vercel.com', 'vercel.app'],
|
|
131
|
+
'netlify': ['netlify.com', 'netlify.app'],
|
|
132
|
+
'pinecone-database': ['pinecone.io'],
|
|
133
|
+
'langchain': ['langchain.com']
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// GitHub release hosts (install_url_github_releases).
|
|
137
|
+
const GITHUB_RELEASE_HOSTS = ['github.com', 'objects.githubusercontent.com', 'raw.githubusercontent.com'];
|
|
138
|
+
|
|
139
|
+
// Bundle file-shape patterns. Conservative: only flag paths that clearly
|
|
140
|
+
// correspond to build output, so the feature stays specific to Cluster B.
|
|
141
|
+
const BUNDLE_PATH_RE = /(?:^|[\\/])(?:dist|build|lib|out|umd|esm|cjs|bundle|_next[\\/]static|\.next[\\/]static|public[\\/]static|webpack|rollup)[\\/]/i;
|
|
142
|
+
const BUNDLE_FILE_RE = /\.(?:min|bundle|prod|umd|iife|esm|cjs)\.(?:m?js|cjs)$|\.min\.js$|chunk-[0-9a-f]+\.js$|vendors?~?.*\.js$/i;
|
|
143
|
+
|
|
144
|
+
// Threat types that indicate remote content fetch in a file (for
|
|
145
|
+
// `git_hook_source_local` heuristic: absence => local source).
|
|
146
|
+
const REMOTE_FETCH_TYPES = new Set([
|
|
147
|
+
'remote_code_load',
|
|
148
|
+
'network_require',
|
|
149
|
+
'curl_exec',
|
|
150
|
+
'suspicious_dataflow',
|
|
151
|
+
'suspicious_domain',
|
|
152
|
+
'dangerous_call_fetch',
|
|
153
|
+
'external_tarball_dep',
|
|
154
|
+
'dependency_url_suspicious',
|
|
155
|
+
'binary_dropper',
|
|
156
|
+
'download_exec_binary'
|
|
157
|
+
]);
|
|
158
|
+
|
|
159
|
+
// Match URLs inside threat message strings (legacy fallback when threats
|
|
160
|
+
// predate v2.10.96 URL enrichment — historical JSONL scan results).
|
|
161
|
+
const MESSAGE_URL_RE = /https?:\/\/([a-zA-Z0-9._-]+)(?:[:/?#][^\s'"`)<>]*)?/g;
|
|
162
|
+
|
|
163
|
+
function hostFromUrl(url) {
|
|
164
|
+
if (typeof url !== 'string') return null;
|
|
165
|
+
const m = url.match(/^https?:\/\/([^/:?#\s'"`)<>]+)/i);
|
|
166
|
+
return m ? m[1].toLowerCase() : null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function extractHostsFromThreats(threats) {
|
|
170
|
+
const hosts = new Set();
|
|
171
|
+
let sawStructured = false;
|
|
172
|
+
for (const t of threats) {
|
|
173
|
+
if (t && Array.isArray(t.urls) && t.urls.length > 0) {
|
|
174
|
+
sawStructured = true;
|
|
175
|
+
for (const u of t.urls) {
|
|
176
|
+
const h = hostFromUrl(u);
|
|
177
|
+
if (h) hosts.add(h);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// If no threat carries structured URLs, fall back to message-regex so that
|
|
182
|
+
// callers can still reason about old scan records. Once the scan fleet is
|
|
183
|
+
// fully on v2.10.96+ the regex branch becomes dead.
|
|
184
|
+
if (sawStructured) return hosts;
|
|
185
|
+
for (const t of threats) {
|
|
186
|
+
const msg = t && t.message;
|
|
187
|
+
if (!msg || typeof msg !== 'string') continue;
|
|
188
|
+
MESSAGE_URL_RE.lastIndex = 0;
|
|
189
|
+
let m;
|
|
190
|
+
while ((m = MESSAGE_URL_RE.exec(msg)) !== null) {
|
|
191
|
+
if (m[1]) hosts.add(m[1].toLowerCase());
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return hosts;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function hostMatchesSuffix(host, candidates) {
|
|
198
|
+
for (const c of candidates) {
|
|
199
|
+
if (host === c || host.endsWith('.' + c)) return true;
|
|
200
|
+
}
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function getPackageScope(name) {
|
|
205
|
+
if (!name || typeof name !== 'string') return null;
|
|
206
|
+
const m = name.match(/^@([^/]+)\//);
|
|
207
|
+
return m ? m[1].toLowerCase() : null;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function getHomepageHost(meta) {
|
|
211
|
+
if (!meta) return null;
|
|
212
|
+
const candidates = [
|
|
213
|
+
meta.homepage,
|
|
214
|
+
meta.registryMeta && meta.registryMeta.homepage,
|
|
215
|
+
meta.npmRegistryMeta && meta.npmRegistryMeta.homepage
|
|
216
|
+
];
|
|
217
|
+
for (const raw of candidates) {
|
|
218
|
+
if (!raw || typeof raw !== 'string') continue;
|
|
219
|
+
const m = raw.match(/^https?:\/\/([^/:?#]+)/i);
|
|
220
|
+
if (m) return m[1].toLowerCase();
|
|
221
|
+
}
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Feature 1 — TRUE iff the package performs a network call AND every
|
|
227
|
+
* extractable destination is a first-party host of that package.
|
|
228
|
+
* First-party = package-scope SDK publisher or package.homepage host.
|
|
229
|
+
*
|
|
230
|
+
* Targets Cluster E: Claude Code / OpenAI / Anthropic SDK wrappers that
|
|
231
|
+
* read API keys from env and POST them to their legitimate vendor API.
|
|
232
|
+
*/
|
|
233
|
+
function networkDestinationFirstParty(result, meta) {
|
|
234
|
+
const threats = (result && result.threats) || [];
|
|
235
|
+
const hasNetwork = threats.some(t => NETWORK_ADJACENT_TYPES.has(t.type));
|
|
236
|
+
if (!hasNetwork) return false;
|
|
237
|
+
|
|
238
|
+
const firstParty = [];
|
|
239
|
+
const scope = getPackageScope(meta && meta.name);
|
|
240
|
+
if (scope && SCOPE_FIRST_PARTY_DOMAINS[scope]) {
|
|
241
|
+
firstParty.push(...SCOPE_FIRST_PARTY_DOMAINS[scope]);
|
|
242
|
+
}
|
|
243
|
+
// Unscoped packages: accept exact-name match against the scope table for
|
|
244
|
+
// packages whose own identifier IS the publisher (e.g., `stripe`, `twilio`).
|
|
245
|
+
const baseName = (meta && meta.name && String(meta.name).replace(/^@[^/]+\//, '').toLowerCase()) || '';
|
|
246
|
+
if (!scope && SCOPE_FIRST_PARTY_DOMAINS[baseName]) {
|
|
247
|
+
firstParty.push(...SCOPE_FIRST_PARTY_DOMAINS[baseName]);
|
|
248
|
+
}
|
|
249
|
+
const homepageHost = getHomepageHost(meta);
|
|
250
|
+
if (homepageHost) firstParty.push(homepageHost);
|
|
251
|
+
if (firstParty.length === 0) return false;
|
|
252
|
+
|
|
253
|
+
const hosts = extractHostsFromThreats(threats);
|
|
254
|
+
// No destination host was observable (scanner saw the network sink but
|
|
255
|
+
// no URL literal leaked into threat messages). Accept as first-party only
|
|
256
|
+
// when the package identity alone is a strong signal (scoped SDK).
|
|
257
|
+
if (hosts.size === 0) return scope !== null && SCOPE_FIRST_PARTY_DOMAINS[scope] !== undefined;
|
|
258
|
+
|
|
259
|
+
for (const h of hosts) {
|
|
260
|
+
if (!hostMatchesSuffix(h, firstParty)) return false;
|
|
261
|
+
}
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Feature 2 — TRUE iff the package behaves as a native-binary installer
|
|
267
|
+
* AND every URL visible in its threat messages points to GitHub releases.
|
|
268
|
+
*
|
|
269
|
+
* Targets Cluster A: esbuild / swc / prisma style platform binary drops.
|
|
270
|
+
*/
|
|
271
|
+
function installUrlGithubReleases(result) {
|
|
272
|
+
const threats = (result && result.threats) || [];
|
|
273
|
+
const hasInstaller = threats.some(t => t.type === 'binary_dropper' || t.type === 'download_exec_binary');
|
|
274
|
+
if (!hasInstaller) return false;
|
|
275
|
+
// Any known-suspicious destination present => not a github-only installer.
|
|
276
|
+
if (threats.some(t => t.type === 'suspicious_domain')) return false;
|
|
277
|
+
|
|
278
|
+
const hosts = extractHostsFromThreats(threats);
|
|
279
|
+
if (hosts.size === 0) return false;
|
|
280
|
+
for (const h of hosts) {
|
|
281
|
+
if (!hostMatchesSuffix(h, GITHUB_RELEASE_HOSTS)) return false;
|
|
282
|
+
}
|
|
283
|
+
// At least one host must be a github release host (guards against the
|
|
284
|
+
// degenerate case where every extracted host happened to be unrelated
|
|
285
|
+
// allowlist traffic — e.g., registry.npmjs.org).
|
|
286
|
+
for (const h of hosts) {
|
|
287
|
+
if (hostMatchesSuffix(h, GITHUB_RELEASE_HOSTS)) return true;
|
|
288
|
+
}
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function hasBundlePath(file) {
|
|
293
|
+
if (!file || typeof file !== 'string') return false;
|
|
294
|
+
return BUNDLE_PATH_RE.test(file) || BUNDLE_FILE_RE.test(file);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function hasLifecycleScripts(meta) {
|
|
298
|
+
const scripts = (meta && meta.registryMeta && meta.registryMeta.scripts) || null;
|
|
299
|
+
if (!scripts || typeof scripts !== 'object') return false;
|
|
300
|
+
for (const key of ['preinstall', 'install', 'postinstall']) {
|
|
301
|
+
const v = scripts[key];
|
|
302
|
+
if (typeof v === 'string' && v.trim().length > 0) return true;
|
|
303
|
+
}
|
|
304
|
+
return false;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Threshold derived from the v2.10.9x FP review of minified bundles:
|
|
308
|
+
// Cluster B FPs all ship at least one > 100KB file (typical webpack chunk
|
|
309
|
+
// is 200-800KB). 100KB is low enough to catch small bundlers yet high
|
|
310
|
+
// enough to exclude hand-written source.
|
|
311
|
+
const BUNDLE_FILE_MIN_BYTES = 100 * 1024;
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Feature 3 — TRUE iff the package ships at least one large (>100KB) file
|
|
315
|
+
* AND the findings all sit in those large files AND the package declares
|
|
316
|
+
* no install lifecycle script. Targets Cluster B: minified webpack/rollup
|
|
317
|
+
* output triggering eval / obfuscation heuristics without any runtime
|
|
318
|
+
* install vector.
|
|
319
|
+
*
|
|
320
|
+
* Primary size source: `summary.fileSizes` (populated by processor.js in
|
|
321
|
+
* v2.10.96+). When sizes are absent (historical JSONL records), fall back
|
|
322
|
+
* to the path-shape proxy (`dist/`, `.min.js`, etc.).
|
|
323
|
+
*
|
|
324
|
+
* `registryMeta.scripts` is REQUIRED: callers that do not populate it will
|
|
325
|
+
* always get FALSE — we must not claim a package has no install hook when
|
|
326
|
+
* we never looked.
|
|
327
|
+
*/
|
|
328
|
+
function bundleWithoutInstallScripts(result, meta) {
|
|
329
|
+
if (!meta || !meta.registryMeta || meta.registryMeta.scripts === undefined) return false;
|
|
330
|
+
if (hasLifecycleScripts(meta)) return false;
|
|
331
|
+
|
|
332
|
+
const threats = (result && result.threats) || [];
|
|
333
|
+
if (threats.length === 0) return false;
|
|
334
|
+
|
|
335
|
+
const threatFiles = new Set();
|
|
336
|
+
for (const t of threats) {
|
|
337
|
+
if (t.file) threatFiles.add(t.file);
|
|
338
|
+
}
|
|
339
|
+
if (threatFiles.size === 0) return false;
|
|
340
|
+
|
|
341
|
+
const summary = (result && result.summary) || {};
|
|
342
|
+
const fileSizes = summary.fileSizes;
|
|
343
|
+
const haveSizes = fileSizes && typeof fileSizes === 'object' && Object.keys(fileSizes).length > 0;
|
|
344
|
+
|
|
345
|
+
if (haveSizes) {
|
|
346
|
+
let sawLargeFile = false;
|
|
347
|
+
for (const f of threatFiles) {
|
|
348
|
+
const size = fileSizes[f];
|
|
349
|
+
if (typeof size !== 'number') return false;
|
|
350
|
+
if (size < BUNDLE_FILE_MIN_BYTES) return false;
|
|
351
|
+
sawLargeFile = true;
|
|
352
|
+
}
|
|
353
|
+
return sawLargeFile;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Legacy proxy: no file sizes available, fall back to path shape.
|
|
357
|
+
for (const f of threatFiles) {
|
|
358
|
+
if (!hasBundlePath(f)) return false;
|
|
359
|
+
}
|
|
360
|
+
return true;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Feature 4 — TRUE iff the package fires `git_hooks_injection` AND none of
|
|
365
|
+
* the files that triggered it also show a remote-fetch signal. Proxy for
|
|
366
|
+
* "hook body was read from a local source file", i.e. dev tooling like
|
|
367
|
+
* husky / simple-git-hooks installing its own canned hook.
|
|
368
|
+
*/
|
|
369
|
+
function gitHookSourceLocal(result) {
|
|
370
|
+
const threats = (result && result.threats) || [];
|
|
371
|
+
const hookThreats = threats.filter(t => t.type === 'git_hooks_injection');
|
|
372
|
+
if (hookThreats.length === 0) return false;
|
|
373
|
+
|
|
374
|
+
const remoteByFile = new Map();
|
|
375
|
+
for (const t of threats) {
|
|
376
|
+
if (!t.file || !REMOTE_FETCH_TYPES.has(t.type)) continue;
|
|
377
|
+
remoteByFile.set(t.file, true);
|
|
378
|
+
}
|
|
379
|
+
for (const h of hookThreats) {
|
|
380
|
+
if (h.file && remoteByFile.has(h.file)) return false;
|
|
381
|
+
}
|
|
382
|
+
return true;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// --- v2.10.96 extended FP features (F5-F8, VPS review 2026-04-18) ---
|
|
386
|
+
//
|
|
387
|
+
// Covers an additional 319 FP (15.2%) on top of F1-F4; combined F1-F8
|
|
388
|
+
// cover 2069/2104 reviewed FP = 98.3%.
|
|
389
|
+
|
|
390
|
+
// Obfuscation-shape threats used by Feature 6.
|
|
391
|
+
const OBFUSCATION_TYPES = new Set([
|
|
392
|
+
'obfuscation_detected',
|
|
393
|
+
'js_obfuscation_pattern',
|
|
394
|
+
'high_entropy_string',
|
|
395
|
+
'unicode_invisible_injection'
|
|
396
|
+
]);
|
|
397
|
+
|
|
398
|
+
// Threat types that indicate a runtime vector (install, env, network).
|
|
399
|
+
// Their presence disqualifies Feature 6 (obfuscation-without-vector).
|
|
400
|
+
const VECTOR_TYPES = new Set([
|
|
401
|
+
// install / lifecycle
|
|
402
|
+
'lifecycle_script',
|
|
403
|
+
'lifecycle_shell_pipe',
|
|
404
|
+
// env read (credential source)
|
|
405
|
+
'env_access',
|
|
406
|
+
'env_charcode_reconstruction',
|
|
407
|
+
'credential_regex_harvest',
|
|
408
|
+
// network / exec / dynamic code
|
|
409
|
+
'suspicious_dataflow',
|
|
410
|
+
'network_require',
|
|
411
|
+
'remote_code_load',
|
|
412
|
+
'curl_exec',
|
|
413
|
+
'intent_credential_exfil',
|
|
414
|
+
'intent_command_exfil',
|
|
415
|
+
'dangerous_call_fetch',
|
|
416
|
+
'external_tarball_dep',
|
|
417
|
+
'dependency_url_suspicious',
|
|
418
|
+
'dangerous_exec',
|
|
419
|
+
'dangerous_call_eval',
|
|
420
|
+
'dangerous_call_exec',
|
|
421
|
+
'dangerous_call_function',
|
|
422
|
+
'module_compile',
|
|
423
|
+
'binary_dropper',
|
|
424
|
+
'download_exec_binary',
|
|
425
|
+
'fetch_decrypt_exec',
|
|
426
|
+
'suspicious_domain',
|
|
427
|
+
'reverse_shell'
|
|
428
|
+
]);
|
|
429
|
+
|
|
430
|
+
// Threats that indicate a network egress capability somewhere in the
|
|
431
|
+
// package. Broader than NETWORK_ADJACENT_TYPES: includes domain literals,
|
|
432
|
+
// drop-exec pairs, and suspicious dataflows. Used by Feature 8.
|
|
433
|
+
const EGRESS_TYPES = new Set([
|
|
434
|
+
'suspicious_dataflow',
|
|
435
|
+
'network_require',
|
|
436
|
+
'remote_code_load',
|
|
437
|
+
'curl_exec',
|
|
438
|
+
'intent_credential_exfil',
|
|
439
|
+
'intent_command_exfil',
|
|
440
|
+
'dangerous_call_fetch',
|
|
441
|
+
'external_tarball_dep',
|
|
442
|
+
'dependency_url_suspicious',
|
|
443
|
+
'suspicious_domain',
|
|
444
|
+
'binary_dropper',
|
|
445
|
+
'download_exec_binary',
|
|
446
|
+
'fetch_decrypt_exec',
|
|
447
|
+
'reverse_shell'
|
|
448
|
+
]);
|
|
449
|
+
|
|
450
|
+
// Dep-confusion / defensive-placeholder phrases matched against the
|
|
451
|
+
// package description. Case-insensitive, whole-phrase (no substring
|
|
452
|
+
// inside an unrelated word). The list is deliberately conservative —
|
|
453
|
+
// a real README that happens to mention "dependency confusion" once
|
|
454
|
+
// still needs to look like a placeholder in every other dimension
|
|
455
|
+
// (see `placeholderAntiDepConfusion`).
|
|
456
|
+
const PLACEHOLDER_DESCRIPTION_RE = new RegExp([
|
|
457
|
+
'dependency[- ]?confusion',
|
|
458
|
+
'dep[- ]?confusion',
|
|
459
|
+
'namespace[- ]?squatt?ing',
|
|
460
|
+
'name[- ]?squatt?ing',
|
|
461
|
+
'squatting[- ]?prevention',
|
|
462
|
+
'defensive[- ]?(?:registration|publish|package|placeholder)',
|
|
463
|
+
'placeholder[- ]?(?:package|to[- ]?reserve|for[- ]?the[- ]?name)',
|
|
464
|
+
'reserv(?:e|ing|ation)[- ]?(?:this[- ]?)?(?:name|package|namespace)',
|
|
465
|
+
'prevents?[- ]+(?:malicious[- ]+)?dependency[- ]+confusion',
|
|
466
|
+
'blocks?[- ]+(?:malicious[- ]+)?dependency[- ]+confusion',
|
|
467
|
+
'reserved[- ]+by[- ]+.*?(?:to[- ]+prevent|against)'
|
|
468
|
+
].join('|'), 'i');
|
|
469
|
+
|
|
470
|
+
// Alias — same semantics as hasLifecycleScripts (used by F3), just named
|
|
471
|
+
// from the perspective of F7/F8 which reason about install vectors.
|
|
472
|
+
const hasInstallScript = hasLifecycleScripts;
|
|
473
|
+
|
|
474
|
+
function getDescription(meta) {
|
|
475
|
+
if (!meta) return '';
|
|
476
|
+
const candidates = [
|
|
477
|
+
meta.description,
|
|
478
|
+
meta.registryMeta && meta.registryMeta.description,
|
|
479
|
+
meta.npmRegistryMeta && meta.npmRegistryMeta.description
|
|
480
|
+
];
|
|
481
|
+
for (const c of candidates) {
|
|
482
|
+
if (typeof c === 'string' && c.length > 0) return c;
|
|
483
|
+
}
|
|
484
|
+
return '';
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Feature 5 — TRUE iff a `typosquat_detected` threat fires on a scoped
|
|
489
|
+
* package (`@scope/name`). Rationale: the typosquat rule computes edit
|
|
490
|
+
* distance on the bare name (`@vendor/client-foo` -> `client-foo`) and
|
|
491
|
+
* will sometimes treat `@scope/adapter-rubrik` as a typosquat of the
|
|
492
|
+
* unscoped `rubrik`. Scoping implies a separate namespace, so the
|
|
493
|
+
* collision is structurally false.
|
|
494
|
+
*
|
|
495
|
+
* Covers 52 FP (2.5%) on the VPS extended corpus.
|
|
496
|
+
*/
|
|
497
|
+
function typosquatScopedPackage(result, meta) {
|
|
498
|
+
const threats = (result && result.threats) || [];
|
|
499
|
+
const hasTyposquat = threats.some(t =>
|
|
500
|
+
t.type === 'typosquat_detected' || t.type === 'pypi_typosquat_detected'
|
|
501
|
+
);
|
|
502
|
+
if (!hasTyposquat) return false;
|
|
503
|
+
const name = (meta && meta.name && String(meta.name)) || '';
|
|
504
|
+
return name.startsWith('@') && name.includes('/');
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Feature 6 — TRUE iff the package shows only obfuscation-shape findings
|
|
509
|
+
* (obfuscation_detected, js_obfuscation_pattern, high_entropy_string,
|
|
510
|
+
* unicode_invisible_injection) AND carries no install / env / network
|
|
511
|
+
* vector threat. This is the commercial-obfuscator pattern: webpack
|
|
512
|
+
* output or a hardening vendor (jsjiami, obfuscator.io) trips heuristics
|
|
513
|
+
* but the package has no runtime capability to exfiltrate anything.
|
|
514
|
+
*
|
|
515
|
+
* Mutually exclusive with F8 by construction (F8 requires a lifecycle
|
|
516
|
+
* script, which is a VECTOR_TYPE here).
|
|
517
|
+
*
|
|
518
|
+
* Covers 33 FP (1.6%).
|
|
519
|
+
*/
|
|
520
|
+
function obfuscationWithoutVector(result) {
|
|
521
|
+
const threats = (result && result.threats) || [];
|
|
522
|
+
if (threats.length === 0) return false;
|
|
523
|
+
let sawObf = false;
|
|
524
|
+
for (const t of threats) {
|
|
525
|
+
if (OBFUSCATION_TYPES.has(t.type)) { sawObf = true; continue; }
|
|
526
|
+
if (VECTOR_TYPES.has(t.type)) return false;
|
|
527
|
+
}
|
|
528
|
+
return sawObf;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Feature 7 — TRUE iff the package description explicitly declares a
|
|
533
|
+
* defensive / placeholder / dependency-confusion-prevention purpose AND
|
|
534
|
+
* the package body is effectively empty (no install script, trivial
|
|
535
|
+
* footprint). These are namespace reservations published by vendors to
|
|
536
|
+
* block attackers from squatting internal package names.
|
|
537
|
+
*
|
|
538
|
+
* Covers 15 FP (0.7%). Conservative double-check (description + empty
|
|
539
|
+
* body) protects against real packages whose README merely mentions
|
|
540
|
+
* dep-confusion as a discussed topic.
|
|
541
|
+
*/
|
|
542
|
+
function placeholderAntiDepConfusion(result, meta) {
|
|
543
|
+
const desc = getDescription(meta);
|
|
544
|
+
if (!desc || !PLACEHOLDER_DESCRIPTION_RE.test(desc)) return false;
|
|
545
|
+
if (hasInstallScript(meta)) return false;
|
|
546
|
+
const threats = (result && result.threats) || [];
|
|
547
|
+
// Real placeholder packages should not carry any CRITICAL/HIGH static
|
|
548
|
+
// finding — empty by construction.
|
|
549
|
+
for (const t of threats) {
|
|
550
|
+
if (t.severity === 'CRITICAL' || t.severity === 'HIGH') return false;
|
|
551
|
+
}
|
|
552
|
+
return true;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Feature 8 — TRUE iff the package declares at least one install
|
|
557
|
+
* lifecycle script AND the scan shows no network egress capability
|
|
558
|
+
* anywhere (no fetch/curl/dns/suspicious dataflow/drop-exec).
|
|
559
|
+
*
|
|
560
|
+
* Install scripts that only do `echo`, `mkdir`, `chmod`, `npm run
|
|
561
|
+
* build`, or call a local node script without network access cannot
|
|
562
|
+
* exfiltrate data — the 219 FP this covers are almost entirely build
|
|
563
|
+
* helpers and version/engine gates.
|
|
564
|
+
*
|
|
565
|
+
* Mutually exclusive with F1 (requires no install) and F2 (requires
|
|
566
|
+
* a binary downloader, hence network egress).
|
|
567
|
+
*/
|
|
568
|
+
function installScriptNoNetworkEgress(result, meta) {
|
|
569
|
+
if (!hasInstallScript(meta)) return false;
|
|
570
|
+
const threats = (result && result.threats) || [];
|
|
571
|
+
for (const t of threats) {
|
|
572
|
+
if (EGRESS_TYPES.has(t.type)) return false;
|
|
573
|
+
}
|
|
574
|
+
return true;
|
|
575
|
+
}
|
|
576
|
+
|
|
79
577
|
/**
|
|
80
578
|
* Extract ML features from a scan result object.
|
|
81
579
|
*
|
|
@@ -190,6 +688,16 @@ function extractFeatures(result, meta) {
|
|
|
190
688
|
? Math.round((features.count_total / features.file_count_with_threats) * 100) / 100
|
|
191
689
|
: 0;
|
|
192
690
|
|
|
691
|
+
// --- Cluster FP contextual features (v2.10.96) ---
|
|
692
|
+
features.network_destination_first_party = networkDestinationFirstParty(result, meta) ? 1 : 0;
|
|
693
|
+
features.install_url_github_releases = installUrlGithubReleases(result) ? 1 : 0;
|
|
694
|
+
features.bundle_without_install_scripts = bundleWithoutInstallScripts(result, meta) ? 1 : 0;
|
|
695
|
+
features.git_hook_source_local = gitHookSourceLocal(result) ? 1 : 0;
|
|
696
|
+
features.typosquat_scoped_package = typosquatScopedPackage(result, meta) ? 1 : 0;
|
|
697
|
+
features.obfuscation_without_vector = obfuscationWithoutVector(result) ? 1 : 0;
|
|
698
|
+
features.placeholder_anti_dep_confusion = placeholderAntiDepConfusion(result, meta) ? 1 : 0;
|
|
699
|
+
features.install_script_no_network_egress = installScriptNoNetworkEgress(result, meta) ? 1 : 0;
|
|
700
|
+
|
|
193
701
|
return features;
|
|
194
702
|
}
|
|
195
703
|
|
|
@@ -258,5 +766,14 @@ module.exports = {
|
|
|
258
766
|
extractFeatures,
|
|
259
767
|
buildTrainingRecord,
|
|
260
768
|
TOP_THREAT_TYPES,
|
|
261
|
-
TOP_THREAT_TYPES_SET
|
|
769
|
+
TOP_THREAT_TYPES_SET,
|
|
770
|
+
// Exported for direct unit testing of the cluster-FP helpers.
|
|
771
|
+
networkDestinationFirstParty,
|
|
772
|
+
installUrlGithubReleases,
|
|
773
|
+
bundleWithoutInstallScripts,
|
|
774
|
+
gitHookSourceLocal,
|
|
775
|
+
typosquatScopedPackage,
|
|
776
|
+
obfuscationWithoutVector,
|
|
777
|
+
placeholderAntiDepConfusion,
|
|
778
|
+
installScriptNoNetworkEgress
|
|
262
779
|
};
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -282,8 +282,10 @@ function extractTarballFromDoc(doc) {
|
|
|
282
282
|
const unpackedSize = (versionData.dist && versionData.dist.unpackedSize) || 0;
|
|
283
283
|
const version = versionData.version || latestTag;
|
|
284
284
|
const scripts = versionData.scripts || {};
|
|
285
|
+
const homepage = (typeof versionData.homepage === 'string') ? versionData.homepage : '';
|
|
286
|
+
const description = (typeof versionData.description === 'string') ? versionData.description : '';
|
|
285
287
|
|
|
286
|
-
return { version, tarball, unpackedSize, scripts };
|
|
288
|
+
return { version, tarball, unpackedSize, scripts, homepage, description };
|
|
287
289
|
} catch {
|
|
288
290
|
return null; // Parse failure -> fallback to lazy resolution
|
|
289
291
|
}
|
|
@@ -312,7 +314,9 @@ async function getNpmLatestTarball(packageName) {
|
|
|
312
314
|
const tarball = (data.dist && data.dist.tarball) || null;
|
|
313
315
|
const unpackedSize = (data.dist && data.dist.unpackedSize) || 0;
|
|
314
316
|
const scripts = (data.scripts) || {};
|
|
315
|
-
|
|
317
|
+
const homepage = (typeof data.homepage === 'string') ? data.homepage : '';
|
|
318
|
+
const description = (typeof data.description === 'string') ? data.description : '';
|
|
319
|
+
return { version, tarball, unpackedSize, scripts, homepage, description };
|
|
316
320
|
}
|
|
317
321
|
|
|
318
322
|
// --- npm polling ---
|
|
@@ -251,6 +251,23 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
251
251
|
criticalCount, highCount, mediumCount, lowCount
|
|
252
252
|
} = calculateRiskScore(deduped, intentResult);
|
|
253
253
|
|
|
254
|
+
// v2.10.96: stat each file that carries a threat and expose sizes on the
|
|
255
|
+
// scan result. Used by ML cluster-FP features (bundle_without_install_scripts)
|
|
256
|
+
// to replace the bundle-path-shape proxy with a real ">100KB" check.
|
|
257
|
+
// Cost: one statSync per unique threatened file (typically <30); same
|
|
258
|
+
// operation already runs elsewhere in the pipeline (executor.js:251).
|
|
259
|
+
const fileSizes = {};
|
|
260
|
+
for (const rel of Object.keys(fileScores)) {
|
|
261
|
+
if (!rel || rel === '(unknown)' || rel.startsWith('[SANDBOX]')) continue;
|
|
262
|
+
try {
|
|
263
|
+
const abs = path.isAbsolute(rel) ? rel : path.join(targetPath, rel);
|
|
264
|
+
const st = fs.statSync(abs);
|
|
265
|
+
if (st.isFile()) fileSizes[rel] = st.size;
|
|
266
|
+
} catch {
|
|
267
|
+
// File removed between scan and stat, or unreadable: skip silently.
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
254
271
|
// Python scan metadata
|
|
255
272
|
const pythonInfo = pythonDeps.length > 0 ? {
|
|
256
273
|
dependencies: pythonDeps.length,
|
|
@@ -276,6 +293,7 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
276
293
|
packageScore,
|
|
277
294
|
mostSuspiciousFile,
|
|
278
295
|
fileScores,
|
|
296
|
+
fileSizes,
|
|
279
297
|
breakdown
|
|
280
298
|
},
|
|
281
299
|
sandbox: sandboxData,
|
|
@@ -90,12 +90,14 @@ function handlePostWalk(ctx) {
|
|
|
90
90
|
t.file === ctx.relFile && execTypes.includes(t.type)
|
|
91
91
|
);
|
|
92
92
|
if (hasExecInFile) {
|
|
93
|
-
|
|
93
|
+
const t = {
|
|
94
94
|
type: 'binary_dropper',
|
|
95
95
|
severity: 'CRITICAL',
|
|
96
96
|
message: `${ctx.chmodMessage} + exec/spawn in same file — binary dropper pattern.`,
|
|
97
97
|
file: ctx.relFile
|
|
98
|
-
}
|
|
98
|
+
};
|
|
99
|
+
if (ctx.fetchUrls && ctx.fetchUrls.length > 0) t.urls = ctx.fetchUrls.slice();
|
|
100
|
+
ctx.threats.push(t);
|
|
99
101
|
}
|
|
100
102
|
}
|
|
101
103
|
|
|
@@ -112,22 +114,26 @@ function handlePostWalk(ctx) {
|
|
|
112
114
|
// Remote code loading: fetch + eval/Function in same file = multi-stage payload
|
|
113
115
|
// Distinct from fetch_decrypt_exec which also requires crypto. This catches SVG/HTML payload extraction.
|
|
114
116
|
if (ctx.hasRemoteFetch && ctx.hasDynamicExec && !ctx.hasCryptoDecipher) {
|
|
115
|
-
|
|
117
|
+
const t = {
|
|
116
118
|
type: 'remote_code_load',
|
|
117
119
|
severity: 'CRITICAL',
|
|
118
120
|
message: 'Remote code loading: network fetch + dynamic eval/Function in same file — multi-stage payload execution.',
|
|
119
121
|
file: ctx.relFile
|
|
120
|
-
}
|
|
122
|
+
};
|
|
123
|
+
if (ctx.fetchUrls && ctx.fetchUrls.length > 0) t.urls = ctx.fetchUrls.slice();
|
|
124
|
+
ctx.threats.push(t);
|
|
121
125
|
}
|
|
122
126
|
|
|
123
127
|
// Wave 4: Remote fetch + crypto decrypt + dynamic eval = steganographic payload chain
|
|
124
128
|
if (ctx.hasRemoteFetch && ctx.hasCryptoDecipher && ctx.hasDynamicExec) {
|
|
125
|
-
|
|
129
|
+
const t = {
|
|
126
130
|
type: 'fetch_decrypt_exec',
|
|
127
131
|
severity: 'CRITICAL',
|
|
128
132
|
message: 'Steganographic payload chain: remote fetch + crypto decryption + dynamic execution. No legitimate package uses this pattern.',
|
|
129
133
|
file: ctx.relFile
|
|
130
|
-
}
|
|
134
|
+
};
|
|
135
|
+
if (ctx.fetchUrls && ctx.fetchUrls.length > 0) t.urls = ctx.fetchUrls.slice();
|
|
136
|
+
ctx.threats.push(t);
|
|
131
137
|
}
|
|
132
138
|
|
|
133
139
|
// Wave 4: Download-execute-cleanup — https download + chmod executable + execSync + unlink
|
|
@@ -135,14 +141,22 @@ function handlePostWalk(ctx) {
|
|
|
135
141
|
// B4: removed fetchOnlySafeDomains guard — compound requires fetch+chmod+exec, which is never legitimate
|
|
136
142
|
// C10: If file also contains hash/checksum verification, downgrade to HIGH — real droppers
|
|
137
143
|
// don't verify payload integrity; legitimate installers (esbuild, sharp) do.
|
|
144
|
+
// v2.10.95: hasHashVerification is now gated by presence of a comparison operator
|
|
145
|
+
// in the same file (see ast.js:211 — best-effort heuristic). No additional tier
|
|
146
|
+
// added: diagnostic on 545 benign packages showed download_exec_binary fires on
|
|
147
|
+
// only 3 packages (esbuild, yarn, @backstage/create-app) and their final score is
|
|
148
|
+
// dominated by other CRITICAL rules, so a MEDIUM tier here had 0 FPR impact.
|
|
149
|
+
// Full validation in data/fp-v2.10.95-validation.md.
|
|
138
150
|
if (ctx.hasRemoteFetch && ctx.hasChmodExecutable && ctx.hasExecSyncCall) {
|
|
139
|
-
|
|
151
|
+
const t = {
|
|
140
152
|
type: 'download_exec_binary',
|
|
141
153
|
severity: ctx.hasHashVerification ? 'HIGH' : 'CRITICAL',
|
|
142
154
|
message: 'Download-execute pattern: remote fetch + chmod executable + execSync in same file.' +
|
|
143
155
|
(ctx.hasHashVerification ? ' Hash verification detected — likely legitimate binary installer.' : ' Binary dropper camouflaged as native addon build.'),
|
|
144
156
|
file: ctx.relFile
|
|
145
|
-
}
|
|
157
|
+
};
|
|
158
|
+
if (ctx.fetchUrls && ctx.fetchUrls.length > 0) t.urls = ctx.fetchUrls.slice();
|
|
159
|
+
ctx.threats.push(t);
|
|
146
160
|
}
|
|
147
161
|
|
|
148
162
|
// Wave 4: IDE persistence via content co-occurrence — tasks.json + runOn + writeFileSync
|
package/src/scanner/ast.js
CHANGED
|
@@ -205,10 +205,20 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
205
205
|
stringBuildVars: new Set(), // variables assigned from BinaryExpression with '+' (string concat)
|
|
206
206
|
// Audit v3 B2: Entropy split detection — high-entropy string concat + eval/decode
|
|
207
207
|
highEntropyConcatFound: false, // set when a concat chain with >=3 leaves and high combined entropy is found
|
|
208
|
-
// C10: Hash verification — legitimate binary installers verify checksums
|
|
209
|
-
//
|
|
210
|
-
//
|
|
211
|
-
|
|
208
|
+
// C10: Hash verification — legitimate binary installers verify checksums.
|
|
209
|
+
// v2.10.95: file-level heuristic durcie par un check de comparaison. Requires
|
|
210
|
+
// createHash+digest AND at least one comparison/assert/throw in the same file.
|
|
211
|
+
// THIS IS NOT A PROOF that the hash is actually verified — a malicious author
|
|
212
|
+
// can include a === or assert elsewhere in the file without comparing the
|
|
213
|
+
// digest result. This gate is best-effort and gains value only through the
|
|
214
|
+
// triple-gate in handle-post-walk.js (requires also fetchOnlySafeDomains).
|
|
215
|
+
// Proper fix would require function-scope AST tracking to confirm the
|
|
216
|
+
// comparison consumes the digest result — deferred until a dedicated
|
|
217
|
+
// taint-tracking PR.
|
|
218
|
+
hasHashVerification:
|
|
219
|
+
/\bcreateHash\s*\(/.test(content) &&
|
|
220
|
+
/\.digest\s*\(/.test(content) &&
|
|
221
|
+
/\b(===|!==|\.equals\s*\(|assert\.(strictEqual|equal|deepEqual|deepStrictEqual)\s*\(|\bthrow\b)/.test(content),
|
|
212
222
|
// GlassWorm: variation selector decoder pattern (.codePointAt + 0xFE00/0xE0100)
|
|
213
223
|
hasCodePointAt: false,
|
|
214
224
|
hasVariationSelectorConst: false,
|
|
@@ -271,6 +281,10 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
271
281
|
})) {
|
|
272
282
|
ctx.fetchOnlySafeDomains = true;
|
|
273
283
|
}
|
|
284
|
+
// v2.10.96: retain the URL set on ctx so post-walk detectors can attach
|
|
285
|
+
// it to download/install-shaped threats. Consumed by ML feature
|
|
286
|
+
// install_url_github_releases to avoid regex-on-message proxying.
|
|
287
|
+
ctx.fetchUrls = urlMatches.slice(0, 32);
|
|
274
288
|
}
|
|
275
289
|
|
|
276
290
|
walk.simple(ast, {
|
|
@@ -142,6 +142,8 @@ async function getPackageMetadata(packageName) {
|
|
|
142
142
|
const weeklyDownloads = downloadsData?.downloads ?? 0;
|
|
143
143
|
const authorPackageCount = authorData?.total ?? 0;
|
|
144
144
|
const versionCount = meta.versions ? Object.keys(meta.versions).length : 0;
|
|
145
|
+
const description = (typeof latestMeta?.description === 'string' ? latestMeta.description
|
|
146
|
+
: (typeof meta.description === 'string' ? meta.description : ''));
|
|
145
147
|
|
|
146
148
|
return {
|
|
147
149
|
created_at: createdAt,
|
|
@@ -151,7 +153,8 @@ async function getPackageMetadata(packageName) {
|
|
|
151
153
|
has_readme: hasReadme,
|
|
152
154
|
has_repository: hasRepository,
|
|
153
155
|
version_count: versionCount,
|
|
154
|
-
readme_size: readmeText.length
|
|
156
|
+
readme_size: readmeText.length,
|
|
157
|
+
description
|
|
155
158
|
};
|
|
156
159
|
}
|
|
157
160
|
|