muaddib-scanner 2.11.8 → 2.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +15 -7
- package/package.json +1 -1
- package/src/ioc/scraper.js +43 -4
- package/src/pipeline/processor.js +35 -27
- package/src/scoring.js +2 -1
package/.env.example
CHANGED
|
@@ -21,11 +21,19 @@ OSM_API_TOKEN=
|
|
|
21
21
|
# DISCORD_WEBHOOK_URL=
|
|
22
22
|
|
|
23
23
|
# ----------------------------------------------------------------------------
|
|
24
|
-
#
|
|
24
|
+
# FPR plan gates — DEFAULT ON since v2.11.9 (no need to set these unless opting OUT)
|
|
25
25
|
# ----------------------------------------------------------------------------
|
|
26
|
-
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
26
|
+
# Measured impact on the v2.11.4 evaluation corpus (1054 packages):
|
|
27
|
+
# FPR curated 15.6% -> 9.36% (-6.24 pp), FPR random 7.0% -> 2.0% (-5.00 pp).
|
|
28
|
+
# TPR@3 / TPR@20 / ADR strictly unchanged.
|
|
29
|
+
#
|
|
30
|
+
# Opt-OUT individual gates (uncomment + set to 0):
|
|
31
|
+
# MUADDIB_FN_REACHABILITY=0 # function-level reachability gating
|
|
32
|
+
# MUADDIB_DECAY=0 # group score decay on bundled outputs
|
|
33
|
+
# MUADDIB_MATURE_CAP=0 # cap mature, well-trafficked packages at MEDIUM
|
|
34
|
+
# MUADDIB_METADATA_FACTOR=0 # registry signals -> reputation multiplier
|
|
35
|
+
# MUADDIB_DELTA_MODE=0 # delta scoring against prior versions
|
|
36
|
+
#
|
|
37
|
+
# Skip the npm registry fetch ENTIRELY (disables MATURE_CAP + METADATA_FACTOR
|
|
38
|
+
# + DELTA_MODE in one shot, useful for air-gap / offline CI / perf-critical):
|
|
39
|
+
# MUADDIB_NO_REGISTRY_FETCH=1
|
package/package.json
CHANGED
package/src/ioc/scraper.js
CHANGED
|
@@ -12,10 +12,37 @@ const { Spinner } = require('../utils.js');
|
|
|
12
12
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
13
13
|
|
|
14
14
|
// Version format validation (semver-like + wildcard)
|
|
15
|
-
|
|
15
|
+
// Permissive version validator — accepts:
|
|
16
|
+
// - npm semver (1.2.3, 1.2.3-beta.1+build42, 0.x.y)
|
|
17
|
+
// - PEP 440 (0.1.0b7, 1.2.post1, 1.2.dev0, 0.1.0rc1)
|
|
18
|
+
// - calendar versioning (2024.5.0)
|
|
19
|
+
// - 4-segment versions (99.99.99.1, 0.0.7.5)
|
|
20
|
+
// - wildcard (*)
|
|
21
|
+
// Rejects only structural abuse: path traversal (..), shell metachars,
|
|
22
|
+
// whitespace, slashes, length > 100. The previous regex required first
|
|
23
|
+
// char in [1-9] after a '0' which broke ALL 0.x.y versions (false negative
|
|
24
|
+
// spam in scraper logs ; ~600 valid PyPI/npm versions wrongly skipped per scrape).
|
|
25
|
+
const VERSION_INVALID_CHARS = /[\s\\/'"`;|&$<>(){}\[\]?]/;
|
|
26
|
+
function isValidVersion(version) {
|
|
27
|
+
if (!version || typeof version !== 'string') return false;
|
|
28
|
+
if (version === '*') return true;
|
|
29
|
+
if (version.length > 100) return false;
|
|
30
|
+
if (version.includes('..')) return false;
|
|
31
|
+
if (VERSION_INVALID_CHARS.test(version)) return false;
|
|
32
|
+
// Must start with a digit (or 'v' prefix), and contain only word chars / . / + / -
|
|
33
|
+
if (!/^v?\d/.test(version)) return false;
|
|
34
|
+
return /^[\w.+\-]+$/.test(version);
|
|
35
|
+
}
|
|
36
|
+
// Backwards compat: keep VERSION_RE as a no-op test wrapper for any legacy
|
|
37
|
+
// caller that imports it. Prefer isValidVersion() in new code.
|
|
38
|
+
const VERSION_RE = { test: isValidVersion };
|
|
16
39
|
|
|
17
|
-
// Aggregated warning
|
|
40
|
+
// Aggregated warning counters for noisy logs (reset per scraper run).
|
|
41
|
+
// Avoids spamming hundreds of WARN lines for malware feeds with non-standard
|
|
42
|
+
// version strings — a single summary line is logged at the end of runScraper.
|
|
18
43
|
let _noVersionSkipCount = 0;
|
|
44
|
+
let _invalidVersionSkipCount = 0;
|
|
45
|
+
let _invalidVersionSamples = []; // first 3 samples for context
|
|
19
46
|
|
|
20
47
|
/**
|
|
21
48
|
* Validate an IOC package entry before insertion.
|
|
@@ -37,9 +64,13 @@ function validateIOCEntry(pkgName, version, ecosystem) {
|
|
|
37
64
|
return false;
|
|
38
65
|
}
|
|
39
66
|
}
|
|
40
|
-
// Version validation
|
|
67
|
+
// Version validation — silent counter (aggregated log emitted by runScraper).
|
|
68
|
+
// The previous per-line WARN was spamming ~600 lines per scrape on PyPI feeds.
|
|
41
69
|
if (version && !VERSION_RE.test(version)) {
|
|
42
|
-
|
|
70
|
+
_invalidVersionSkipCount++;
|
|
71
|
+
if (_invalidVersionSamples.length < 3) {
|
|
72
|
+
_invalidVersionSamples.push(`${version} for ${pkgName}`);
|
|
73
|
+
}
|
|
43
74
|
return false;
|
|
44
75
|
}
|
|
45
76
|
return true;
|
|
@@ -986,6 +1017,8 @@ async function runScraper() {
|
|
|
986
1017
|
|
|
987
1018
|
// Reset aggregated warning counters
|
|
988
1019
|
_noVersionSkipCount = 0;
|
|
1020
|
+
_invalidVersionSkipCount = 0;
|
|
1021
|
+
_invalidVersionSamples = [];
|
|
989
1022
|
|
|
990
1023
|
// Create data directory if needed
|
|
991
1024
|
const dataDir = path.dirname(IOC_FILE);
|
|
@@ -1054,6 +1087,12 @@ async function runScraper() {
|
|
|
1054
1087
|
if (_noVersionSkipCount > 0) {
|
|
1055
1088
|
console.log('[SCRAPER] WARN: ' + _noVersionSkipCount + ' packages skipped (no version info, wildcard fallback avoided)');
|
|
1056
1089
|
}
|
|
1090
|
+
if (_invalidVersionSkipCount > 0) {
|
|
1091
|
+
const samples = _invalidVersionSamples.length > 0
|
|
1092
|
+
? ' (samples: ' + _invalidVersionSamples.join(', ') + ')'
|
|
1093
|
+
: '';
|
|
1094
|
+
console.log('[SCRAPER] WARN: ' + _invalidVersionSkipCount + ' entries skipped (malformed version)' + samples);
|
|
1095
|
+
}
|
|
1057
1096
|
|
|
1058
1097
|
// Merge all scraped packages
|
|
1059
1098
|
const allPackages = [
|
|
@@ -131,11 +131,12 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
131
131
|
debugLog('[REACHABILITY] error:', e?.message);
|
|
132
132
|
// Graceful fallback — treat all files as reachable
|
|
133
133
|
}
|
|
134
|
-
// FPR plan C2 : function-level reachability
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
|
|
134
|
+
// FPR plan C2 : function-level reachability. Default ON since v2.11.9 after
|
|
135
|
+
// measuring -2.0 pp FPR (curated 11.4% -> 9.4%) with zero TPR/ADR regression
|
|
136
|
+
// on the full evaluation corpus (1054 packages). Opt-out via
|
|
137
|
+
// MUADDIB_FN_REACHABILITY=0. Activated only when file-level reachability
|
|
138
|
+
// succeeded (otherwise no entry-point context to seed from).
|
|
139
|
+
if (reachableFiles && globalThis.process.env.MUADDIB_FN_REACHABILITY !== '0') {
|
|
139
140
|
try {
|
|
140
141
|
reachableFunctions = computeReachableFunctions(targetPath, reachableFiles);
|
|
141
142
|
} catch (e) {
|
|
@@ -169,21 +170,25 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
169
170
|
}
|
|
170
171
|
} catch { /* graceful fallback */ }
|
|
171
172
|
|
|
172
|
-
// FPR plan Chantier 4 + 5 wiring :
|
|
173
|
-
//
|
|
174
|
-
// and
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
-
//
|
|
178
|
-
//
|
|
179
|
-
//
|
|
173
|
+
// FPR plan Chantier 4 + 5 wiring : fetch npm registry packument and attach
|
|
174
|
+
// it as _pkgMeta.npmRegistryMeta so applyReputationFactor, applyMatureStable-
|
|
175
|
+
// Cap, and applyDeltaMultiplier can fire. getPackageMetadata has an in-
|
|
176
|
+
// process cache, so repeated scans of the same package hit the cache and
|
|
177
|
+
// never re-fetch. Network failure / unknown package -> returns null and all
|
|
178
|
+
// downstream functions degrade gracefully.
|
|
179
|
+
//
|
|
180
|
+
// Default ON since v2.11.9. To skip the fetch entirely (air-gap, offline CI,
|
|
181
|
+
// perf-critical batch), set MUADDIB_NO_REGISTRY_FETCH=1 — this disables the
|
|
182
|
+
// 3 metadata-dependent gates (METADATA_FACTOR, MATURE_CAP, DELTA_MODE) in
|
|
183
|
+
// one shot. Individual gates can still be turned off via their own =0 flag.
|
|
180
184
|
if (
|
|
181
185
|
packageName &&
|
|
182
186
|
_pkgMeta &&
|
|
187
|
+
globalThis.process.env.MUADDIB_NO_REGISTRY_FETCH !== '1' &&
|
|
183
188
|
(
|
|
184
|
-
globalThis.process.env.MUADDIB_METADATA_FACTOR
|
|
185
|
-
globalThis.process.env.MUADDIB_MATURE_CAP
|
|
186
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
189
|
+
globalThis.process.env.MUADDIB_METADATA_FACTOR !== '0' ||
|
|
190
|
+
globalThis.process.env.MUADDIB_MATURE_CAP !== '0' ||
|
|
191
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0'
|
|
187
192
|
)
|
|
188
193
|
) {
|
|
189
194
|
try {
|
|
@@ -293,13 +298,15 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
293
298
|
applyFPReductions(deduped, reachableFiles, packageName, packageDeps, reachableFunctions);
|
|
294
299
|
|
|
295
300
|
// FPR plan Chantier 3 - delta-aware decay. Threats present in the last 3
|
|
296
|
-
// published versions (and not HC/IOC) decay to LOW.
|
|
297
|
-
//
|
|
301
|
+
// published versions (and not HC/IOC) decay to LOW. Default ON since v2.11.9.
|
|
302
|
+
// Opt-out: MUADDIB_DELTA_MODE=0 (or set MUADDIB_NO_REGISTRY_FETCH=1 to skip
|
|
303
|
+
// the registry fetch upstream). No-op when registry meta is absent (CLI
|
|
304
|
+
// scans on private packages, offline, or unknown package).
|
|
298
305
|
let _deltaResult = null;
|
|
299
306
|
if (
|
|
300
307
|
packageName && packageVersion &&
|
|
301
308
|
_pkgMeta && _pkgMeta.npmRegistryMeta &&
|
|
302
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
309
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0'
|
|
303
310
|
) {
|
|
304
311
|
try {
|
|
305
312
|
const packument = _pkgMeta.npmRegistryMeta.packument || _pkgMeta.npmRegistryMeta;
|
|
@@ -446,9 +453,9 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
446
453
|
// FPR plan Chantier 5 : mature stable cap — caps mature, well-owned, high-
|
|
447
454
|
// traffic packages at MEDIUM unless an HC type or IOC is present. Sits
|
|
448
455
|
// BETWEEN the contextual caps (which it composes with) and the single-fire
|
|
449
|
-
// floor (which can override on hard signals).
|
|
450
|
-
// MUADDIB_MATURE_CAP=
|
|
451
|
-
if (globalThis.process.env.MUADDIB_MATURE_CAP
|
|
456
|
+
// floor (which can override on hard signals). Default ON since v2.11.9.
|
|
457
|
+
// Opt-out: MUADDIB_MATURE_CAP=0. No-op when registry meta is absent.
|
|
458
|
+
if (globalThis.process.env.MUADDIB_MATURE_CAP !== '0') {
|
|
452
459
|
const matureCap = applyMatureStableCap(result, _pkgMeta && _pkgMeta.npmRegistryMeta);
|
|
453
460
|
if (matureCap && matureCap.applied) {
|
|
454
461
|
debugLog('[MATURE-CAP] ' + (packageName || targetPath) + ': ' +
|
|
@@ -470,12 +477,13 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
470
477
|
// Hybrid v3 Phase 4: metadata-first reputation factor — multiplies the score
|
|
471
478
|
// by a factor in [0.10, 1.5] derived from npm registry signals. Applied LAST
|
|
472
479
|
// so all severity logic completes first; the factor is the final, package-
|
|
473
|
-
// wide context filter.
|
|
474
|
-
// metadata is absent (CLI scans
|
|
480
|
+
// wide context filter. Default ON since v2.11.9. Opt-out:
|
|
481
|
+
// MUADDIB_METADATA_FACTOR=0. No-op when metadata is absent (CLI scans on
|
|
482
|
+
// unknown package, offline, MUADDIB_NO_REGISTRY_FETCH=1).
|
|
475
483
|
// NOTE: this module's exported function is named `process`, which shadows
|
|
476
484
|
// the global `process` inside its body. Use globalThis.process.env to reach
|
|
477
485
|
// the real environment.
|
|
478
|
-
if (globalThis.process.env.MUADDIB_METADATA_FACTOR
|
|
486
|
+
if (globalThis.process.env.MUADDIB_METADATA_FACTOR !== '0') {
|
|
479
487
|
const repAdjust = applyReputationFactor(result, _pkgMeta && _pkgMeta.npmRegistryMeta);
|
|
480
488
|
if (repAdjust) {
|
|
481
489
|
debugLog('[META-FACTOR] ' + (packageName || targetPath) + ': factor=' +
|
|
@@ -501,10 +509,10 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
501
509
|
// FPR plan Chantier 3 : persist this version's signature set so future scans
|
|
502
510
|
// (or future versions) can use it as a baseline for delta decay. Best-effort
|
|
503
511
|
// and idempotent ; cache misses on read are silent so a missed write never
|
|
504
|
-
// blocks scoring.
|
|
512
|
+
// blocks scoring. Write whenever delta-mode is enabled (default ON) AND we
|
|
505
513
|
// have a concrete package@version pair.
|
|
506
514
|
if (
|
|
507
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
515
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0' &&
|
|
508
516
|
packageName && packageVersion
|
|
509
517
|
) {
|
|
510
518
|
try {
|
package/src/scoring.js
CHANGED
|
@@ -186,7 +186,8 @@ function _isReplacedByCompound(t) {
|
|
|
186
186
|
}
|
|
187
187
|
|
|
188
188
|
function computeGroupScore(threats) {
|
|
189
|
-
|
|
189
|
+
// Score decay default ON since v2.11.9 (FPR plan Chantier 1). Opt-out: MUADDIB_DECAY=0.
|
|
190
|
+
if (process.env.MUADDIB_DECAY !== '0') return computeGroupScoreDecay(threats);
|
|
190
191
|
let score = 0;
|
|
191
192
|
let protoHookMediumPoints = 0;
|
|
192
193
|
let dataflowMediumPoints = 0;
|