muaddib-scanner 2.11.7 → 2.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +39 -0
- package/package.json +4 -4
- package/src/ioc/scraper.js +178 -10
- package/src/ioc/updater.js +11 -8
- package/src/pipeline/processor.js +35 -27
- package/src/scoring.js +2 -1
package/.env.example
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# MUAD'DIB environment variables — template
|
|
2
|
+
# Copy to .env (local dev) or /opt/muaddib/.env (VPS) and fill in real values.
|
|
3
|
+
# .env files are gitignored. NEVER commit a real token.
|
|
4
|
+
|
|
5
|
+
# ----------------------------------------------------------------------------
|
|
6
|
+
# Threat-feed API tokens (all OPTIONAL — scrapers degrade gracefully if absent)
|
|
7
|
+
# ----------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
# OpenSourceMalware.com — community-verified threat intel
|
|
10
|
+
# Free tier: 60 req/min, /query-latest gives 100 most recent threats per ecosystem.
|
|
11
|
+
# Sign up + generate at: https://opensourcemalware.com/auth → profile → API Tokens
|
|
12
|
+
# Format: osm_<random-32+chars>
|
|
13
|
+
# Used by: src/ioc/scraper.js → scrapeOSMQueryLatest()
|
|
14
|
+
OSM_API_TOKEN=
|
|
15
|
+
|
|
16
|
+
# ----------------------------------------------------------------------------
|
|
17
|
+
# Webhook destinations (optional — monitor alerts)
|
|
18
|
+
# ----------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
# Discord webhook for monitor alerts (P1/P2/P3 triage)
|
|
21
|
+
# DISCORD_WEBHOOK_URL=
|
|
22
|
+
|
|
23
|
+
# ----------------------------------------------------------------------------
|
|
24
|
+
# FPR plan gates — DEFAULT ON since v2.11.9 (no need to set these unless opting OUT)
|
|
25
|
+
# ----------------------------------------------------------------------------
|
|
26
|
+
# Measured impact on the v2.11.4 evaluation corpus (1054 packages):
|
|
27
|
+
# FPR curated 15.6% -> 9.36% (-6.24 pp), FPR random 7.0% -> 2.0% (-5.00 pp).
|
|
28
|
+
# TPR@3 / TPR@20 / ADR strictly unchanged.
|
|
29
|
+
#
|
|
30
|
+
# Opt-OUT individual gates (uncomment + set to 0):
|
|
31
|
+
# MUADDIB_FN_REACHABILITY=0 # function-level reachability gating
|
|
32
|
+
# MUADDIB_DECAY=0 # group score decay on bundled outputs
|
|
33
|
+
# MUADDIB_MATURE_CAP=0 # cap mature, well-trafficked packages at MEDIUM
|
|
34
|
+
# MUADDIB_METADATA_FACTOR=0 # registry signals -> reputation multiplier
|
|
35
|
+
# MUADDIB_DELTA_MODE=0 # delta scoring against prior versions
|
|
36
|
+
#
|
|
37
|
+
# Skip the npm registry fetch ENTIRELY (disables MATURE_CAP + METADATA_FACTOR
|
|
38
|
+
# + DELTA_MODE in one shot, useful for air-gap / offline CI / perf-critical):
|
|
39
|
+
# MUADDIB_NO_REGISTRY_FETCH=1
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "muaddib-scanner",
|
|
3
|
-
"version": "2.11.
|
|
3
|
+
"version": "2.11.9",
|
|
4
4
|
"description": "Supply-chain threat detection & response for npm & PyPI/Python",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"node": ">=18.0.0"
|
|
47
47
|
},
|
|
48
48
|
"dependencies": {
|
|
49
|
-
"@inquirer/prompts": "8.4.
|
|
49
|
+
"@inquirer/prompts": "8.4.2",
|
|
50
50
|
"acorn": "8.16.0",
|
|
51
51
|
"acorn-walk": "8.3.5",
|
|
52
52
|
"adm-zip": "0.5.17",
|
|
@@ -57,8 +57,8 @@
|
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
59
|
"@eslint/js": "10.0.1",
|
|
60
|
-
"eslint": "10.
|
|
60
|
+
"eslint": "10.3.0",
|
|
61
61
|
"eslint-plugin-security": "^4.0.0",
|
|
62
|
-
"globals": "17.
|
|
62
|
+
"globals": "17.6.0"
|
|
63
63
|
}
|
|
64
64
|
}
|
package/src/ioc/scraper.js
CHANGED
|
@@ -12,10 +12,37 @@ const { Spinner } = require('../utils.js');
|
|
|
12
12
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
13
13
|
|
|
14
14
|
// Version format validation (semver-like + wildcard)
|
|
15
|
-
|
|
15
|
+
// Permissive version validator — accepts:
|
|
16
|
+
// - npm semver (1.2.3, 1.2.3-beta.1+build42, 0.x.y)
|
|
17
|
+
// - PEP 440 (0.1.0b7, 1.2.post1, 1.2.dev0, 0.1.0rc1)
|
|
18
|
+
// - calendar versioning (2024.5.0)
|
|
19
|
+
// - 4-segment versions (99.99.99.1, 0.0.7.5)
|
|
20
|
+
// - wildcard (*)
|
|
21
|
+
// Rejects only structural abuse: path traversal (..), shell metachars,
|
|
22
|
+
// whitespace, slashes, length > 100. The previous regex required first
|
|
23
|
+
// char in [1-9] after a '0' which broke ALL 0.x.y versions (false negative
|
|
24
|
+
// spam in scraper logs ; ~600 valid PyPI/npm versions wrongly skipped per scrape).
|
|
25
|
+
const VERSION_INVALID_CHARS = /[\s\\/'"`;|&$<>(){}\[\]?]/;
|
|
26
|
+
function isValidVersion(version) {
|
|
27
|
+
if (!version || typeof version !== 'string') return false;
|
|
28
|
+
if (version === '*') return true;
|
|
29
|
+
if (version.length > 100) return false;
|
|
30
|
+
if (version.includes('..')) return false;
|
|
31
|
+
if (VERSION_INVALID_CHARS.test(version)) return false;
|
|
32
|
+
// Must start with a digit (or 'v' prefix), and contain only word chars / . / + / -
|
|
33
|
+
if (!/^v?\d/.test(version)) return false;
|
|
34
|
+
return /^[\w.+\-]+$/.test(version);
|
|
35
|
+
}
|
|
36
|
+
// Backwards compat: keep VERSION_RE as a no-op test wrapper for any legacy
|
|
37
|
+
// caller that imports it. Prefer isValidVersion() in new code.
|
|
38
|
+
const VERSION_RE = { test: isValidVersion };
|
|
16
39
|
|
|
17
|
-
// Aggregated warning
|
|
40
|
+
// Aggregated warning counters for noisy logs (reset per scraper run).
|
|
41
|
+
// Avoids spamming hundreds of WARN lines for malware feeds with non-standard
|
|
42
|
+
// version strings — a single summary line is logged at the end of runScraper.
|
|
18
43
|
let _noVersionSkipCount = 0;
|
|
44
|
+
let _invalidVersionSkipCount = 0;
|
|
45
|
+
let _invalidVersionSamples = []; // first 3 samples for context
|
|
19
46
|
|
|
20
47
|
/**
|
|
21
48
|
* Validate an IOC package entry before insertion.
|
|
@@ -37,9 +64,13 @@ function validateIOCEntry(pkgName, version, ecosystem) {
|
|
|
37
64
|
return false;
|
|
38
65
|
}
|
|
39
66
|
}
|
|
40
|
-
// Version validation
|
|
67
|
+
// Version validation — silent counter (aggregated log emitted by runScraper).
|
|
68
|
+
// The previous per-line WARN was spamming ~600 lines per scrape on PyPI feeds.
|
|
41
69
|
if (version && !VERSION_RE.test(version)) {
|
|
42
|
-
|
|
70
|
+
_invalidVersionSkipCount++;
|
|
71
|
+
if (_invalidVersionSamples.length < 3) {
|
|
72
|
+
_invalidVersionSamples.push(`${version} for ${pkgName}`);
|
|
73
|
+
}
|
|
43
74
|
return false;
|
|
44
75
|
}
|
|
45
76
|
return true;
|
|
@@ -980,12 +1011,14 @@ async function scrapeGitHubAdvisory() {
|
|
|
980
1011
|
// ============================================
|
|
981
1012
|
async function runScraper() {
|
|
982
1013
|
console.log('\n' + '='.repeat(60));
|
|
983
|
-
console.log(' MUAD\'DIB IOC Scraper v4.
|
|
984
|
-
console.log(' OSV + OSSF + GenSecAI + DataDog +
|
|
1014
|
+
console.log(' MUAD\'DIB IOC Scraper v4.1');
|
|
1015
|
+
console.log(' OSV + OSSF + GenSecAI + DataDog + Aikido + OSM');
|
|
985
1016
|
console.log('='.repeat(60) + '\n');
|
|
986
1017
|
|
|
987
1018
|
// Reset aggregated warning counters
|
|
988
1019
|
_noVersionSkipCount = 0;
|
|
1020
|
+
_invalidVersionSkipCount = 0;
|
|
1021
|
+
_invalidVersionSamples = [];
|
|
989
1022
|
|
|
990
1023
|
// Create data directory if needed
|
|
991
1024
|
const dataDir = path.dirname(IOC_FILE);
|
|
@@ -1038,7 +1071,8 @@ async function runScraper() {
|
|
|
1038
1071
|
scrapeOSSFMaliciousPackages(osvResult.knownIds),
|
|
1039
1072
|
scrapeGitHubAdvisory(),
|
|
1040
1073
|
scrapeOSVPyPIDataDump(),
|
|
1041
|
-
scrapeAikidoMalwareFeed()
|
|
1074
|
+
scrapeAikidoMalwareFeed(),
|
|
1075
|
+
scrapeOSMQueryLatest()
|
|
1042
1076
|
]);
|
|
1043
1077
|
|
|
1044
1078
|
const shaiHuludResult = results[0];
|
|
@@ -1047,11 +1081,18 @@ async function runScraper() {
|
|
|
1047
1081
|
const githubPackages = results[3];
|
|
1048
1082
|
const pypiPackages = results[4];
|
|
1049
1083
|
const aikidoResult = results[5];
|
|
1084
|
+
const osmResult = results[6];
|
|
1050
1085
|
|
|
1051
1086
|
// Log aggregated warnings
|
|
1052
1087
|
if (_noVersionSkipCount > 0) {
|
|
1053
1088
|
console.log('[SCRAPER] WARN: ' + _noVersionSkipCount + ' packages skipped (no version info, wildcard fallback avoided)');
|
|
1054
1089
|
}
|
|
1090
|
+
if (_invalidVersionSkipCount > 0) {
|
|
1091
|
+
const samples = _invalidVersionSamples.length > 0
|
|
1092
|
+
? ' (samples: ' + _invalidVersionSamples.join(', ') + ')'
|
|
1093
|
+
: '';
|
|
1094
|
+
console.log('[SCRAPER] WARN: ' + _invalidVersionSkipCount + ' entries skipped (malformed version)' + samples);
|
|
1095
|
+
}
|
|
1055
1096
|
|
|
1056
1097
|
// Merge all scraped packages
|
|
1057
1098
|
const allPackages = [
|
|
@@ -1060,7 +1101,8 @@ async function runScraper() {
|
|
|
1060
1101
|
...datadogResult.packages,
|
|
1061
1102
|
...ossfPackages,
|
|
1062
1103
|
...githubPackages,
|
|
1063
|
-
...aikidoResult.packages
|
|
1104
|
+
...aikidoResult.packages,
|
|
1105
|
+
...osmResult.packages
|
|
1064
1106
|
];
|
|
1065
1107
|
|
|
1066
1108
|
// Merge all hashes
|
|
@@ -1072,7 +1114,7 @@ async function runScraper() {
|
|
|
1072
1114
|
// Smart deduplication: build map of best entry per key
|
|
1073
1115
|
// For duplicates, keep the one with highest confidence, then most recent date
|
|
1074
1116
|
const dedupSpinner = new Spinner();
|
|
1075
|
-
dedupSpinner.start('Deduplicating ' + allPackages.length + ' npm + ' + (pypiPackages.length + (aikidoResult.pypi_packages || []).length) + ' PyPI entries...');
|
|
1117
|
+
dedupSpinner.start('Deduplicating ' + allPackages.length + ' npm + ' + (pypiPackages.length + (aikidoResult.pypi_packages || []).length + (osmResult.pypi_packages || []).length) + ' PyPI entries...');
|
|
1076
1118
|
const dedupMap = new Map();
|
|
1077
1119
|
|
|
1078
1120
|
// Seed with existing IOCs (with sanitization of stale comma-in-version entries)
|
|
@@ -1173,7 +1215,7 @@ async function runScraper() {
|
|
|
1173
1215
|
}
|
|
1174
1216
|
let addedPyPIPackages = 0;
|
|
1175
1217
|
// Merge Aikido PyPI feed into the same loop
|
|
1176
|
-
const allPyPIPackages = pypiPackages.concat(aikidoResult.pypi_packages || []);
|
|
1218
|
+
const allPyPIPackages = pypiPackages.concat(aikidoResult.pypi_packages || [], osmResult.pypi_packages || []);
|
|
1177
1219
|
for (const pkg of allPyPIPackages) {
|
|
1178
1220
|
if (!validateIOCEntry(pkg.name, pkg.version, 'pypi')) {
|
|
1179
1221
|
skippedInvalid++;
|
|
@@ -1411,6 +1453,131 @@ async function scrapeAikidoMalwareFeed() {
|
|
|
1411
1453
|
return { packages: npmPackages, pypi_packages: pypiPackages };
|
|
1412
1454
|
}
|
|
1413
1455
|
|
|
1456
|
+
// ============================================
|
|
1457
|
+
// SOURCE 7: OpenSourceMalware.com (community-verified threat intel)
|
|
1458
|
+
// Free tier: 60 req/min, /query-latest returns 100 most recent verified threats per
|
|
1459
|
+
// ecosystem. Token stored in OSM_API_TOKEN env var (NEVER hardcoded — public repo).
|
|
1460
|
+
// API: https://api.opensourcemalware.com/functions/v1/query-latest?ecosystem={npm|pypi}
|
|
1461
|
+
// Docs: https://docs.opensourcemalware.com/api/query-latest.md
|
|
1462
|
+
// Rate-limit ref: https://docs.opensourcemalware.com/api/rate-limits.md
|
|
1463
|
+
// ============================================
|
|
1464
|
+
async function scrapeOSMQueryLatest() {
|
|
1465
|
+
console.log('[SCRAPER] OpenSourceMalware.com query-latest...');
|
|
1466
|
+
const token = process.env.OSM_API_TOKEN;
|
|
1467
|
+
if (!token) {
|
|
1468
|
+
console.log('[SCRAPER] OSM_API_TOKEN not set — skipping (graceful, no error).');
|
|
1469
|
+
return { packages: [], pypi_packages: [] };
|
|
1470
|
+
}
|
|
1471
|
+
// Defensive token shape check (don't log the value)
|
|
1472
|
+
if (typeof token !== 'string' || !token.startsWith('osm_') || token.length < 16) {
|
|
1473
|
+
console.log('[SCRAPER] OSM_API_TOKEN malformed (expected osm_<chars>) — skipping.');
|
|
1474
|
+
return { packages: [], pypi_packages: [] };
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
const npmPackages = [];
|
|
1478
|
+
const pypiPackages = [];
|
|
1479
|
+
const headers = { Authorization: 'Bearer ' + token };
|
|
1480
|
+
|
|
1481
|
+
// Map OSM severity_level (low/medium/high/critical) to MUAD'DIB severity (lowercase).
|
|
1482
|
+
// OSM doesn't always populate severity; default to 'high' (verified threats are high-confidence by definition).
|
|
1483
|
+
function mapSeverity(s) {
|
|
1484
|
+
if (!s || typeof s !== 'string') return 'high';
|
|
1485
|
+
const v = s.toLowerCase().trim();
|
|
1486
|
+
if (v === 'low' || v === 'medium' || v === 'high' || v === 'critical') return v;
|
|
1487
|
+
return 'high';
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
function buildReferences(threat, ecosystem) {
|
|
1491
|
+
const refs = [];
|
|
1492
|
+
if (threat.osv_advisory_url && typeof threat.osv_advisory_url === 'string') refs.push(threat.osv_advisory_url);
|
|
1493
|
+
if (threat.ghsa_advisory_url && typeof threat.ghsa_advisory_url === 'string') refs.push(threat.ghsa_advisory_url);
|
|
1494
|
+
// Canonical OSM page for this threat. Best-effort URL — if 404 it's harmless metadata.
|
|
1495
|
+
if (threat.package_name) {
|
|
1496
|
+
refs.push('https://opensourcemalware.com/' + ecosystem + '/' + encodeURIComponent(threat.package_name));
|
|
1497
|
+
}
|
|
1498
|
+
return refs;
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
function buildDescription(threat) {
|
|
1502
|
+
const parts = [];
|
|
1503
|
+
if (threat.threat_description) parts.push(String(threat.threat_description));
|
|
1504
|
+
if (Array.isArray(threat.tags) && threat.tags.length > 0) {
|
|
1505
|
+
parts.push('Tags: ' + threat.tags.filter(t => typeof t === 'string').join(', '));
|
|
1506
|
+
}
|
|
1507
|
+
if (threat.researcher) parts.push('Reporter: ' + String(threat.researcher));
|
|
1508
|
+
return parts.length > 0 ? parts.join(' — ') : 'Verified by OpenSourceMalware.com community';
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
async function pull(ecosystem, target) {
|
|
1512
|
+
try {
|
|
1513
|
+
const { status, data } = await fetchJSON(
|
|
1514
|
+
'https://api.opensourcemalware.com/functions/v1/query-latest?ecosystem=' + encodeURIComponent(ecosystem),
|
|
1515
|
+
{ headers }
|
|
1516
|
+
);
|
|
1517
|
+
if (status === 401 || status === 403) {
|
|
1518
|
+
console.log('[SCRAPER] OSM ' + ecosystem + ': HTTP ' + status + ' — token rejected, check OSM_API_TOKEN.');
|
|
1519
|
+
return;
|
|
1520
|
+
}
|
|
1521
|
+
if (status !== 200) {
|
|
1522
|
+
console.log('[SCRAPER] OSM ' + ecosystem + ' feed: HTTP ' + status);
|
|
1523
|
+
return;
|
|
1524
|
+
}
|
|
1525
|
+
if (!data || !Array.isArray(data.threats)) {
|
|
1526
|
+
console.log('[SCRAPER] OSM ' + ecosystem + ' feed: unexpected response shape (no threats[]).');
|
|
1527
|
+
return;
|
|
1528
|
+
}
|
|
1529
|
+
let count = 0;
|
|
1530
|
+
for (const t of data.threats) {
|
|
1531
|
+
if (!t || typeof t.package_name !== 'string' || t.package_name.length === 0) continue;
|
|
1532
|
+
// OSM verifies report_type === 'package' threats. Skip anything else (repository/url/domain).
|
|
1533
|
+
// The query is filtered by ecosystem, but defensive check on registry field.
|
|
1534
|
+
if (t.report_type && t.report_type !== 'package') continue;
|
|
1535
|
+
// Normalize version: OSM uses free-form strings ('all', 'any', 'unknown', null, etc.)
|
|
1536
|
+
// Wildcard placeholders must be MUAD'DIB's canonical '*' so the IOC matcher hits.
|
|
1537
|
+
let ver = '*';
|
|
1538
|
+
if (t.version_info && typeof t.version_info === 'string') {
|
|
1539
|
+
const trimmed = t.version_info.trim();
|
|
1540
|
+
const lc = trimmed.toLowerCase();
|
|
1541
|
+
if (trimmed !== '' && lc !== 'all' && lc !== 'any' && lc !== 'unknown' && lc !== '*' && lc !== 'n/a') {
|
|
1542
|
+
ver = trimmed;
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
const severity = mapSeverity(t.severity_level);
|
|
1546
|
+
const addedAt = t.verified_at || t.created_at || t.first_seen || new Date().toISOString();
|
|
1547
|
+
const idPrefix = ecosystem === 'pypi' ? 'OSM-PYPI-' : 'OSM-';
|
|
1548
|
+
target.push({
|
|
1549
|
+
id: idPrefix + t.package_name + '-' + ver,
|
|
1550
|
+
name: t.package_name,
|
|
1551
|
+
version: ver,
|
|
1552
|
+
severity: severity,
|
|
1553
|
+
confidence: 'high',
|
|
1554
|
+
source: 'osm',
|
|
1555
|
+
description: buildDescription(t),
|
|
1556
|
+
references: buildReferences(t, ecosystem),
|
|
1557
|
+
mitre: 'T1195.002',
|
|
1558
|
+
freshness: {
|
|
1559
|
+
added_at: addedAt,
|
|
1560
|
+
source: 'osm',
|
|
1561
|
+
confidence: 'high'
|
|
1562
|
+
}
|
|
1563
|
+
});
|
|
1564
|
+
count++;
|
|
1565
|
+
}
|
|
1566
|
+
console.log('[SCRAPER] ' + count + ' ' + ecosystem + ' verified threats from OSM');
|
|
1567
|
+
} catch (e) {
|
|
1568
|
+
// Defensive: do NOT echo any token-bearing URL or header in the error.
|
|
1569
|
+
console.log('[SCRAPER] OSM ' + ecosystem + ' error: ' + (e && e.message ? e.message : 'unknown'));
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
// Sequential (not parallel): keeps us well under the 60 req/min rate limit
|
|
1574
|
+
// and gives clearer logs. Two ecosystems = ~200ms total.
|
|
1575
|
+
await pull('npm', npmPackages);
|
|
1576
|
+
await pull('pypi', pypiPackages);
|
|
1577
|
+
|
|
1578
|
+
return { packages: npmPackages, pypi_packages: pypiPackages };
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1414
1581
|
// ============================================
|
|
1415
1582
|
// SOURCE 5: OSV.dev Lightweight API
|
|
1416
1583
|
// Used by `muaddib update` (fast, no zip download)
|
|
@@ -1529,6 +1696,7 @@ function getSourceConfidence(pkg) {
|
|
|
1529
1696
|
module.exports = {
|
|
1530
1697
|
runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs,
|
|
1531
1698
|
scrapeAikidoMalwareFeed,
|
|
1699
|
+
scrapeOSMQueryLatest,
|
|
1532
1700
|
scrapeOSVLightweightAPI, queryOSVBatch,
|
|
1533
1701
|
getSourceConfidence,
|
|
1534
1702
|
// Pure utility functions (exported for testing)
|
package/src/ioc/updater.js
CHANGED
|
@@ -39,25 +39,28 @@ async function updateIOCs() {
|
|
|
39
39
|
mergeIOCs(baseIOCs, yamlStandard);
|
|
40
40
|
console.log('[2/4] YAML IOCs: ' + yamlStandard.packages.length + ' packages, ' + yamlStandard.hashes.length + ' hashes');
|
|
41
41
|
|
|
42
|
-
// Step 3: Download additional IOCs from GitHub + OSV API (GenSecAI + DataDog + OSV lightweight)
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
// Step 3: Download additional IOCs from GitHub + OSV API (GenSecAI + DataDog + OSV lightweight + OSM)
|
|
43
|
+
// Light path: JSON/REST only, NO heavy zip dumps. Designed to be safe at 15min cadence.
|
|
44
|
+
// For the deep refresh (OSV zip dumps + OSSF + Aikido + GitHub Advisory), use `muaddib scrape` (~5min).
|
|
45
|
+
const { scrapeShaiHuludDetector, scrapeDatadogIOCs, scrapeOSVLightweightAPI, scrapeOSMQueryLatest } = require('./scraper.js');
|
|
46
|
+
console.log('[3/4] Downloading GitHub + OSV API + OSM IOCs...');
|
|
45
47
|
|
|
46
|
-
const [shaiHulud, datadog, osvApi] = await Promise.all([
|
|
48
|
+
const [shaiHulud, datadog, osvApi, osmResult] = await Promise.all([
|
|
47
49
|
scrapeShaiHuludDetector(),
|
|
48
50
|
scrapeDatadogIOCs(),
|
|
49
|
-
scrapeOSVLightweightAPI()
|
|
51
|
+
scrapeOSVLightweightAPI(),
|
|
52
|
+
scrapeOSMQueryLatest()
|
|
50
53
|
]);
|
|
51
54
|
|
|
52
55
|
const githubIOCs = {
|
|
53
|
-
packages: [].concat(shaiHulud.packages, datadog.packages, osvApi),
|
|
54
|
-
pypi_packages: [],
|
|
56
|
+
packages: [].concat(shaiHulud.packages, datadog.packages, osvApi, osmResult.packages),
|
|
57
|
+
pypi_packages: (osmResult.pypi_packages || []).slice(),
|
|
55
58
|
hashes: [].concat(shaiHulud.hashes || [], datadog.hashes || []),
|
|
56
59
|
markers: [],
|
|
57
60
|
files: []
|
|
58
61
|
};
|
|
59
62
|
mergeIOCs(baseIOCs, githubIOCs);
|
|
60
|
-
console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog, +' + osvApi.length + ' OSV API');
|
|
63
|
+
console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog, +' + osvApi.length + ' OSV API, +' + osmResult.packages.length + ' OSM npm, +' + (osmResult.pypi_packages || []).length + ' OSM PyPI');
|
|
61
64
|
|
|
62
65
|
// Step 3b: Load existing cache IOCs (from bootstrap download or previous update)
|
|
63
66
|
if (fs.existsSync(CACHE_IOC_FILE)) {
|
|
@@ -131,11 +131,12 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
131
131
|
debugLog('[REACHABILITY] error:', e?.message);
|
|
132
132
|
// Graceful fallback — treat all files as reachable
|
|
133
133
|
}
|
|
134
|
-
// FPR plan C2 : function-level reachability
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
|
|
134
|
+
// FPR plan C2 : function-level reachability. Default ON since v2.11.9 after
|
|
135
|
+
// measuring -2.0 pp FPR (curated 11.4% -> 9.4%) with zero TPR/ADR regression
|
|
136
|
+
// on the full evaluation corpus (1054 packages). Opt-out via
|
|
137
|
+
// MUADDIB_FN_REACHABILITY=0. Activated only when file-level reachability
|
|
138
|
+
// succeeded (otherwise no entry-point context to seed from).
|
|
139
|
+
if (reachableFiles && globalThis.process.env.MUADDIB_FN_REACHABILITY !== '0') {
|
|
139
140
|
try {
|
|
140
141
|
reachableFunctions = computeReachableFunctions(targetPath, reachableFiles);
|
|
141
142
|
} catch (e) {
|
|
@@ -169,21 +170,25 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
169
170
|
}
|
|
170
171
|
} catch { /* graceful fallback */ }
|
|
171
172
|
|
|
172
|
-
// FPR plan Chantier 4 + 5 wiring :
|
|
173
|
-
//
|
|
174
|
-
// and
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
-
//
|
|
178
|
-
//
|
|
179
|
-
//
|
|
173
|
+
// FPR plan Chantier 4 + 5 wiring : fetch npm registry packument and attach
|
|
174
|
+
// it as _pkgMeta.npmRegistryMeta so applyReputationFactor, applyMatureStable-
|
|
175
|
+
// Cap, and applyDeltaMultiplier can fire. getPackageMetadata has an in-
|
|
176
|
+
// process cache, so repeated scans of the same package hit the cache and
|
|
177
|
+
// never re-fetch. Network failure / unknown package -> returns null and all
|
|
178
|
+
// downstream functions degrade gracefully.
|
|
179
|
+
//
|
|
180
|
+
// Default ON since v2.11.9. To skip the fetch entirely (air-gap, offline CI,
|
|
181
|
+
// perf-critical batch), set MUADDIB_NO_REGISTRY_FETCH=1 — this disables the
|
|
182
|
+
// 3 metadata-dependent gates (METADATA_FACTOR, MATURE_CAP, DELTA_MODE) in
|
|
183
|
+
// one shot. Individual gates can still be turned off via their own =0 flag.
|
|
180
184
|
if (
|
|
181
185
|
packageName &&
|
|
182
186
|
_pkgMeta &&
|
|
187
|
+
globalThis.process.env.MUADDIB_NO_REGISTRY_FETCH !== '1' &&
|
|
183
188
|
(
|
|
184
|
-
globalThis.process.env.MUADDIB_METADATA_FACTOR
|
|
185
|
-
globalThis.process.env.MUADDIB_MATURE_CAP
|
|
186
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
189
|
+
globalThis.process.env.MUADDIB_METADATA_FACTOR !== '0' ||
|
|
190
|
+
globalThis.process.env.MUADDIB_MATURE_CAP !== '0' ||
|
|
191
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0'
|
|
187
192
|
)
|
|
188
193
|
) {
|
|
189
194
|
try {
|
|
@@ -293,13 +298,15 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
293
298
|
applyFPReductions(deduped, reachableFiles, packageName, packageDeps, reachableFunctions);
|
|
294
299
|
|
|
295
300
|
// FPR plan Chantier 3 - delta-aware decay. Threats present in the last 3
|
|
296
|
-
// published versions (and not HC/IOC) decay to LOW.
|
|
297
|
-
//
|
|
301
|
+
// published versions (and not HC/IOC) decay to LOW. Default ON since v2.11.9.
|
|
302
|
+
// Opt-out: MUADDIB_DELTA_MODE=0 (or set MUADDIB_NO_REGISTRY_FETCH=1 to skip
|
|
303
|
+
// the registry fetch upstream). No-op when registry meta is absent (CLI
|
|
304
|
+
// scans on private packages, offline, or unknown package).
|
|
298
305
|
let _deltaResult = null;
|
|
299
306
|
if (
|
|
300
307
|
packageName && packageVersion &&
|
|
301
308
|
_pkgMeta && _pkgMeta.npmRegistryMeta &&
|
|
302
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
309
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0'
|
|
303
310
|
) {
|
|
304
311
|
try {
|
|
305
312
|
const packument = _pkgMeta.npmRegistryMeta.packument || _pkgMeta.npmRegistryMeta;
|
|
@@ -446,9 +453,9 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
446
453
|
// FPR plan Chantier 5 : mature stable cap — caps mature, well-owned, high-
|
|
447
454
|
// traffic packages at MEDIUM unless an HC type or IOC is present. Sits
|
|
448
455
|
// BETWEEN the contextual caps (which it composes with) and the single-fire
|
|
449
|
-
// floor (which can override on hard signals).
|
|
450
|
-
// MUADDIB_MATURE_CAP=
|
|
451
|
-
if (globalThis.process.env.MUADDIB_MATURE_CAP
|
|
456
|
+
// floor (which can override on hard signals). Default ON since v2.11.9.
|
|
457
|
+
// Opt-out: MUADDIB_MATURE_CAP=0. No-op when registry meta is absent.
|
|
458
|
+
if (globalThis.process.env.MUADDIB_MATURE_CAP !== '0') {
|
|
452
459
|
const matureCap = applyMatureStableCap(result, _pkgMeta && _pkgMeta.npmRegistryMeta);
|
|
453
460
|
if (matureCap && matureCap.applied) {
|
|
454
461
|
debugLog('[MATURE-CAP] ' + (packageName || targetPath) + ': ' +
|
|
@@ -470,12 +477,13 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
470
477
|
// Hybrid v3 Phase 4: metadata-first reputation factor — multiplies the score
|
|
471
478
|
// by a factor in [0.10, 1.5] derived from npm registry signals. Applied LAST
|
|
472
479
|
// so all severity logic completes first; the factor is the final, package-
|
|
473
|
-
// wide context filter.
|
|
474
|
-
// metadata is absent (CLI scans
|
|
480
|
+
// wide context filter. Default ON since v2.11.9. Opt-out:
|
|
481
|
+
// MUADDIB_METADATA_FACTOR=0. No-op when metadata is absent (CLI scans on
|
|
482
|
+
// unknown package, offline, MUADDIB_NO_REGISTRY_FETCH=1).
|
|
475
483
|
// NOTE: this module's exported function is named `process`, which shadows
|
|
476
484
|
// the global `process` inside its body. Use globalThis.process.env to reach
|
|
477
485
|
// the real environment.
|
|
478
|
-
if (globalThis.process.env.MUADDIB_METADATA_FACTOR
|
|
486
|
+
if (globalThis.process.env.MUADDIB_METADATA_FACTOR !== '0') {
|
|
479
487
|
const repAdjust = applyReputationFactor(result, _pkgMeta && _pkgMeta.npmRegistryMeta);
|
|
480
488
|
if (repAdjust) {
|
|
481
489
|
debugLog('[META-FACTOR] ' + (packageName || targetPath) + ': factor=' +
|
|
@@ -501,10 +509,10 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
501
509
|
// FPR plan Chantier 3 : persist this version's signature set so future scans
|
|
502
510
|
// (or future versions) can use it as a baseline for delta decay. Best-effort
|
|
503
511
|
// and idempotent ; cache misses on read are silent so a missed write never
|
|
504
|
-
// blocks scoring.
|
|
512
|
+
// blocks scoring. Write whenever delta-mode is enabled (default ON) AND we
|
|
505
513
|
// have a concrete package@version pair.
|
|
506
514
|
if (
|
|
507
|
-
globalThis.process.env.MUADDIB_DELTA_MODE
|
|
515
|
+
globalThis.process.env.MUADDIB_DELTA_MODE !== '0' &&
|
|
508
516
|
packageName && packageVersion
|
|
509
517
|
) {
|
|
510
518
|
try {
|
package/src/scoring.js
CHANGED
|
@@ -186,7 +186,8 @@ function _isReplacedByCompound(t) {
|
|
|
186
186
|
}
|
|
187
187
|
|
|
188
188
|
function computeGroupScore(threats) {
|
|
189
|
-
|
|
189
|
+
// Score decay default ON since v2.11.9 (FPR plan Chantier 1). Opt-out: MUADDIB_DECAY=0.
|
|
190
|
+
if (process.env.MUADDIB_DECAY !== '0') return computeGroupScoreDecay(threats);
|
|
190
191
|
let score = 0;
|
|
191
192
|
let protoHookMediumPoints = 0;
|
|
192
193
|
let dataflowMediumPoints = 0;
|