muaddib-scanner 2.11.65 → 2.11.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.65.json → self-scan-v2.11.67.json} +1 -1
- package/src/ioc/updater.js +42 -9
- package/src/monitor/classify.js +3 -1
- package/src/monitor/queue.js +17 -0
- package/src/monitor/scan-queue.js +12 -1
- package/src/monitor/state.js +113 -0
- package/src/monitor/tarball-archive.js +67 -25
- package/src/response/playbooks.js +4 -0
- package/src/rules/index.js +13 -0
- package/src/scanner/package.js +45 -0
- package/src/scoring.js +3 -1
package/package.json
CHANGED
package/src/ioc/updater.js
CHANGED
|
@@ -123,6 +123,11 @@ async function updateIOCs() {
|
|
|
123
123
|
console.log('[4/4] Saved to cache: ' + CACHE_IOC_FILE);
|
|
124
124
|
console.log('\n[OK] IOCs updated: ' + totalNpm + ' npm + ' + totalPyPI + ' PyPI packages');
|
|
125
125
|
|
|
126
|
+
// Fresh IOC files written — drop the in-process singleton so the next
|
|
127
|
+
// loadCachedIOCs() rebuilds from them (cross-process monitors pick the change
|
|
128
|
+
// up via the mtime/size source signature within SOURCE_CHECK_INTERVAL).
|
|
129
|
+
invalidateCache();
|
|
130
|
+
|
|
126
131
|
return { total: totalNpm, totalPyPI: totalPyPI };
|
|
127
132
|
}
|
|
128
133
|
|
|
@@ -202,16 +207,41 @@ function mergeIOCs(target, source) {
|
|
|
202
207
|
return added;
|
|
203
208
|
}
|
|
204
209
|
|
|
205
|
-
//
|
|
210
|
+
// IOC store cache. The optimized store is large (~240K entries → hundreds of MB),
|
|
211
|
+
// so it MUST be a stable singleton: rebuilding it duplicates that memory, and any
|
|
212
|
+
// in-flight async scan (sandbox/deferred/network) that captured a prior copy pins
|
|
213
|
+
// it — a periodic rebuild therefore accumulates copies. This was the monitor's
|
|
214
|
+
// old_space → OOM leak: a heap snapshot showed 7+ live copies of the 421K-entry
|
|
215
|
+
// Map retained via loadCachedIOCs closures + suspended Generators/Promises.
|
|
216
|
+
// Fix: rebuild ONLY when a source file actually changes (mtime/size signature) or
|
|
217
|
+
// on invalidateCache(); otherwise return the same object. The signature is
|
|
218
|
+
// re-checked at most every SOURCE_CHECK_INTERVAL so the hot path (called per
|
|
219
|
+
// scan/poll) does zero disk I/O.
|
|
220
|
+
const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
|
|
221
|
+
const IOC_SOURCE_FILES = [
|
|
222
|
+
CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
|
|
223
|
+
path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
|
|
224
|
+
path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
|
|
225
|
+
];
|
|
226
|
+
function iocSourcesSignature() {
|
|
227
|
+
let sig = '';
|
|
228
|
+
for (const f of IOC_SOURCE_FILES) { try { const s = fs.statSync(f); sig += s.mtimeMs + ':' + s.size + ';'; } catch { sig += '0;'; } }
|
|
229
|
+
return sig;
|
|
230
|
+
}
|
|
231
|
+
|
|
206
232
|
let cachedIOCsResult = null;
|
|
207
|
-
let
|
|
208
|
-
|
|
233
|
+
let cachedIOCsSig = null;
|
|
234
|
+
let lastSourceCheck = 0;
|
|
235
|
+
const SOURCE_CHECK_INTERVAL = 10000; // re-stat source files at most every 10s
|
|
209
236
|
|
|
210
237
|
function loadCachedIOCs() {
|
|
211
|
-
// Return cache if still valid
|
|
212
238
|
const now = Date.now();
|
|
213
|
-
if (cachedIOCsResult
|
|
214
|
-
return
|
|
239
|
+
if (cachedIOCsResult) {
|
|
240
|
+
// Hot path: within the check window, return the singleton with no disk I/O.
|
|
241
|
+
if (now - lastSourceCheck < SOURCE_CHECK_INTERVAL) return cachedIOCsResult;
|
|
242
|
+
lastSourceCheck = now;
|
|
243
|
+
// Throttled freshness check: keep the singleton unless a source file changed.
|
|
244
|
+
if (iocSourcesSignature() === cachedIOCsSig) return cachedIOCsResult;
|
|
215
245
|
}
|
|
216
246
|
|
|
217
247
|
// Priority 1: YAML IOCs
|
|
@@ -279,9 +309,11 @@ function loadCachedIOCs() {
|
|
|
279
309
|
// Create optimized structures for O(1) lookup
|
|
280
310
|
const optimized = createOptimizedIOCs(merged);
|
|
281
311
|
|
|
282
|
-
// Store
|
|
312
|
+
// Store as the shared singleton; record the source signature so we only rebuild
|
|
313
|
+
// when the IOC files actually change (see loadCachedIOCs header).
|
|
283
314
|
cachedIOCsResult = optimized;
|
|
284
|
-
|
|
315
|
+
cachedIOCsSig = iocSourcesSignature();
|
|
316
|
+
lastSourceCheck = now;
|
|
285
317
|
|
|
286
318
|
return optimized;
|
|
287
319
|
}
|
|
@@ -560,7 +592,8 @@ function expandCompactIOCs(compact) {
|
|
|
560
592
|
|
|
561
593
|
function invalidateCache() {
|
|
562
594
|
cachedIOCsResult = null;
|
|
563
|
-
|
|
595
|
+
cachedIOCsSig = null;
|
|
596
|
+
lastSourceCheck = 0;
|
|
564
597
|
}
|
|
565
598
|
|
|
566
599
|
/**
|
package/src/monitor/classify.js
CHANGED
|
@@ -73,7 +73,9 @@ const HIGH_CONFIDENCE_MALICE_TYPES = new Set([
|
|
|
73
73
|
// cap since the attack uses optionalDependencies + prepare hook (no direct lifecycle).
|
|
74
74
|
'env_charcode_reconstruction', // fromCharCode + process.env[computed] (TeamPCP credential stealer)
|
|
75
75
|
'ide_hook_autoexec', // .claude/settings.json SessionStart hook, .vscode/tasks.json folderOpen (Shai-Hulud)
|
|
76
|
-
'workflow_secrets_dump' // toJSON(secrets) in GitHub Actions workflow (Shai-Hulud)
|
|
76
|
+
'workflow_secrets_dump', // toJSON(secrets) in GitHub Actions workflow (Shai-Hulud)
|
|
77
|
+
// Phantom Gyp 2026-06: binding.gyp command-substitution = install-time RCE, quasi-never legit in benign packages
|
|
78
|
+
'gyp_command_exec'
|
|
77
79
|
]);
|
|
78
80
|
|
|
79
81
|
// Lifecycle compound types that indicate real malicious intent beyond a simple postinstall
|
package/src/monitor/queue.js
CHANGED
|
@@ -26,6 +26,7 @@ const {
|
|
|
26
26
|
cacheTarball,
|
|
27
27
|
updateScanStats,
|
|
28
28
|
appendDetection,
|
|
29
|
+
appendScanLedger,
|
|
29
30
|
maybePersistDailyStats,
|
|
30
31
|
appendTemporalDetection,
|
|
31
32
|
tarballCacheKey,
|
|
@@ -221,6 +222,20 @@ function recordTrainingSample(result, params) {
|
|
|
221
222
|
sandboxResult: params.sandboxResult || null
|
|
222
223
|
});
|
|
223
224
|
appendTrainingRecord(record);
|
|
225
|
+
// Phase 0a: per-scan coverage ledger — record this terminal outcome (best-effort;
|
|
226
|
+
// appendScanLedger swallows its own write errors and never throws).
|
|
227
|
+
appendScanLedger({
|
|
228
|
+
name: params.name,
|
|
229
|
+
version: params.version,
|
|
230
|
+
ecosystem: params.ecosystem,
|
|
231
|
+
outcome: params.label || 'clean',
|
|
232
|
+
score: (result.summary && typeof result.summary.riskScore === 'number') ? result.summary.riskScore : null,
|
|
233
|
+
tier: params.tier,
|
|
234
|
+
maxSeverity: result.summary ? result.summary.riskLevel : null,
|
|
235
|
+
types: [...new Set((result.threats || []).map(t => t.type))],
|
|
236
|
+
sandbox: params.sandboxResult ? 'run' : 'none',
|
|
237
|
+
source: 'scan'
|
|
238
|
+
});
|
|
224
239
|
} catch (err) {
|
|
225
240
|
// Non-fatal: ML export must never crash the monitor
|
|
226
241
|
console.error(`[ML] Failed to record training sample for ${params.name}: ${err.message}`);
|
|
@@ -521,6 +536,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
521
536
|
stats.totalTimeMs += Date.now() - startTime;
|
|
522
537
|
stats.clean++;
|
|
523
538
|
updateScanStats('clean');
|
|
539
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'size_skip', score: 0, source: 'size_skip_quick_clean' });
|
|
524
540
|
return;
|
|
525
541
|
}
|
|
526
542
|
} catch {
|
|
@@ -541,6 +557,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
541
557
|
stats.totalTimeMs += Date.now() - startTime;
|
|
542
558
|
stats.clean++;
|
|
543
559
|
updateScanStats('clean');
|
|
560
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'size_skip', score: 0, source: 'size_skip_extract_failed' });
|
|
544
561
|
return;
|
|
545
562
|
}
|
|
546
563
|
}
|
|
@@ -32,9 +32,20 @@ let _lastHardDropLog = 0;
|
|
|
32
32
|
function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
33
33
|
let dropped = false;
|
|
34
34
|
if (scanQueue.length >= max) {
|
|
35
|
-
scanQueue.shift(); // drop oldest
|
|
35
|
+
const evicted = scanQueue.shift(); // drop oldest
|
|
36
36
|
dropped = true;
|
|
37
37
|
if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
|
|
38
|
+
// Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
|
|
39
|
+
// "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
|
|
40
|
+
// require avoids any top-level coupling with state.js; best-effort, never throws.
|
|
41
|
+
try {
|
|
42
|
+
if (evicted && evicted.name) {
|
|
43
|
+
require('./state.js').appendScanLedger({
|
|
44
|
+
name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
|
|
45
|
+
outcome: 'dropped', source: 'queue_cap'
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
} catch { /* ledger is best-effort */ }
|
|
38
49
|
const now = Date.now();
|
|
39
50
|
if (now - _lastHardDropLog > HARD_DROP_LOG_INTERVAL_MS) {
|
|
40
51
|
_lastHardDropLog = now;
|
package/src/monitor/state.js
CHANGED
|
@@ -951,6 +951,114 @@ function _compactDetectionsJsonl() {
|
|
|
951
951
|
}
|
|
952
952
|
}
|
|
953
953
|
|
|
954
|
+
// --- Per-scan ledger (Phase 0a: operational coverage observability) ---
|
|
955
|
+
// Append-only record of EVERY package the monitor dequeues + its terminal outcome,
|
|
956
|
+
// so we can distinguish never-scanned vs scanned-clean vs suspect vs dropped and
|
|
957
|
+
// measure TRUE operational coverage (not just rule-TPR on the static corpus).
|
|
958
|
+
// Mirrors the detections JSONL machinery (chunked iterate + periodic compaction).
|
|
959
|
+
// Differences vs detections: (1) NO dedup — every scan event is a distinct record;
|
|
960
|
+
// (2) higher cap + compaction interval since this logs every scan, not just findings.
|
|
961
|
+
const SCAN_LEDGER_FILE = process.env.MUADDIB_SCAN_LEDGER_FILE || path.join(__dirname, '..', '..', 'data', 'scan-ledger.jsonl');
|
|
962
|
+
const MAX_SCAN_LEDGER = (() => {
|
|
963
|
+
const raw = process.env.MUADDIB_SCAN_LEDGER_MAX;
|
|
964
|
+
const n = raw ? parseInt(raw, 10) : NaN;
|
|
965
|
+
return (Number.isFinite(n) && n >= 10 && n <= 5_000_000) ? n : 500_000;
|
|
966
|
+
})();
|
|
967
|
+
const SCAN_LEDGER_COMPACT_INTERVAL = 2000;
|
|
968
|
+
let _scanLedgerAppendedSinceCompact = 0;
|
|
969
|
+
|
|
970
|
+
// Terminal outcomes a dequeued package can reach. Unknown values normalize to 'clean'
|
|
971
|
+
// so a typo at a call site can never crash the pipeline.
|
|
972
|
+
const SCAN_LEDGER_OUTCOMES = new Set([
|
|
973
|
+
'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
|
|
974
|
+
'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
|
|
975
|
+
'static_timeout', 'size_skip', 'dropped'
|
|
976
|
+
]);
|
|
977
|
+
|
|
978
|
+
/**
|
|
979
|
+
* Append one per-scan ledger entry recording the terminal outcome of a dequeued
|
|
980
|
+
* package. Best-effort: NEVER throws (a ledger failure must not break scanning).
|
|
981
|
+
* No dedup — repeated scans of the same package are intentionally all recorded.
|
|
982
|
+
*
|
|
983
|
+
* @param {object} e
|
|
984
|
+
* @param {string} e.name package name (required)
|
|
985
|
+
* @param {string} [e.version]
|
|
986
|
+
* @param {string} [e.ecosystem] 'npm' | 'pypi' | ...
|
|
987
|
+
* @param {string} [e.outcome] one of SCAN_LEDGER_OUTCOMES (default 'clean')
|
|
988
|
+
* @param {number} [e.score] riskScore at the terminal decision
|
|
989
|
+
* @param {string} [e.tier] suspect tier ('1a'|'1b'|2|3) if applicable
|
|
990
|
+
* @param {string} [e.maxSeverity]
|
|
991
|
+
* @param {string[]} [e.types] threat types (capped to 12)
|
|
992
|
+
* @param {string} [e.sandbox] 'none' | 'run' | 'deferred' | 'skip'
|
|
993
|
+
* @param {boolean} [e.firstPublish]
|
|
994
|
+
* @param {string} [e.source] where the record originated ('scan','queue_cap',...)
|
|
995
|
+
*/
|
|
996
|
+
function appendScanLedger(e) {
|
|
997
|
+
try {
|
|
998
|
+
if (!e || !e.name) return;
|
|
999
|
+
const dir = path.dirname(SCAN_LEDGER_FILE);
|
|
1000
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
1001
|
+
const entry = {
|
|
1002
|
+
ts: new Date().toISOString(),
|
|
1003
|
+
name: e.name,
|
|
1004
|
+
version: e.version || null,
|
|
1005
|
+
ecosystem: e.ecosystem || null,
|
|
1006
|
+
outcome: SCAN_LEDGER_OUTCOMES.has(e.outcome) ? e.outcome : 'clean',
|
|
1007
|
+
score: (typeof e.score === 'number') ? e.score : null,
|
|
1008
|
+
tier: (e.tier !== undefined && e.tier !== null) ? String(e.tier) : null,
|
|
1009
|
+
maxSeverity: e.maxSeverity || null,
|
|
1010
|
+
types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
|
|
1011
|
+
sandbox: e.sandbox || 'none',
|
|
1012
|
+
firstPublish: !!e.firstPublish,
|
|
1013
|
+
source: e.source || 'scan'
|
|
1014
|
+
};
|
|
1015
|
+
fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');
|
|
1016
|
+
_scanLedgerAppendedSinceCompact++;
|
|
1017
|
+
if (_scanLedgerAppendedSinceCompact >= SCAN_LEDGER_COMPACT_INTERVAL) {
|
|
1018
|
+
_scanLedgerAppendedSinceCompact = 0;
|
|
1019
|
+
_compactScanLedgerJsonl();
|
|
1020
|
+
}
|
|
1021
|
+
} catch (err) {
|
|
1022
|
+
if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') return;
|
|
1023
|
+
if (err.code === 'ENOSPC') {
|
|
1024
|
+
console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist scan-ledger.');
|
|
1025
|
+
return;
|
|
1026
|
+
}
|
|
1027
|
+
console.error(`[MONITOR] Failed to write scan-ledger: ${err.message}`);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
/**
|
|
1032
|
+
* Compact the scan-ledger JSONL: keep only the most recent MAX_SCAN_LEDGER entries.
|
|
1033
|
+
* No-op when already under cap. Streams (never loads the whole file at once).
|
|
1034
|
+
*/
|
|
1035
|
+
function _compactScanLedgerJsonl() {
|
|
1036
|
+
try {
|
|
1037
|
+
const total = _countJsonlLines(SCAN_LEDGER_FILE);
|
|
1038
|
+
if (total <= MAX_SCAN_LEDGER) return;
|
|
1039
|
+
const toDrop = total - MAX_SCAN_LEDGER;
|
|
1040
|
+
let skipped = 0;
|
|
1041
|
+
const kept = [];
|
|
1042
|
+
_iterateJsonlSync(SCAN_LEDGER_FILE, (entry) => {
|
|
1043
|
+
if (skipped < toDrop) { skipped++; return; }
|
|
1044
|
+
kept.push(JSON.stringify(entry));
|
|
1045
|
+
});
|
|
1046
|
+
const tmpFile = SCAN_LEDGER_FILE + '.tmp';
|
|
1047
|
+
fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
|
|
1048
|
+
fs.renameSync(tmpFile, SCAN_LEDGER_FILE);
|
|
1049
|
+
console.log(`[MONITOR] COMPACT scan-ledger: ${total} -> ${kept.length} entries`);
|
|
1050
|
+
} catch (err) {
|
|
1051
|
+
console.error(`[MONITOR] Scan-ledger compaction failed: ${err.message}`);
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
/** Stream the scan-ledger into an array (tests + Phase 0b rollup). */
|
|
1056
|
+
function loadScanLedger() {
|
|
1057
|
+
const entries = [];
|
|
1058
|
+
try { _iterateJsonlSync(SCAN_LEDGER_FILE, (e) => { entries.push(e); }); } catch { /* ignore */ }
|
|
1059
|
+
return entries;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
954
1062
|
// --- Scan stats (FP rate tracking) ---
|
|
955
1063
|
|
|
956
1064
|
function loadScanStats() {
|
|
@@ -1420,6 +1528,8 @@ module.exports = {
|
|
|
1420
1528
|
MAX_TEMPORAL_DETECTIONS,
|
|
1421
1529
|
MAX_DAILY_ALERTS,
|
|
1422
1530
|
DETECTION_COMPACT_INTERVAL,
|
|
1531
|
+
SCAN_LEDGER_FILE,
|
|
1532
|
+
MAX_SCAN_LEDGER,
|
|
1423
1533
|
|
|
1424
1534
|
// Mutable state getters/setters
|
|
1425
1535
|
getScanMemoryCache,
|
|
@@ -1456,6 +1566,9 @@ module.exports = {
|
|
|
1456
1566
|
appendAlert,
|
|
1457
1567
|
loadDetections,
|
|
1458
1568
|
appendDetection,
|
|
1569
|
+
appendScanLedger,
|
|
1570
|
+
loadScanLedger,
|
|
1571
|
+
_compactScanLedgerJsonl,
|
|
1459
1572
|
getDetectionStats,
|
|
1460
1573
|
runStateMigrations,
|
|
1461
1574
|
// Internal — exported for tests and for the daemon hourly housekeeping.
|
|
@@ -45,6 +45,18 @@ function getMinFreeBytes() {
|
|
|
45
45
|
return gb * 1024 * 1024 * 1024;
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
// Tarball download is gated on this score so the heavy .tgz is kept ONLY for
|
|
49
|
+
// alert-threshold packages; the cheap JSON metadata is still written for every
|
|
50
|
+
// suspect. Aligns with the webhook alert floor (20). Bounded to [0, 100], default 20.
|
|
51
|
+
const DEFAULT_TGZ_MIN_SCORE = 20;
|
|
52
|
+
function getArchiveTgzMinScore() {
|
|
53
|
+
const raw = process.env.MUADDIB_ARCHIVE_TGZ_MIN_SCORE;
|
|
54
|
+
if (raw === undefined || raw === '') return DEFAULT_TGZ_MIN_SCORE;
|
|
55
|
+
const n = parseInt(raw, 10);
|
|
56
|
+
if (!Number.isFinite(n) || n < 0 || n > 100) return DEFAULT_TGZ_MIN_SCORE;
|
|
57
|
+
return n;
|
|
58
|
+
}
|
|
59
|
+
|
|
48
60
|
function hasEnoughSpace(targetDir) {
|
|
49
61
|
try {
|
|
50
62
|
if (typeof fs.statfsSync !== 'function') return true; // Node <18.15 — fail-open
|
|
@@ -109,14 +121,20 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
|
|
|
109
121
|
|
|
110
122
|
// Defense-in-depth: never archive packages that are statically clean.
|
|
111
123
|
// Callers in the pipeline already gate on tier 1a/1b/2 classification, but a
|
|
112
|
-
// numeric score of 0 with no triggered rules is unambiguously CLEAN
|
|
113
|
-
// dominated archive volume in production.
|
|
124
|
+
// numeric score of 0 with no triggered rules is unambiguously CLEAN.
|
|
114
125
|
const score = (scanResult && typeof scanResult.score === 'number') ? scanResult.score : 0;
|
|
115
126
|
const rules = (scanResult && Array.isArray(scanResult.rulesTriggered)) ? scanResult.rulesTriggered : [];
|
|
116
127
|
if (score === 0 && rules.length === 0) {
|
|
117
128
|
return false;
|
|
118
129
|
}
|
|
119
130
|
|
|
131
|
+
// Tarballs dominate archive volume (~439MB/day of .tgz vs ~3.6MB/day of JSON).
|
|
132
|
+
// Keep the cheap JSON metadata for EVERY suspect (audit trail + GT-promotion index),
|
|
133
|
+
// but download/retain the heavy .tgz ONLY for packages at/above the alert threshold
|
|
134
|
+
// (score >= MUADDIB_ARCHIVE_TGZ_MIN_SCORE, default 20 = webhook floor). This shrinks
|
|
135
|
+
// the archive from tens of GB to hundreds of MB without losing the record of what was seen.
|
|
136
|
+
const keepTarball = score >= getArchiveTgzMinScore();
|
|
137
|
+
|
|
120
138
|
const dateStr = getArchiveDateString();
|
|
121
139
|
const dayDir = path.join(ARCHIVE_DIR, dateStr);
|
|
122
140
|
const safeName = sanitizeForFilename(packageName);
|
|
@@ -124,32 +142,55 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
|
|
|
124
142
|
const tgzPath = path.join(dayDir, `${basename}.tgz`);
|
|
125
143
|
const jsonPath = path.join(dayDir, `${basename}.json`);
|
|
126
144
|
|
|
127
|
-
//
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
145
|
+
// At/above the alert threshold: archive the full .tgz (existing behavior, unchanged).
|
|
146
|
+
// Below it: keep only the cheap JSON metadata (audit trail + GT-promotion index).
|
|
147
|
+
if (keepTarball) {
|
|
148
|
+
// Dedup: skip if already archived
|
|
149
|
+
if (fs.existsSync(tgzPath)) {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
131
152
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
}
|
|
153
|
+
// Disk-space gate: don't let a burst of suspects run the volume to 100% between
|
|
154
|
+
// the periodic cleanups. Guards the heavy .tgz download.
|
|
155
|
+
if (!hasEnoughSpace(ARCHIVE_DIR)) {
|
|
156
|
+
console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
139
159
|
|
|
140
|
-
|
|
141
|
-
|
|
160
|
+
// Ensure day directory exists
|
|
161
|
+
fs.mkdirSync(dayDir, { recursive: true });
|
|
142
162
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
163
|
+
// Download with semaphore (shares concurrency with rest of pipeline). Download
|
|
164
|
+
// errors propagate to the fire-and-forget .catch() in the caller (queue.js).
|
|
165
|
+
await acquireRegistrySlot();
|
|
166
|
+
try {
|
|
167
|
+
await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
|
|
168
|
+
} finally {
|
|
169
|
+
releaseRegistrySlot();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const tarballSha256 = sha256File(tgzPath);
|
|
173
|
+
const metadata = {
|
|
174
|
+
package: packageName,
|
|
175
|
+
version,
|
|
176
|
+
timestamp: new Date().toISOString(),
|
|
177
|
+
score: scanResult.score || 0,
|
|
178
|
+
priority: scanResult.priority || null,
|
|
179
|
+
rules_triggered: scanResult.rulesTriggered || [],
|
|
180
|
+
llm_verdict: scanResult.llmVerdict || null,
|
|
181
|
+
tarball_archived: true,
|
|
182
|
+
tarball_sha256: tarballSha256
|
|
183
|
+
};
|
|
184
|
+
fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
|
|
185
|
+
return true;
|
|
149
186
|
}
|
|
150
187
|
|
|
151
|
-
//
|
|
152
|
-
|
|
188
|
+
// Below the alert threshold — record cheap JSON metadata only, skip the tarball.
|
|
189
|
+
// Dedup on the JSON record so re-scans of the same package@version don't rewrite it.
|
|
190
|
+
if (fs.existsSync(jsonPath)) {
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
fs.mkdirSync(dayDir, { recursive: true });
|
|
153
194
|
const metadata = {
|
|
154
195
|
package: packageName,
|
|
155
196
|
version,
|
|
@@ -158,9 +199,9 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
|
|
|
158
199
|
priority: scanResult.priority || null,
|
|
159
200
|
rules_triggered: scanResult.rulesTriggered || [],
|
|
160
201
|
llm_verdict: scanResult.llmVerdict || null,
|
|
161
|
-
|
|
202
|
+
tarball_archived: false,
|
|
203
|
+
tarball_sha256: null
|
|
162
204
|
};
|
|
163
|
-
|
|
164
205
|
fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
|
|
165
206
|
return true;
|
|
166
207
|
}
|
|
@@ -272,5 +313,6 @@ module.exports = {
|
|
|
272
313
|
getArchiveDateString,
|
|
273
314
|
getRetentionDays,
|
|
274
315
|
getMinFreeBytes,
|
|
316
|
+
getArchiveTgzMinScore,
|
|
275
317
|
parseArchiveDayDir
|
|
276
318
|
};
|
|
@@ -1001,6 +1001,10 @@ const PLAYBOOKS = {
|
|
|
1001
1001
|
'HAUTE: binding.gyp avec script lifecycle non-standard. Code natif compile a l\'installation. ' +
|
|
1002
1002
|
'Verifier le contenu de binding.gyp et les sources C/C++. Installer avec --ignore-scripts si suspect.',
|
|
1003
1003
|
|
|
1004
|
+
gyp_command_exec:
|
|
1005
|
+
'CRITIQUE: binding.gyp utilise la command-substitution GYP <!(...) / <!@(...) — execution de code a l\'installation via node-gyp, sans script lifecycle (pattern Phantom Gyp). ' +
|
|
1006
|
+
'Decoder la commande substituee. NE PAS installer : node-gyp l\'execute au build meme avec --ignore-scripts. Verifier la source officielle du package.',
|
|
1007
|
+
|
|
1004
1008
|
string_mutation_obfuscation:
|
|
1005
1009
|
'HAUTE: Chaine de .replace() reconstruisant des noms d\'API dangereuses (leet-speak). ' +
|
|
1006
1010
|
'Technique d\'evasion par substitution de caracteres. Decoder la chaine finale. Supprimer si malveillant.',
|
package/src/rules/index.js
CHANGED
|
@@ -2949,6 +2949,19 @@ const RULES = {
|
|
|
2949
2949
|
],
|
|
2950
2950
|
mitre: 'T1195.002'
|
|
2951
2951
|
},
|
|
2952
|
+
gyp_command_exec: {
|
|
2953
|
+
id: 'MUADDIB-PKG-023',
|
|
2954
|
+
name: 'GYP Command-Substitution Install Execution',
|
|
2955
|
+
severity: 'CRITICAL',
|
|
2956
|
+
confidence: 'high',
|
|
2957
|
+
domain: 'malware',
|
|
2958
|
+
description: 'binding.gyp utilise la command-substitution GYP <!(...) / <!@(...) — execution de code a l\'installation via node-gyp, sans script lifecycle package.json (pattern Phantom Gyp, juin 2026).',
|
|
2959
|
+
references: [
|
|
2960
|
+
'https://gyp.gsrc.io/docs/InputFormatReference.md',
|
|
2961
|
+
'https://attack.mitre.org/techniques/T1195.002/'
|
|
2962
|
+
],
|
|
2963
|
+
mitre: 'T1195.002'
|
|
2964
|
+
},
|
|
2952
2965
|
string_mutation_obfuscation: {
|
|
2953
2966
|
id: 'MUADDIB-AST-074',
|
|
2954
2967
|
name: 'String Mutation Obfuscation',
|
package/src/scanner/package.js
CHANGED
|
@@ -252,6 +252,51 @@ async function scanPackageJson(targetPath) {
|
|
|
252
252
|
// Check if binding.gyp references C/C++ source files
|
|
253
253
|
const hasNativeSources = /\.(c|cc|cpp|cxx|h|hpp)\b/.test(gypContent);
|
|
254
254
|
|
|
255
|
+
// Phantom Gyp (June 2026): GYP command-substitution <!(...) / <!@(...) runs a command at
|
|
256
|
+
// *configure* time via `node-gyp`, which npm auto-runs on install whenever a binding.gyp is
|
|
257
|
+
// present — NO package.json lifecycle script required, so it slips past every lifecycle-gated
|
|
258
|
+
// check below. Distinct from <(...) / <@(...) (plain variable expansion, benign) which MUST
|
|
259
|
+
// NOT fire — the required `!` gates command execution.
|
|
260
|
+
//
|
|
261
|
+
// Legit native addons use <!(...) heavily for build-env queries — `node -p process.versions`,
|
|
262
|
+
// `node ./util/has_lib.js`, `pkg-config ... | sed`, `node -p "require('node-addon-api').include"`
|
|
263
|
+
// — and a build-helper `<!(node x.js)` is statically INDISTINGUISHABLE from a payload
|
|
264
|
+
// `<!(node index.js)`. To honor "FPR must never increase" we flag a command-sub ONLY when it
|
|
265
|
+
// carries a malice-specific marker, never the bare "runs a script" shape:
|
|
266
|
+
// (1) GYP_DANGER — shell-level malice in the command line itself: the Phantom Gyp fake-source
|
|
267
|
+
// trick (`; / && / | echo <name>.c`, returning a fabricated source so node-gyp doesn't
|
|
268
|
+
// error), network fetch (curl/wget), pipe-to-shell (| sh, sh -c), eval/base64//dev/tcp,
|
|
269
|
+
// char-code obfuscation (fromCharCode/atob);
|
|
270
|
+
// (2) an inline interpreter payload — node|python|ruby|perl running -e/-c/-p/--eval/--print code
|
|
271
|
+
// that reaches the NETWORK (require/import of https|http|net|dgram|dns|tls, optional node:
|
|
272
|
+
// prefix; fetch; urllib/requests/httpx/http.client/urlopen; socket). Network at configure
|
|
273
|
+
// time is never a legit build query. We deliberately do NOT key on child_process/exec/spawn
|
|
274
|
+
// here — legit addons shell out to detect the toolchain (`node -e "...execSync('gcc
|
|
275
|
+
// --version')..."`), which would FP; an exec of curl/wget is still caught by GYP_DANGER.
|
|
276
|
+
// Catches `<!(node --eval require('node:https')...)`, `<!(python3 -c import requests)`.
|
|
277
|
+
// Honest limitation: this is a line-by-line SPEED-BUMP, not coverage. A bare `<!(node payload.js)`
|
|
278
|
+
// and any non-network inline payload are NOT flagged (indistinguishable from canvas/node-sass
|
|
279
|
+
// build helpers without false positives, FPR-first by design). Real closure needs a compound
|
|
280
|
+
// (configure-time sink × the run script's AST/dataflow verdict) — a separate effort.
|
|
281
|
+
const GYP_DANGER = /[;&|]\s*echo\s+[^|;&]*\.(?:c|cc|cpp|cxx|m|mm|cs)\b|\bcurl\b|\bwget\b|\|\s*(?:sh|bash|zsh)\b|\b(?:sh|bash|zsh)\s+-c\b|\beval\b|\bbase64\b|\/dev\/tcp|fromCharCode|\batob\b/i;
|
|
282
|
+
const GYP_INTERP = /\b(?:node|nodejs|python[0-9.]*|ruby|perl)\b[^|;&\n]{0,40}?\s--?(?:eval|print|e|c|p)\b/i;
|
|
283
|
+
const GYP_PAYLOAD_API = /(?:require|import)\s*\(\s*['"](?:node:)?(?:https?|net|dgram|dns|tls)['"]|\bfetch\s*\(|\burllib\b|\brequests\b|\bhttpx\b|http\.client|\burlopen\b|socket\.(?:socket|create_connection)/i;
|
|
284
|
+
let gypCommandExec = false;
|
|
285
|
+
const gypCmdSubRe = /<!@?\(([^\n]{0,400})/g;
|
|
286
|
+
let _gm;
|
|
287
|
+
while ((_gm = gypCmdSubRe.exec(gypContent)) !== null) {
|
|
288
|
+
const body = _gm[1];
|
|
289
|
+
if (GYP_DANGER.test(body) || (GYP_INTERP.test(body) && GYP_PAYLOAD_API.test(body))) { gypCommandExec = true; break; }
|
|
290
|
+
}
|
|
291
|
+
if (gypCommandExec) {
|
|
292
|
+
threats.push({
|
|
293
|
+
type: 'gyp_command_exec',
|
|
294
|
+
severity: 'CRITICAL',
|
|
295
|
+
message: `binding.gyp uses GYP command-substitution (<!(...) / <!@(...)) running a non-build command at install time via node-gyp, no lifecycle script required (Phantom Gyp pattern).`,
|
|
296
|
+
file: 'binding.gyp'
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
|
|
255
300
|
if (hasShellActions) {
|
|
256
301
|
threats.push({
|
|
257
302
|
type: 'native_addon_install',
|
package/src/scoring.js
CHANGED
|
@@ -130,7 +130,9 @@ const PACKAGE_LEVEL_TYPES = new Set([
|
|
|
130
130
|
// audit DF-C1: emitted when MAX_GRAPH_NODES exceeded so cross-file blind spot is visible in scoring
|
|
131
131
|
'large_package_graph_truncated',
|
|
132
132
|
// audit MR-C1: informational signal that the scan target is a monorepo root (per-workspace scoring TBD)
|
|
133
|
-
'monorepo_detected'
|
|
133
|
+
'monorepo_detected',
|
|
134
|
+
// Phantom Gyp: binding.gyp command-substitution is a package-level (manifest) finding
|
|
135
|
+
'gyp_command_exec'
|
|
134
136
|
]);
|
|
135
137
|
|
|
136
138
|
// ============================================
|