muaddib-scanner 2.10.35 → 2.10.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/archive-cleanup.sh +7 -0
- package/scripts/audit-archive.sh +45 -0
- package/src/ml/llm-detective.js +30 -0
- package/src/monitor/queue.js +13 -0
- package/src/monitor/tarball-archive.js +120 -0
package/package.json
CHANGED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Supprime les archives de plus de 30 jours
|
|
3
|
+
ARCHIVE_DIR="/opt/muaddib/archive"
|
|
4
|
+
find "$ARCHIVE_DIR" -type d -name "20*" -mtime +30 -exec rm -rf {} + 2>/dev/null
|
|
5
|
+
# Log
|
|
6
|
+
TOTAL=$(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)
|
|
7
|
+
echo "[Archive Cleanup] $(date -Iseconds) — Total size: $TOTAL"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Usage: ./audit-archive.sh [YYYY-MM-DD] [priority]
|
|
3
|
+
# Exemples:
|
|
4
|
+
# ./audit-archive.sh → résumé de toutes les dates
|
|
5
|
+
# ./audit-archive.sh 2026-03-29 → liste les packages archivés ce jour
|
|
6
|
+
# ./audit-archive.sh 2026-03-29 P1 → filtre par priorité
|
|
7
|
+
|
|
8
|
+
ARCHIVE_DIR="/opt/muaddib/archive"
|
|
9
|
+
DATE=$1
|
|
10
|
+
PRIORITY=$2
|
|
11
|
+
|
|
12
|
+
if [ -z "$DATE" ]; then
|
|
13
|
+
echo "=== Archive Summary ==="
|
|
14
|
+
for dir in "$ARCHIVE_DIR"/20*; do
|
|
15
|
+
[ -d "$dir" ] || continue
|
|
16
|
+
day=$(basename "$dir")
|
|
17
|
+
count=$(ls "$dir"/*.tgz 2>/dev/null | wc -l)
|
|
18
|
+
size=$(du -sh "$dir" 2>/dev/null | cut -f1)
|
|
19
|
+
echo "$day : $count packages ($size)"
|
|
20
|
+
done
|
|
21
|
+
echo "---"
|
|
22
|
+
echo "Total: $(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)"
|
|
23
|
+
exit 0
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
DIR="$ARCHIVE_DIR/$DATE"
|
|
27
|
+
if [ ! -d "$DIR" ]; then
|
|
28
|
+
echo "No archive for $DATE"
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
for json in "$DIR"/*.json; do
|
|
33
|
+
[ -f "$json" ] || continue
|
|
34
|
+
pkg=$(jq -r '.package' "$json")
|
|
35
|
+
ver=$(jq -r '.version' "$json")
|
|
36
|
+
prio=$(jq -r '.priority' "$json")
|
|
37
|
+
score=$(jq -r '.score' "$json")
|
|
38
|
+
llm=$(jq -r '.llm_verdict // "none"' "$json")
|
|
39
|
+
|
|
40
|
+
if [ -n "$PRIORITY" ] && [ "$prio" != "$PRIORITY" ]; then
|
|
41
|
+
continue
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
printf "%-40s %-8s score=%-4s llm=%s\n" "$pkg@$ver" "$prio" "$score" "$llm"
|
|
45
|
+
done
|
package/src/ml/llm-detective.js
CHANGED
|
@@ -240,6 +240,20 @@ Step 5 — COHERENCE: Does the complexity match the purpose?
|
|
|
240
240
|
- Obfuscated code in a 10-line utility? → SUSPICIOUS
|
|
241
241
|
- Minified dist/ in a large framework? → NORMAL
|
|
242
242
|
|
|
243
|
+
Step 6 — FALSE POSITIVE CHECKS: Before declaring "malicious", verify the finding isn't one of these known benign patterns:
|
|
244
|
+
|
|
245
|
+
6a. PHANTOM LIFECYCLE SCRIPTS: package.json declares preinstall/postinstall pointing to a script file, BUT the "files" field in package.json EXCLUDES that directory from the published tarball. The script does NOT exist in the package. npm silently ignores missing lifecycle scripts. → BENIGN (packaging oversight, not a threat).
|
|
246
|
+
|
|
247
|
+
6b. ANONYMOUS INSTALL TELEMETRY: postinstall sends a single HTTP POST with ONLY: package version, process.platform, process.arch, process.version (Node), timestamp. NO process.env access, NO os.hostname(), NO os.userInfo(), NO file system reads, NO PII. This is standard practice (Next.js, Turborepo, Astro do the same). → BENIGN.
|
|
248
|
+
|
|
249
|
+
6c. BINARY WRAPPER FROM GITHUB RELEASES: postinstall downloads a platform-specific binary from GitHub Releases (github.com/OWNER/REPO/releases/download/TAG/ASSET), verifies SHA256 checksum, and installs it. This is standard for native tool distribution (esbuild, turbo, biome, swc). → BENIGN if: (a) URL is GitHub Releases, (b) checksum is verified, (c) no fallback to a suspicious third-party domain.
|
|
250
|
+
|
|
251
|
+
6d. APPLICATION CODE WITH HTTP CLIENTS: Source code contains fetch(), HttpClient, http.get() etc. but in application context (Angular services, React components, Express handlers). URLs are configurable templates (e.g. \`\${this.apiEndpoint}/...\`, \`\${process.env.NEXT_PUBLIC_API_URL}/...\`), not hardcoded suspicious domains. No lifecycle scripts performing network calls. → BENIGN — application code, not exfiltration.
|
|
252
|
+
|
|
253
|
+
6e. CLI TOOLS USING EXEC/SPAWN FOR LOCAL OPERATIONS: Code uses child_process.execSync() or spawn() for legitimate local operations: git commands, npm/npx invocations, platform tools (screencapture, xdotool), CLI binary discovery. No encoded payloads, no eval(), no Buffer.from(base64) for code execution. → BENIGN — verify WHAT is being executed, not just the presence of execSync.
|
|
254
|
+
|
|
255
|
+
6f. NPM PLACEHOLDER DEPENDENCIES: Package depends on "http@0.0.1-security" or similar npm placeholder packages. These are official npm placeholder modules (empty packages reserving names to prevent typosquatting). They are NOT suspicious HTTP modules. → BENIGN — ignore these dependencies entirely.
|
|
256
|
+
|
|
243
257
|
## GOLDEN RULE
|
|
244
258
|
|
|
245
259
|
If sensitive data (env vars, credentials, keys) is only READ for self-configuration and never SENT to an external third-party, the package is BENIGN regardless of what the scanner says.
|
|
@@ -264,6 +278,22 @@ EXAMPLE 4 — FALSE POSITIVE:
|
|
|
264
278
|
A web framework reads process.env.DATABASE_URL, process.env.API_KEY for configuration. It uses fetch() to call its own documented API endpoint. It uses dynamic require() to load user-configured plugins. Scanner flags env_access, dynamic_require, network_require — but all these are standard framework patterns. No data leaves the application boundary.
|
|
265
279
|
→ Verdict: BENIGN (confidence 0.95)
|
|
266
280
|
|
|
281
|
+
EXAMPLE 5 — FALSE POSITIVE (phantom lifecycle script):
|
|
282
|
+
Package "instructify@1.0.0" declares "postinstall": "node ./scripts/postinstall.js". But its "files" field is ["dist", ".cursor", "docs/README.md", "README.md", "LICENSE", "CHANGELOG.md", "CONTRIBUTING.md"]. The scripts/ directory does NOT exist in the published tarball because the "files" field excludes it. The postinstall script cannot execute — it is a packaging oversight. The GitHub repository shows the script only prints a welcome message.
|
|
283
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
284
|
+
|
|
285
|
+
EXAMPLE 6 — FALSE POSITIVE (anonymous telemetry):
|
|
286
|
+
Package "delimit-cli@3.14.46" has a postinstall that prints CLI setup instructions, then sends anonymous telemetry: POST to delimit.ai/api/telemetry with body {event:'install', version, node:process.version, platform:process.platform, arch:process.arch, ts:ISO}. Silent fail on error, 3s timeout. No PII, no process.env access beyond process.version/platform/arch, no os.hostname(), no file reads. This is standard anonymous install telemetry identical to what Next.js, Turborepo, and Astro do.
|
|
287
|
+
→ Verdict: BENIGN (confidence 0.92)
|
|
288
|
+
|
|
289
|
+
EXAMPLE 7 — FALSE POSITIVE (binary wrapper with checksum):
|
|
290
|
+
Package "plugin-kit-ai@1.0.1" has a postinstall that downloads a platform-specific binary from GitHub Releases (github.com/777genius/plugin-kit-ai/releases/download/vX.Y.Z/ASSET), verifies SHA256 checksum from checksums.txt, and extracts the binary to vendor/. No data exfiltration, no env access beyond optional GITHUB_TOKEN for rate limits. This is the standard binary distribution pattern used by esbuild, turbo, and biome.
|
|
291
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
292
|
+
|
|
293
|
+
EXAMPLE 8 — FALSE POSITIVE (application code with HTTP clients):
|
|
294
|
+
Package "@craft-ng/core@0.1.2" is an Angular state management library. No lifecycle scripts (no postinstall/preinstall). Source contains fetch() and http references but ONLY in JSDoc examples ("const response = await fetch(\`/api/users/\${params}\`)") and Angular service patterns (this.httpClient.get(url)). These are application code patterns, not active network calls during install. No child_process, no eval, no Buffer manipulation.
|
|
295
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
296
|
+
|
|
267
297
|
## KEY QUESTIONS TO ANSWER
|
|
268
298
|
|
|
269
299
|
1. "Do sensitive data (env vars, credentials) LEAVE the package to a third party?"
|
package/src/monitor/queue.js
CHANGED
|
@@ -99,6 +99,9 @@ const {
|
|
|
99
99
|
// From ./ingestion.js (will be created — currently in monitor.js)
|
|
100
100
|
const { getNpmLatestTarball, getPyPITarballUrl, getWeeklyDownloads } = require('./ingestion.js');
|
|
101
101
|
|
|
102
|
+
// From ./tarball-archive.js
|
|
103
|
+
const { archiveSuspectTarball } = require('./tarball-archive.js');
|
|
104
|
+
|
|
102
105
|
// --- Constants ---
|
|
103
106
|
|
|
104
107
|
const SCAN_CONCURRENCY = Math.max(1, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 5);
|
|
@@ -541,6 +544,16 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
541
544
|
|
|
542
545
|
stats.suspect++;
|
|
543
546
|
|
|
547
|
+
// Fire-and-forget tarball archiving — never blocks the pipeline
|
|
548
|
+
archiveSuspectTarball(name, version, tarballUrl, {
|
|
549
|
+
score: riskScore,
|
|
550
|
+
priority: tierLabel,
|
|
551
|
+
rulesTriggered: (result.threats || []).map(t => t.ruleId || t.type).filter(Boolean),
|
|
552
|
+
llmVerdict: null // LLM runs after this point; updated by webhook if needed
|
|
553
|
+
}).catch(err => {
|
|
554
|
+
console.warn(`[Archive] Failed for ${name}@${version}: ${err.message}`);
|
|
555
|
+
});
|
|
556
|
+
|
|
544
557
|
// Sandbox decision based on tier
|
|
545
558
|
// T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
|
|
546
559
|
// T1b: conditional sandbox (HIGH/CRITICAL without HC type — bundler FP zone)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tarball archiving for suspect packages.
|
|
5
|
+
*
|
|
6
|
+
* Downloads and stores tarballs + metadata JSON for packages flagged as suspect,
|
|
7
|
+
* enabling retrospective audit when npm/PyPI unpublish the package.
|
|
8
|
+
*
|
|
9
|
+
* Fire-and-forget: never blocks the scan pipeline.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
const crypto = require('crypto');
|
|
15
|
+
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
16
|
+
const { downloadToFile } = require('../shared/download.js');
|
|
17
|
+
|
|
18
|
+
// Archive root — configurable via env for testing
|
|
19
|
+
const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
|
|
20
|
+
const ARCHIVE_TIMEOUT_MS = 10_000;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
|
|
24
|
+
* Falls back to UTC if Intl is unavailable.
|
|
25
|
+
*/
|
|
26
|
+
function getArchiveDateString() {
|
|
27
|
+
try {
|
|
28
|
+
const now = new Date();
|
|
29
|
+
const parts = new Intl.DateTimeFormat('fr-CA', { timeZone: 'Europe/Paris', year: 'numeric', month: '2-digit', day: '2-digit' }).formatToParts(now);
|
|
30
|
+
const y = parts.find(p => p.type === 'year').value;
|
|
31
|
+
const m = parts.find(p => p.type === 'month').value;
|
|
32
|
+
const d = parts.find(p => p.type === 'day').value;
|
|
33
|
+
return `${y}-${m}-${d}`;
|
|
34
|
+
} catch {
|
|
35
|
+
return new Date().toISOString().slice(0, 10);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Sanitize package name for use in filenames.
|
|
41
|
+
* Replaces / (scoped packages) with __ and removes unsafe characters.
|
|
42
|
+
*/
|
|
43
|
+
function sanitizeForFilename(name) {
|
|
44
|
+
return name.replace(/^@/, '').replace(/\//g, '__').replace(/[^a-zA-Z0-9._-]/g, '_');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Compute SHA-256 hash of a file.
|
|
49
|
+
*/
|
|
50
|
+
function sha256File(filePath) {
|
|
51
|
+
const hash = crypto.createHash('sha256');
|
|
52
|
+
const data = fs.readFileSync(filePath);
|
|
53
|
+
hash.update(data);
|
|
54
|
+
return hash.digest('hex');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Archive a suspect package tarball and its scan metadata.
|
|
59
|
+
*
|
|
60
|
+
* @param {string} packageName - Package name (e.g. "evil-pkg" or "@scope/evil-pkg")
|
|
61
|
+
* @param {string} version - Package version
|
|
62
|
+
* @param {string} tarballUrl - Registry URL to download the tarball from
|
|
63
|
+
* @param {object} scanResult - Scan result object from the pipeline
|
|
64
|
+
* @param {number} scanResult.score - Risk score
|
|
65
|
+
* @param {string} scanResult.priority - Priority tier (e.g. "P1", "P2")
|
|
66
|
+
* @param {Array} [scanResult.rulesTriggered] - Array of triggered rule IDs
|
|
67
|
+
* @param {string} [scanResult.llmVerdict] - LLM detective verdict if available
|
|
68
|
+
* @returns {Promise<boolean>} true if archived, false if skipped/failed
|
|
69
|
+
*/
|
|
70
|
+
async function archiveSuspectTarball(packageName, version, tarballUrl, scanResult) {
|
|
71
|
+
if (!tarballUrl || !packageName || !version) return false;
|
|
72
|
+
|
|
73
|
+
const dateStr = getArchiveDateString();
|
|
74
|
+
const dayDir = path.join(ARCHIVE_DIR, dateStr);
|
|
75
|
+
const safeName = sanitizeForFilename(packageName);
|
|
76
|
+
const basename = `${safeName}-${version}`;
|
|
77
|
+
const tgzPath = path.join(dayDir, `${basename}.tgz`);
|
|
78
|
+
const jsonPath = path.join(dayDir, `${basename}.json`);
|
|
79
|
+
|
|
80
|
+
// Dedup: skip if already archived
|
|
81
|
+
if (fs.existsSync(tgzPath)) {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Ensure day directory exists
|
|
86
|
+
fs.mkdirSync(dayDir, { recursive: true });
|
|
87
|
+
|
|
88
|
+
// Download with semaphore (shares concurrency with rest of pipeline)
|
|
89
|
+
await acquireRegistrySlot();
|
|
90
|
+
try {
|
|
91
|
+
await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
|
|
92
|
+
} finally {
|
|
93
|
+
releaseRegistrySlot();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Compute hash and write metadata
|
|
97
|
+
const tarballSha256 = sha256File(tgzPath);
|
|
98
|
+
const metadata = {
|
|
99
|
+
package: packageName,
|
|
100
|
+
version,
|
|
101
|
+
timestamp: new Date().toISOString(),
|
|
102
|
+
score: scanResult.score || 0,
|
|
103
|
+
priority: scanResult.priority || null,
|
|
104
|
+
rules_triggered: scanResult.rulesTriggered || [],
|
|
105
|
+
llm_verdict: scanResult.llmVerdict || null,
|
|
106
|
+
tarball_sha256: tarballSha256
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
archiveSuspectTarball,
|
|
115
|
+
ARCHIVE_DIR,
|
|
116
|
+
// Exported for testing
|
|
117
|
+
sanitizeForFilename,
|
|
118
|
+
sha256File,
|
|
119
|
+
getArchiveDateString
|
|
120
|
+
};
|