muaddib-scanner 2.10.35 → 2.10.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.35",
3
+ "version": "2.10.36",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ # Supprime les archives de plus de 30 jours
3
+ ARCHIVE_DIR="/opt/muaddib/archive"
4
+ find "$ARCHIVE_DIR" -type d -name "20*" -mtime +30 -exec rm -rf {} + 2>/dev/null
5
+ # Log
6
+ TOTAL=$(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)
7
+ echo "[Archive Cleanup] $(date -Iseconds) — Total size: $TOTAL"
@@ -0,0 +1,45 @@
1
+ #!/bin/bash
2
+ # Usage: ./audit-archive.sh [YYYY-MM-DD] [priority]
3
+ # Exemples:
4
+ # ./audit-archive.sh → résumé de toutes les dates
5
+ # ./audit-archive.sh 2026-03-29 → liste les packages archivés ce jour
6
+ # ./audit-archive.sh 2026-03-29 P1 → filtre par priorité
7
+
8
+ ARCHIVE_DIR="/opt/muaddib/archive"
9
+ DATE=$1
10
+ PRIORITY=$2
11
+
12
+ if [ -z "$DATE" ]; then
13
+ echo "=== Archive Summary ==="
14
+ for dir in "$ARCHIVE_DIR"/20*; do
15
+ [ -d "$dir" ] || continue
16
+ day=$(basename "$dir")
17
+ count=$(ls "$dir"/*.tgz 2>/dev/null | wc -l)
18
+ size=$(du -sh "$dir" 2>/dev/null | cut -f1)
19
+ echo "$day : $count packages ($size)"
20
+ done
21
+ echo "---"
22
+ echo "Total: $(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)"
23
+ exit 0
24
+ fi
25
+
26
+ DIR="$ARCHIVE_DIR/$DATE"
27
+ if [ ! -d "$DIR" ]; then
28
+ echo "No archive for $DATE"
29
+ exit 1
30
+ fi
31
+
32
+ for json in "$DIR"/*.json; do
33
+ [ -f "$json" ] || continue
34
+ pkg=$(jq -r '.package' "$json")
35
+ ver=$(jq -r '.version' "$json")
36
+ prio=$(jq -r '.priority' "$json")
37
+ score=$(jq -r '.score' "$json")
38
+ llm=$(jq -r '.llm_verdict // "none"' "$json")
39
+
40
+ if [ -n "$PRIORITY" ] && [ "$prio" != "$PRIORITY" ]; then
41
+ continue
42
+ fi
43
+
44
+ printf "%-40s %-8s score=%-4s llm=%s\n" "$pkg@$ver" "$prio" "$score" "$llm"
45
+ done
@@ -240,6 +240,20 @@ Step 5 — COHERENCE: Does the complexity match the purpose?
240
240
  - Obfuscated code in a 10-line utility? → SUSPICIOUS
241
241
  - Minified dist/ in a large framework? → NORMAL
242
242
 
243
+ Step 6 — FALSE POSITIVE CHECKS: Before declaring "malicious", verify the finding isn't one of these known benign patterns:
244
+
245
+ 6a. PHANTOM LIFECYCLE SCRIPTS: package.json declares preinstall/postinstall pointing to a script file, BUT the "files" field in package.json EXCLUDES that directory from the published tarball. The script does NOT exist in the package. npm silently ignores missing lifecycle scripts. → BENIGN (packaging oversight, not a threat).
246
+
247
+ 6b. ANONYMOUS INSTALL TELEMETRY: postinstall sends a single HTTP POST with ONLY: package version, process.platform, process.arch, process.version (Node), timestamp. NO process.env access, NO os.hostname(), NO os.userInfo(), NO file system reads, NO PII. This is standard practice (Next.js, Turborepo, Astro do the same). → BENIGN.
248
+
249
+ 6c. BINARY WRAPPER FROM GITHUB RELEASES: postinstall downloads a platform-specific binary from GitHub Releases (github.com/OWNER/REPO/releases/download/TAG/ASSET), verifies SHA256 checksum, and installs it. This is standard for native tool distribution (esbuild, turbo, biome, swc). → BENIGN if: (a) URL is GitHub Releases, (b) checksum is verified, (c) no fallback to a suspicious third-party domain.
250
+
251
+ 6d. APPLICATION CODE WITH HTTP CLIENTS: Source code contains fetch(), HttpClient, http.get() etc. but in application context (Angular services, React components, Express handlers). URLs are configurable templates (e.g. \`\${this.apiEndpoint}/...\`, \`\${process.env.NEXT_PUBLIC_API_URL}/...\`), not hardcoded suspicious domains. No lifecycle scripts performing network calls. → BENIGN — application code, not exfiltration.
252
+
253
+ 6e. CLI TOOLS USING EXEC/SPAWN FOR LOCAL OPERATIONS: Code uses child_process.execSync() or spawn() for legitimate local operations: git commands, npm/npx invocations, platform tools (screencapture, xdotool), CLI binary discovery. No encoded payloads, no eval(), no Buffer.from(base64) for code execution. → BENIGN — verify WHAT is being executed, not just the presence of execSync.
254
+
255
+ 6f. NPM PLACEHOLDER DEPENDENCIES: Package depends on "http@0.0.1-security" or similar npm placeholder packages. These are official npm placeholder modules (empty packages reserving names to prevent typosquatting). They are NOT suspicious HTTP modules. → BENIGN — ignore these dependencies entirely.
256
+
243
257
  ## GOLDEN RULE
244
258
 
245
259
  If sensitive data (env vars, credentials, keys) is only READ for self-configuration and never SENT to an external third-party, the package is BENIGN regardless of what the scanner says.
@@ -264,6 +278,22 @@ EXAMPLE 4 — FALSE POSITIVE:
264
278
  A web framework reads process.env.DATABASE_URL, process.env.API_KEY for configuration. It uses fetch() to call its own documented API endpoint. It uses dynamic require() to load user-configured plugins. Scanner flags env_access, dynamic_require, network_require — but all these are standard framework patterns. No data leaves the application boundary.
265
279
  → Verdict: BENIGN (confidence 0.95)
266
280
 
281
+ EXAMPLE 5 — FALSE POSITIVE (phantom lifecycle script):
282
+ Package "instructify@1.0.0" declares "postinstall": "node ./scripts/postinstall.js". But its "files" field is ["dist", ".cursor", "docs/README.md", "README.md", "LICENSE", "CHANGELOG.md", "CONTRIBUTING.md"]. The scripts/ directory does NOT exist in the published tarball because the "files" field excludes it. The postinstall script cannot execute — it is a packaging oversight. The GitHub repository shows the script only prints a welcome message.
283
+ → Verdict: BENIGN (confidence 0.95)
284
+
285
+ EXAMPLE 6 — FALSE POSITIVE (anonymous telemetry):
286
+ Package "delimit-cli@3.14.46" has a postinstall that prints CLI setup instructions, then sends anonymous telemetry: POST to delimit.ai/api/telemetry with body {event:'install', version, node:process.version, platform:process.platform, arch:process.arch, ts:ISO}. Silent fail on error, 3s timeout. No PII, no process.env access beyond process.version/platform/arch, no os.hostname(), no file reads. This is standard anonymous install telemetry identical to what Next.js, Turborepo, and Astro do.
287
+ → Verdict: BENIGN (confidence 0.92)
288
+
289
+ EXAMPLE 7 — FALSE POSITIVE (binary wrapper with checksum):
290
+ Package "plugin-kit-ai@1.0.1" has a postinstall that downloads a platform-specific binary from GitHub Releases (github.com/777genius/plugin-kit-ai/releases/download/vX.Y.Z/ASSET), verifies SHA256 checksum from checksums.txt, and extracts the binary to vendor/. No data exfiltration, no env access beyond optional GITHUB_TOKEN for rate limits. This is the standard binary distribution pattern used by esbuild, turbo, and biome.
291
+ → Verdict: BENIGN (confidence 0.95)
292
+
293
+ EXAMPLE 8 — FALSE POSITIVE (application code with HTTP clients):
294
+ Package "@craft-ng/core@0.1.2" is an Angular state management library. No lifecycle scripts (no postinstall/preinstall). Source contains fetch() and http references but ONLY in JSDoc examples ("const response = await fetch(\`/api/users/\${params}\`)") and Angular service patterns (this.httpClient.get(url)). These are application code patterns, not active network calls during install. No child_process, no eval, no Buffer manipulation.
295
+ → Verdict: BENIGN (confidence 0.95)
296
+
267
297
  ## KEY QUESTIONS TO ANSWER
268
298
 
269
299
  1. "Do sensitive data (env vars, credentials) LEAVE the package to a third party?"
@@ -99,6 +99,9 @@ const {
99
99
  // From ./ingestion.js (will be created — currently in monitor.js)
100
100
  const { getNpmLatestTarball, getPyPITarballUrl, getWeeklyDownloads } = require('./ingestion.js');
101
101
 
102
+ // From ./tarball-archive.js
103
+ const { archiveSuspectTarball } = require('./tarball-archive.js');
104
+
102
105
  // --- Constants ---
103
106
 
104
107
  const SCAN_CONCURRENCY = Math.max(1, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 5);
@@ -541,6 +544,16 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
541
544
 
542
545
  stats.suspect++;
543
546
 
547
+ // Fire-and-forget tarball archiving — never blocks the pipeline
548
+ archiveSuspectTarball(name, version, tarballUrl, {
549
+ score: riskScore,
550
+ priority: tierLabel,
551
+ rulesTriggered: (result.threats || []).map(t => t.ruleId || t.type).filter(Boolean),
552
+ llmVerdict: null // LLM runs after this point; updated by webhook if needed
553
+ }).catch(err => {
554
+ console.warn(`[Archive] Failed for ${name}@${version}: ${err.message}`);
555
+ });
556
+
544
557
  // Sandbox decision based on tier
545
558
  // T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
546
559
  // T1b: conditional sandbox (HIGH/CRITICAL without HC type — bundler FP zone)
@@ -0,0 +1,120 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Tarball archiving for suspect packages.
5
+ *
6
+ * Downloads and stores tarballs + metadata JSON for packages flagged as suspect,
7
+ * enabling retrospective audit when npm/PyPI unpublish the package.
8
+ *
9
+ * Fire-and-forget: never blocks the scan pipeline.
10
+ */
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+ const crypto = require('crypto');
15
+ const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
16
+ const { downloadToFile } = require('../shared/download.js');
17
+
18
+ // Archive root — configurable via env for testing
19
+ const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
20
+ const ARCHIVE_TIMEOUT_MS = 10_000;
21
+
22
+ /**
23
+ * Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
24
+ * Falls back to UTC if Intl is unavailable.
25
+ */
26
+ function getArchiveDateString() {
27
+ try {
28
+ const now = new Date();
29
+ const parts = new Intl.DateTimeFormat('fr-CA', { timeZone: 'Europe/Paris', year: 'numeric', month: '2-digit', day: '2-digit' }).formatToParts(now);
30
+ const y = parts.find(p => p.type === 'year').value;
31
+ const m = parts.find(p => p.type === 'month').value;
32
+ const d = parts.find(p => p.type === 'day').value;
33
+ return `${y}-${m}-${d}`;
34
+ } catch {
35
+ return new Date().toISOString().slice(0, 10);
36
+ }
37
+ }
38
+
39
+ /**
40
+ * Sanitize package name for use in filenames.
41
+ * Replaces / (scoped packages) with __ and removes unsafe characters.
42
+ */
43
+ function sanitizeForFilename(name) {
44
+ return name.replace(/^@/, '').replace(/\//g, '__').replace(/[^a-zA-Z0-9._-]/g, '_');
45
+ }
46
+
47
+ /**
48
+ * Compute SHA-256 hash of a file.
49
+ */
50
+ function sha256File(filePath) {
51
+ const hash = crypto.createHash('sha256');
52
+ const data = fs.readFileSync(filePath);
53
+ hash.update(data);
54
+ return hash.digest('hex');
55
+ }
56
+
57
+ /**
58
+ * Archive a suspect package tarball and its scan metadata.
59
+ *
60
+ * @param {string} packageName - Package name (e.g. "evil-pkg" or "@scope/evil-pkg")
61
+ * @param {string} version - Package version
62
+ * @param {string} tarballUrl - Registry URL to download the tarball from
63
+ * @param {object} scanResult - Scan result object from the pipeline
64
+ * @param {number} scanResult.score - Risk score
65
+ * @param {string} scanResult.priority - Priority tier (e.g. "P1", "P2")
66
+ * @param {Array} [scanResult.rulesTriggered] - Array of triggered rule IDs
67
+ * @param {string} [scanResult.llmVerdict] - LLM detective verdict if available
68
+ * @returns {Promise<boolean>} true if archived, false if skipped/failed
69
+ */
70
+ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResult) {
71
+ if (!tarballUrl || !packageName || !version) return false;
72
+
73
+ const dateStr = getArchiveDateString();
74
+ const dayDir = path.join(ARCHIVE_DIR, dateStr);
75
+ const safeName = sanitizeForFilename(packageName);
76
+ const basename = `${safeName}-${version}`;
77
+ const tgzPath = path.join(dayDir, `${basename}.tgz`);
78
+ const jsonPath = path.join(dayDir, `${basename}.json`);
79
+
80
+ // Dedup: skip if already archived
81
+ if (fs.existsSync(tgzPath)) {
82
+ return false;
83
+ }
84
+
85
+ // Ensure day directory exists
86
+ fs.mkdirSync(dayDir, { recursive: true });
87
+
88
+ // Download with semaphore (shares concurrency with rest of pipeline)
89
+ await acquireRegistrySlot();
90
+ try {
91
+ await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
92
+ } finally {
93
+ releaseRegistrySlot();
94
+ }
95
+
96
+ // Compute hash and write metadata
97
+ const tarballSha256 = sha256File(tgzPath);
98
+ const metadata = {
99
+ package: packageName,
100
+ version,
101
+ timestamp: new Date().toISOString(),
102
+ score: scanResult.score || 0,
103
+ priority: scanResult.priority || null,
104
+ rules_triggered: scanResult.rulesTriggered || [],
105
+ llm_verdict: scanResult.llmVerdict || null,
106
+ tarball_sha256: tarballSha256
107
+ };
108
+
109
+ fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
110
+ return true;
111
+ }
112
+
113
+ module.exports = {
114
+ archiveSuspectTarball,
115
+ ARCHIVE_DIR,
116
+ // Exported for testing
117
+ sanitizeForFilename,
118
+ sha256File,
119
+ getArchiveDateString
120
+ };