muaddib-scanner 2.10.34 → 2.10.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/archive-cleanup.sh +7 -0
- package/scripts/audit-archive.sh +45 -0
- package/src/integrations/webhook.js +9 -0
- package/src/ml/llm-detective.js +106 -32
- package/src/monitor/queue.js +13 -0
- package/src/monitor/tarball-archive.js +120 -0
- package/src/monitor/webhook.js +1 -0
package/package.json
CHANGED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Supprime les archives de plus de 30 jours
|
|
3
|
+
ARCHIVE_DIR="/opt/muaddib/archive"
|
|
4
|
+
find "$ARCHIVE_DIR" -type d -name "20*" -mtime +30 -exec rm -rf {} + 2>/dev/null
|
|
5
|
+
# Log
|
|
6
|
+
TOTAL=$(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)
|
|
7
|
+
echo "[Archive Cleanup] $(date -Iseconds) — Total size: $TOTAL"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Usage: ./audit-archive.sh [YYYY-MM-DD] [priority]
|
|
3
|
+
# Exemples:
|
|
4
|
+
# ./audit-archive.sh → résumé de toutes les dates
|
|
5
|
+
# ./audit-archive.sh 2026-03-29 → liste les packages archivés ce jour
|
|
6
|
+
# ./audit-archive.sh 2026-03-29 P1 → filtre par priorité
|
|
7
|
+
|
|
8
|
+
ARCHIVE_DIR="/opt/muaddib/archive"
|
|
9
|
+
DATE=$1
|
|
10
|
+
PRIORITY=$2
|
|
11
|
+
|
|
12
|
+
if [ -z "$DATE" ]; then
|
|
13
|
+
echo "=== Archive Summary ==="
|
|
14
|
+
for dir in "$ARCHIVE_DIR"/20*; do
|
|
15
|
+
[ -d "$dir" ] || continue
|
|
16
|
+
day=$(basename "$dir")
|
|
17
|
+
count=$(ls "$dir"/*.tgz 2>/dev/null | wc -l)
|
|
18
|
+
size=$(du -sh "$dir" 2>/dev/null | cut -f1)
|
|
19
|
+
echo "$day : $count packages ($size)"
|
|
20
|
+
done
|
|
21
|
+
echo "---"
|
|
22
|
+
echo "Total: $(du -sh "$ARCHIVE_DIR" 2>/dev/null | cut -f1)"
|
|
23
|
+
exit 0
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
DIR="$ARCHIVE_DIR/$DATE"
|
|
27
|
+
if [ ! -d "$DIR" ]; then
|
|
28
|
+
echo "No archive for $DATE"
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
for json in "$DIR"/*.json; do
|
|
33
|
+
[ -f "$json" ] || continue
|
|
34
|
+
pkg=$(jq -r '.package' "$json")
|
|
35
|
+
ver=$(jq -r '.version' "$json")
|
|
36
|
+
prio=$(jq -r '.priority' "$json")
|
|
37
|
+
score=$(jq -r '.score' "$json")
|
|
38
|
+
llm=$(jq -r '.llm_verdict // "none"' "$json")
|
|
39
|
+
|
|
40
|
+
if [ -n "$PRIORITY" ] && [ "$prio" != "$PRIORITY" ]; then
|
|
41
|
+
continue
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
printf "%-40s %-8s score=%-4s llm=%s\n" "$pkg@$ver" "$prio" "$score" "$llm"
|
|
45
|
+
done
|
|
@@ -243,6 +243,15 @@ function formatDiscord(results) {
|
|
|
243
243
|
value: llmValue.slice(0, 1024),
|
|
244
244
|
inline: false
|
|
245
245
|
});
|
|
246
|
+
// Show investigation steps as a separate field if present (structured reasoning)
|
|
247
|
+
if (results.llm.investigation_steps && results.llm.investigation_steps.length > 0) {
|
|
248
|
+
const stepsText = results.llm.investigation_steps.map(s => `- ${s}`).join('\n');
|
|
249
|
+
fields.push({
|
|
250
|
+
name: 'Investigation Steps',
|
|
251
|
+
value: stepsText.slice(0, 1024),
|
|
252
|
+
inline: false
|
|
253
|
+
});
|
|
254
|
+
}
|
|
246
255
|
}
|
|
247
256
|
|
|
248
257
|
const titlePrefix = emoji ? `${emoji} ` : '';
|
package/src/ml/llm-detective.js
CHANGED
|
@@ -212,39 +212,111 @@ function collectSourceContext(extractedDir, scanResult) {
|
|
|
212
212
|
|
|
213
213
|
// ── Prompt construction ──
|
|
214
214
|
|
|
215
|
-
const SYSTEM_PROMPT = `You are a senior supply-chain security analyst. You receive
|
|
216
|
-
|
|
217
|
-
Your job
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
-
|
|
239
|
-
-
|
|
240
|
-
-
|
|
241
|
-
-
|
|
215
|
+
const SYSTEM_PROMPT = `You are a senior supply-chain security analyst performing the SAME investigation a human would do manually. You receive source code of a suspect package and static scanner results.
|
|
216
|
+
|
|
217
|
+
CRITICAL: The scanner findings are SIGNALS, not truth. Your job is to INDEPENDENTLY determine if this package is malicious by reading the code yourself. Many scanner findings are false positives — a CLI tool using child_process is not malware.
|
|
218
|
+
|
|
219
|
+
## YOUR INVESTIGATION METHOD
|
|
220
|
+
|
|
221
|
+
Do exactly what a human analyst would:
|
|
222
|
+
|
|
223
|
+
Step 1 — DECLARED PURPOSE: Read package.json. What does this package claim to do? Is the name/description/repo coherent?
|
|
224
|
+
|
|
225
|
+
Step 2 — CODE REALITY: Read ALL the code. Does it actually do what the description says? A "color picker" with child_process.exec is suspicious. A "CLI wrapper" with child_process.exec is normal.
|
|
226
|
+
|
|
227
|
+
Step 3 — DATA FLOW INTENT: When code accesses process.env or credentials:
|
|
228
|
+
- Is it CONFIGURING itself (reading DATABASE_URL, API_KEY for its own backend)? → BENIGN
|
|
229
|
+
- Is it COLLECTING and SENDING data to a third-party domain? → MALICIOUS
|
|
230
|
+
Follow the data: where does it GO?
|
|
231
|
+
|
|
232
|
+
Step 4 — DESTINATION CHECK: If data is sent somewhere:
|
|
233
|
+
- To the package's own documented API/backend? → BENIGN
|
|
234
|
+
- To a raw IP, ngrok/serveo tunnel, or unrelated domain? → MALICIOUS
|
|
235
|
+
- To nowhere (data is only read, never exfiltrated)? → BENIGN
|
|
236
|
+
|
|
237
|
+
Step 5 — COHERENCE: Does the complexity match the purpose?
|
|
238
|
+
- 3-file package with postinstall downloading binaries? → SUSPICIOUS
|
|
239
|
+
- Build tool with postinstall compiling native addon? → NORMAL
|
|
240
|
+
- Obfuscated code in a 10-line utility? → SUSPICIOUS
|
|
241
|
+
- Minified dist/ in a large framework? → NORMAL
|
|
242
|
+
|
|
243
|
+
Step 6 — FALSE POSITIVE CHECKS: Before declaring "malicious", verify the finding isn't one of these known benign patterns:
|
|
244
|
+
|
|
245
|
+
6a. PHANTOM LIFECYCLE SCRIPTS: package.json declares preinstall/postinstall pointing to a script file, BUT the "files" field in package.json EXCLUDES that directory from the published tarball. The script does NOT exist in the package. npm silently ignores missing lifecycle scripts. → BENIGN (packaging oversight, not a threat).
|
|
246
|
+
|
|
247
|
+
6b. ANONYMOUS INSTALL TELEMETRY: postinstall sends a single HTTP POST with ONLY: package version, process.platform, process.arch, process.version (Node), timestamp. NO process.env access, NO os.hostname(), NO os.userInfo(), NO file system reads, NO PII. This is standard practice (Next.js, Turborepo, Astro do the same). → BENIGN.
|
|
248
|
+
|
|
249
|
+
6c. BINARY WRAPPER FROM GITHUB RELEASES: postinstall downloads a platform-specific binary from GitHub Releases (github.com/OWNER/REPO/releases/download/TAG/ASSET), verifies SHA256 checksum, and installs it. This is standard for native tool distribution (esbuild, turbo, biome, swc). → BENIGN if: (a) URL is GitHub Releases, (b) checksum is verified, (c) no fallback to a suspicious third-party domain.
|
|
250
|
+
|
|
251
|
+
6d. APPLICATION CODE WITH HTTP CLIENTS: Source code contains fetch(), HttpClient, http.get() etc. but in application context (Angular services, React components, Express handlers). URLs are configurable templates (e.g. \`\${this.apiEndpoint}/...\`, \`\${process.env.NEXT_PUBLIC_API_URL}/...\`), not hardcoded suspicious domains. No lifecycle scripts performing network calls. → BENIGN — application code, not exfiltration.
|
|
252
|
+
|
|
253
|
+
6e. CLI TOOLS USING EXEC/SPAWN FOR LOCAL OPERATIONS: Code uses child_process.execSync() or spawn() for legitimate local operations: git commands, npm/npx invocations, platform tools (screencapture, xdotool), CLI binary discovery. No encoded payloads, no eval(), no Buffer.from(base64) for code execution. → BENIGN — verify WHAT is being executed, not just the presence of execSync.
|
|
254
|
+
|
|
255
|
+
6f. NPM PLACEHOLDER DEPENDENCIES: Package depends on "http@0.0.1-security" or similar npm placeholder packages. These are official npm placeholder modules (empty packages reserving names to prevent typosquatting). They are NOT suspicious HTTP modules. → BENIGN — ignore these dependencies entirely.
|
|
256
|
+
|
|
257
|
+
## GOLDEN RULE
|
|
258
|
+
|
|
259
|
+
If sensitive data (env vars, credentials, keys) is only READ for self-configuration and never SENT to an external third-party, the package is BENIGN regardless of what the scanner says.
|
|
260
|
+
|
|
261
|
+
If sensitive data is COLLECTED and EXFILTRATED to a domain unrelated to the package's stated purpose, it is MALICIOUS.
|
|
262
|
+
|
|
263
|
+
## REFERENCE EXAMPLES
|
|
264
|
+
|
|
265
|
+
EXAMPLE 1 — TRUE MALWARE:
|
|
266
|
+
Package "slopex-cli" claims to be a "continuity patcher for OpenAI Codex". Postinstall downloads a binary from a personal GitHub repo and REPLACES the real Codex binary. The binary is not part of the described functionality — it's a trojan replacing a trusted tool.
|
|
267
|
+
→ Verdict: MALICIOUS (backdoor, confidence 0.97)
|
|
268
|
+
|
|
269
|
+
EXAMPLE 2 — FALSE POSITIVE:
|
|
270
|
+
Package "@yeaft/webchat-agent" is a "remote agent for WebChat connecting worker machines". Code uses execSync to locate the Claude CLI binary, process.env to read PATH configuration. Scanner flags "detached_credential_exfil" (CRITICAL) — but the code is just spawning a documented CLI tool and reading PATH. No data is sent to any external domain. The functionality matches the description.
|
|
271
|
+
→ Verdict: BENIGN (confidence 0.92)
|
|
272
|
+
|
|
273
|
+
EXAMPLE 3 — TRUE MALWARE:
|
|
274
|
+
Package "event-stream" (compromised via flatmap-stream dependency). Obfuscated code hidden in a nested dependency decrypts a payload targeting Bitcoin wallet data from Copay. The obfuscation has no legitimate reason — the parent package is a simple stream utility. The decrypted code specifically targets cryptocurrency credentials.
|
|
275
|
+
→ Verdict: MALICIOUS (credential_exfil, confidence 0.98)
|
|
276
|
+
|
|
277
|
+
EXAMPLE 4 — FALSE POSITIVE:
|
|
278
|
+
A web framework reads process.env.DATABASE_URL, process.env.API_KEY for configuration. It uses fetch() to call its own documented API endpoint. It uses dynamic require() to load user-configured plugins. Scanner flags env_access, dynamic_require, network_require — but all these are standard framework patterns. No data leaves the application boundary.
|
|
279
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
280
|
+
|
|
281
|
+
EXAMPLE 5 — FALSE POSITIVE (phantom lifecycle script):
|
|
282
|
+
Package "instructify@1.0.0" declares "postinstall": "node ./scripts/postinstall.js". But its "files" field is ["dist", ".cursor", "docs/README.md", "README.md", "LICENSE", "CHANGELOG.md", "CONTRIBUTING.md"]. The scripts/ directory does NOT exist in the published tarball because the "files" field excludes it. The postinstall script cannot execute — it is a packaging oversight. The GitHub repository shows the script only prints a welcome message.
|
|
283
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
284
|
+
|
|
285
|
+
EXAMPLE 6 — FALSE POSITIVE (anonymous telemetry):
|
|
286
|
+
Package "delimit-cli@3.14.46" has a postinstall that prints CLI setup instructions, then sends anonymous telemetry: POST to delimit.ai/api/telemetry with body {event:'install', version, node:process.version, platform:process.platform, arch:process.arch, ts:ISO}. Silent fail on error, 3s timeout. No PII, no process.env access beyond process.version/platform/arch, no os.hostname(), no file reads. This is standard anonymous install telemetry identical to what Next.js, Turborepo, and Astro do.
|
|
287
|
+
→ Verdict: BENIGN (confidence 0.92)
|
|
288
|
+
|
|
289
|
+
EXAMPLE 7 — FALSE POSITIVE (binary wrapper with checksum):
|
|
290
|
+
Package "plugin-kit-ai@1.0.1" has a postinstall that downloads a platform-specific binary from GitHub Releases (github.com/777genius/plugin-kit-ai/releases/download/vX.Y.Z/ASSET), verifies SHA256 checksum from checksums.txt, and extracts the binary to vendor/. No data exfiltration, no env access beyond optional GITHUB_TOKEN for rate limits. This is the standard binary distribution pattern used by esbuild, turbo, and biome.
|
|
291
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
292
|
+
|
|
293
|
+
EXAMPLE 8 — FALSE POSITIVE (application code with HTTP clients):
|
|
294
|
+
Package "@craft-ng/core@0.1.2" is an Angular state management library. No lifecycle scripts (no postinstall/preinstall). Source contains fetch() and http references but ONLY in JSDoc examples ("const response = await fetch(\`/api/users/\${params}\`)") and Angular service patterns (this.httpClient.get(url)). These are application code patterns, not active network calls during install. No child_process, no eval, no Buffer manipulation.
|
|
295
|
+
→ Verdict: BENIGN (confidence 0.95)
|
|
296
|
+
|
|
297
|
+
## KEY QUESTIONS TO ANSWER
|
|
298
|
+
|
|
299
|
+
1. "Do sensitive data (env vars, credentials) LEAVE the package to a third party?"
|
|
300
|
+
2. "Does the code do something HIDDEN that the description doesn't mention?"
|
|
301
|
+
3. "Is obfuscation justified (build tool output) or suspicious (tiny package, no build step)?"
|
|
302
|
+
4. "Does the postinstall relate to the declared functionality?"
|
|
303
|
+
5. "Could a reasonable developer have written this code for the stated purpose?"
|
|
304
|
+
|
|
305
|
+
## COMMON FALSE POSITIVE PATTERNS (do NOT flag these)
|
|
306
|
+
|
|
307
|
+
- CLI tools/wrappers using exec/spawn to run other CLI tools (their stated purpose)
|
|
308
|
+
- SDK packages reading API keys from env vars (standard configuration)
|
|
309
|
+
- Build tools with postinstall that compile native addons (node-gyp, prebuild)
|
|
310
|
+
- Packages reading process.env for feature flags, logging config, or database URLs
|
|
311
|
+
- Monorepo tooling with dynamic require for loading workspace packages
|
|
312
|
+
- Test frameworks that use eval() or vm.runInContext for sandboxed test execution
|
|
242
313
|
|
|
243
314
|
RESPOND IN STRICT JSON ONLY (nothing else):
|
|
244
315
|
{
|
|
245
316
|
"verdict": "malicious" | "benign" | "uncertain",
|
|
246
317
|
"confidence": 0.0-1.0,
|
|
247
|
-
"
|
|
318
|
+
"investigation_steps": ["Step 1: ...", "Step 2: ...", "Step 3: ..."],
|
|
319
|
+
"reasoning": "Final summary of your analysis",
|
|
248
320
|
"iocs_found": ["domain.com", "1.2.3.4"],
|
|
249
321
|
"attack_type": "credential_exfil" | "reverse_shell" | "crypto_miner" | "backdoor" | "typosquat" | "protestware" | null,
|
|
250
322
|
"recommendation": "block" | "monitor" | "safe"
|
|
@@ -275,15 +347,15 @@ function buildPrompt(name, version, ecosystem, sourceContext, threats, npmRegist
|
|
|
275
347
|
userContent += '\n';
|
|
276
348
|
}
|
|
277
349
|
|
|
278
|
-
// Static scanner findings
|
|
350
|
+
// Static scanner findings — framed as signals to challenge
|
|
279
351
|
if (threats && threats.length > 0) {
|
|
280
|
-
userContent += `## Static Scanner
|
|
352
|
+
userContent += `## Static Scanner Signals (${threats.length} total — these are SIGNALS to investigate, not confirmed threats)\n`;
|
|
281
353
|
for (const t of threats.slice(0, 30)) {
|
|
282
354
|
const loc = t.file ? ` in ${t.file}${t.line ? ':' + t.line : ''}` : '';
|
|
283
355
|
userContent += `- [${t.severity}] ${t.type}${loc}: ${t.message || ''}\n`;
|
|
284
356
|
}
|
|
285
357
|
if (threats.length > 30) {
|
|
286
|
-
userContent += `... and ${threats.length - 30} more
|
|
358
|
+
userContent += `... and ${threats.length - 30} more signals\n`;
|
|
287
359
|
}
|
|
288
360
|
userContent += '\n';
|
|
289
361
|
}
|
|
@@ -315,7 +387,7 @@ async function callAnthropicAPI(system, messages) {
|
|
|
315
387
|
|
|
316
388
|
const body = JSON.stringify({
|
|
317
389
|
model: MODEL_ID,
|
|
318
|
-
max_tokens:
|
|
390
|
+
max_tokens: 2048,
|
|
319
391
|
system,
|
|
320
392
|
messages
|
|
321
393
|
});
|
|
@@ -384,6 +456,7 @@ function parseResponse(text) {
|
|
|
384
456
|
const fallback = {
|
|
385
457
|
verdict: 'uncertain',
|
|
386
458
|
confidence: 0,
|
|
459
|
+
investigation_steps: [],
|
|
387
460
|
reasoning: 'Failed to parse LLM response',
|
|
388
461
|
iocs_found: [],
|
|
389
462
|
attack_type: null,
|
|
@@ -434,6 +507,7 @@ function parseResponse(text) {
|
|
|
434
507
|
return {
|
|
435
508
|
verdict,
|
|
436
509
|
confidence: Math.round(confidence * 1000) / 1000,
|
|
510
|
+
investigation_steps: Array.isArray(parsed.investigation_steps) ? parsed.investigation_steps.filter(x => typeof x === 'string').slice(0, 10) : [],
|
|
437
511
|
reasoning: typeof parsed.reasoning === 'string' ? parsed.reasoning : '',
|
|
438
512
|
iocs_found: Array.isArray(parsed.iocs_found) ? parsed.iocs_found.filter(x => typeof x === 'string').slice(0, 20) : [],
|
|
439
513
|
attack_type: typeof parsed.attack_type === 'string' ? parsed.attack_type : null,
|
package/src/monitor/queue.js
CHANGED
|
@@ -99,6 +99,9 @@ const {
|
|
|
99
99
|
// From ./ingestion.js (will be created — currently in monitor.js)
|
|
100
100
|
const { getNpmLatestTarball, getPyPITarballUrl, getWeeklyDownloads } = require('./ingestion.js');
|
|
101
101
|
|
|
102
|
+
// From ./tarball-archive.js
|
|
103
|
+
const { archiveSuspectTarball } = require('./tarball-archive.js');
|
|
104
|
+
|
|
102
105
|
// --- Constants ---
|
|
103
106
|
|
|
104
107
|
const SCAN_CONCURRENCY = Math.max(1, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 5);
|
|
@@ -541,6 +544,16 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
541
544
|
|
|
542
545
|
stats.suspect++;
|
|
543
546
|
|
|
547
|
+
// Fire-and-forget tarball archiving — never blocks the pipeline
|
|
548
|
+
archiveSuspectTarball(name, version, tarballUrl, {
|
|
549
|
+
score: riskScore,
|
|
550
|
+
priority: tierLabel,
|
|
551
|
+
rulesTriggered: (result.threats || []).map(t => t.ruleId || t.type).filter(Boolean),
|
|
552
|
+
llmVerdict: null // LLM runs after this point; updated by webhook if needed
|
|
553
|
+
}).catch(err => {
|
|
554
|
+
console.warn(`[Archive] Failed for ${name}@${version}: ${err.message}`);
|
|
555
|
+
});
|
|
556
|
+
|
|
544
557
|
// Sandbox decision based on tier
|
|
545
558
|
// T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
|
|
546
559
|
// T1b: conditional sandbox (HIGH/CRITICAL without HC type — bundler FP zone)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tarball archiving for suspect packages.
|
|
5
|
+
*
|
|
6
|
+
* Downloads and stores tarballs + metadata JSON for packages flagged as suspect,
|
|
7
|
+
* enabling retrospective audit when npm/PyPI unpublish the package.
|
|
8
|
+
*
|
|
9
|
+
* Fire-and-forget: never blocks the scan pipeline.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
const crypto = require('crypto');
|
|
15
|
+
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
16
|
+
const { downloadToFile } = require('../shared/download.js');
|
|
17
|
+
|
|
18
|
+
// Archive root — configurable via env for testing
|
|
19
|
+
const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
|
|
20
|
+
const ARCHIVE_TIMEOUT_MS = 10_000;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
|
|
24
|
+
* Falls back to UTC if Intl is unavailable.
|
|
25
|
+
*/
|
|
26
|
+
function getArchiveDateString() {
|
|
27
|
+
try {
|
|
28
|
+
const now = new Date();
|
|
29
|
+
const parts = new Intl.DateTimeFormat('fr-CA', { timeZone: 'Europe/Paris', year: 'numeric', month: '2-digit', day: '2-digit' }).formatToParts(now);
|
|
30
|
+
const y = parts.find(p => p.type === 'year').value;
|
|
31
|
+
const m = parts.find(p => p.type === 'month').value;
|
|
32
|
+
const d = parts.find(p => p.type === 'day').value;
|
|
33
|
+
return `${y}-${m}-${d}`;
|
|
34
|
+
} catch {
|
|
35
|
+
return new Date().toISOString().slice(0, 10);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Sanitize package name for use in filenames.
|
|
41
|
+
* Replaces / (scoped packages) with __ and removes unsafe characters.
|
|
42
|
+
*/
|
|
43
|
+
function sanitizeForFilename(name) {
|
|
44
|
+
return name.replace(/^@/, '').replace(/\//g, '__').replace(/[^a-zA-Z0-9._-]/g, '_');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Compute SHA-256 hash of a file.
|
|
49
|
+
*/
|
|
50
|
+
function sha256File(filePath) {
|
|
51
|
+
const hash = crypto.createHash('sha256');
|
|
52
|
+
const data = fs.readFileSync(filePath);
|
|
53
|
+
hash.update(data);
|
|
54
|
+
return hash.digest('hex');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Archive a suspect package tarball and its scan metadata.
|
|
59
|
+
*
|
|
60
|
+
* @param {string} packageName - Package name (e.g. "evil-pkg" or "@scope/evil-pkg")
|
|
61
|
+
* @param {string} version - Package version
|
|
62
|
+
* @param {string} tarballUrl - Registry URL to download the tarball from
|
|
63
|
+
* @param {object} scanResult - Scan result object from the pipeline
|
|
64
|
+
* @param {number} scanResult.score - Risk score
|
|
65
|
+
* @param {string} scanResult.priority - Priority tier (e.g. "P1", "P2")
|
|
66
|
+
* @param {Array} [scanResult.rulesTriggered] - Array of triggered rule IDs
|
|
67
|
+
* @param {string} [scanResult.llmVerdict] - LLM detective verdict if available
|
|
68
|
+
* @returns {Promise<boolean>} true if archived, false if skipped/failed
|
|
69
|
+
*/
|
|
70
|
+
async function archiveSuspectTarball(packageName, version, tarballUrl, scanResult) {
|
|
71
|
+
if (!tarballUrl || !packageName || !version) return false;
|
|
72
|
+
|
|
73
|
+
const dateStr = getArchiveDateString();
|
|
74
|
+
const dayDir = path.join(ARCHIVE_DIR, dateStr);
|
|
75
|
+
const safeName = sanitizeForFilename(packageName);
|
|
76
|
+
const basename = `${safeName}-${version}`;
|
|
77
|
+
const tgzPath = path.join(dayDir, `${basename}.tgz`);
|
|
78
|
+
const jsonPath = path.join(dayDir, `${basename}.json`);
|
|
79
|
+
|
|
80
|
+
// Dedup: skip if already archived
|
|
81
|
+
if (fs.existsSync(tgzPath)) {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Ensure day directory exists
|
|
86
|
+
fs.mkdirSync(dayDir, { recursive: true });
|
|
87
|
+
|
|
88
|
+
// Download with semaphore (shares concurrency with rest of pipeline)
|
|
89
|
+
await acquireRegistrySlot();
|
|
90
|
+
try {
|
|
91
|
+
await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
|
|
92
|
+
} finally {
|
|
93
|
+
releaseRegistrySlot();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Compute hash and write metadata
|
|
97
|
+
const tarballSha256 = sha256File(tgzPath);
|
|
98
|
+
const metadata = {
|
|
99
|
+
package: packageName,
|
|
100
|
+
version,
|
|
101
|
+
timestamp: new Date().toISOString(),
|
|
102
|
+
score: scanResult.score || 0,
|
|
103
|
+
priority: scanResult.priority || null,
|
|
104
|
+
rules_triggered: scanResult.rulesTriggered || [],
|
|
105
|
+
llm_verdict: scanResult.llmVerdict || null,
|
|
106
|
+
tarball_sha256: tarballSha256
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
archiveSuspectTarball,
|
|
115
|
+
ARCHIVE_DIR,
|
|
116
|
+
// Exported for testing
|
|
117
|
+
sanitizeForFilename,
|
|
118
|
+
sha256File,
|
|
119
|
+
getArchiveDateString
|
|
120
|
+
};
|
package/src/monitor/webhook.js
CHANGED
|
@@ -379,6 +379,7 @@ function buildAlertData(name, version, ecosystem, result, sandboxResult, llmResu
|
|
|
379
379
|
webhookData.llm = {
|
|
380
380
|
verdict: llmResult.verdict,
|
|
381
381
|
confidence: llmResult.confidence,
|
|
382
|
+
investigation_steps: (llmResult.investigation_steps || []).slice(0, 5),
|
|
382
383
|
reasoning: (llmResult.reasoning || '').slice(0, 200),
|
|
383
384
|
attack_type: llmResult.attack_type || null,
|
|
384
385
|
iocs_found: (llmResult.iocs_found || []).slice(0, 5),
|