@testzugang/pi-plugin-dependency-audit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1345 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Static-first npm/TypeScript package and dependency triage.
4
+
5
+ This tool intentionally does not run npm, node, package scripts, tests, builds,
6
+ or code from the target. It inspects source trees and npm tarballs (.tgz) for
7
+ malware and quality risk indicators that are common in supply-chain attacks.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import datetime as _dt
13
+ import hashlib
14
+ import json
15
+ import math
16
+ import os
17
+ import re
18
+ import shutil
19
+ import sys
20
+ import tarfile
21
+ import tempfile
22
+ from dataclasses import dataclass, field, asdict
23
+ from pathlib import Path, PurePosixPath
24
+ from typing import Any, Iterable
25
+
26
+ SEVERITY_ORDER = {"INFO": 0, "LOW": 1, "MEDIUM": 2, "HIGH": 3, "CRITICAL": 4}
27
+ SEVERITIES = tuple(SEVERITY_ORDER.keys())
28
+
29
+ LIFECYCLE_SCRIPTS = {
30
+ "preinstall", "install", "postinstall",
31
+ "prepublish", "prepublishOnly",
32
+ "preprepare", "prepare", "postprepare",
33
+ "prepack", "postpack", "publish", "postpublish",
34
+ "dependencies",
35
+ }
36
+
37
+ INSTALL_PHASE_SCRIPTS = {
38
+ "preinstall", "install", "postinstall", "prepublish",
39
+ "preprepare", "prepare", "postprepare", "dependencies",
40
+ }
41
+
42
+ DEP_FIELDS = (
43
+ "dependencies",
44
+ "devDependencies",
45
+ "optionalDependencies",
46
+ "peerDependencies",
47
+ "peerDependenciesMeta",
48
+ "bundleDependencies",
49
+ "bundledDependencies",
50
+ "overrides",
51
+ "resolutions",
52
+ )
53
+
54
+ TEXT_EXTENSIONS = {
55
+ ".js", ".jsx", ".mjs", ".cjs",
56
+ ".ts", ".tsx", ".mts", ".cts",
57
+ ".json", ".jsonc", ".yaml", ".yml", ".toml",
58
+ ".sh", ".bash", ".zsh", ".fish",
59
+ ".ps1", ".cmd", ".bat",
60
+ ".md", ".txt", ".env", ".npmrc", ".yarnrc", ".pnpmrc",
61
+ ".html", ".css",
62
+ }
63
+
64
+ CODE_EXTENSIONS = {
65
+ ".js", ".jsx", ".mjs", ".cjs",
66
+ ".ts", ".tsx", ".mts", ".cts",
67
+ ".sh", ".bash", ".zsh", ".fish", ".ps1", ".cmd", ".bat",
68
+ }
69
+
70
+ DOC_EXTENSIONS = {".md", ".txt"}
71
+
72
+ BINARY_EXEC_EXTENSIONS = {
73
+ ".exe", ".dll", ".so", ".dylib", ".node", ".wasm",
74
+ ".bin", ".elf", ".msi", ".pkg", ".appimage",
75
+ }
76
+
77
+ ARCHIVE_EXTENSIONS = {".zip", ".tgz", ".tar", ".gz", ".xz", ".7z", ".rar", ".br"}
78
+
79
+ DEFAULT_SKIP_DIRS = {
80
+ ".git", ".hg", ".svn",
81
+ ".cache", ".turbo", ".parcel-cache", ".next", ".nuxt",
82
+ "coverage", ".nyc_output", ".vitest", ".jest",
83
+ ".idea", ".DS_Store",
84
+ }
85
+
86
+ NODE_MODULES_DIRS = {"node_modules"}
87
+
88
+ IOC_STRINGS = [
89
+ "filev2.getsession.org/file",
90
+ "getsession.org",
91
+ "169.254.169.254/latest/meta-data/iam/security-credentials",
92
+ "metadata.google.internal",
93
+ "127.0.0.1:8200",
94
+ "oven-sh/bun/releases/download/bun-v1.3.13",
95
+ "github.com/oven-sh/bun/releases/download/bun-v1.3.13",
96
+ "git-tanstack.com",
97
+ "transformers.pyz",
98
+ "tanstack_runner.js",
99
+ "router_init.js",
100
+ "router_runtime.js",
101
+ "createCommitOnBranch",
102
+ ".claude/settings.json",
103
+ ".claude/setup.mjs",
104
+ ".vscode/tasks.json",
105
+ ".vscode/setup.mjs",
106
+ "tanstack/router#79ac49eedf774dd4b0cfa308722bc463cfe5885c",
107
+ "@tanstack/setup",
108
+ ]
109
+
110
+ SECRET_PATTERNS = [
111
+ re.compile(r"github_pat_[A-Za-z0-9_]{20,}_[A-Za-z0-9_]{20,}"),
112
+ re.compile(r"\bgh[pousr]_[A-Za-z0-9_\-.]{20,}"),
113
+ re.compile(r"\bghs_[A-Za-z0-9_\-.]{20,}"),
114
+ re.compile(r"\bnpm_[A-Za-z0-9_\-.]{20,}"),
115
+ re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
116
+ re.compile(r"\bASIA[0-9A-Z]{16}\b"),
117
+ re.compile(r"xox[baprs]-[A-Za-z0-9-]{20,}"),
118
+ ]
119
+
120
+ TOKEN_NAME_PATTERN = re.compile(
121
+ r"\b(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
122
+ r"GITHUB_TOKEN|GH_TOKEN|NPM_TOKEN|NODE_AUTH_TOKEN|ACTIONS_ID_TOKEN|"
123
+ r"ACTIONS_ID_TOKEN_REQUEST_URL|ACTIONS_ID_TOKEN_REQUEST_TOKEN|"
124
+ r"VAULT_TOKEN|VAULT_AUTH_TOKEN|GOOGLE_APPLICATION_CREDENTIALS|"
125
+ r"AZURE_CLIENT_SECRET|DOCKER_CONFIG)\b"
126
+ )
127
+
128
+ NETWORK_PATTERNS = [
129
+ (re.compile(r"\b(fetch|XMLHttpRequest)\s*\("), "browser/node fetch"),
130
+ (re.compile(r"\b(require\(['\"]https?['\"]\)|from ['\"]https?['\"]|https?\.(request|get)\s*\()"), "node http/https API"),
131
+ (re.compile(r"\b(axios|got|request|superagent|undici)\b"), "HTTP client library"),
132
+ (re.compile(r"\b(curl|wget|Invoke-WebRequest|Invoke-RestMethod|iwr|irm)\b", re.I), "download command"),
133
+ (re.compile(r"https?://", re.I), "URL literal"),
134
+ ]
135
+
136
+ EXEC_PATTERNS = [
137
+ (re.compile(r"\brequire\(['\"]child_process['\"]\)|from ['\"]child_process['\"]"), "child_process import"),
138
+ (re.compile(r"\b(exec|execSync|execFile|execFileSync|spawn|spawnSync|fork)\s*\("), "process execution call"),
139
+ (re.compile(r"\b(child_process\.)?(exec|execSync|execFile|spawn|spawnSync)\s*\("), "child_process execution"),
140
+ (re.compile(r"\beval\s*\(|\bnew\s+Function\s*\(|\bFunction\s*\("), "dynamic JS evaluation"),
141
+ (re.compile(r"\bvm\.(runInNewContext|runInThisContext|runInContext|compileFunction)\s*\("), "Node vm execution"),
142
+ (re.compile(r"\bWebAssembly\.(instantiate|compile)\s*\("), "WebAssembly runtime load"),
143
+ (re.compile(r"\b(node|bun|deno|python|python3|bash|sh|zsh|fish|powershell|pwsh|cmd)\b", re.I), "interpreter invocation"),
144
+ ]
145
+
146
+ STEALTH_PATTERNS = [
147
+ (re.compile(r"&&\s*exit\s+1\b"), "forced failure after execution"),
148
+ (re.compile(r"(?:>|1>)\s*/dev/null|2>&1|--silent|--quiet|-sS?\b|\bNO_COLOR\b"), "output suppression"),
149
+ (re.compile(r"\bchmod\s+\+x\b|\bicacls\b|\bSet-ExecutionPolicy\b", re.I), "permission change"),
150
+ (re.compile(r"\|\s*(bash|sh|zsh|powershell|pwsh|cmd)\b", re.I), "download piped to shell"),
151
+ ]
152
+
153
+ SECRET_PATH_PATTERNS = [
154
+ (re.compile(r"\.npmrc|\.yarnrc|\.pnpmrc"), "package-manager credentials file"),
155
+ (re.compile(r"\.aws/(credentials|config)|aws/credentials"), "AWS credentials path"),
156
+ (re.compile(r"\.config/gh/hosts\.yml|\.git-credentials|\.netrc"), "GitHub/git credentials path"),
157
+ (re.compile(r"\.ssh/(id_rsa|id_ed25519|config|known_hosts)"), "SSH credential path"),
158
+ (re.compile(r"\.docker/config\.json"), "Docker credential path"),
159
+ ]
160
+
161
+ OBFUSCATION_PATTERNS = [
162
+ (re.compile(r"_0x[a-fA-F0-9]{3,}"), "hex-style obfuscated identifiers"),
163
+ (re.compile(r"\b(atob|btoa)\s*\(|Buffer\.from\s*\([^)]{0,120}['\"]base64['\"]"), "base64 decode"),
164
+ (re.compile(r"\b(zlib|gunzipSync|inflateSync|brotliDecompressSync)\b"), "compressed payload decode"),
165
+ (re.compile(r"\b(createDecipheriv|createCipheriv|crypto\.subtle|AES|RC4|xor)\b", re.I), "crypto/decryption layer"),
166
+ (re.compile(r"\bString\.fromCharCode\s*\(|\bunescape\s*\("), "string decoder"),
167
+ ]
168
+
169
+ IDE_AGENT_PATTERNS = [
170
+ (re.compile(r"\.claude/(settings\.json|setup\.mjs|router_runtime\.js)"), "Claude Code/agent config path"),
171
+ (re.compile(r"\.vscode/(tasks\.json|settings\.json|setup\.mjs|extensions\.json)"), "VS Code config path"),
172
+ (re.compile(r"\.cursor/|\.devcontainer/"), "AI/IDE/devcontainer config path"),
173
+ ]
174
+
175
+ GITHUB_API_PATTERNS = [
176
+ (re.compile(r"createCommitOnBranch|createRef|updateRef|repos/[^\s]+/contents|git/refs", re.I), "GitHub write API"),
177
+ (re.compile(r"graphql\s*\(|api\.github\.com/graphql", re.I), "GitHub GraphQL API"),
178
+ (re.compile(r"octokit|@actions/github", re.I), "GitHub API client"),
179
+ ]
180
+
181
+ FULL_SHA_RE = re.compile(r"^[0-9a-fA-F]{40}$")
182
+ SEMVER_EXACT_RE = re.compile(r"^(?:v)?\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?$")
183
+
184
+
185
+ @dataclass
186
+ class Finding:
187
+ severity: str
188
+ category: str
189
+ path: str
190
+ line: int | None
191
+ title: str
192
+ evidence: str
193
+ recommendation: str
194
+ confidence: str = "medium"
195
+ tags: list[str] = field(default_factory=list)
196
+
197
+
198
+
199
+
200
+ def normalize_ioc(value: str) -> list[str]:
201
+ raw = value.strip()
202
+ if not raw or raw.startswith("#"):
203
+ return []
204
+ normalized = raw.replace("hxxps://", "https://").replace("hxxp://", "http://")
205
+ normalized = normalized.replace("[.]", ".").replace("(.)", ".")
206
+ return list(dict.fromkeys([raw, normalized]))
207
+
208
+
209
+ def load_ioc_files(paths: Iterable[Path]):
210
+ existing = {x.lower() for x in IOC_STRINGS}
211
+ for path in paths:
212
+ if not path or not path.exists():
213
+ continue
214
+ try:
215
+ for line in path.read_text("utf-8", errors="replace").splitlines():
216
+ for ioc in normalize_ioc(line):
217
+ if ioc.lower() not in existing:
218
+ IOC_STRINGS.append(ioc)
219
+ existing.add(ioc.lower())
220
+ except OSError:
221
+ continue
222
+
223
+
224
+ @dataclass
225
+ class TargetSummary:
226
+ target: str
227
+ root: str
228
+ mode: str
229
+ is_tarball: bool
230
+ started_at: str
231
+ file_count: int = 0
232
+ package_json_count: int = 0
233
+ lockfile_count: int = 0
234
+ tsconfig_count: int = 0
235
+ workflow_count: int = 0
236
+ total_bytes: int = 0
237
+ sha256: str | None = None
238
+
239
+
240
+ @dataclass
241
+ class ScanReport:
242
+ tool: str
243
+ generated_at: str
244
+ summaries: list[TargetSummary]
245
+ findings: list[Finding]
246
+ counts_by_severity: dict[str, int]
247
+ decision: str
248
+ strict_exit_code: int
249
+
250
+
251
+ class ScanContext:
252
+ def __init__(self, root: Path, target_label: str, mode: str, is_tarball: bool, include_node_modules: bool, max_file_bytes: int, max_findings: int):
253
+ self.root = root.resolve()
254
+ self.target_label = target_label
255
+ self.mode = mode
256
+ self.is_tarball = is_tarball
257
+ self.include_node_modules = include_node_modules
258
+ self.max_file_bytes = max_file_bytes
259
+ self.max_findings = max_findings
260
+ self.findings: list[Finding] = []
261
+ self._dedupe: set[tuple[str, str, str, int | None, str]] = set()
262
+ self.lifecycle_entrypoints: set[str] = set()
263
+ self.package_roots: set[Path] = set()
264
+ self.summary = TargetSummary(
265
+ target=target_label,
266
+ root=str(self.root),
267
+ mode=mode,
268
+ is_tarball=is_tarball,
269
+ started_at=_dt.datetime.now(_dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
270
+ )
271
+
272
+ def rel(self, path: Path | str) -> str:
273
+ p = Path(path)
274
+ try:
275
+ return p.resolve().relative_to(self.root).as_posix()
276
+ except Exception:
277
+ return str(path)
278
+
279
+ def add(self, severity: str, category: str, path: Path | str, line: int | None, title: str, evidence: str, recommendation: str, confidence: str = "medium", tags: Iterable[str] = ()): # noqa: E501
280
+ if len(self.findings) >= self.max_findings:
281
+ if len(self.findings) == self.max_findings:
282
+ self.findings.append(Finding(
283
+ severity="INFO",
284
+ category="scan-limit",
285
+ path=".",
286
+ line=None,
287
+ title="Finding limit reached",
288
+ evidence=f"The scanner stopped adding findings after {self.max_findings} findings.",
289
+ recommendation="Increase --max-findings for a complete report or triage the highest severity findings first.",
290
+ confidence="high",
291
+ tags=["limit"],
292
+ ))
293
+ return
294
+ severity = severity.upper()
295
+ if severity not in SEVERITY_ORDER:
296
+ severity = "INFO"
297
+ rel_path = self.rel(path) if isinstance(path, Path) else str(path)
298
+ evidence = mask_secrets(one_line(evidence))[:900]
299
+ title = one_line(title)[:220]
300
+ recommendation = one_line(recommendation)[:500]
301
+ key = (severity, category, rel_path, line, title)
302
+ if key in self._dedupe:
303
+ return
304
+ self._dedupe.add(key)
305
+ self.findings.append(Finding(
306
+ severity=severity,
307
+ category=category,
308
+ path=rel_path,
309
+ line=line,
310
+ title=title,
311
+ evidence=evidence,
312
+ recommendation=recommendation,
313
+ confidence=confidence,
314
+ tags=list(tags),
315
+ ))
316
+
317
+
318
+ def one_line(value: Any) -> str:
319
+ s = str(value).replace("\r", " ").replace("\n", " ").replace("\t", " ")
320
+ return re.sub(r"\s+", " ", s).strip()
321
+
322
+
323
+ def mask_secrets(text: str) -> str:
324
+ out = text
325
+ for pat in SECRET_PATTERNS:
326
+ def repl(m: re.Match[str]) -> str:
327
+ token = m.group(0)
328
+ if len(token) <= 12:
329
+ return "[MASKED]"
330
+ return token[:6] + "...[MASKED]..." + token[-4:]
331
+ out = pat.sub(repl, out)
332
+ # Mask common assignment values while preserving variable names.
333
+ out = re.sub(r"((?:NPM_TOKEN|GITHUB_TOKEN|GH_TOKEN|AWS_SECRET_ACCESS_KEY|NODE_AUTH_TOKEN)\s*[=:]\s*)['\"]?[^'\"\s]+", r"\1[MASKED]", out)
334
+ return out
335
+
336
+
337
+ def sha256_file(path: Path, limit: int | None = None) -> str:
338
+ h = hashlib.sha256()
339
+ with path.open("rb") as f:
340
+ remaining = limit
341
+ while True:
342
+ if remaining is not None:
343
+ if remaining <= 0:
344
+ break
345
+ chunk = f.read(min(1024 * 1024, remaining))
346
+ remaining -= len(chunk)
347
+ else:
348
+ chunk = f.read(1024 * 1024)
349
+ if not chunk:
350
+ break
351
+ h.update(chunk)
352
+ return h.hexdigest()
353
+
354
+
355
+ def safe_read_bytes(path: Path, max_bytes: int) -> tuple[bytes, bool]:
356
+ size = path.stat().st_size
357
+ with path.open("rb") as f:
358
+ data = f.read(max_bytes)
359
+ return data, size > max_bytes
360
+
361
+
362
+ def decode_text(data: bytes) -> str:
363
+ return data.decode("utf-8", errors="replace")
364
+
365
+
366
+ def is_probably_binary(data: bytes) -> bool:
367
+ if not data:
368
+ return False
369
+ if b"\x00" in data[:4096]:
370
+ return True
371
+ sample = data[:4096]
372
+ nontext = sum(1 for b in sample if b < 9 or (13 < b < 32) or b > 126)
373
+ return nontext / max(1, len(sample)) > 0.35
374
+
375
+
376
+ def line_for_offset(text: str, offset: int) -> int:
377
+ return text.count("\n", 0, offset) + 1
378
+
379
+
380
+ def first_match_line(text: str, pattern: re.Pattern[str]) -> tuple[int | None, str | None]:
381
+ m = pattern.search(text)
382
+ if not m:
383
+ return None, None
384
+ line = line_for_offset(text, m.start())
385
+ snippet = text[m.start(): min(len(text), m.end() + 180)]
386
+ return line, one_line(snippet)
387
+
388
+
389
+ def key_line(text: str, key: str) -> int | None:
390
+ pat = re.compile(r"[\"']" + re.escape(key) + r"[\"']\s*:")
391
+ m = pat.search(text)
392
+ if not m:
393
+ return None
394
+ return line_for_offset(text, m.start())
395
+
396
+
397
+ def should_skip_dir(path: Path, include_node_modules: bool, is_tarball: bool) -> bool:
398
+ name = path.name
399
+ if name in DEFAULT_SKIP_DIRS:
400
+ return True
401
+ if name in NODE_MODULES_DIRS and not include_node_modules and not is_tarball:
402
+ return True
403
+ return False
404
+
405
+
406
+ def iter_files(ctx: ScanContext) -> list[Path]:
407
+ files: list[Path] = []
408
+ for dirpath, dirnames, filenames in os.walk(ctx.root):
409
+ dpath = Path(dirpath)
410
+ dirnames[:] = [d for d in dirnames if not should_skip_dir(dpath / d, ctx.include_node_modules, ctx.is_tarball)]
411
+ for name in filenames:
412
+ p = dpath / name
413
+ try:
414
+ st = p.stat()
415
+ except OSError:
416
+ continue
417
+ ctx.summary.file_count += 1
418
+ ctx.summary.total_bytes += st.st_size
419
+ files.append(p)
420
+ return files
421
+
422
+
423
+ def load_json_file(path: Path) -> tuple[Any | None, str]:
424
+ try:
425
+ text = path.read_text("utf-8", errors="replace")
426
+ return json.loads(text), text
427
+ except Exception as exc:
428
+ return None, f"JSON parse error: {exc}"
429
+
430
+
431
+ def classify_dep_spec(spec: str) -> tuple[str, str, str]:
432
+ s = str(spec).strip()
433
+ low = s.lower()
434
+ if low.startswith("npm:"):
435
+ return "alias", "MEDIUM", "npm alias can hide the actual package identity"
436
+ if low.startswith(("git+", "git://", "github:", "gitlab:", "bitbucket:")) or "github.com" in low or "gitlab.com" in low or "bitbucket.org" in low:
437
+ if "#" not in s:
438
+ return "git-unpinned", "HIGH", "git dependency has no commit pin"
439
+ frag = s.rsplit("#", 1)[-1]
440
+ if not FULL_SHA_RE.match(frag):
441
+ return "git-not-full-sha", "HIGH", "git dependency is not pinned to a full 40-character commit SHA"
442
+ return "git", "HIGH", "git dependency can execute prepare scripts during install"
443
+ if re.match(r"https?://", low):
444
+ sev = "HIGH" if low.startswith("http://") else "MEDIUM"
445
+ reason = "remote tarball/URL dependency bypasses normal registry trust controls"
446
+ if low.startswith("http://"):
447
+ reason += " and uses plaintext HTTP"
448
+ return "url", sev, reason
449
+ if low.startswith("file:") or low.startswith("link:"):
450
+ return "local-file", "MEDIUM", "local file/link dependency depends on local filesystem state"
451
+ if low.startswith("workspace:"):
452
+ return "workspace", "INFO", "workspace dependency; inspect workspace package.json separately"
453
+ if low in {"*", "x", "latest", "next", "canary", "beta", "alpha"}:
454
+ return "floating", "MEDIUM", "floating dependency spec can resolve to newly published versions"
455
+ if any(ch in s for ch in ["^", "~", "*", "x", "X", ">", "<", "|"]):
456
+ return "range", "LOW", "version range permits dependency drift unless a lockfile is enforced"
457
+ if SEMVER_EXACT_RE.match(s):
458
+ return "exact", "INFO", "exact semver dependency"
459
+ if s == "":
460
+ return "empty", "MEDIUM", "empty dependency spec"
461
+ return "other", "LOW", "non-standard dependency spec; review manually"
462
+
463
+
464
+ def flatten_dep_like(obj: Any, prefix: str = "") -> list[tuple[str, str]]:
465
+ found: list[tuple[str, str]] = []
466
+ if isinstance(obj, dict):
467
+ for k, v in obj.items():
468
+ key = f"{prefix}.{k}" if prefix else str(k)
469
+ if isinstance(v, str):
470
+ found.append((key, v))
471
+ elif isinstance(v, dict):
472
+ found.extend(flatten_dep_like(v, key))
473
+ elif isinstance(v, list):
474
+ for i, item in enumerate(v):
475
+ found.extend(flatten_dep_like(item, f"{key}[{i}]"))
476
+ elif isinstance(obj, list):
477
+ for i, item in enumerate(obj):
478
+ if isinstance(item, str):
479
+ found.append((f"{prefix}[{i}]", item))
480
+ else:
481
+ found.extend(flatten_dep_like(item, f"{prefix}[{i}]"))
482
+ return found
483
+
484
+
485
+ def extract_script_entrypoints(cmd: str) -> list[str]:
486
+ paths: list[str] = []
487
+ # Capture common script file references without trying to fully parse a shell command.
488
+ for m in re.finditer(r"(?:node|bun|deno|tsx?|ts-node|python3?|bash|sh|pwsh|powershell)?\s*([A-Za-z0-9_./\\-]+\.(?:mjs|cjs|js|jsx|ts|tsx|sh|ps1|cmd|bat))", cmd):
489
+ raw = m.group(1).strip("'\"")
490
+ if raw and not raw.startswith("http"):
491
+ paths.append(raw.replace("\\", "/"))
492
+ return paths
493
+
494
+
495
+ def script_risk_labels(cmd: str) -> list[str]:
496
+ labels: list[str] = []
497
+ for rules in (NETWORK_PATTERNS, EXEC_PATTERNS, STEALTH_PATTERNS, OBFUSCATION_PATTERNS, SECRET_PATH_PATTERNS, IDE_AGENT_PATTERNS, GITHUB_API_PATTERNS):
498
+ for pat, label in rules:
499
+ if pat.search(cmd):
500
+ labels.append(label)
501
+ if TOKEN_NAME_PATTERN.search(cmd):
502
+ labels.append("credential environment variable")
503
+ for ioc in IOC_STRINGS:
504
+ if ioc.lower() in cmd.lower():
505
+ labels.append(f"known IOC: {ioc}")
506
+ return sorted(set(labels))
507
+
508
+
509
+ def analyze_package_json(ctx: ScanContext, path: Path):
510
+ data, text_or_err = load_json_file(path)
511
+ ctx.summary.package_json_count += 1
512
+ ctx.package_roots.add(path.parent.resolve())
513
+ if data is None:
514
+ ctx.add("MEDIUM", "manifest", path, None, "Invalid package.json", text_or_err, "Fix JSON syntax before trusting this package.", "high", ["package-json"])
515
+ return
516
+ text = text_or_err
517
+ if not isinstance(data, dict):
518
+ ctx.add("HIGH", "manifest", path, 1, "package.json is not an object", str(type(data)), "Treat as suspicious and validate the package contents manually.", "high", ["package-json"])
519
+ return
520
+
521
+ name = data.get("name", "<unnamed>")
522
+ version = data.get("version", "<no-version>")
523
+
524
+ # Scripts and lifecycle hooks.
525
+ scripts = data.get("scripts", {})
526
+ if isinstance(scripts, dict):
527
+ for script_name, cmd in scripts.items():
528
+ if not isinstance(cmd, str):
529
+ continue
530
+ labels = script_risk_labels(cmd)
531
+ line = key_line(text, script_name)
532
+ is_lifecycle = script_name in LIFECYCLE_SCRIPTS or script_name.startswith(("pre", "post")) and script_name[3:] in scripts
533
+ is_install_phase = script_name in INSTALL_PHASE_SCRIPTS
534
+ for ref in extract_script_entrypoints(cmd):
535
+ candidate = (path.parent / ref).resolve()
536
+ try:
537
+ ctx.lifecycle_entrypoints.add(candidate.relative_to(ctx.root).as_posix())
538
+ except Exception:
539
+ ctx.lifecycle_entrypoints.add(ref)
540
+
541
+ if is_lifecycle:
542
+ sev = "HIGH" if is_install_phase else "MEDIUM"
543
+ if labels:
544
+ if any("known IOC" in x for x in labels) or (any("download" in x or "URL" in x or "fetch" in x for x in labels) and any("execution" in x or "interpreter" in x or "dynamic" in x for x in labels)):
545
+ sev = "CRITICAL"
546
+ else:
547
+ sev = max_severity(sev, "HIGH")
548
+ ctx.add(
549
+ sev,
550
+ "npm-lifecycle-script",
551
+ path,
552
+ line,
553
+ f"npm lifecycle script '{script_name}' in {name}@{version}",
554
+ f"{script_name}: {cmd}; signals={', '.join(labels) if labels else 'none'}",
555
+ "Do not install with scripts enabled. Review the referenced files and require a documented, minimal, reproducible reason for this lifecycle hook.",
556
+ "high" if is_install_phase else "medium",
557
+ ["npm", "script", script_name, "install-phase" if is_install_phase else "lifecycle"],
558
+ )
559
+ elif labels:
560
+ sev = "MEDIUM"
561
+ if any("download piped" in x for x in labels) or (any("download" in x or "URL" in x for x in labels) and any("dynamic" in x or "execution" in x for x in labels)):
562
+ sev = "HIGH"
563
+ ctx.add(
564
+ sev,
565
+ "npm-script",
566
+ path,
567
+ line,
568
+ f"Risky npm script '{script_name}' in {name}@{version}",
569
+ f"{script_name}: {cmd}; signals={', '.join(labels)}",
570
+ "Review before running npm scripts. Prefer explicit allowlisted scripts in CI and never run these with developer or publish tokens present.",
571
+ "medium",
572
+ ["npm", "script", script_name],
573
+ )
574
+ elif scripts is not None:
575
+ ctx.add("MEDIUM", "manifest", path, key_line(text, "scripts"), "scripts field is not an object", str(scripts), "Normalize package.json and review manually.", "medium", ["package-json", "scripts"])
576
+
577
+ # Dependencies and unusual spec types.
578
+ for field_name in DEP_FIELDS:
579
+ value = data.get(field_name)
580
+ if value is None:
581
+ continue
582
+ line = key_line(text, field_name)
583
+ if field_name in {"bundleDependencies", "bundledDependencies"}:
584
+ if value is True:
585
+ ctx.add("HIGH", "dependency", path, line, f"{field_name}=true bundles all dependencies", f"{field_name}: true", "Avoid bundled dependencies unless every bundled artifact is audited; inspect packed tarball contents.", "medium", ["npm", "bundled-deps"])
586
+ elif isinstance(value, list) and value:
587
+ ctx.add("MEDIUM", "dependency", path, line, f"Package bundles dependencies", f"{field_name}: {value[:10]}", "Audit bundled packages inside the tarball; bundled code bypasses normal dependency review visibility.", "medium", ["npm", "bundled-deps"])
588
+ continue
589
+ if field_name == "peerDependenciesMeta":
590
+ continue
591
+ if not isinstance(value, dict):
592
+ continue
593
+ for dep_name, spec in flatten_dep_like(value):
594
+ if not isinstance(spec, str):
595
+ continue
596
+ kind, sev, reason = classify_dep_spec(spec)
597
+ tags = ["npm", "dependency", field_name, kind]
598
+ if field_name == "optionalDependencies" and kind in {"git", "git-unpinned", "git-not-full-sha", "url", "local-file", "alias"}:
599
+ sev = max_severity(sev, "HIGH")
600
+ reason += "; optionalDependencies are easy to overlook and install failures may be ignored"
601
+ if field_name in {"overrides", "resolutions"} and kind not in {"exact", "range", "workspace"}:
602
+ sev = max_severity(sev, "HIGH")
603
+ reason += "; override/resolution can redirect a transitive package"
604
+ elif field_name in {"overrides", "resolutions"}:
605
+ sev = max_severity(sev, "MEDIUM")
606
+ if kind in {"exact", "workspace"}:
607
+ continue
608
+ if kind == "range" and ctx.mode == "library":
609
+ sev = "INFO"
610
+ dep_line = key_line(text, dep_name.split(".")[-1]) or line
611
+ ctx.add(
612
+ sev,
613
+ "dependency-spec",
614
+ path,
615
+ dep_line,
616
+ f"Dependency spec review needed: {field_name}.{dep_name}",
617
+ f"{dep_name}: {spec}; {reason}",
618
+ "Prefer registry packages pinned by lockfile and integrity. Avoid git/URL/file specs unless explicitly allowlisted and pinned to immutable commits/artifacts.",
619
+ "medium",
620
+ tags,
621
+ )
622
+ if any(ioc.lower() in f"{dep_name} {spec}".lower() for ioc in IOC_STRINGS):
623
+ ctx.add(
624
+ "CRITICAL",
625
+ "ioc",
626
+ path,
627
+ dep_line,
628
+ "Known supply-chain campaign IOC in dependency spec",
629
+ f"{field_name}.{dep_name}: {spec}",
630
+ "Quarantine this dependency/package, rotate any exposed credentials, and verify package versions against trusted upstream advisories.",
631
+ "high",
632
+ ["ioc", "npm", "dependency"],
633
+ )
634
+
635
+ # npm package metadata quality/security.
636
+ if data.get("private") is not True and ctx.mode in {"application", "repo"} and path.parent == ctx.root:
637
+ # For applications, accidental publish is a real quality/security issue.
638
+ ctx.add("LOW", "quality", path, key_line(text, "private"), "Root project is not marked private", f"name={name}, version={version}", "For non-published applications, set private=true to prevent accidental npm publication.", "medium", ["npm", "quality"])
639
+
640
+ if "license" not in data:
641
+ ctx.add("LOW", "quality", path, None, "Missing license field", f"{name}@{version} has no license field", "Add an SPDX license expression or UNLICENSED for private packages.", "medium", ["npm", "quality"])
642
+ if "repository" not in data:
643
+ ctx.add("LOW", "quality", path, None, "Missing repository metadata", f"{name}@{version} has no repository field", "Add repository metadata so consumers can verify provenance and source.", "medium", ["npm", "quality"])
644
+ if "engines" not in data:
645
+ ctx.add("LOW", "quality", path, None, "Missing engines.node constraint", f"{name}@{version} has no engines field", "Declare supported Node.js versions to reduce ambiguous runtime behavior.", "low", ["node", "quality"])
646
+
647
+ has_ts_files = any(path.parent.rglob("*.ts")) or any(path.parent.rglob("*.tsx"))
648
+ if has_ts_files and not any(k in data for k in ("types", "typings")) and not (path.parent / "index.d.ts").exists():
649
+ ctx.add("LOW", "quality", path, None, "TypeScript package lacks types metadata", f"{name}@{version} has TS files but no types/typings field", "Publish declaration files and declare the types entrypoint for consumers.", "low", ["typescript", "quality"])
650
+
651
+ for entry_field in ("main", "module", "types", "typings"):
652
+ val = data.get(entry_field)
653
+ if isinstance(val, str) and val and not val.startswith(("http://", "https://")):
654
+ if not (path.parent / val).exists():
655
+ ctx.add("LOW", "quality", path, key_line(text, entry_field), f"Declared {entry_field} file is missing", f"{entry_field}: {val}", "Ensure package metadata points to files present in the repo/tarball.", "medium", ["npm", "entrypoint"])
656
+
657
+ bin_field = data.get("bin")
658
+ if isinstance(bin_field, str):
659
+ check_bin_entry(ctx, path, name, bin_field, text)
660
+ elif isinstance(bin_field, dict):
661
+ for bin_name, bin_path in bin_field.items():
662
+ if isinstance(bin_path, str):
663
+ check_bin_entry(ctx, path, str(bin_name), bin_path, text)
664
+
665
+ # Workspaces: make sure nested packages are scanned.
666
+ workspaces = data.get("workspaces")
667
+ if workspaces:
668
+ ctx.add("INFO", "workspace", path, key_line(text, "workspaces"), "npm workspaces detected", one_line(workspaces), "Scan all workspace package.json files and compare dependency changes per workspace.", "high", ["npm", "workspace"])
669
+
670
+ # Lockfile expectations.
671
+ direct_dep_count = 0
672
+ for dep_field in ("dependencies", "devDependencies", "optionalDependencies"):
673
+ if isinstance(data.get(dep_field), dict):
674
+ direct_dep_count += len(data[dep_field])
675
+ if direct_dep_count and path.parent == ctx.root:
676
+ lockfiles = [path.parent / "package-lock.json", path.parent / "npm-shrinkwrap.json", path.parent / "pnpm-lock.yaml", path.parent / "yarn.lock"]
677
+ if not any(p.exists() for p in lockfiles) and ctx.mode in {"application", "repo"}:
678
+ ctx.add("MEDIUM", "lockfile", path, None, "Root project has dependencies but no lockfile", f"{direct_dep_count} direct dependencies", "Use a committed lockfile for applications and CI to avoid surprise transitive updates.", "medium", ["npm", "lockfile"])
679
+
680
+
681
+ def check_bin_entry(ctx: ScanContext, package_json: Path, bin_name: str, bin_path: str, manifest_text: str):
682
+ p = (package_json.parent / bin_path).resolve()
683
+ line = key_line(manifest_text, "bin")
684
+ try:
685
+ rel = p.relative_to(ctx.root).as_posix()
686
+ ctx.lifecycle_entrypoints.add(rel)
687
+ except Exception:
688
+ pass
689
+ if not p.exists():
690
+ ctx.add("MEDIUM", "bin-entrypoint", package_json, line, "bin entrypoint file is missing", f"{bin_name}: {bin_path}", "Verify the package tarball/source tree is complete and not relying on generated files from install scripts.", "medium", ["npm", "bin"])
691
+ return
692
+ try:
693
+ first = p.read_bytes()[:80]
694
+ except Exception:
695
+ return
696
+ if p.suffix in {".js", ".mjs", ".cjs"} and not first.startswith(b"#!"):
697
+ ctx.add("LOW", "quality", p, 1, "CLI bin entrypoint has no shebang", f"bin {bin_name} -> {bin_path}", "Add a Node.js shebang if the file is intended as an executable CLI.", "low", ["npm", "bin", "quality"])
698
+
699
+
700
+ def max_severity(a: str, b: str) -> str:
701
+ return a if SEVERITY_ORDER.get(a, 0) >= SEVERITY_ORDER.get(b, 0) else b
702
+
703
+
704
+ def analyze_npmrc(ctx: ScanContext, path: Path):
705
+ data, truncated = safe_read_bytes(path, min(ctx.max_file_bytes, 1024 * 1024))
706
+ text = decode_text(data)
707
+ for i, line in enumerate(text.splitlines(), 1):
708
+ stripped = line.strip()
709
+ if not stripped or stripped.startswith("#") or stripped.startswith(";"):
710
+ continue
711
+ low = stripped.lower()
712
+ if "_authtoken" in low or re.search(r"\b_auth\s*=", low):
713
+ ctx.add("CRITICAL", "npmrc", path, i, "npm authentication token in .npmrc", stripped, "Remove tokens from repository/package. Rotate the token immediately if it was committed or shipped.", "high", ["secret", "npmrc"])
714
+ if low.startswith("ignore-scripts=false"):
715
+ ctx.add("HIGH", "npmrc", path, i, "ignore-scripts is explicitly disabled", stripped, "Set ignore-scripts=true by default and run any required scripts only after manual approval.", "high", ["npmrc", "scripts"])
716
+ if low.startswith("strict-ssl=false"):
717
+ ctx.add("HIGH", "npmrc", path, i, "strict-ssl is disabled", stripped, "Require TLS validation for registry traffic.", "high", ["npmrc", "tls"])
718
+ if low.startswith("audit=false"):
719
+ ctx.add("MEDIUM", "npmrc", path, i, "npm audit disabled", stripped, "Enable audit in CI unless a documented alternative vulnerability scanner is enforced.", "medium", ["npmrc", "audit"])
720
+ if low.startswith("registry=http://") or re.match(r"@[^:]+:registry=http://", low):
721
+ ctx.add("HIGH", "npmrc", path, i, "Plain HTTP npm registry configured", stripped, "Use HTTPS registry URLs only.", "high", ["npmrc", "registry"])
722
+ if low.startswith("unsafe-perm=true"):
723
+ ctx.add("MEDIUM", "npmrc", path, i, "unsafe-perm enabled", stripped, "Avoid elevated script execution privileges; use least-privilege build users.", "medium", ["npmrc", "scripts"])
724
+ if low.startswith(("allow-git=all", "allow-remote=all", "allow-file=all", "allow-directory=all")):
725
+ ctx.add("MEDIUM", "npmrc", path, i, "npm allows non-registry dependency sources", stripped, "Prefer allow-git=none, allow-remote=none and allow-file=none unless specifically required.", "medium", ["npmrc", "dependency-source"])
726
+
727
+
728
+ def analyze_package_lock(ctx: ScanContext, path: Path):
729
+ data, text_or_err = load_json_file(path)
730
+ ctx.summary.lockfile_count += 1
731
+ if data is None:
732
+ ctx.add("MEDIUM", "lockfile", path, None, "Invalid npm lockfile JSON", text_or_err, "Regenerate lockfile from a trusted environment and review the diff.", "medium", ["npm", "lockfile"])
733
+ return
734
+ text = text_or_err
735
+ lockfile_version = data.get("lockfileVersion") if isinstance(data, dict) else None
736
+ ctx.add("INFO", "lockfile", path, key_line(text, "lockfileVersion"), "npm lockfile detected", f"lockfileVersion={lockfile_version}", "Use this lockfile for dependency diff review; do not update it implicitly during audit.", "high", ["npm", "lockfile"])
737
+
738
+ packages = data.get("packages", {}) if isinstance(data, dict) else {}
739
+ if isinstance(packages, dict):
740
+ for loc, meta in packages.items():
741
+ if not isinstance(meta, dict):
742
+ continue
743
+ loc_str = loc or "."
744
+ pseudo_path = f"{ctx.rel(path)}:{loc_str}"
745
+ resolved = str(meta.get("resolved", ""))
746
+ integrity = meta.get("integrity")
747
+ optional = bool(meta.get("optional"))
748
+ dev = bool(meta.get("dev"))
749
+ in_bundle = bool(meta.get("inBundle") or meta.get("bundled"))
750
+ if meta.get("hasInstallScript"):
751
+ sev = "HIGH" if not dev else "MEDIUM"
752
+ if optional:
753
+ sev = max_severity(sev, "HIGH")
754
+ ctx.add(sev, "lockfile-install-script", pseudo_path, None, "Dependency has install/lifecycle script", f"{loc_str}; optional={optional}; dev={dev}; resolved={resolved}", "Inspect this package tarball before installing with scripts enabled. Prefer --ignore-scripts and allowlist required native build packages.", "high", ["npm", "lockfile", "install-script"])
755
+ if resolved:
756
+ analyze_resolved_url(ctx, path, pseudo_path, resolved, integrity, optional, dev)
757
+ if resolved and not integrity and not resolved.startswith(("file:", "link:")) and loc_str != ".":
758
+ ctx.add("MEDIUM", "lockfile-integrity", pseudo_path, None, "Lockfile entry lacks integrity", f"{loc_str}; resolved={resolved}", "Require Subresource Integrity for registry tarballs; regenerate lockfile from a trusted registry if missing.", "medium", ["npm", "lockfile", "integrity"])
759
+ if in_bundle:
760
+ ctx.add("MEDIUM", "bundled-dependency", pseudo_path, None, "Bundled dependency in lockfile", f"{loc_str}; resolved={resolved}", "Review bundled code inside the package tarball; bundled code is less visible in normal dependency review.", "medium", ["npm", "bundle"])
761
+ joined = f"{loc_str} {resolved} {json.dumps(meta, sort_keys=True)[:500]}"
762
+ if any(ioc.lower() in joined.lower() for ioc in IOC_STRINGS):
763
+ ctx.add("CRITICAL", "ioc", pseudo_path, None, "Known supply-chain IOC in lockfile", joined[:500], "Quarantine dependency tree, verify affected package versions, and rotate potentially exposed credentials.", "high", ["ioc", "npm", "lockfile"])
764
+
765
+ deps = data.get("dependencies", {}) if isinstance(data, dict) else {}
766
+ if isinstance(deps, dict):
767
+ walk_lock_deps(ctx, path, deps, prefix="dependencies")
768
+
769
+
770
+ def walk_lock_deps(ctx: ScanContext, path: Path, deps: dict[str, Any], prefix: str):
771
+ for name, meta in deps.items():
772
+ if not isinstance(meta, dict):
773
+ continue
774
+ pseudo_path = f"{ctx.rel(path)}:{prefix}.{name}"
775
+ resolved = str(meta.get("resolved", ""))
776
+ integrity = meta.get("integrity")
777
+ optional = bool(meta.get("optional"))
778
+ dev = bool(meta.get("dev"))
779
+ if resolved:
780
+ analyze_resolved_url(ctx, path, pseudo_path, resolved, integrity, optional, dev)
781
+ if resolved and not integrity and not resolved.startswith(("file:", "link:")):
782
+ ctx.add("MEDIUM", "lockfile-integrity", pseudo_path, None, "Legacy lockfile entry lacks integrity", f"{name}; resolved={resolved}", "Require integrity-pinned lockfiles for registry tarballs.", "medium", ["npm", "lockfile", "integrity"])
783
+ if any(ioc.lower() in f"{name} {resolved}".lower() for ioc in IOC_STRINGS):
784
+ ctx.add("CRITICAL", "ioc", pseudo_path, None, "Known supply-chain IOC in legacy lock dependency", f"{name}; resolved={resolved}", "Quarantine dependency tree and validate affected versions.", "high", ["ioc", "npm", "lockfile"])
785
+ nested = meta.get("dependencies")
786
+ if isinstance(nested, dict):
787
+ walk_lock_deps(ctx, path, nested, f"{prefix}.{name}.dependencies")
788
+
789
+
790
+ def analyze_resolved_url(ctx: ScanContext, path: Path, pseudo_path: str, resolved: str, integrity: Any, optional: bool, dev: bool):
791
+ low = resolved.lower()
792
+ sev = "INFO"
793
+ reason = ""
794
+ if low.startswith("git+") or "github.com" in low or low.startswith("github:"):
795
+ sev = "HIGH"
796
+ reason = "git/GitHub dependency source can run prepare scripts and bypass registry tarball review"
797
+ elif low.startswith("http://"):
798
+ sev = "HIGH"
799
+ reason = "plaintext HTTP tarball source"
800
+ elif low.startswith("https://") and "registry.npmjs.org" not in low and "registry.npmjs.com" not in low:
801
+ sev = "MEDIUM"
802
+ reason = "non-default remote tarball source"
803
+ elif low.startswith(("file:", "link:")):
804
+ sev = "MEDIUM"
805
+ reason = "local file/link source depends on local filesystem state"
806
+ if optional and sev != "INFO":
807
+ sev = max_severity(sev, "HIGH")
808
+ reason += "; optional dependency source is easy to overlook"
809
+ if sev != "INFO":
810
+ ctx.add(sev, "lockfile-source", pseudo_path, None, "Non-standard dependency source in lockfile", f"resolved={resolved}; integrity={bool(integrity)}; optional={optional}; dev={dev}; {reason}", "Review and allowlist this source explicitly, or replace it with a registry package pinned by integrity.", "medium", ["npm", "lockfile", "source"])
811
+
812
+
813
+ def analyze_text_lockfile(ctx: ScanContext, path: Path):
814
+ ctx.summary.lockfile_count += 1
815
+ data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
816
+ text = decode_text(data)
817
+ lines = text.splitlines()
818
+ for i, line in enumerate(lines, 1):
819
+ low = line.lower()
820
+ if any(x in low for x in ("git+", "github:", "gitlab:", "bitbucket:", "github.com")):
821
+ ctx.add("HIGH", "lockfile-source", path, i, "Git dependency in text lockfile", line, "Review git dependencies manually and require immutable full-SHA pins; avoid scripts during install.", "medium", ["npm", "lockfile", "git"])
822
+ elif "http://" in low:
823
+ ctx.add("HIGH", "lockfile-source", path, i, "Plain HTTP source in lockfile", line, "Use HTTPS and integrity-pinned registry artifacts only.", "high", ["npm", "lockfile", "http"])
824
+ elif "https://" in low and "registry.npmjs.org" not in low and "registry.yarnpkg.com" not in low:
825
+ ctx.add("MEDIUM", "lockfile-source", path, i, "Non-default URL source in lockfile", line, "Review and allowlist non-default registries/tarball sources.", "medium", ["npm", "lockfile", "url"])
826
+ if "requiresbuild: true" in low or "requiresbuild=true" in low:
827
+ ctx.add("MEDIUM", "lockfile-install-script", path, i, "Dependency requires build/install scripts", line, "Identify package, inspect tarball, and install with scripts disabled unless allowlisted.", "medium", ["npm", "pnpm", "install-script"])
828
+ if any(ioc.lower() in low for ioc in IOC_STRINGS):
829
+ ctx.add("CRITICAL", "ioc", path, i, "Known supply-chain IOC in lockfile", line, "Quarantine dependency tree and verify affected versions.", "high", ["ioc", "npm", "lockfile"])
830
+ if truncated:
831
+ ctx.add("INFO", "scan-limit", path, None, "Lockfile scan truncated", f"Scanned first {ctx.max_file_bytes} bytes", "Increase --max-file-bytes if suspicious entries may be later in the file.", "medium", ["limit"])
832
+
833
+
834
+ def analyze_tsconfig(ctx: ScanContext, path: Path):
835
+ data, text_or_err = load_json_file(path)
836
+ ctx.summary.tsconfig_count += 1
837
+ if data is None:
838
+ # tsconfig often has JSONC; do a lightweight text pass.
839
+ text = path.read_text("utf-8", errors="replace")[:ctx.max_file_bytes]
840
+ if re.search(r"\"strict\"\s*:\s*false", text):
841
+ line, ev = first_match_line(text, re.compile(r"\"strict\"\s*:\s*false"))
842
+ ctx.add("LOW", "typescript-quality", path, line, "TypeScript strict mode disabled", ev or "strict=false", "Enable strict mode or document why the package cannot use it.", "medium", ["typescript", "quality"])
843
+ return
844
+ if not isinstance(data, dict):
845
+ return
846
+ opts = data.get("compilerOptions", {})
847
+ if isinstance(opts, dict):
848
+ for opt in ("strict", "noImplicitAny", "strictNullChecks", "noUncheckedIndexedAccess"):
849
+ if opts.get(opt) is False:
850
+ ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, opt), f"TypeScript compiler option {opt}=false", f"{opt}=false", "Tighten TypeScript compiler checks for library-quality code.", "medium", ["typescript", "quality"])
851
+ if opts.get("allowJs") is True:
852
+ ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, "allowJs"), "allowJs enabled", "allowJs=true", "Ensure JavaScript sources are covered by linting and malware scan; mixed JS/TS increases review surface.", "low", ["typescript", "quality"])
853
+ if opts.get("declaration") is not True and ctx.mode in {"package", "library"}:
854
+ ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, "declaration"), "Declaration output not enabled", "compilerOptions.declaration is not true", "Published TypeScript packages should produce .d.ts declaration files or document generated types.", "low", ["typescript", "quality"])
855
+
856
+
857
+ def analyze_workflow(ctx: ScanContext, path: Path):
858
+ ctx.summary.workflow_count += 1
859
+ data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
860
+ text = decode_text(data)
861
+ low = text.lower()
862
+ if re.search(r"^\s*pull_request_target\s*:", text, flags=re.M):
863
+ sev = "HIGH" if re.search(r"uses:\s*actions/checkout|run:\s*(npm|pnpm|yarn|bun|node|bash|sh)", text, flags=re.I) else "MEDIUM"
864
+ ctx.add(sev, "github-actions", path, None, "Workflow uses pull_request_target", "pull_request_target with checkout/run risk if untrusted PR code is executed", "Do not checkout or execute untrusted PR code in pull_request_target workflows; use read-only permissions and explicit validation.", "high", ["github-actions", "pr"])
865
+ if re.search(r"^\s*workflow_run\s*:", text, flags=re.M):
866
+ ctx.add("MEDIUM", "github-actions", path, None, "Workflow triggered by workflow_run", "workflow_run can bridge artifacts and trust boundaries", "Verify artifacts are trusted before execution and keep token permissions minimal.", "medium", ["github-actions"])
867
+ if "permissions: write-all" in low:
868
+ ctx.add("HIGH", "github-actions-permissions", path, None, "Workflow grants write-all permissions", "permissions: write-all", "Use permissions: {} by default and grant only minimal scopes per job.", "high", ["github-actions", "permissions"])
869
+ for perm in ("contents: write", "packages: write", "actions: write", "id-token: write", "pull-requests: write"):
870
+ if perm in low:
871
+ ctx.add("MEDIUM", "github-actions-permissions", path, None, f"Workflow grants {perm}", perm, "Verify this permission is required and isolated to trusted branches/environments.", "medium", ["github-actions", "permissions"])
872
+ # Unpinned actions.
873
+ for m in re.finditer(r"uses:\s*([^\s#]+)", text):
874
+ ref = m.group(1).strip().strip("'\"")
875
+ line = line_for_offset(text, m.start())
876
+ if ref.startswith("./") or ref.startswith("docker://"):
877
+ continue
878
+ if "@" not in ref:
879
+ ctx.add("MEDIUM", "github-actions-pinning", path, line, "GitHub Action without explicit ref", ref, "Pin third-party actions to full-length commit SHAs and maintain them with Dependabot/Renovate.", "medium", ["github-actions", "pinning"])
880
+ continue
881
+ action_ref = ref.rsplit("@", 1)[-1]
882
+ if not FULL_SHA_RE.match(action_ref):
883
+ ctx.add("MEDIUM", "github-actions-pinning", path, line, "GitHub Action not pinned to full commit SHA", ref, "Pin third-party actions to full-length commit SHAs to make workflow dependencies immutable.", "medium", ["github-actions", "pinning"])
884
+ # Install commands without script suppression.
885
+ for m in re.finditer(r"run:\s*(.+)", text):
886
+ cmd = m.group(1).strip()
887
+ if re.search(r"\b(npm\s+(install|i|ci)|pnpm\s+install|yarn\s+install|bun\s+install)\b", cmd) and "ignore-scripts" not in cmd:
888
+ ctx.add("HIGH", "github-actions-install", path, line_for_offset(text, m.start()), "CI install command does not disable lifecycle scripts", cmd, "For dependency-review jobs, use --ignore-scripts and omit optional dependencies before any build/test step.", "medium", ["github-actions", "npm", "install-scripts"])
889
+ if re.search(r"\bnpm\s+publish\b", cmd) and re.search(r"(NPM_TOKEN|NODE_AUTH_TOKEN|secrets\.)", text):
890
+ ctx.add("HIGH", "github-actions-publish", path, line_for_offset(text, m.start()), "npm publish workflow uses registry token", cmd, "Ensure publish only runs from protected tags/branches with trusted source and minimal token scope; consider trusted publishing/OIDC.", "medium", ["github-actions", "npm", "publish"])
891
+ if truncated:
892
+ ctx.add("INFO", "scan-limit", path, None, "Workflow scan truncated", f"Scanned first {ctx.max_file_bytes} bytes", "Increase --max-file-bytes for complete workflow review.", "medium", ["limit"])
893
+
894
+
895
+ def analyze_json_config(ctx: ScanContext, path: Path):
896
+ # Focus on IDE/agent execution configs that may run commands after clone/open.
897
+ rel = ctx.rel(path)
898
+ data, text_or_err = load_json_file(path)
899
+ text = text_or_err if data is not None else path.read_text("utf-8", errors="replace")[:ctx.max_file_bytes]
900
+ interesting = any(part in rel for part in [".vscode/", ".claude/", ".cursor/", ".devcontainer/"])
901
+ if not interesting:
902
+ return
903
+ labels = script_risk_labels(text)
904
+ if "tasks.json" in rel or "settings.json" in rel or ".claude/" in rel:
905
+ sev = "HIGH" if labels else "MEDIUM"
906
+ if any("known IOC" in x for x in labels) or (any("download" in x or "URL" in x for x in labels) and any("execution" in x or "interpreter" in x for x in labels)):
907
+ sev = "CRITICAL"
908
+ ctx.add(sev, "ide-agent-config", path, None, "IDE/AI-agent execution configuration present", f"signals={', '.join(labels) if labels else 'manual review required'}", "Do not auto-run IDE tasks or AI-agent hooks from untrusted repos/packages. Review and remove unexpected commands.", "medium", ["ide", "agent", "persistence"])
909
+
910
+
911
+ def entropy(s: str) -> float:
912
+ if not s:
913
+ return 0.0
914
+ counts: dict[str, int] = {}
915
+ for ch in s:
916
+ counts[ch] = counts.get(ch, 0) + 1
917
+ length = len(s)
918
+ return -sum((c / length) * math.log2(c / length) for c in counts.values())
919
+
920
+
921
+ def scan_source_file(ctx: ScanContext, path: Path):
922
+ try:
923
+ st = path.stat()
924
+ except OSError:
925
+ return
926
+ ext = path.suffix.lower()
927
+ rel = ctx.rel(path)
928
+ is_lifecycle_ref = rel in ctx.lifecycle_entrypoints or path.name.lower() in {"setup.mjs", "setup.js", "install.js", "postinstall.js", "preinstall.js", "prepare.js"}
929
+
930
+ if ext in BINARY_EXEC_EXTENSIONS:
931
+ sev = "HIGH" if ctx.is_tarball or is_lifecycle_ref else "MEDIUM"
932
+ ctx.add(sev, "binary-artifact", path, None, "Native/binary artifact present", f"{path.name}; size={st.st_size}; sha256={sha256_file(path, limit=min(st.st_size, 50 * 1024 * 1024))}", "Verify binary provenance, rebuildability and platform necessity. Avoid binaries downloaded or executed by install scripts.", "medium", ["binary", "npm-package"])
933
+ return
934
+ if ext in ARCHIVE_EXTENSIONS and path.name != Path(ctx.target_label).name:
935
+ ctx.add("MEDIUM", "embedded-archive", path, None, "Embedded archive present", f"{path.name}; size={st.st_size}; sha256={sha256_file(path, limit=min(st.st_size, 50 * 1024 * 1024))}", "Extract and scan embedded archives only in a safe offline sandbox; verify why they are shipped.", "medium", ["archive", "payload"])
936
+ return
937
+
938
+ if ext not in TEXT_EXTENSIONS and path.name not in {"Makefile", "Dockerfile", ".npmrc", ".yarnrc", ".pnpmrc"}:
939
+ return
940
+
941
+ try:
942
+ data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
943
+ except OSError:
944
+ return
945
+ if is_probably_binary(data):
946
+ if ext in CODE_EXTENSIONS:
947
+ ctx.add("HIGH", "obfuscation", path, None, "Code file appears binary or packed", f"{path.name}; size={st.st_size}", "Treat as suspicious until unpacked or explained by a reproducible build process.", "medium", ["packed", "obfuscation"])
948
+ return
949
+
950
+ text = decode_text(data)
951
+ lower_text = text.lower()
952
+ is_doc_file = ext in DOC_EXTENSIONS
953
+
954
+ # Known IOCs first.
955
+ for ioc in IOC_STRINGS:
956
+ if ioc.lower() in lower_text:
957
+ line = lower_text.find(ioc.lower())
958
+ ctx.add("CRITICAL", "ioc", path, line_for_offset(lower_text, line) if line >= 0 else None, "Known supply-chain campaign IOC found", ioc, "Quarantine the package/repo, verify affected versions from advisories, and rotate potentially exposed credentials.", "high", ["ioc", "npm", "malware"])
959
+
960
+ # Secret literals should never be in packages; mask output.
961
+ for pat in SECRET_PATTERNS:
962
+ m = pat.search(text)
963
+ if m:
964
+ ctx.add("CRITICAL", "secret", path, line_for_offset(text, m.start()), "Possible live secret/token committed or shipped", m.group(0), "Remove the secret and rotate it immediately. Treat package as compromised if token was published.", "high", ["secret"])
965
+ break
966
+
967
+ network_hits = collect_hits(text, NETWORK_PATTERNS)
968
+ exec_hits = collect_hits(text, EXEC_PATTERNS)
969
+ stealth_hits = collect_hits(text, STEALTH_PATTERNS)
970
+ secret_path_hits = collect_hits(text, SECRET_PATH_PATTERNS)
971
+ obf_hits = collect_hits(text, OBFUSCATION_PATTERNS)
972
+ ide_hits = collect_hits(text, IDE_AGENT_PATTERNS)
973
+ gh_hits = collect_hits(text, GITHUB_API_PATTERNS)
974
+ token_name_hit = TOKEN_NAME_PATTERN.search(text)
975
+
976
+ # Documentation often contains command/API examples that would otherwise trigger
977
+ # payload heuristics. Only apply those heuristics to docs when the file is a
978
+ # lifecycle entrypoint or explicit setup/install script.
979
+ apply_payload_heuristics = (not is_doc_file) or is_lifecycle_ref
980
+
981
+ if apply_payload_heuristics and network_hits and exec_hits:
982
+ line = min([h[0] for h in network_hits + exec_hits if h[0] is not None] or [None])
983
+ ctx.add("CRITICAL", "payload-behavior", path, line, "Network plus code/process execution behavior", f"network={labels_only(network_hits)}; execution={labels_only(exec_hits)}", "Do not execute. Manually trace data flow and verify there is no downloader/dropper/exfiltration path.", "medium", ["network", "exec", "malware-pattern"])
984
+ elif apply_payload_heuristics and exec_hits and is_lifecycle_ref:
985
+ ctx.add("HIGH", "payload-behavior", path, exec_hits[0][0], "Lifecycle-referenced file can execute commands/code", f"execution={labels_only(exec_hits)}", "Review lifecycle entrypoint manually. Avoid install scripts unless required and allowlisted.", "medium", ["exec", "lifecycle"])
986
+ elif apply_payload_heuristics and network_hits and is_lifecycle_ref:
987
+ ctx.add("HIGH", "payload-behavior", path, network_hits[0][0], "Lifecycle-referenced file performs network access", f"network={labels_only(network_hits)}", "Install scripts should not download code/binaries without transparent integrity checks and provenance.", "medium", ["network", "lifecycle"])
988
+
989
+ if apply_payload_heuristics and (secret_path_hits or token_name_hit) and (network_hits or exec_hits or gh_hits):
990
+ labels = labels_only(secret_path_hits)
991
+ if token_name_hit:
992
+ labels.append("credential environment variable")
993
+ ctx.add("CRITICAL", "credential-access", path, token_name_hit and line_for_offset(text, token_name_hit.start()) or (secret_path_hits[0][0] if secret_path_hits else None), "Credential access combined with network/execution", f"credentials={labels}; network={labels_only(network_hits)}; execution={labels_only(exec_hits)}; github_api={labels_only(gh_hits)}", "Assume credential theft is possible. Do not run; inspect for exfiltration and rotate any credentials exposed to this code.", "medium", ["credential", "exfiltration"])
994
+ elif apply_payload_heuristics and (secret_path_hits or token_name_hit):
995
+ labels = labels_only(secret_path_hits)
996
+ if token_name_hit:
997
+ labels.append("credential environment variable")
998
+ ctx.add("MEDIUM", "credential-access", path, token_name_hit and line_for_offset(text, token_name_hit.start()) or (secret_path_hits[0][0] if secret_path_hits else None), "Credential-related names or paths referenced", f"credentials={labels}", "Verify this is legitimate configuration handling and not token harvesting.", "medium", ["credential"])
999
+
1000
+ if apply_payload_heuristics and ide_hits and (network_hits or exec_hits or gh_hits):
1001
+ ctx.add("CRITICAL", "ide-agent-persistence", path, ide_hits[0][0], "IDE/AI-agent config path combined with execution/network/GitHub write behavior", f"ide={labels_only(ide_hits)}; exec={labels_only(exec_hits)}; network={labels_only(network_hits)}; github_api={labels_only(gh_hits)}", "Treat as potential repo-poisoning/persistence. Remove configs and audit GitHub token exposure.", "medium", ["ide", "agent", "persistence"])
1002
+ elif apply_payload_heuristics and ide_hits:
1003
+ ctx.add("HIGH", "ide-agent-config", path, ide_hits[0][0], "IDE/AI-agent configuration path referenced", f"ide={labels_only(ide_hits)}", "Review whether the package/repo writes or ships IDE/agent configs unexpectedly.", "medium", ["ide", "agent"])
1004
+
1005
+ if apply_payload_heuristics and gh_hits and token_name_hit:
1006
+ ctx.add("HIGH", "github-api", path, gh_hits[0][0], "GitHub API usage with token-related code", f"github_api={labels_only(gh_hits)}", "Ensure GitHub token use is limited to documented operations and cannot modify repo config or workflows unexpectedly.", "medium", ["github", "token"])
1007
+
1008
+ if apply_payload_heuristics and stealth_hits and (is_lifecycle_ref or network_hits or exec_hits):
1009
+ ctx.add("HIGH", "stealth", path, stealth_hits[0][0], "Stealthy script behavior", f"stealth={labels_only(stealth_hits)}", "Review why output is suppressed, permissions changed, or failures forced after execution.", "medium", ["stealth"])
1010
+
1011
+ # Obfuscation heuristics.
1012
+ lines = text.splitlines()
1013
+ max_line_len = max((len(line) for line in lines), default=0)
1014
+ long_lines = [i + 1 for i, line in enumerate(lines) if len(line) > 2000]
1015
+ huge_single_line = st.st_size > 500_000 and len(lines) <= 3
1016
+ hex_id_count = len(re.findall(r"_0x[a-fA-F0-9]{3,}", text[:ctx.max_file_bytes]))
1017
+ base64_like = re.findall(r"['\"]([A-Za-z0-9+/]{160,}={0,2})['\"]", text[:ctx.max_file_bytes])
1018
+ high_entropy_strings = [s for s in base64_like[:10] if entropy(s) > 4.5]
1019
+ if apply_payload_heuristics and (obf_hits or long_lines or huge_single_line or hex_id_count > 20 or high_entropy_strings):
1020
+ sev = "HIGH" if (network_hits or exec_hits or is_lifecycle_ref or huge_single_line) else "MEDIUM"
1021
+ if huge_single_line and (network_hits or exec_hits or is_lifecycle_ref):
1022
+ sev = "CRITICAL"
1023
+ ctx.add(sev, "obfuscation", path, obf_hits[0][0] if obf_hits else (long_lines[0] if long_lines else 1), "Obfuscation or packed payload indicators", f"obf={labels_only(obf_hits)}; max_line_len={max_line_len}; huge_single_line={huge_single_line}; hex_ids={hex_id_count}; high_entropy_strings={len(high_entropy_strings)}; truncated={truncated}", "Demand unobfuscated source, reproducible build provenance, and manual reverse engineering before use.", "medium", ["obfuscation", "packed"])
1024
+
1025
+ if truncated:
1026
+ ctx.add("INFO", "scan-limit", path, None, "File scan truncated", f"size={st.st_size}; scanned_first_bytes={ctx.max_file_bytes}", "Increase --max-file-bytes for full-file scanning if this file is relevant.", "medium", ["limit"])
1027
+
1028
+
1029
+ def collect_hits(text: str, patterns: list[tuple[re.Pattern[str], str]]) -> list[tuple[int | None, str, str]]:
1030
+ hits: list[tuple[int | None, str, str]] = []
1031
+ for pat, label in patterns:
1032
+ m = pat.search(text)
1033
+ if m:
1034
+ hits.append((line_for_offset(text, m.start()), label, one_line(m.group(0))[:120]))
1035
+ return hits
1036
+
1037
+
1038
+ def labels_only(hits: list[tuple[int | None, str, str]]) -> list[str]:
1039
+ return sorted(set(h[1] for h in hits))
1040
+
1041
+
1042
+ def analyze_package_artifact_hygiene(ctx: ScanContext, files: list[Path]):
1043
+ # For npm tarballs, unexpected config files are especially risky.
1044
+ for p in files:
1045
+ rel = ctx.rel(p)
1046
+ if rel.startswith("package/"):
1047
+ inside = rel[len("package/"):]
1048
+ else:
1049
+ inside = rel
1050
+ if inside in {".vscode/tasks.json", ".vscode/settings.json", ".claude/settings.json", ".cursor/rules", ".npmrc"} or inside.startswith((".claude/", ".cursor/", ".vscode/")):
1051
+ sev = "HIGH"
1052
+ if ctx.is_tarball:
1053
+ sev = "CRITICAL" if inside.startswith((".claude/", ".vscode/")) else "HIGH"
1054
+ ctx.add(sev, "package-artifact", p, None, "Sensitive IDE/agent/npm config present in package/repo contents", inside, "Remove unexpected config/persistence files from npm package or repo contents and audit how they were introduced.", "medium", ["npm-package", "artifact", "ide"])
1055
+ if ctx.is_tarball:
1056
+ package_jsons = [p for p in files if p.name == "package.json"]
1057
+ if not package_jsons:
1058
+ ctx.add("HIGH", "package-artifact", ctx.root, None, "npm tarball has no package.json", "No package.json found after extraction", "Reject this artifact as malformed or suspicious.", "high", ["npm-package"])
1059
+
1060
+
1061
+ def safe_extract_tgz(tgz_path: Path, destination: Path) -> list[Finding]:
1062
+ findings: list[Finding] = []
1063
+ try:
1064
+ tf = tarfile.open(tgz_path, "r:*")
1065
+ except Exception as exc:
1066
+ findings.append(Finding("CRITICAL", "tarball", str(tgz_path), None, "Unable to open tarball", str(exc), "Reject malformed package artifact.", "high", ["tarball"]))
1067
+ return findings
1068
+ dest_resolved = destination.resolve()
1069
+ with tf:
1070
+ for member in tf.getmembers():
1071
+ name = member.name
1072
+ try:
1073
+ pure = PurePosixPath(name)
1074
+ if pure.is_absolute() or ".." in pure.parts:
1075
+ findings.append(Finding("CRITICAL", "tarball", name, None, "Unsafe tarball path traversal entry", name, "Reject artifact and report to registry/upstream.", "high", ["tarball", "path-traversal"]))
1076
+ continue
1077
+ target = (destination / Path(*pure.parts)).resolve()
1078
+ if not str(target).startswith(str(dest_resolved) + os.sep) and target != dest_resolved:
1079
+ findings.append(Finding("CRITICAL", "tarball", name, None, "Unsafe tarball extraction target", str(target), "Reject artifact and report to registry/upstream.", "high", ["tarball", "path-traversal"]))
1080
+ continue
1081
+ if member.issym() or member.islnk():
1082
+ findings.append(Finding("MEDIUM", "tarball", name, None, "Symlink/hardlink entry in tarball", f"linkname={member.linkname}", "Review links manually; scanner does not follow package symlinks.", "medium", ["tarball", "link"]))
1083
+ continue
1084
+ if member.isdir():
1085
+ target.mkdir(parents=True, exist_ok=True)
1086
+ continue
1087
+ if member.isfile():
1088
+ target.parent.mkdir(parents=True, exist_ok=True)
1089
+ src = tf.extractfile(member)
1090
+ if src is None:
1091
+ continue
1092
+ with target.open("wb") as out:
1093
+ shutil.copyfileobj(src, out)
1094
+ try:
1095
+ os.chmod(target, member.mode & 0o777)
1096
+ except Exception:
1097
+ pass
1098
+ except Exception as exc:
1099
+ findings.append(Finding("HIGH", "tarball", name, None, "Error extracting tarball member", str(exc), "Reject or manually inspect artifact extraction behavior.", "medium", ["tarball"]))
1100
+ return findings
1101
+
1102
+
1103
+ def scan_root(ctx: ScanContext) -> ScanContext:
1104
+ files = iter_files(ctx)
1105
+ analyze_package_artifact_hygiene(ctx, files)
1106
+
1107
+ # First pass: manifests and lock/config files that establish context.
1108
+ for p in files:
1109
+ name = p.name
1110
+ rel = ctx.rel(p)
1111
+ if name == "package.json":
1112
+ analyze_package_json(ctx, p)
1113
+ elif name in {"package-lock.json", "npm-shrinkwrap.json"}:
1114
+ analyze_package_lock(ctx, p)
1115
+ elif name in {"pnpm-lock.yaml", "yarn.lock"}:
1116
+ analyze_text_lockfile(ctx, p)
1117
+ elif name in {".npmrc", ".yarnrc", ".pnpmrc"}:
1118
+ analyze_npmrc(ctx, p)
1119
+ elif name == "tsconfig.json" or name.startswith("tsconfig.") and name.endswith(".json"):
1120
+ analyze_tsconfig(ctx, p)
1121
+ elif rel.startswith(".github/workflows/") and p.suffix.lower() in {".yml", ".yaml"}:
1122
+ analyze_workflow(ctx, p)
1123
+ elif rel.startswith((".vscode/", ".claude/", ".cursor/", ".devcontainer/")) and p.suffix.lower() in {".json", ".jsonc", ".yml", ".yaml"}:
1124
+ analyze_json_config(ctx, p)
1125
+
1126
+ # Second pass: source and config content scan.
1127
+ for p in files:
1128
+ scan_source_file(ctx, p)
1129
+
1130
+ # Basic repo/package quality checks.
1131
+ root = ctx.root
1132
+ if not any((root / name).exists() for name in ("README.md", "readme.md", "README", "package/README.md")):
1133
+ ctx.add("LOW", "quality", root, None, "Missing README", "No README found at target root", "Add README with install, build, security and provenance guidance.", "low", ["quality"])
1134
+ if not any((root / name).exists() for name in ("SECURITY.md", "security.md", ".github/SECURITY.md", "package/SECURITY.md")):
1135
+ ctx.add("LOW", "quality", root, None, "Missing SECURITY.md", "No SECURITY.md found", "Add a security policy with vulnerability reporting instructions.", "low", ["quality", "security-policy"])
1136
+ if not any((root / name).exists() for name in ("LICENSE", "LICENSE.md", "license", "package/LICENSE", "package/LICENSE.md")):
1137
+ ctx.add("LOW", "quality", root, None, "Missing license file", "No LICENSE file found", "Include a license file matching package.json license metadata.", "low", ["quality", "license"])
1138
+ return ctx
1139
+
1140
+
1141
+ def scan_target(path: Path, args: argparse.Namespace) -> ScanContext:
1142
+ label = str(path)
1143
+ if path.is_file() and path.suffix.lower() in {".tgz", ".gz", ".tar"}:
1144
+ tmp = Path(tempfile.mkdtemp(prefix="npm-ts-audit-"))
1145
+ ctx = ScanContext(tmp, label, args.mode, True, args.include_node_modules, args.max_file_bytes, args.max_findings)
1146
+ ctx.summary.sha256 = sha256_file(path)
1147
+ extraction_findings = safe_extract_tgz(path, tmp)
1148
+ for f in extraction_findings:
1149
+ ctx.findings.append(f)
1150
+ scan_root(ctx)
1151
+ # Preserve tmp path in JSON for traceability but delete contents after scan.
1152
+ if not args.keep_extracted:
1153
+ shutil.rmtree(tmp, ignore_errors=True)
1154
+ return ctx
1155
+ if path.is_file() and path.name == "package.json":
1156
+ root = path.parent
1157
+ else:
1158
+ root = path
1159
+ ctx = ScanContext(root, label, args.mode, False, args.include_node_modules, args.max_file_bytes, args.max_findings)
1160
+ if path.is_file():
1161
+ ctx.summary.sha256 = sha256_file(path)
1162
+ scan_root(ctx)
1163
+ return ctx
1164
+
1165
+
1166
+ def build_report(contexts: list[ScanContext]) -> ScanReport:
1167
+ findings: list[Finding] = []
1168
+ summaries: list[TargetSummary] = []
1169
+ for ctx in contexts:
1170
+ findings.extend(ctx.findings)
1171
+ summaries.append(ctx.summary)
1172
+ findings.sort(key=lambda f: (-SEVERITY_ORDER.get(f.severity, 0), f.category, f.path, f.line or 0, f.title))
1173
+ counts = {sev: 0 for sev in SEVERITIES}
1174
+ for f in findings:
1175
+ counts[f.severity] = counts.get(f.severity, 0) + 1
1176
+ if counts.get("CRITICAL", 0):
1177
+ decision = "QUARANTINE"
1178
+ strict_exit = 2
1179
+ elif counts.get("HIGH", 0):
1180
+ decision = "BLOCK_UNTIL_REVIEW"
1181
+ strict_exit = 2
1182
+ elif counts.get("MEDIUM", 0):
1183
+ decision = "REVIEW_BEFORE_USE"
1184
+ strict_exit = 1
1185
+ else:
1186
+ decision = "PASS_WITH_CAUTION"
1187
+ strict_exit = 0
1188
+ return ScanReport(
1189
+ tool="npm_ts_static_triage.py",
1190
+ generated_at=_dt.datetime.now(_dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
1191
+ summaries=summaries,
1192
+ findings=findings,
1193
+ counts_by_severity=counts,
1194
+ decision=decision,
1195
+ strict_exit_code=strict_exit,
1196
+ )
1197
+
1198
+
1199
+ def markdown_report(report: ScanReport) -> str:
1200
+ lines: list[str] = []
1201
+ lines.append("# npm/TypeScript Dependency & Package Static Audit")
1202
+ lines.append("")
1203
+ lines.append(f"Generated: `{report.generated_at}`")
1204
+ lines.append(f"Decision: **{report.decision}**")
1205
+ lines.append("")
1206
+ lines.append("## Scope")
1207
+ lines.append("")
1208
+ for s in report.summaries:
1209
+ lines.append(f"- Target: `{s.target}`")
1210
+ lines.append(f" - Mode: `{s.mode}`; tarball: `{s.is_tarball}`; files: `{s.file_count}`; bytes: `{s.total_bytes}`")
1211
+ if s.sha256:
1212
+ lines.append(f" - SHA-256: `{s.sha256}`")
1213
+ lines.append(f" - package.json: `{s.package_json_count}`; lockfiles: `{s.lockfile_count}`; tsconfig: `{s.tsconfig_count}`; workflows: `{s.workflow_count}`")
1214
+ lines.append("")
1215
+ lines.append("## Severity counts")
1216
+ lines.append("")
1217
+ lines.append("| Severity | Count |")
1218
+ lines.append("|---|---:|")
1219
+ for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]:
1220
+ lines.append(f"| {sev} | {report.counts_by_severity.get(sev, 0)} |")
1221
+ lines.append("")
1222
+ lines.append("## Findings")
1223
+ lines.append("")
1224
+ if not report.findings:
1225
+ lines.append("No findings. This is not a proof of safety; it only means these static checks did not trigger.")
1226
+ else:
1227
+ for idx, f in enumerate(report.findings, 1):
1228
+ loc = f"{f.path}:{f.line}" if f.line else f.path
1229
+ lines.append(f"### {idx}. [{f.severity}] {f.title}")
1230
+ lines.append("")
1231
+ lines.append(f"- Category: `{f.category}`")
1232
+ lines.append(f"- Location: `{loc}`")
1233
+ lines.append(f"- Confidence: `{f.confidence}`")
1234
+ if f.tags:
1235
+ lines.append(f"- Tags: `{', '.join(f.tags)}`")
1236
+ lines.append(f"- Evidence: `{f.evidence}`")
1237
+ lines.append(f"- Recommendation: {f.recommendation}")
1238
+ lines.append("")
1239
+ lines.append("## Suggested next steps")
1240
+ lines.append("")
1241
+ if report.decision == "QUARANTINE":
1242
+ lines.append("- Do not install, build, import or run this package/repo. Quarantine the artifact and rotate any credentials that may have been exposed to it.")
1243
+ elif report.decision == "BLOCK_UNTIL_REVIEW":
1244
+ lines.append("- Block use until each HIGH finding is manually explained, removed, or allowlisted with evidence.")
1245
+ elif report.decision == "REVIEW_BEFORE_USE":
1246
+ lines.append("- Review MEDIUM findings before use, especially non-standard dependency sources and CI install behavior.")
1247
+ else:
1248
+ lines.append("- Proceed only with normal supply-chain controls: lockfile review, script suppression, signature/provenance checks and sandboxed install/build.")
1249
+ lines.append("- Use `npm ci --ignore-scripts` for review installs and avoid optional dependencies unless explicitly needed.")
1250
+ lines.append("- Run vulnerability/signature tooling in a secret-free environment after static review.")
1251
+ lines.append("")
1252
+ return "\n".join(lines)
1253
+
1254
+
1255
+ def sarif_report(report: ScanReport) -> dict[str, Any]:
1256
+ rules: dict[str, dict[str, Any]] = {}
1257
+ results: list[dict[str, Any]] = []
1258
+ for f in report.findings:
1259
+ rule_id = f.category
1260
+ rules.setdefault(rule_id, {
1261
+ "id": rule_id,
1262
+ "name": rule_id,
1263
+ "shortDescription": {"text": rule_id},
1264
+ "fullDescription": {"text": "Static npm/TypeScript supply-chain audit finding"},
1265
+ "defaultConfiguration": {"level": sarif_level(f.severity)},
1266
+ })
1267
+ result: dict[str, Any] = {
1268
+ "ruleId": rule_id,
1269
+ "level": sarif_level(f.severity),
1270
+ "message": {"text": f"[{f.severity}] {f.title}: {f.evidence}"},
1271
+ "locations": [{
1272
+ "physicalLocation": {
1273
+ "artifactLocation": {"uri": f.path},
1274
+ "region": {"startLine": f.line or 1},
1275
+ }
1276
+ }],
1277
+ }
1278
+ results.append(result)
1279
+ return {
1280
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
1281
+ "version": "2.1.0",
1282
+ "runs": [{
1283
+ "tool": {
1284
+ "driver": {
1285
+ "name": "npm_ts_static_triage.py",
1286
+ "informationUri": "https://docs.npmjs.com/",
1287
+ "rules": list(rules.values()),
1288
+ }
1289
+ },
1290
+ "results": results,
1291
+ }],
1292
+ }
1293
+
1294
+
1295
+ def sarif_level(sev: str) -> str:
1296
+ if sev in {"CRITICAL", "HIGH"}:
1297
+ return "error"
1298
+ if sev == "MEDIUM":
1299
+ return "warning"
1300
+ return "note"
1301
+
1302
+
1303
+ def parse_args(argv: list[str]) -> argparse.Namespace:
1304
+ p = argparse.ArgumentParser(description="Static npm/TypeScript dependency and package malware/quality triage. Does not execute target code.")
1305
+ p.add_argument("targets", nargs="+", help="Repo/package directory, package.json, or npm package tarball (.tgz) to scan")
1306
+ p.add_argument("--mode", choices=["package", "library", "application", "repo"], default="package", help="Review mode; affects quality/lockfile expectations")
1307
+ p.add_argument("--json", dest="json_out", help="Write JSON report to this path")
1308
+ p.add_argument("--markdown", "--out", dest="markdown_out", help="Write Markdown report to this path")
1309
+ p.add_argument("--sarif", dest="sarif_out", help="Write SARIF report to this path")
1310
+ p.add_argument("--strict-exit", action="store_true", help="Exit non-zero for MEDIUM/HIGH/CRITICAL findings; HIGH/CRITICAL return 2")
1311
+ p.add_argument("--include-node-modules", action="store_true", help="Include node_modules in repo scans. Tarball scans include all extracted contents by default.")
1312
+ p.add_argument("--max-file-bytes", type=int, default=5_000_000, help="Max bytes to read per text file")
1313
+ p.add_argument("--max-findings", type=int, default=1000, help="Maximum findings to record")
1314
+ p.add_argument("--ioc-file", action="append", default=[], help="Additional IOC text file; one indicator per line. hxxp and [.] are normalized.")
1315
+ p.add_argument("--keep-extracted", action="store_true", help="Keep extracted tarball temp directories for manual review")
1316
+ return p.parse_args(argv)
1317
+
1318
+
1319
+ def main(argv: list[str]) -> int:
1320
+ args = parse_args(argv)
1321
+ default_ioc = Path(__file__).resolve().parents[1] / "rules" / "iocs.txt"
1322
+ load_ioc_files([default_ioc] + [Path(x) for x in args.ioc_file])
1323
+ contexts: list[ScanContext] = []
1324
+ for target in args.targets:
1325
+ path = Path(target)
1326
+ if not path.exists():
1327
+ sys.stderr.write(f"Target not found: {target}\n")
1328
+ return 3
1329
+ contexts.append(scan_target(path, args))
1330
+ report = build_report(contexts)
1331
+ if args.json_out:
1332
+ Path(args.json_out).write_text(json.dumps(asdict(report), indent=2, ensure_ascii=False), encoding="utf-8")
1333
+ if args.markdown_out:
1334
+ Path(args.markdown_out).write_text(markdown_report(report), encoding="utf-8")
1335
+ if args.sarif_out:
1336
+ Path(args.sarif_out).write_text(json.dumps(sarif_report(report), indent=2), encoding="utf-8")
1337
+ if not args.json_out and not args.markdown_out and not args.sarif_out:
1338
+ print(markdown_report(report))
1339
+ if args.strict_exit:
1340
+ return report.strict_exit_code
1341
+ return 0
1342
+
1343
+
1344
+ if __name__ == "__main__":
1345
+ raise SystemExit(main(sys.argv[1:]))