@testzugang/pi-plugin-dependency-audit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/package.json +23 -0
- package/skills/dependency-audit/README.md +105 -0
- package/skills/dependency-audit/SKILL.md +353 -0
- package/skills/dependency-audit/config.json +3 -0
- package/skills/dependency-audit/examples/github-actions-static-audit.yml +46 -0
- package/skills/dependency-audit/examples/sample-commands.md +110 -0
- package/skills/dependency-audit/rules/iocs.txt +23 -0
- package/skills/dependency-audit/rules/review-policy.md +38 -0
- package/skills/dependency-audit/scripts/npm_ts_static_triage.py +1345 -0
- package/skills/dependency-audit/scripts/pi-check-all-updates.sh +15 -0
- package/skills/dependency-audit/scripts/pi-check-current-global-versions.sh +37 -0
- package/skills/dependency-audit/scripts/pi-check-git-source-updates.sh +57 -0
- package/skills/dependency-audit/scripts/pi-check-latest-npm-versions.sh +25 -0
- package/skills/dependency-audit/scripts/pi-default-git-repos.txt +4 -0
- package/skills/dependency-audit/scripts/pi-default-packages.txt +16 -0
- package/skills/dependency-audit/scripts/pi-interactive-update.py +151 -0
- package/skills/dependency-audit/scripts/run_pi_dependency_audit.py +528 -0
- package/skills/dependency-audit/scripts/summarize_pi_dependency_audit.py +242 -0
- package/skills/dependency-audit/templates/report.md +102 -0
|
@@ -0,0 +1,1345 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Static-first npm/TypeScript package and dependency triage.
|
|
4
|
+
|
|
5
|
+
This tool intentionally does not run npm, node, package scripts, tests, builds,
|
|
6
|
+
or code from the target. It inspects source trees and npm tarballs (.tgz) for
|
|
7
|
+
malware and quality risk indicators that are common in supply-chain attacks.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import datetime as _dt
|
|
13
|
+
import hashlib
|
|
14
|
+
import json
|
|
15
|
+
import math
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
import shutil
|
|
19
|
+
import sys
|
|
20
|
+
import tarfile
|
|
21
|
+
import tempfile
|
|
22
|
+
from dataclasses import dataclass, field, asdict
|
|
23
|
+
from pathlib import Path, PurePosixPath
|
|
24
|
+
from typing import Any, Iterable
|
|
25
|
+
|
|
26
|
+
SEVERITY_ORDER = {"INFO": 0, "LOW": 1, "MEDIUM": 2, "HIGH": 3, "CRITICAL": 4}
|
|
27
|
+
SEVERITIES = tuple(SEVERITY_ORDER.keys())
|
|
28
|
+
|
|
29
|
+
LIFECYCLE_SCRIPTS = {
|
|
30
|
+
"preinstall", "install", "postinstall",
|
|
31
|
+
"prepublish", "prepublishOnly",
|
|
32
|
+
"preprepare", "prepare", "postprepare",
|
|
33
|
+
"prepack", "postpack", "publish", "postpublish",
|
|
34
|
+
"dependencies",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
INSTALL_PHASE_SCRIPTS = {
|
|
38
|
+
"preinstall", "install", "postinstall", "prepublish",
|
|
39
|
+
"preprepare", "prepare", "postprepare", "dependencies",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
DEP_FIELDS = (
|
|
43
|
+
"dependencies",
|
|
44
|
+
"devDependencies",
|
|
45
|
+
"optionalDependencies",
|
|
46
|
+
"peerDependencies",
|
|
47
|
+
"peerDependenciesMeta",
|
|
48
|
+
"bundleDependencies",
|
|
49
|
+
"bundledDependencies",
|
|
50
|
+
"overrides",
|
|
51
|
+
"resolutions",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
TEXT_EXTENSIONS = {
|
|
55
|
+
".js", ".jsx", ".mjs", ".cjs",
|
|
56
|
+
".ts", ".tsx", ".mts", ".cts",
|
|
57
|
+
".json", ".jsonc", ".yaml", ".yml", ".toml",
|
|
58
|
+
".sh", ".bash", ".zsh", ".fish",
|
|
59
|
+
".ps1", ".cmd", ".bat",
|
|
60
|
+
".md", ".txt", ".env", ".npmrc", ".yarnrc", ".pnpmrc",
|
|
61
|
+
".html", ".css",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
CODE_EXTENSIONS = {
|
|
65
|
+
".js", ".jsx", ".mjs", ".cjs",
|
|
66
|
+
".ts", ".tsx", ".mts", ".cts",
|
|
67
|
+
".sh", ".bash", ".zsh", ".fish", ".ps1", ".cmd", ".bat",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
DOC_EXTENSIONS = {".md", ".txt"}
|
|
71
|
+
|
|
72
|
+
BINARY_EXEC_EXTENSIONS = {
|
|
73
|
+
".exe", ".dll", ".so", ".dylib", ".node", ".wasm",
|
|
74
|
+
".bin", ".elf", ".msi", ".pkg", ".appimage",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
ARCHIVE_EXTENSIONS = {".zip", ".tgz", ".tar", ".gz", ".xz", ".7z", ".rar", ".br"}
|
|
78
|
+
|
|
79
|
+
DEFAULT_SKIP_DIRS = {
|
|
80
|
+
".git", ".hg", ".svn",
|
|
81
|
+
".cache", ".turbo", ".parcel-cache", ".next", ".nuxt",
|
|
82
|
+
"coverage", ".nyc_output", ".vitest", ".jest",
|
|
83
|
+
".idea", ".DS_Store",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
NODE_MODULES_DIRS = {"node_modules"}
|
|
87
|
+
|
|
88
|
+
IOC_STRINGS = [
|
|
89
|
+
"filev2.getsession.org/file",
|
|
90
|
+
"getsession.org",
|
|
91
|
+
"169.254.169.254/latest/meta-data/iam/security-credentials",
|
|
92
|
+
"metadata.google.internal",
|
|
93
|
+
"127.0.0.1:8200",
|
|
94
|
+
"oven-sh/bun/releases/download/bun-v1.3.13",
|
|
95
|
+
"github.com/oven-sh/bun/releases/download/bun-v1.3.13",
|
|
96
|
+
"git-tanstack.com",
|
|
97
|
+
"transformers.pyz",
|
|
98
|
+
"tanstack_runner.js",
|
|
99
|
+
"router_init.js",
|
|
100
|
+
"router_runtime.js",
|
|
101
|
+
"createCommitOnBranch",
|
|
102
|
+
".claude/settings.json",
|
|
103
|
+
".claude/setup.mjs",
|
|
104
|
+
".vscode/tasks.json",
|
|
105
|
+
".vscode/setup.mjs",
|
|
106
|
+
"tanstack/router#79ac49eedf774dd4b0cfa308722bc463cfe5885c",
|
|
107
|
+
"@tanstack/setup",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
SECRET_PATTERNS = [
|
|
111
|
+
re.compile(r"github_pat_[A-Za-z0-9_]{20,}_[A-Za-z0-9_]{20,}"),
|
|
112
|
+
re.compile(r"\bgh[pousr]_[A-Za-z0-9_\-.]{20,}"),
|
|
113
|
+
re.compile(r"\bghs_[A-Za-z0-9_\-.]{20,}"),
|
|
114
|
+
re.compile(r"\bnpm_[A-Za-z0-9_\-.]{20,}"),
|
|
115
|
+
re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
|
|
116
|
+
re.compile(r"\bASIA[0-9A-Z]{16}\b"),
|
|
117
|
+
re.compile(r"xox[baprs]-[A-Za-z0-9-]{20,}"),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
TOKEN_NAME_PATTERN = re.compile(
|
|
121
|
+
r"\b(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
|
|
122
|
+
r"GITHUB_TOKEN|GH_TOKEN|NPM_TOKEN|NODE_AUTH_TOKEN|ACTIONS_ID_TOKEN|"
|
|
123
|
+
r"ACTIONS_ID_TOKEN_REQUEST_URL|ACTIONS_ID_TOKEN_REQUEST_TOKEN|"
|
|
124
|
+
r"VAULT_TOKEN|VAULT_AUTH_TOKEN|GOOGLE_APPLICATION_CREDENTIALS|"
|
|
125
|
+
r"AZURE_CLIENT_SECRET|DOCKER_CONFIG)\b"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
NETWORK_PATTERNS = [
|
|
129
|
+
(re.compile(r"\b(fetch|XMLHttpRequest)\s*\("), "browser/node fetch"),
|
|
130
|
+
(re.compile(r"\b(require\(['\"]https?['\"]\)|from ['\"]https?['\"]|https?\.(request|get)\s*\()"), "node http/https API"),
|
|
131
|
+
(re.compile(r"\b(axios|got|request|superagent|undici)\b"), "HTTP client library"),
|
|
132
|
+
(re.compile(r"\b(curl|wget|Invoke-WebRequest|Invoke-RestMethod|iwr|irm)\b", re.I), "download command"),
|
|
133
|
+
(re.compile(r"https?://", re.I), "URL literal"),
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
EXEC_PATTERNS = [
|
|
137
|
+
(re.compile(r"\brequire\(['\"]child_process['\"]\)|from ['\"]child_process['\"]"), "child_process import"),
|
|
138
|
+
(re.compile(r"\b(exec|execSync|execFile|execFileSync|spawn|spawnSync|fork)\s*\("), "process execution call"),
|
|
139
|
+
(re.compile(r"\b(child_process\.)?(exec|execSync|execFile|spawn|spawnSync)\s*\("), "child_process execution"),
|
|
140
|
+
(re.compile(r"\beval\s*\(|\bnew\s+Function\s*\(|\bFunction\s*\("), "dynamic JS evaluation"),
|
|
141
|
+
(re.compile(r"\bvm\.(runInNewContext|runInThisContext|runInContext|compileFunction)\s*\("), "Node vm execution"),
|
|
142
|
+
(re.compile(r"\bWebAssembly\.(instantiate|compile)\s*\("), "WebAssembly runtime load"),
|
|
143
|
+
(re.compile(r"\b(node|bun|deno|python|python3|bash|sh|zsh|fish|powershell|pwsh|cmd)\b", re.I), "interpreter invocation"),
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
STEALTH_PATTERNS = [
|
|
147
|
+
(re.compile(r"&&\s*exit\s+1\b"), "forced failure after execution"),
|
|
148
|
+
(re.compile(r"(?:>|1>)\s*/dev/null|2>&1|--silent|--quiet|-sS?\b|\bNO_COLOR\b"), "output suppression"),
|
|
149
|
+
(re.compile(r"\bchmod\s+\+x\b|\bicacls\b|\bSet-ExecutionPolicy\b", re.I), "permission change"),
|
|
150
|
+
(re.compile(r"\|\s*(bash|sh|zsh|powershell|pwsh|cmd)\b", re.I), "download piped to shell"),
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
SECRET_PATH_PATTERNS = [
|
|
154
|
+
(re.compile(r"\.npmrc|\.yarnrc|\.pnpmrc"), "package-manager credentials file"),
|
|
155
|
+
(re.compile(r"\.aws/(credentials|config)|aws/credentials"), "AWS credentials path"),
|
|
156
|
+
(re.compile(r"\.config/gh/hosts\.yml|\.git-credentials|\.netrc"), "GitHub/git credentials path"),
|
|
157
|
+
(re.compile(r"\.ssh/(id_rsa|id_ed25519|config|known_hosts)"), "SSH credential path"),
|
|
158
|
+
(re.compile(r"\.docker/config\.json"), "Docker credential path"),
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
OBFUSCATION_PATTERNS = [
|
|
162
|
+
(re.compile(r"_0x[a-fA-F0-9]{3,}"), "hex-style obfuscated identifiers"),
|
|
163
|
+
(re.compile(r"\b(atob|btoa)\s*\(|Buffer\.from\s*\([^)]{0,120}['\"]base64['\"]"), "base64 decode"),
|
|
164
|
+
(re.compile(r"\b(zlib|gunzipSync|inflateSync|brotliDecompressSync)\b"), "compressed payload decode"),
|
|
165
|
+
(re.compile(r"\b(createDecipheriv|createCipheriv|crypto\.subtle|AES|RC4|xor)\b", re.I), "crypto/decryption layer"),
|
|
166
|
+
(re.compile(r"\bString\.fromCharCode\s*\(|\bunescape\s*\("), "string decoder"),
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
IDE_AGENT_PATTERNS = [
|
|
170
|
+
(re.compile(r"\.claude/(settings\.json|setup\.mjs|router_runtime\.js)"), "Claude Code/agent config path"),
|
|
171
|
+
(re.compile(r"\.vscode/(tasks\.json|settings\.json|setup\.mjs|extensions\.json)"), "VS Code config path"),
|
|
172
|
+
(re.compile(r"\.cursor/|\.devcontainer/"), "AI/IDE/devcontainer config path"),
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
GITHUB_API_PATTERNS = [
|
|
176
|
+
(re.compile(r"createCommitOnBranch|createRef|updateRef|repos/[^\s]+/contents|git/refs", re.I), "GitHub write API"),
|
|
177
|
+
(re.compile(r"graphql\s*\(|api\.github\.com/graphql", re.I), "GitHub GraphQL API"),
|
|
178
|
+
(re.compile(r"octokit|@actions/github", re.I), "GitHub API client"),
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
FULL_SHA_RE = re.compile(r"^[0-9a-fA-F]{40}$")
|
|
182
|
+
SEMVER_EXACT_RE = re.compile(r"^(?:v)?\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?$")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class Finding:
|
|
187
|
+
severity: str
|
|
188
|
+
category: str
|
|
189
|
+
path: str
|
|
190
|
+
line: int | None
|
|
191
|
+
title: str
|
|
192
|
+
evidence: str
|
|
193
|
+
recommendation: str
|
|
194
|
+
confidence: str = "medium"
|
|
195
|
+
tags: list[str] = field(default_factory=list)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def normalize_ioc(value: str) -> list[str]:
|
|
201
|
+
raw = value.strip()
|
|
202
|
+
if not raw or raw.startswith("#"):
|
|
203
|
+
return []
|
|
204
|
+
normalized = raw.replace("hxxps://", "https://").replace("hxxp://", "http://")
|
|
205
|
+
normalized = normalized.replace("[.]", ".").replace("(.)", ".")
|
|
206
|
+
return list(dict.fromkeys([raw, normalized]))
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def load_ioc_files(paths: Iterable[Path]):
|
|
210
|
+
existing = {x.lower() for x in IOC_STRINGS}
|
|
211
|
+
for path in paths:
|
|
212
|
+
if not path or not path.exists():
|
|
213
|
+
continue
|
|
214
|
+
try:
|
|
215
|
+
for line in path.read_text("utf-8", errors="replace").splitlines():
|
|
216
|
+
for ioc in normalize_ioc(line):
|
|
217
|
+
if ioc.lower() not in existing:
|
|
218
|
+
IOC_STRINGS.append(ioc)
|
|
219
|
+
existing.add(ioc.lower())
|
|
220
|
+
except OSError:
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@dataclass
|
|
225
|
+
class TargetSummary:
|
|
226
|
+
target: str
|
|
227
|
+
root: str
|
|
228
|
+
mode: str
|
|
229
|
+
is_tarball: bool
|
|
230
|
+
started_at: str
|
|
231
|
+
file_count: int = 0
|
|
232
|
+
package_json_count: int = 0
|
|
233
|
+
lockfile_count: int = 0
|
|
234
|
+
tsconfig_count: int = 0
|
|
235
|
+
workflow_count: int = 0
|
|
236
|
+
total_bytes: int = 0
|
|
237
|
+
sha256: str | None = None
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass
|
|
241
|
+
class ScanReport:
|
|
242
|
+
tool: str
|
|
243
|
+
generated_at: str
|
|
244
|
+
summaries: list[TargetSummary]
|
|
245
|
+
findings: list[Finding]
|
|
246
|
+
counts_by_severity: dict[str, int]
|
|
247
|
+
decision: str
|
|
248
|
+
strict_exit_code: int
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class ScanContext:
|
|
252
|
+
def __init__(self, root: Path, target_label: str, mode: str, is_tarball: bool, include_node_modules: bool, max_file_bytes: int, max_findings: int):
|
|
253
|
+
self.root = root.resolve()
|
|
254
|
+
self.target_label = target_label
|
|
255
|
+
self.mode = mode
|
|
256
|
+
self.is_tarball = is_tarball
|
|
257
|
+
self.include_node_modules = include_node_modules
|
|
258
|
+
self.max_file_bytes = max_file_bytes
|
|
259
|
+
self.max_findings = max_findings
|
|
260
|
+
self.findings: list[Finding] = []
|
|
261
|
+
self._dedupe: set[tuple[str, str, str, int | None, str]] = set()
|
|
262
|
+
self.lifecycle_entrypoints: set[str] = set()
|
|
263
|
+
self.package_roots: set[Path] = set()
|
|
264
|
+
self.summary = TargetSummary(
|
|
265
|
+
target=target_label,
|
|
266
|
+
root=str(self.root),
|
|
267
|
+
mode=mode,
|
|
268
|
+
is_tarball=is_tarball,
|
|
269
|
+
started_at=_dt.datetime.now(_dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def rel(self, path: Path | str) -> str:
|
|
273
|
+
p = Path(path)
|
|
274
|
+
try:
|
|
275
|
+
return p.resolve().relative_to(self.root).as_posix()
|
|
276
|
+
except Exception:
|
|
277
|
+
return str(path)
|
|
278
|
+
|
|
279
|
+
def add(self, severity: str, category: str, path: Path | str, line: int | None, title: str, evidence: str, recommendation: str, confidence: str = "medium", tags: Iterable[str] = ()): # noqa: E501
|
|
280
|
+
if len(self.findings) >= self.max_findings:
|
|
281
|
+
if len(self.findings) == self.max_findings:
|
|
282
|
+
self.findings.append(Finding(
|
|
283
|
+
severity="INFO",
|
|
284
|
+
category="scan-limit",
|
|
285
|
+
path=".",
|
|
286
|
+
line=None,
|
|
287
|
+
title="Finding limit reached",
|
|
288
|
+
evidence=f"The scanner stopped adding findings after {self.max_findings} findings.",
|
|
289
|
+
recommendation="Increase --max-findings for a complete report or triage the highest severity findings first.",
|
|
290
|
+
confidence="high",
|
|
291
|
+
tags=["limit"],
|
|
292
|
+
))
|
|
293
|
+
return
|
|
294
|
+
severity = severity.upper()
|
|
295
|
+
if severity not in SEVERITY_ORDER:
|
|
296
|
+
severity = "INFO"
|
|
297
|
+
rel_path = self.rel(path) if isinstance(path, Path) else str(path)
|
|
298
|
+
evidence = mask_secrets(one_line(evidence))[:900]
|
|
299
|
+
title = one_line(title)[:220]
|
|
300
|
+
recommendation = one_line(recommendation)[:500]
|
|
301
|
+
key = (severity, category, rel_path, line, title)
|
|
302
|
+
if key in self._dedupe:
|
|
303
|
+
return
|
|
304
|
+
self._dedupe.add(key)
|
|
305
|
+
self.findings.append(Finding(
|
|
306
|
+
severity=severity,
|
|
307
|
+
category=category,
|
|
308
|
+
path=rel_path,
|
|
309
|
+
line=line,
|
|
310
|
+
title=title,
|
|
311
|
+
evidence=evidence,
|
|
312
|
+
recommendation=recommendation,
|
|
313
|
+
confidence=confidence,
|
|
314
|
+
tags=list(tags),
|
|
315
|
+
))
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def one_line(value: Any) -> str:
|
|
319
|
+
s = str(value).replace("\r", " ").replace("\n", " ").replace("\t", " ")
|
|
320
|
+
return re.sub(r"\s+", " ", s).strip()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def mask_secrets(text: str) -> str:
|
|
324
|
+
out = text
|
|
325
|
+
for pat in SECRET_PATTERNS:
|
|
326
|
+
def repl(m: re.Match[str]) -> str:
|
|
327
|
+
token = m.group(0)
|
|
328
|
+
if len(token) <= 12:
|
|
329
|
+
return "[MASKED]"
|
|
330
|
+
return token[:6] + "...[MASKED]..." + token[-4:]
|
|
331
|
+
out = pat.sub(repl, out)
|
|
332
|
+
# Mask common assignment values while preserving variable names.
|
|
333
|
+
out = re.sub(r"((?:NPM_TOKEN|GITHUB_TOKEN|GH_TOKEN|AWS_SECRET_ACCESS_KEY|NODE_AUTH_TOKEN)\s*[=:]\s*)['\"]?[^'\"\s]+", r"\1[MASKED]", out)
|
|
334
|
+
return out
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def sha256_file(path: Path, limit: int | None = None) -> str:
|
|
338
|
+
h = hashlib.sha256()
|
|
339
|
+
with path.open("rb") as f:
|
|
340
|
+
remaining = limit
|
|
341
|
+
while True:
|
|
342
|
+
if remaining is not None:
|
|
343
|
+
if remaining <= 0:
|
|
344
|
+
break
|
|
345
|
+
chunk = f.read(min(1024 * 1024, remaining))
|
|
346
|
+
remaining -= len(chunk)
|
|
347
|
+
else:
|
|
348
|
+
chunk = f.read(1024 * 1024)
|
|
349
|
+
if not chunk:
|
|
350
|
+
break
|
|
351
|
+
h.update(chunk)
|
|
352
|
+
return h.hexdigest()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def safe_read_bytes(path: Path, max_bytes: int) -> tuple[bytes, bool]:
|
|
356
|
+
size = path.stat().st_size
|
|
357
|
+
with path.open("rb") as f:
|
|
358
|
+
data = f.read(max_bytes)
|
|
359
|
+
return data, size > max_bytes
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def decode_text(data: bytes) -> str:
|
|
363
|
+
return data.decode("utf-8", errors="replace")
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def is_probably_binary(data: bytes) -> bool:
|
|
367
|
+
if not data:
|
|
368
|
+
return False
|
|
369
|
+
if b"\x00" in data[:4096]:
|
|
370
|
+
return True
|
|
371
|
+
sample = data[:4096]
|
|
372
|
+
nontext = sum(1 for b in sample if b < 9 or (13 < b < 32) or b > 126)
|
|
373
|
+
return nontext / max(1, len(sample)) > 0.35
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def line_for_offset(text: str, offset: int) -> int:
|
|
377
|
+
return text.count("\n", 0, offset) + 1
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def first_match_line(text: str, pattern: re.Pattern[str]) -> tuple[int | None, str | None]:
|
|
381
|
+
m = pattern.search(text)
|
|
382
|
+
if not m:
|
|
383
|
+
return None, None
|
|
384
|
+
line = line_for_offset(text, m.start())
|
|
385
|
+
snippet = text[m.start(): min(len(text), m.end() + 180)]
|
|
386
|
+
return line, one_line(snippet)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def key_line(text: str, key: str) -> int | None:
|
|
390
|
+
pat = re.compile(r"[\"']" + re.escape(key) + r"[\"']\s*:")
|
|
391
|
+
m = pat.search(text)
|
|
392
|
+
if not m:
|
|
393
|
+
return None
|
|
394
|
+
return line_for_offset(text, m.start())
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def should_skip_dir(path: Path, include_node_modules: bool, is_tarball: bool) -> bool:
|
|
398
|
+
name = path.name
|
|
399
|
+
if name in DEFAULT_SKIP_DIRS:
|
|
400
|
+
return True
|
|
401
|
+
if name in NODE_MODULES_DIRS and not include_node_modules and not is_tarball:
|
|
402
|
+
return True
|
|
403
|
+
return False
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def iter_files(ctx: ScanContext) -> list[Path]:
|
|
407
|
+
files: list[Path] = []
|
|
408
|
+
for dirpath, dirnames, filenames in os.walk(ctx.root):
|
|
409
|
+
dpath = Path(dirpath)
|
|
410
|
+
dirnames[:] = [d for d in dirnames if not should_skip_dir(dpath / d, ctx.include_node_modules, ctx.is_tarball)]
|
|
411
|
+
for name in filenames:
|
|
412
|
+
p = dpath / name
|
|
413
|
+
try:
|
|
414
|
+
st = p.stat()
|
|
415
|
+
except OSError:
|
|
416
|
+
continue
|
|
417
|
+
ctx.summary.file_count += 1
|
|
418
|
+
ctx.summary.total_bytes += st.st_size
|
|
419
|
+
files.append(p)
|
|
420
|
+
return files
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def load_json_file(path: Path) -> tuple[Any | None, str]:
|
|
424
|
+
try:
|
|
425
|
+
text = path.read_text("utf-8", errors="replace")
|
|
426
|
+
return json.loads(text), text
|
|
427
|
+
except Exception as exc:
|
|
428
|
+
return None, f"JSON parse error: {exc}"
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def classify_dep_spec(spec: str) -> tuple[str, str, str]:
|
|
432
|
+
s = str(spec).strip()
|
|
433
|
+
low = s.lower()
|
|
434
|
+
if low.startswith("npm:"):
|
|
435
|
+
return "alias", "MEDIUM", "npm alias can hide the actual package identity"
|
|
436
|
+
if low.startswith(("git+", "git://", "github:", "gitlab:", "bitbucket:")) or "github.com" in low or "gitlab.com" in low or "bitbucket.org" in low:
|
|
437
|
+
if "#" not in s:
|
|
438
|
+
return "git-unpinned", "HIGH", "git dependency has no commit pin"
|
|
439
|
+
frag = s.rsplit("#", 1)[-1]
|
|
440
|
+
if not FULL_SHA_RE.match(frag):
|
|
441
|
+
return "git-not-full-sha", "HIGH", "git dependency is not pinned to a full 40-character commit SHA"
|
|
442
|
+
return "git", "HIGH", "git dependency can execute prepare scripts during install"
|
|
443
|
+
if re.match(r"https?://", low):
|
|
444
|
+
sev = "HIGH" if low.startswith("http://") else "MEDIUM"
|
|
445
|
+
reason = "remote tarball/URL dependency bypasses normal registry trust controls"
|
|
446
|
+
if low.startswith("http://"):
|
|
447
|
+
reason += " and uses plaintext HTTP"
|
|
448
|
+
return "url", sev, reason
|
|
449
|
+
if low.startswith("file:") or low.startswith("link:"):
|
|
450
|
+
return "local-file", "MEDIUM", "local file/link dependency depends on local filesystem state"
|
|
451
|
+
if low.startswith("workspace:"):
|
|
452
|
+
return "workspace", "INFO", "workspace dependency; inspect workspace package.json separately"
|
|
453
|
+
if low in {"*", "x", "latest", "next", "canary", "beta", "alpha"}:
|
|
454
|
+
return "floating", "MEDIUM", "floating dependency spec can resolve to newly published versions"
|
|
455
|
+
if any(ch in s for ch in ["^", "~", "*", "x", "X", ">", "<", "|"]):
|
|
456
|
+
return "range", "LOW", "version range permits dependency drift unless a lockfile is enforced"
|
|
457
|
+
if SEMVER_EXACT_RE.match(s):
|
|
458
|
+
return "exact", "INFO", "exact semver dependency"
|
|
459
|
+
if s == "":
|
|
460
|
+
return "empty", "MEDIUM", "empty dependency spec"
|
|
461
|
+
return "other", "LOW", "non-standard dependency spec; review manually"
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def flatten_dep_like(obj: Any, prefix: str = "") -> list[tuple[str, str]]:
|
|
465
|
+
found: list[tuple[str, str]] = []
|
|
466
|
+
if isinstance(obj, dict):
|
|
467
|
+
for k, v in obj.items():
|
|
468
|
+
key = f"{prefix}.{k}" if prefix else str(k)
|
|
469
|
+
if isinstance(v, str):
|
|
470
|
+
found.append((key, v))
|
|
471
|
+
elif isinstance(v, dict):
|
|
472
|
+
found.extend(flatten_dep_like(v, key))
|
|
473
|
+
elif isinstance(v, list):
|
|
474
|
+
for i, item in enumerate(v):
|
|
475
|
+
found.extend(flatten_dep_like(item, f"{key}[{i}]"))
|
|
476
|
+
elif isinstance(obj, list):
|
|
477
|
+
for i, item in enumerate(obj):
|
|
478
|
+
if isinstance(item, str):
|
|
479
|
+
found.append((f"{prefix}[{i}]", item))
|
|
480
|
+
else:
|
|
481
|
+
found.extend(flatten_dep_like(item, f"{prefix}[{i}]"))
|
|
482
|
+
return found
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def extract_script_entrypoints(cmd: str) -> list[str]:
|
|
486
|
+
paths: list[str] = []
|
|
487
|
+
# Capture common script file references without trying to fully parse a shell command.
|
|
488
|
+
for m in re.finditer(r"(?:node|bun|deno|tsx?|ts-node|python3?|bash|sh|pwsh|powershell)?\s*([A-Za-z0-9_./\\-]+\.(?:mjs|cjs|js|jsx|ts|tsx|sh|ps1|cmd|bat))", cmd):
|
|
489
|
+
raw = m.group(1).strip("'\"")
|
|
490
|
+
if raw and not raw.startswith("http"):
|
|
491
|
+
paths.append(raw.replace("\\", "/"))
|
|
492
|
+
return paths
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def script_risk_labels(cmd: str) -> list[str]:
|
|
496
|
+
labels: list[str] = []
|
|
497
|
+
for rules in (NETWORK_PATTERNS, EXEC_PATTERNS, STEALTH_PATTERNS, OBFUSCATION_PATTERNS, SECRET_PATH_PATTERNS, IDE_AGENT_PATTERNS, GITHUB_API_PATTERNS):
|
|
498
|
+
for pat, label in rules:
|
|
499
|
+
if pat.search(cmd):
|
|
500
|
+
labels.append(label)
|
|
501
|
+
if TOKEN_NAME_PATTERN.search(cmd):
|
|
502
|
+
labels.append("credential environment variable")
|
|
503
|
+
for ioc in IOC_STRINGS:
|
|
504
|
+
if ioc.lower() in cmd.lower():
|
|
505
|
+
labels.append(f"known IOC: {ioc}")
|
|
506
|
+
return sorted(set(labels))
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def analyze_package_json(ctx: ScanContext, path: Path):
|
|
510
|
+
data, text_or_err = load_json_file(path)
|
|
511
|
+
ctx.summary.package_json_count += 1
|
|
512
|
+
ctx.package_roots.add(path.parent.resolve())
|
|
513
|
+
if data is None:
|
|
514
|
+
ctx.add("MEDIUM", "manifest", path, None, "Invalid package.json", text_or_err, "Fix JSON syntax before trusting this package.", "high", ["package-json"])
|
|
515
|
+
return
|
|
516
|
+
text = text_or_err
|
|
517
|
+
if not isinstance(data, dict):
|
|
518
|
+
ctx.add("HIGH", "manifest", path, 1, "package.json is not an object", str(type(data)), "Treat as suspicious and validate the package contents manually.", "high", ["package-json"])
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
name = data.get("name", "<unnamed>")
|
|
522
|
+
version = data.get("version", "<no-version>")
|
|
523
|
+
|
|
524
|
+
# Scripts and lifecycle hooks.
|
|
525
|
+
scripts = data.get("scripts", {})
|
|
526
|
+
if isinstance(scripts, dict):
|
|
527
|
+
for script_name, cmd in scripts.items():
|
|
528
|
+
if not isinstance(cmd, str):
|
|
529
|
+
continue
|
|
530
|
+
labels = script_risk_labels(cmd)
|
|
531
|
+
line = key_line(text, script_name)
|
|
532
|
+
is_lifecycle = script_name in LIFECYCLE_SCRIPTS or script_name.startswith(("pre", "post")) and script_name[3:] in scripts
|
|
533
|
+
is_install_phase = script_name in INSTALL_PHASE_SCRIPTS
|
|
534
|
+
for ref in extract_script_entrypoints(cmd):
|
|
535
|
+
candidate = (path.parent / ref).resolve()
|
|
536
|
+
try:
|
|
537
|
+
ctx.lifecycle_entrypoints.add(candidate.relative_to(ctx.root).as_posix())
|
|
538
|
+
except Exception:
|
|
539
|
+
ctx.lifecycle_entrypoints.add(ref)
|
|
540
|
+
|
|
541
|
+
if is_lifecycle:
|
|
542
|
+
sev = "HIGH" if is_install_phase else "MEDIUM"
|
|
543
|
+
if labels:
|
|
544
|
+
if any("known IOC" in x for x in labels) or (any("download" in x or "URL" in x or "fetch" in x for x in labels) and any("execution" in x or "interpreter" in x or "dynamic" in x for x in labels)):
|
|
545
|
+
sev = "CRITICAL"
|
|
546
|
+
else:
|
|
547
|
+
sev = max_severity(sev, "HIGH")
|
|
548
|
+
ctx.add(
|
|
549
|
+
sev,
|
|
550
|
+
"npm-lifecycle-script",
|
|
551
|
+
path,
|
|
552
|
+
line,
|
|
553
|
+
f"npm lifecycle script '{script_name}' in {name}@{version}",
|
|
554
|
+
f"{script_name}: {cmd}; signals={', '.join(labels) if labels else 'none'}",
|
|
555
|
+
"Do not install with scripts enabled. Review the referenced files and require a documented, minimal, reproducible reason for this lifecycle hook.",
|
|
556
|
+
"high" if is_install_phase else "medium",
|
|
557
|
+
["npm", "script", script_name, "install-phase" if is_install_phase else "lifecycle"],
|
|
558
|
+
)
|
|
559
|
+
elif labels:
|
|
560
|
+
sev = "MEDIUM"
|
|
561
|
+
if any("download piped" in x for x in labels) or (any("download" in x or "URL" in x for x in labels) and any("dynamic" in x or "execution" in x for x in labels)):
|
|
562
|
+
sev = "HIGH"
|
|
563
|
+
ctx.add(
|
|
564
|
+
sev,
|
|
565
|
+
"npm-script",
|
|
566
|
+
path,
|
|
567
|
+
line,
|
|
568
|
+
f"Risky npm script '{script_name}' in {name}@{version}",
|
|
569
|
+
f"{script_name}: {cmd}; signals={', '.join(labels)}",
|
|
570
|
+
"Review before running npm scripts. Prefer explicit allowlisted scripts in CI and never run these with developer or publish tokens present.",
|
|
571
|
+
"medium",
|
|
572
|
+
["npm", "script", script_name],
|
|
573
|
+
)
|
|
574
|
+
elif scripts is not None:
|
|
575
|
+
ctx.add("MEDIUM", "manifest", path, key_line(text, "scripts"), "scripts field is not an object", str(scripts), "Normalize package.json and review manually.", "medium", ["package-json", "scripts"])
|
|
576
|
+
|
|
577
|
+
# Dependencies and unusual spec types.
|
|
578
|
+
for field_name in DEP_FIELDS:
|
|
579
|
+
value = data.get(field_name)
|
|
580
|
+
if value is None:
|
|
581
|
+
continue
|
|
582
|
+
line = key_line(text, field_name)
|
|
583
|
+
if field_name in {"bundleDependencies", "bundledDependencies"}:
|
|
584
|
+
if value is True:
|
|
585
|
+
ctx.add("HIGH", "dependency", path, line, f"{field_name}=true bundles all dependencies", f"{field_name}: true", "Avoid bundled dependencies unless every bundled artifact is audited; inspect packed tarball contents.", "medium", ["npm", "bundled-deps"])
|
|
586
|
+
elif isinstance(value, list) and value:
|
|
587
|
+
ctx.add("MEDIUM", "dependency", path, line, f"Package bundles dependencies", f"{field_name}: {value[:10]}", "Audit bundled packages inside the tarball; bundled code bypasses normal dependency review visibility.", "medium", ["npm", "bundled-deps"])
|
|
588
|
+
continue
|
|
589
|
+
if field_name == "peerDependenciesMeta":
|
|
590
|
+
continue
|
|
591
|
+
if not isinstance(value, dict):
|
|
592
|
+
continue
|
|
593
|
+
for dep_name, spec in flatten_dep_like(value):
|
|
594
|
+
if not isinstance(spec, str):
|
|
595
|
+
continue
|
|
596
|
+
kind, sev, reason = classify_dep_spec(spec)
|
|
597
|
+
tags = ["npm", "dependency", field_name, kind]
|
|
598
|
+
if field_name == "optionalDependencies" and kind in {"git", "git-unpinned", "git-not-full-sha", "url", "local-file", "alias"}:
|
|
599
|
+
sev = max_severity(sev, "HIGH")
|
|
600
|
+
reason += "; optionalDependencies are easy to overlook and install failures may be ignored"
|
|
601
|
+
if field_name in {"overrides", "resolutions"} and kind not in {"exact", "range", "workspace"}:
|
|
602
|
+
sev = max_severity(sev, "HIGH")
|
|
603
|
+
reason += "; override/resolution can redirect a transitive package"
|
|
604
|
+
elif field_name in {"overrides", "resolutions"}:
|
|
605
|
+
sev = max_severity(sev, "MEDIUM")
|
|
606
|
+
if kind in {"exact", "workspace"}:
|
|
607
|
+
continue
|
|
608
|
+
if kind == "range" and ctx.mode == "library":
|
|
609
|
+
sev = "INFO"
|
|
610
|
+
dep_line = key_line(text, dep_name.split(".")[-1]) or line
|
|
611
|
+
ctx.add(
|
|
612
|
+
sev,
|
|
613
|
+
"dependency-spec",
|
|
614
|
+
path,
|
|
615
|
+
dep_line,
|
|
616
|
+
f"Dependency spec review needed: {field_name}.{dep_name}",
|
|
617
|
+
f"{dep_name}: {spec}; {reason}",
|
|
618
|
+
"Prefer registry packages pinned by lockfile and integrity. Avoid git/URL/file specs unless explicitly allowlisted and pinned to immutable commits/artifacts.",
|
|
619
|
+
"medium",
|
|
620
|
+
tags,
|
|
621
|
+
)
|
|
622
|
+
if any(ioc.lower() in f"{dep_name} {spec}".lower() for ioc in IOC_STRINGS):
|
|
623
|
+
ctx.add(
|
|
624
|
+
"CRITICAL",
|
|
625
|
+
"ioc",
|
|
626
|
+
path,
|
|
627
|
+
dep_line,
|
|
628
|
+
"Known supply-chain campaign IOC in dependency spec",
|
|
629
|
+
f"{field_name}.{dep_name}: {spec}",
|
|
630
|
+
"Quarantine this dependency/package, rotate any exposed credentials, and verify package versions against trusted upstream advisories.",
|
|
631
|
+
"high",
|
|
632
|
+
["ioc", "npm", "dependency"],
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# npm package metadata quality/security.
|
|
636
|
+
if data.get("private") is not True and ctx.mode in {"application", "repo"} and path.parent == ctx.root:
|
|
637
|
+
# For applications, accidental publish is a real quality/security issue.
|
|
638
|
+
ctx.add("LOW", "quality", path, key_line(text, "private"), "Root project is not marked private", f"name={name}, version={version}", "For non-published applications, set private=true to prevent accidental npm publication.", "medium", ["npm", "quality"])
|
|
639
|
+
|
|
640
|
+
if "license" not in data:
|
|
641
|
+
ctx.add("LOW", "quality", path, None, "Missing license field", f"{name}@{version} has no license field", "Add an SPDX license expression or UNLICENSED for private packages.", "medium", ["npm", "quality"])
|
|
642
|
+
if "repository" not in data:
|
|
643
|
+
ctx.add("LOW", "quality", path, None, "Missing repository metadata", f"{name}@{version} has no repository field", "Add repository metadata so consumers can verify provenance and source.", "medium", ["npm", "quality"])
|
|
644
|
+
if "engines" not in data:
|
|
645
|
+
ctx.add("LOW", "quality", path, None, "Missing engines.node constraint", f"{name}@{version} has no engines field", "Declare supported Node.js versions to reduce ambiguous runtime behavior.", "low", ["node", "quality"])
|
|
646
|
+
|
|
647
|
+
has_ts_files = any(path.parent.rglob("*.ts")) or any(path.parent.rglob("*.tsx"))
|
|
648
|
+
if has_ts_files and not any(k in data for k in ("types", "typings")) and not (path.parent / "index.d.ts").exists():
|
|
649
|
+
ctx.add("LOW", "quality", path, None, "TypeScript package lacks types metadata", f"{name}@{version} has TS files but no types/typings field", "Publish declaration files and declare the types entrypoint for consumers.", "low", ["typescript", "quality"])
|
|
650
|
+
|
|
651
|
+
for entry_field in ("main", "module", "types", "typings"):
|
|
652
|
+
val = data.get(entry_field)
|
|
653
|
+
if isinstance(val, str) and val and not val.startswith(("http://", "https://")):
|
|
654
|
+
if not (path.parent / val).exists():
|
|
655
|
+
ctx.add("LOW", "quality", path, key_line(text, entry_field), f"Declared {entry_field} file is missing", f"{entry_field}: {val}", "Ensure package metadata points to files present in the repo/tarball.", "medium", ["npm", "entrypoint"])
|
|
656
|
+
|
|
657
|
+
bin_field = data.get("bin")
|
|
658
|
+
if isinstance(bin_field, str):
|
|
659
|
+
check_bin_entry(ctx, path, name, bin_field, text)
|
|
660
|
+
elif isinstance(bin_field, dict):
|
|
661
|
+
for bin_name, bin_path in bin_field.items():
|
|
662
|
+
if isinstance(bin_path, str):
|
|
663
|
+
check_bin_entry(ctx, path, str(bin_name), bin_path, text)
|
|
664
|
+
|
|
665
|
+
# Workspaces: make sure nested packages are scanned.
|
|
666
|
+
workspaces = data.get("workspaces")
|
|
667
|
+
if workspaces:
|
|
668
|
+
ctx.add("INFO", "workspace", path, key_line(text, "workspaces"), "npm workspaces detected", one_line(workspaces), "Scan all workspace package.json files and compare dependency changes per workspace.", "high", ["npm", "workspace"])
|
|
669
|
+
|
|
670
|
+
# Lockfile expectations.
|
|
671
|
+
direct_dep_count = 0
|
|
672
|
+
for dep_field in ("dependencies", "devDependencies", "optionalDependencies"):
|
|
673
|
+
if isinstance(data.get(dep_field), dict):
|
|
674
|
+
direct_dep_count += len(data[dep_field])
|
|
675
|
+
if direct_dep_count and path.parent == ctx.root:
|
|
676
|
+
lockfiles = [path.parent / "package-lock.json", path.parent / "npm-shrinkwrap.json", path.parent / "pnpm-lock.yaml", path.parent / "yarn.lock"]
|
|
677
|
+
if not any(p.exists() for p in lockfiles) and ctx.mode in {"application", "repo"}:
|
|
678
|
+
ctx.add("MEDIUM", "lockfile", path, None, "Root project has dependencies but no lockfile", f"{direct_dep_count} direct dependencies", "Use a committed lockfile for applications and CI to avoid surprise transitive updates.", "medium", ["npm", "lockfile"])
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def check_bin_entry(ctx: ScanContext, package_json: Path, bin_name: str, bin_path: str, manifest_text: str):
|
|
682
|
+
p = (package_json.parent / bin_path).resolve()
|
|
683
|
+
line = key_line(manifest_text, "bin")
|
|
684
|
+
try:
|
|
685
|
+
rel = p.relative_to(ctx.root).as_posix()
|
|
686
|
+
ctx.lifecycle_entrypoints.add(rel)
|
|
687
|
+
except Exception:
|
|
688
|
+
pass
|
|
689
|
+
if not p.exists():
|
|
690
|
+
ctx.add("MEDIUM", "bin-entrypoint", package_json, line, "bin entrypoint file is missing", f"{bin_name}: {bin_path}", "Verify the package tarball/source tree is complete and not relying on generated files from install scripts.", "medium", ["npm", "bin"])
|
|
691
|
+
return
|
|
692
|
+
try:
|
|
693
|
+
first = p.read_bytes()[:80]
|
|
694
|
+
except Exception:
|
|
695
|
+
return
|
|
696
|
+
if p.suffix in {".js", ".mjs", ".cjs"} and not first.startswith(b"#!"):
|
|
697
|
+
ctx.add("LOW", "quality", p, 1, "CLI bin entrypoint has no shebang", f"bin {bin_name} -> {bin_path}", "Add a Node.js shebang if the file is intended as an executable CLI.", "low", ["npm", "bin", "quality"])
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def max_severity(a: str, b: str) -> str:
|
|
701
|
+
return a if SEVERITY_ORDER.get(a, 0) >= SEVERITY_ORDER.get(b, 0) else b
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def analyze_npmrc(ctx: ScanContext, path: Path):
|
|
705
|
+
data, truncated = safe_read_bytes(path, min(ctx.max_file_bytes, 1024 * 1024))
|
|
706
|
+
text = decode_text(data)
|
|
707
|
+
for i, line in enumerate(text.splitlines(), 1):
|
|
708
|
+
stripped = line.strip()
|
|
709
|
+
if not stripped or stripped.startswith("#") or stripped.startswith(";"):
|
|
710
|
+
continue
|
|
711
|
+
low = stripped.lower()
|
|
712
|
+
if "_authtoken" in low or re.search(r"\b_auth\s*=", low):
|
|
713
|
+
ctx.add("CRITICAL", "npmrc", path, i, "npm authentication token in .npmrc", stripped, "Remove tokens from repository/package. Rotate the token immediately if it was committed or shipped.", "high", ["secret", "npmrc"])
|
|
714
|
+
if low.startswith("ignore-scripts=false"):
|
|
715
|
+
ctx.add("HIGH", "npmrc", path, i, "ignore-scripts is explicitly disabled", stripped, "Set ignore-scripts=true by default and run any required scripts only after manual approval.", "high", ["npmrc", "scripts"])
|
|
716
|
+
if low.startswith("strict-ssl=false"):
|
|
717
|
+
ctx.add("HIGH", "npmrc", path, i, "strict-ssl is disabled", stripped, "Require TLS validation for registry traffic.", "high", ["npmrc", "tls"])
|
|
718
|
+
if low.startswith("audit=false"):
|
|
719
|
+
ctx.add("MEDIUM", "npmrc", path, i, "npm audit disabled", stripped, "Enable audit in CI unless a documented alternative vulnerability scanner is enforced.", "medium", ["npmrc", "audit"])
|
|
720
|
+
if low.startswith("registry=http://") or re.match(r"@[^:]+:registry=http://", low):
|
|
721
|
+
ctx.add("HIGH", "npmrc", path, i, "Plain HTTP npm registry configured", stripped, "Use HTTPS registry URLs only.", "high", ["npmrc", "registry"])
|
|
722
|
+
if low.startswith("unsafe-perm=true"):
|
|
723
|
+
ctx.add("MEDIUM", "npmrc", path, i, "unsafe-perm enabled", stripped, "Avoid elevated script execution privileges; use least-privilege build users.", "medium", ["npmrc", "scripts"])
|
|
724
|
+
if low.startswith(("allow-git=all", "allow-remote=all", "allow-file=all", "allow-directory=all")):
|
|
725
|
+
ctx.add("MEDIUM", "npmrc", path, i, "npm allows non-registry dependency sources", stripped, "Prefer allow-git=none, allow-remote=none and allow-file=none unless specifically required.", "medium", ["npmrc", "dependency-source"])
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def analyze_package_lock(ctx: ScanContext, path: Path):
|
|
729
|
+
data, text_or_err = load_json_file(path)
|
|
730
|
+
ctx.summary.lockfile_count += 1
|
|
731
|
+
if data is None:
|
|
732
|
+
ctx.add("MEDIUM", "lockfile", path, None, "Invalid npm lockfile JSON", text_or_err, "Regenerate lockfile from a trusted environment and review the diff.", "medium", ["npm", "lockfile"])
|
|
733
|
+
return
|
|
734
|
+
text = text_or_err
|
|
735
|
+
lockfile_version = data.get("lockfileVersion") if isinstance(data, dict) else None
|
|
736
|
+
ctx.add("INFO", "lockfile", path, key_line(text, "lockfileVersion"), "npm lockfile detected", f"lockfileVersion={lockfile_version}", "Use this lockfile for dependency diff review; do not update it implicitly during audit.", "high", ["npm", "lockfile"])
|
|
737
|
+
|
|
738
|
+
packages = data.get("packages", {}) if isinstance(data, dict) else {}
|
|
739
|
+
if isinstance(packages, dict):
|
|
740
|
+
for loc, meta in packages.items():
|
|
741
|
+
if not isinstance(meta, dict):
|
|
742
|
+
continue
|
|
743
|
+
loc_str = loc or "."
|
|
744
|
+
pseudo_path = f"{ctx.rel(path)}:{loc_str}"
|
|
745
|
+
resolved = str(meta.get("resolved", ""))
|
|
746
|
+
integrity = meta.get("integrity")
|
|
747
|
+
optional = bool(meta.get("optional"))
|
|
748
|
+
dev = bool(meta.get("dev"))
|
|
749
|
+
in_bundle = bool(meta.get("inBundle") or meta.get("bundled"))
|
|
750
|
+
if meta.get("hasInstallScript"):
|
|
751
|
+
sev = "HIGH" if not dev else "MEDIUM"
|
|
752
|
+
if optional:
|
|
753
|
+
sev = max_severity(sev, "HIGH")
|
|
754
|
+
ctx.add(sev, "lockfile-install-script", pseudo_path, None, "Dependency has install/lifecycle script", f"{loc_str}; optional={optional}; dev={dev}; resolved={resolved}", "Inspect this package tarball before installing with scripts enabled. Prefer --ignore-scripts and allowlist required native build packages.", "high", ["npm", "lockfile", "install-script"])
|
|
755
|
+
if resolved:
|
|
756
|
+
analyze_resolved_url(ctx, path, pseudo_path, resolved, integrity, optional, dev)
|
|
757
|
+
if resolved and not integrity and not resolved.startswith(("file:", "link:")) and loc_str != ".":
|
|
758
|
+
ctx.add("MEDIUM", "lockfile-integrity", pseudo_path, None, "Lockfile entry lacks integrity", f"{loc_str}; resolved={resolved}", "Require Subresource Integrity for registry tarballs; regenerate lockfile from a trusted registry if missing.", "medium", ["npm", "lockfile", "integrity"])
|
|
759
|
+
if in_bundle:
|
|
760
|
+
ctx.add("MEDIUM", "bundled-dependency", pseudo_path, None, "Bundled dependency in lockfile", f"{loc_str}; resolved={resolved}", "Review bundled code inside the package tarball; bundled code is less visible in normal dependency review.", "medium", ["npm", "bundle"])
|
|
761
|
+
joined = f"{loc_str} {resolved} {json.dumps(meta, sort_keys=True)[:500]}"
|
|
762
|
+
if any(ioc.lower() in joined.lower() for ioc in IOC_STRINGS):
|
|
763
|
+
ctx.add("CRITICAL", "ioc", pseudo_path, None, "Known supply-chain IOC in lockfile", joined[:500], "Quarantine dependency tree, verify affected package versions, and rotate potentially exposed credentials.", "high", ["ioc", "npm", "lockfile"])
|
|
764
|
+
|
|
765
|
+
deps = data.get("dependencies", {}) if isinstance(data, dict) else {}
|
|
766
|
+
if isinstance(deps, dict):
|
|
767
|
+
walk_lock_deps(ctx, path, deps, prefix="dependencies")
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def walk_lock_deps(ctx: ScanContext, path: Path, deps: dict[str, Any], prefix: str):
|
|
771
|
+
for name, meta in deps.items():
|
|
772
|
+
if not isinstance(meta, dict):
|
|
773
|
+
continue
|
|
774
|
+
pseudo_path = f"{ctx.rel(path)}:{prefix}.{name}"
|
|
775
|
+
resolved = str(meta.get("resolved", ""))
|
|
776
|
+
integrity = meta.get("integrity")
|
|
777
|
+
optional = bool(meta.get("optional"))
|
|
778
|
+
dev = bool(meta.get("dev"))
|
|
779
|
+
if resolved:
|
|
780
|
+
analyze_resolved_url(ctx, path, pseudo_path, resolved, integrity, optional, dev)
|
|
781
|
+
if resolved and not integrity and not resolved.startswith(("file:", "link:")):
|
|
782
|
+
ctx.add("MEDIUM", "lockfile-integrity", pseudo_path, None, "Legacy lockfile entry lacks integrity", f"{name}; resolved={resolved}", "Require integrity-pinned lockfiles for registry tarballs.", "medium", ["npm", "lockfile", "integrity"])
|
|
783
|
+
if any(ioc.lower() in f"{name} {resolved}".lower() for ioc in IOC_STRINGS):
|
|
784
|
+
ctx.add("CRITICAL", "ioc", pseudo_path, None, "Known supply-chain IOC in legacy lock dependency", f"{name}; resolved={resolved}", "Quarantine dependency tree and validate affected versions.", "high", ["ioc", "npm", "lockfile"])
|
|
785
|
+
nested = meta.get("dependencies")
|
|
786
|
+
if isinstance(nested, dict):
|
|
787
|
+
walk_lock_deps(ctx, path, nested, f"{prefix}.{name}.dependencies")
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def analyze_resolved_url(ctx: ScanContext, path: Path, pseudo_path: str, resolved: str, integrity: Any, optional: bool, dev: bool):
|
|
791
|
+
low = resolved.lower()
|
|
792
|
+
sev = "INFO"
|
|
793
|
+
reason = ""
|
|
794
|
+
if low.startswith("git+") or "github.com" in low or low.startswith("github:"):
|
|
795
|
+
sev = "HIGH"
|
|
796
|
+
reason = "git/GitHub dependency source can run prepare scripts and bypass registry tarball review"
|
|
797
|
+
elif low.startswith("http://"):
|
|
798
|
+
sev = "HIGH"
|
|
799
|
+
reason = "plaintext HTTP tarball source"
|
|
800
|
+
elif low.startswith("https://") and "registry.npmjs.org" not in low and "registry.npmjs.com" not in low:
|
|
801
|
+
sev = "MEDIUM"
|
|
802
|
+
reason = "non-default remote tarball source"
|
|
803
|
+
elif low.startswith(("file:", "link:")):
|
|
804
|
+
sev = "MEDIUM"
|
|
805
|
+
reason = "local file/link source depends on local filesystem state"
|
|
806
|
+
if optional and sev != "INFO":
|
|
807
|
+
sev = max_severity(sev, "HIGH")
|
|
808
|
+
reason += "; optional dependency source is easy to overlook"
|
|
809
|
+
if sev != "INFO":
|
|
810
|
+
ctx.add(sev, "lockfile-source", pseudo_path, None, "Non-standard dependency source in lockfile", f"resolved={resolved}; integrity={bool(integrity)}; optional={optional}; dev={dev}; {reason}", "Review and allowlist this source explicitly, or replace it with a registry package pinned by integrity.", "medium", ["npm", "lockfile", "source"])
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def analyze_text_lockfile(ctx: ScanContext, path: Path):
|
|
814
|
+
ctx.summary.lockfile_count += 1
|
|
815
|
+
data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
|
|
816
|
+
text = decode_text(data)
|
|
817
|
+
lines = text.splitlines()
|
|
818
|
+
for i, line in enumerate(lines, 1):
|
|
819
|
+
low = line.lower()
|
|
820
|
+
if any(x in low for x in ("git+", "github:", "gitlab:", "bitbucket:", "github.com")):
|
|
821
|
+
ctx.add("HIGH", "lockfile-source", path, i, "Git dependency in text lockfile", line, "Review git dependencies manually and require immutable full-SHA pins; avoid scripts during install.", "medium", ["npm", "lockfile", "git"])
|
|
822
|
+
elif "http://" in low:
|
|
823
|
+
ctx.add("HIGH", "lockfile-source", path, i, "Plain HTTP source in lockfile", line, "Use HTTPS and integrity-pinned registry artifacts only.", "high", ["npm", "lockfile", "http"])
|
|
824
|
+
elif "https://" in low and "registry.npmjs.org" not in low and "registry.yarnpkg.com" not in low:
|
|
825
|
+
ctx.add("MEDIUM", "lockfile-source", path, i, "Non-default URL source in lockfile", line, "Review and allowlist non-default registries/tarball sources.", "medium", ["npm", "lockfile", "url"])
|
|
826
|
+
if "requiresbuild: true" in low or "requiresbuild=true" in low:
|
|
827
|
+
ctx.add("MEDIUM", "lockfile-install-script", path, i, "Dependency requires build/install scripts", line, "Identify package, inspect tarball, and install with scripts disabled unless allowlisted.", "medium", ["npm", "pnpm", "install-script"])
|
|
828
|
+
if any(ioc.lower() in low for ioc in IOC_STRINGS):
|
|
829
|
+
ctx.add("CRITICAL", "ioc", path, i, "Known supply-chain IOC in lockfile", line, "Quarantine dependency tree and verify affected versions.", "high", ["ioc", "npm", "lockfile"])
|
|
830
|
+
if truncated:
|
|
831
|
+
ctx.add("INFO", "scan-limit", path, None, "Lockfile scan truncated", f"Scanned first {ctx.max_file_bytes} bytes", "Increase --max-file-bytes if suspicious entries may be later in the file.", "medium", ["limit"])
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def analyze_tsconfig(ctx: ScanContext, path: Path):
|
|
835
|
+
data, text_or_err = load_json_file(path)
|
|
836
|
+
ctx.summary.tsconfig_count += 1
|
|
837
|
+
if data is None:
|
|
838
|
+
# tsconfig often has JSONC; do a lightweight text pass.
|
|
839
|
+
text = path.read_text("utf-8", errors="replace")[:ctx.max_file_bytes]
|
|
840
|
+
if re.search(r"\"strict\"\s*:\s*false", text):
|
|
841
|
+
line, ev = first_match_line(text, re.compile(r"\"strict\"\s*:\s*false"))
|
|
842
|
+
ctx.add("LOW", "typescript-quality", path, line, "TypeScript strict mode disabled", ev or "strict=false", "Enable strict mode or document why the package cannot use it.", "medium", ["typescript", "quality"])
|
|
843
|
+
return
|
|
844
|
+
if not isinstance(data, dict):
|
|
845
|
+
return
|
|
846
|
+
opts = data.get("compilerOptions", {})
|
|
847
|
+
if isinstance(opts, dict):
|
|
848
|
+
for opt in ("strict", "noImplicitAny", "strictNullChecks", "noUncheckedIndexedAccess"):
|
|
849
|
+
if opts.get(opt) is False:
|
|
850
|
+
ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, opt), f"TypeScript compiler option {opt}=false", f"{opt}=false", "Tighten TypeScript compiler checks for library-quality code.", "medium", ["typescript", "quality"])
|
|
851
|
+
if opts.get("allowJs") is True:
|
|
852
|
+
ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, "allowJs"), "allowJs enabled", "allowJs=true", "Ensure JavaScript sources are covered by linting and malware scan; mixed JS/TS increases review surface.", "low", ["typescript", "quality"])
|
|
853
|
+
if opts.get("declaration") is not True and ctx.mode in {"package", "library"}:
|
|
854
|
+
ctx.add("LOW", "typescript-quality", path, key_line(text_or_err, "declaration"), "Declaration output not enabled", "compilerOptions.declaration is not true", "Published TypeScript packages should produce .d.ts declaration files or document generated types.", "low", ["typescript", "quality"])
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def analyze_workflow(ctx: ScanContext, path: Path):
|
|
858
|
+
ctx.summary.workflow_count += 1
|
|
859
|
+
data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
|
|
860
|
+
text = decode_text(data)
|
|
861
|
+
low = text.lower()
|
|
862
|
+
if re.search(r"^\s*pull_request_target\s*:", text, flags=re.M):
|
|
863
|
+
sev = "HIGH" if re.search(r"uses:\s*actions/checkout|run:\s*(npm|pnpm|yarn|bun|node|bash|sh)", text, flags=re.I) else "MEDIUM"
|
|
864
|
+
ctx.add(sev, "github-actions", path, None, "Workflow uses pull_request_target", "pull_request_target with checkout/run risk if untrusted PR code is executed", "Do not checkout or execute untrusted PR code in pull_request_target workflows; use read-only permissions and explicit validation.", "high", ["github-actions", "pr"])
|
|
865
|
+
if re.search(r"^\s*workflow_run\s*:", text, flags=re.M):
|
|
866
|
+
ctx.add("MEDIUM", "github-actions", path, None, "Workflow triggered by workflow_run", "workflow_run can bridge artifacts and trust boundaries", "Verify artifacts are trusted before execution and keep token permissions minimal.", "medium", ["github-actions"])
|
|
867
|
+
if "permissions: write-all" in low:
|
|
868
|
+
ctx.add("HIGH", "github-actions-permissions", path, None, "Workflow grants write-all permissions", "permissions: write-all", "Use permissions: {} by default and grant only minimal scopes per job.", "high", ["github-actions", "permissions"])
|
|
869
|
+
for perm in ("contents: write", "packages: write", "actions: write", "id-token: write", "pull-requests: write"):
|
|
870
|
+
if perm in low:
|
|
871
|
+
ctx.add("MEDIUM", "github-actions-permissions", path, None, f"Workflow grants {perm}", perm, "Verify this permission is required and isolated to trusted branches/environments.", "medium", ["github-actions", "permissions"])
|
|
872
|
+
# Unpinned actions.
|
|
873
|
+
for m in re.finditer(r"uses:\s*([^\s#]+)", text):
|
|
874
|
+
ref = m.group(1).strip().strip("'\"")
|
|
875
|
+
line = line_for_offset(text, m.start())
|
|
876
|
+
if ref.startswith("./") or ref.startswith("docker://"):
|
|
877
|
+
continue
|
|
878
|
+
if "@" not in ref:
|
|
879
|
+
ctx.add("MEDIUM", "github-actions-pinning", path, line, "GitHub Action without explicit ref", ref, "Pin third-party actions to full-length commit SHAs and maintain them with Dependabot/Renovate.", "medium", ["github-actions", "pinning"])
|
|
880
|
+
continue
|
|
881
|
+
action_ref = ref.rsplit("@", 1)[-1]
|
|
882
|
+
if not FULL_SHA_RE.match(action_ref):
|
|
883
|
+
ctx.add("MEDIUM", "github-actions-pinning", path, line, "GitHub Action not pinned to full commit SHA", ref, "Pin third-party actions to full-length commit SHAs to make workflow dependencies immutable.", "medium", ["github-actions", "pinning"])
|
|
884
|
+
# Install commands without script suppression.
|
|
885
|
+
for m in re.finditer(r"run:\s*(.+)", text):
|
|
886
|
+
cmd = m.group(1).strip()
|
|
887
|
+
if re.search(r"\b(npm\s+(install|i|ci)|pnpm\s+install|yarn\s+install|bun\s+install)\b", cmd) and "ignore-scripts" not in cmd:
|
|
888
|
+
ctx.add("HIGH", "github-actions-install", path, line_for_offset(text, m.start()), "CI install command does not disable lifecycle scripts", cmd, "For dependency-review jobs, use --ignore-scripts and omit optional dependencies before any build/test step.", "medium", ["github-actions", "npm", "install-scripts"])
|
|
889
|
+
if re.search(r"\bnpm\s+publish\b", cmd) and re.search(r"(NPM_TOKEN|NODE_AUTH_TOKEN|secrets\.)", text):
|
|
890
|
+
ctx.add("HIGH", "github-actions-publish", path, line_for_offset(text, m.start()), "npm publish workflow uses registry token", cmd, "Ensure publish only runs from protected tags/branches with trusted source and minimal token scope; consider trusted publishing/OIDC.", "medium", ["github-actions", "npm", "publish"])
|
|
891
|
+
if truncated:
|
|
892
|
+
ctx.add("INFO", "scan-limit", path, None, "Workflow scan truncated", f"Scanned first {ctx.max_file_bytes} bytes", "Increase --max-file-bytes for complete workflow review.", "medium", ["limit"])
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
def analyze_json_config(ctx: ScanContext, path: Path):
|
|
896
|
+
# Focus on IDE/agent execution configs that may run commands after clone/open.
|
|
897
|
+
rel = ctx.rel(path)
|
|
898
|
+
data, text_or_err = load_json_file(path)
|
|
899
|
+
text = text_or_err if data is not None else path.read_text("utf-8", errors="replace")[:ctx.max_file_bytes]
|
|
900
|
+
interesting = any(part in rel for part in [".vscode/", ".claude/", ".cursor/", ".devcontainer/"])
|
|
901
|
+
if not interesting:
|
|
902
|
+
return
|
|
903
|
+
labels = script_risk_labels(text)
|
|
904
|
+
if "tasks.json" in rel or "settings.json" in rel or ".claude/" in rel:
|
|
905
|
+
sev = "HIGH" if labels else "MEDIUM"
|
|
906
|
+
if any("known IOC" in x for x in labels) or (any("download" in x or "URL" in x for x in labels) and any("execution" in x or "interpreter" in x for x in labels)):
|
|
907
|
+
sev = "CRITICAL"
|
|
908
|
+
ctx.add(sev, "ide-agent-config", path, None, "IDE/AI-agent execution configuration present", f"signals={', '.join(labels) if labels else 'manual review required'}", "Do not auto-run IDE tasks or AI-agent hooks from untrusted repos/packages. Review and remove unexpected commands.", "medium", ["ide", "agent", "persistence"])
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def entropy(s: str) -> float:
|
|
912
|
+
if not s:
|
|
913
|
+
return 0.0
|
|
914
|
+
counts: dict[str, int] = {}
|
|
915
|
+
for ch in s:
|
|
916
|
+
counts[ch] = counts.get(ch, 0) + 1
|
|
917
|
+
length = len(s)
|
|
918
|
+
return -sum((c / length) * math.log2(c / length) for c in counts.values())
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def scan_source_file(ctx: ScanContext, path: Path):
|
|
922
|
+
try:
|
|
923
|
+
st = path.stat()
|
|
924
|
+
except OSError:
|
|
925
|
+
return
|
|
926
|
+
ext = path.suffix.lower()
|
|
927
|
+
rel = ctx.rel(path)
|
|
928
|
+
is_lifecycle_ref = rel in ctx.lifecycle_entrypoints or path.name.lower() in {"setup.mjs", "setup.js", "install.js", "postinstall.js", "preinstall.js", "prepare.js"}
|
|
929
|
+
|
|
930
|
+
if ext in BINARY_EXEC_EXTENSIONS:
|
|
931
|
+
sev = "HIGH" if ctx.is_tarball or is_lifecycle_ref else "MEDIUM"
|
|
932
|
+
ctx.add(sev, "binary-artifact", path, None, "Native/binary artifact present", f"{path.name}; size={st.st_size}; sha256={sha256_file(path, limit=min(st.st_size, 50 * 1024 * 1024))}", "Verify binary provenance, rebuildability and platform necessity. Avoid binaries downloaded or executed by install scripts.", "medium", ["binary", "npm-package"])
|
|
933
|
+
return
|
|
934
|
+
if ext in ARCHIVE_EXTENSIONS and path.name != Path(ctx.target_label).name:
|
|
935
|
+
ctx.add("MEDIUM", "embedded-archive", path, None, "Embedded archive present", f"{path.name}; size={st.st_size}; sha256={sha256_file(path, limit=min(st.st_size, 50 * 1024 * 1024))}", "Extract and scan embedded archives only in a safe offline sandbox; verify why they are shipped.", "medium", ["archive", "payload"])
|
|
936
|
+
return
|
|
937
|
+
|
|
938
|
+
if ext not in TEXT_EXTENSIONS and path.name not in {"Makefile", "Dockerfile", ".npmrc", ".yarnrc", ".pnpmrc"}:
|
|
939
|
+
return
|
|
940
|
+
|
|
941
|
+
try:
|
|
942
|
+
data, truncated = safe_read_bytes(path, ctx.max_file_bytes)
|
|
943
|
+
except OSError:
|
|
944
|
+
return
|
|
945
|
+
if is_probably_binary(data):
|
|
946
|
+
if ext in CODE_EXTENSIONS:
|
|
947
|
+
ctx.add("HIGH", "obfuscation", path, None, "Code file appears binary or packed", f"{path.name}; size={st.st_size}", "Treat as suspicious until unpacked or explained by a reproducible build process.", "medium", ["packed", "obfuscation"])
|
|
948
|
+
return
|
|
949
|
+
|
|
950
|
+
text = decode_text(data)
|
|
951
|
+
lower_text = text.lower()
|
|
952
|
+
is_doc_file = ext in DOC_EXTENSIONS
|
|
953
|
+
|
|
954
|
+
# Known IOCs first.
|
|
955
|
+
for ioc in IOC_STRINGS:
|
|
956
|
+
if ioc.lower() in lower_text:
|
|
957
|
+
line = lower_text.find(ioc.lower())
|
|
958
|
+
ctx.add("CRITICAL", "ioc", path, line_for_offset(lower_text, line) if line >= 0 else None, "Known supply-chain campaign IOC found", ioc, "Quarantine the package/repo, verify affected versions from advisories, and rotate potentially exposed credentials.", "high", ["ioc", "npm", "malware"])
|
|
959
|
+
|
|
960
|
+
# Secret literals should never be in packages; mask output.
|
|
961
|
+
for pat in SECRET_PATTERNS:
|
|
962
|
+
m = pat.search(text)
|
|
963
|
+
if m:
|
|
964
|
+
ctx.add("CRITICAL", "secret", path, line_for_offset(text, m.start()), "Possible live secret/token committed or shipped", m.group(0), "Remove the secret and rotate it immediately. Treat package as compromised if token was published.", "high", ["secret"])
|
|
965
|
+
break
|
|
966
|
+
|
|
967
|
+
network_hits = collect_hits(text, NETWORK_PATTERNS)
|
|
968
|
+
exec_hits = collect_hits(text, EXEC_PATTERNS)
|
|
969
|
+
stealth_hits = collect_hits(text, STEALTH_PATTERNS)
|
|
970
|
+
secret_path_hits = collect_hits(text, SECRET_PATH_PATTERNS)
|
|
971
|
+
obf_hits = collect_hits(text, OBFUSCATION_PATTERNS)
|
|
972
|
+
ide_hits = collect_hits(text, IDE_AGENT_PATTERNS)
|
|
973
|
+
gh_hits = collect_hits(text, GITHUB_API_PATTERNS)
|
|
974
|
+
token_name_hit = TOKEN_NAME_PATTERN.search(text)
|
|
975
|
+
|
|
976
|
+
# Documentation often contains command/API examples that would otherwise trigger
|
|
977
|
+
# payload heuristics. Only apply those heuristics to docs when the file is a
|
|
978
|
+
# lifecycle entrypoint or explicit setup/install script.
|
|
979
|
+
apply_payload_heuristics = (not is_doc_file) or is_lifecycle_ref
|
|
980
|
+
|
|
981
|
+
if apply_payload_heuristics and network_hits and exec_hits:
|
|
982
|
+
line = min([h[0] for h in network_hits + exec_hits if h[0] is not None] or [None])
|
|
983
|
+
ctx.add("CRITICAL", "payload-behavior", path, line, "Network plus code/process execution behavior", f"network={labels_only(network_hits)}; execution={labels_only(exec_hits)}", "Do not execute. Manually trace data flow and verify there is no downloader/dropper/exfiltration path.", "medium", ["network", "exec", "malware-pattern"])
|
|
984
|
+
elif apply_payload_heuristics and exec_hits and is_lifecycle_ref:
|
|
985
|
+
ctx.add("HIGH", "payload-behavior", path, exec_hits[0][0], "Lifecycle-referenced file can execute commands/code", f"execution={labels_only(exec_hits)}", "Review lifecycle entrypoint manually. Avoid install scripts unless required and allowlisted.", "medium", ["exec", "lifecycle"])
|
|
986
|
+
elif apply_payload_heuristics and network_hits and is_lifecycle_ref:
|
|
987
|
+
ctx.add("HIGH", "payload-behavior", path, network_hits[0][0], "Lifecycle-referenced file performs network access", f"network={labels_only(network_hits)}", "Install scripts should not download code/binaries without transparent integrity checks and provenance.", "medium", ["network", "lifecycle"])
|
|
988
|
+
|
|
989
|
+
if apply_payload_heuristics and (secret_path_hits or token_name_hit) and (network_hits or exec_hits or gh_hits):
|
|
990
|
+
labels = labels_only(secret_path_hits)
|
|
991
|
+
if token_name_hit:
|
|
992
|
+
labels.append("credential environment variable")
|
|
993
|
+
ctx.add("CRITICAL", "credential-access", path, token_name_hit and line_for_offset(text, token_name_hit.start()) or (secret_path_hits[0][0] if secret_path_hits else None), "Credential access combined with network/execution", f"credentials={labels}; network={labels_only(network_hits)}; execution={labels_only(exec_hits)}; github_api={labels_only(gh_hits)}", "Assume credential theft is possible. Do not run; inspect for exfiltration and rotate any credentials exposed to this code.", "medium", ["credential", "exfiltration"])
|
|
994
|
+
elif apply_payload_heuristics and (secret_path_hits or token_name_hit):
|
|
995
|
+
labels = labels_only(secret_path_hits)
|
|
996
|
+
if token_name_hit:
|
|
997
|
+
labels.append("credential environment variable")
|
|
998
|
+
ctx.add("MEDIUM", "credential-access", path, token_name_hit and line_for_offset(text, token_name_hit.start()) or (secret_path_hits[0][0] if secret_path_hits else None), "Credential-related names or paths referenced", f"credentials={labels}", "Verify this is legitimate configuration handling and not token harvesting.", "medium", ["credential"])
|
|
999
|
+
|
|
1000
|
+
if apply_payload_heuristics and ide_hits and (network_hits or exec_hits or gh_hits):
|
|
1001
|
+
ctx.add("CRITICAL", "ide-agent-persistence", path, ide_hits[0][0], "IDE/AI-agent config path combined with execution/network/GitHub write behavior", f"ide={labels_only(ide_hits)}; exec={labels_only(exec_hits)}; network={labels_only(network_hits)}; github_api={labels_only(gh_hits)}", "Treat as potential repo-poisoning/persistence. Remove configs and audit GitHub token exposure.", "medium", ["ide", "agent", "persistence"])
|
|
1002
|
+
elif apply_payload_heuristics and ide_hits:
|
|
1003
|
+
ctx.add("HIGH", "ide-agent-config", path, ide_hits[0][0], "IDE/AI-agent configuration path referenced", f"ide={labels_only(ide_hits)}", "Review whether the package/repo writes or ships IDE/agent configs unexpectedly.", "medium", ["ide", "agent"])
|
|
1004
|
+
|
|
1005
|
+
if apply_payload_heuristics and gh_hits and token_name_hit:
|
|
1006
|
+
ctx.add("HIGH", "github-api", path, gh_hits[0][0], "GitHub API usage with token-related code", f"github_api={labels_only(gh_hits)}", "Ensure GitHub token use is limited to documented operations and cannot modify repo config or workflows unexpectedly.", "medium", ["github", "token"])
|
|
1007
|
+
|
|
1008
|
+
if apply_payload_heuristics and stealth_hits and (is_lifecycle_ref or network_hits or exec_hits):
|
|
1009
|
+
ctx.add("HIGH", "stealth", path, stealth_hits[0][0], "Stealthy script behavior", f"stealth={labels_only(stealth_hits)}", "Review why output is suppressed, permissions changed, or failures forced after execution.", "medium", ["stealth"])
|
|
1010
|
+
|
|
1011
|
+
# Obfuscation heuristics.
|
|
1012
|
+
lines = text.splitlines()
|
|
1013
|
+
max_line_len = max((len(line) for line in lines), default=0)
|
|
1014
|
+
long_lines = [i + 1 for i, line in enumerate(lines) if len(line) > 2000]
|
|
1015
|
+
huge_single_line = st.st_size > 500_000 and len(lines) <= 3
|
|
1016
|
+
hex_id_count = len(re.findall(r"_0x[a-fA-F0-9]{3,}", text[:ctx.max_file_bytes]))
|
|
1017
|
+
base64_like = re.findall(r"['\"]([A-Za-z0-9+/]{160,}={0,2})['\"]", text[:ctx.max_file_bytes])
|
|
1018
|
+
high_entropy_strings = [s for s in base64_like[:10] if entropy(s) > 4.5]
|
|
1019
|
+
if apply_payload_heuristics and (obf_hits or long_lines or huge_single_line or hex_id_count > 20 or high_entropy_strings):
|
|
1020
|
+
sev = "HIGH" if (network_hits or exec_hits or is_lifecycle_ref or huge_single_line) else "MEDIUM"
|
|
1021
|
+
if huge_single_line and (network_hits or exec_hits or is_lifecycle_ref):
|
|
1022
|
+
sev = "CRITICAL"
|
|
1023
|
+
ctx.add(sev, "obfuscation", path, obf_hits[0][0] if obf_hits else (long_lines[0] if long_lines else 1), "Obfuscation or packed payload indicators", f"obf={labels_only(obf_hits)}; max_line_len={max_line_len}; huge_single_line={huge_single_line}; hex_ids={hex_id_count}; high_entropy_strings={len(high_entropy_strings)}; truncated={truncated}", "Demand unobfuscated source, reproducible build provenance, and manual reverse engineering before use.", "medium", ["obfuscation", "packed"])
|
|
1024
|
+
|
|
1025
|
+
if truncated:
|
|
1026
|
+
ctx.add("INFO", "scan-limit", path, None, "File scan truncated", f"size={st.st_size}; scanned_first_bytes={ctx.max_file_bytes}", "Increase --max-file-bytes for full-file scanning if this file is relevant.", "medium", ["limit"])
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def collect_hits(text: str, patterns: list[tuple[re.Pattern[str], str]]) -> list[tuple[int | None, str, str]]:
|
|
1030
|
+
hits: list[tuple[int | None, str, str]] = []
|
|
1031
|
+
for pat, label in patterns:
|
|
1032
|
+
m = pat.search(text)
|
|
1033
|
+
if m:
|
|
1034
|
+
hits.append((line_for_offset(text, m.start()), label, one_line(m.group(0))[:120]))
|
|
1035
|
+
return hits
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def labels_only(hits: list[tuple[int | None, str, str]]) -> list[str]:
|
|
1039
|
+
return sorted(set(h[1] for h in hits))
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
def analyze_package_artifact_hygiene(ctx: ScanContext, files: list[Path]):
|
|
1043
|
+
# For npm tarballs, unexpected config files are especially risky.
|
|
1044
|
+
for p in files:
|
|
1045
|
+
rel = ctx.rel(p)
|
|
1046
|
+
if rel.startswith("package/"):
|
|
1047
|
+
inside = rel[len("package/"):]
|
|
1048
|
+
else:
|
|
1049
|
+
inside = rel
|
|
1050
|
+
if inside in {".vscode/tasks.json", ".vscode/settings.json", ".claude/settings.json", ".cursor/rules", ".npmrc"} or inside.startswith((".claude/", ".cursor/", ".vscode/")):
|
|
1051
|
+
sev = "HIGH"
|
|
1052
|
+
if ctx.is_tarball:
|
|
1053
|
+
sev = "CRITICAL" if inside.startswith((".claude/", ".vscode/")) else "HIGH"
|
|
1054
|
+
ctx.add(sev, "package-artifact", p, None, "Sensitive IDE/agent/npm config present in package/repo contents", inside, "Remove unexpected config/persistence files from npm package or repo contents and audit how they were introduced.", "medium", ["npm-package", "artifact", "ide"])
|
|
1055
|
+
if ctx.is_tarball:
|
|
1056
|
+
package_jsons = [p for p in files if p.name == "package.json"]
|
|
1057
|
+
if not package_jsons:
|
|
1058
|
+
ctx.add("HIGH", "package-artifact", ctx.root, None, "npm tarball has no package.json", "No package.json found after extraction", "Reject this artifact as malformed or suspicious.", "high", ["npm-package"])
|
|
1059
|
+
|
|
1060
|
+
|
|
1061
|
+
def safe_extract_tgz(tgz_path: Path, destination: Path) -> list[Finding]:
|
|
1062
|
+
findings: list[Finding] = []
|
|
1063
|
+
try:
|
|
1064
|
+
tf = tarfile.open(tgz_path, "r:*")
|
|
1065
|
+
except Exception as exc:
|
|
1066
|
+
findings.append(Finding("CRITICAL", "tarball", str(tgz_path), None, "Unable to open tarball", str(exc), "Reject malformed package artifact.", "high", ["tarball"]))
|
|
1067
|
+
return findings
|
|
1068
|
+
dest_resolved = destination.resolve()
|
|
1069
|
+
with tf:
|
|
1070
|
+
for member in tf.getmembers():
|
|
1071
|
+
name = member.name
|
|
1072
|
+
try:
|
|
1073
|
+
pure = PurePosixPath(name)
|
|
1074
|
+
if pure.is_absolute() or ".." in pure.parts:
|
|
1075
|
+
findings.append(Finding("CRITICAL", "tarball", name, None, "Unsafe tarball path traversal entry", name, "Reject artifact and report to registry/upstream.", "high", ["tarball", "path-traversal"]))
|
|
1076
|
+
continue
|
|
1077
|
+
target = (destination / Path(*pure.parts)).resolve()
|
|
1078
|
+
if not str(target).startswith(str(dest_resolved) + os.sep) and target != dest_resolved:
|
|
1079
|
+
findings.append(Finding("CRITICAL", "tarball", name, None, "Unsafe tarball extraction target", str(target), "Reject artifact and report to registry/upstream.", "high", ["tarball", "path-traversal"]))
|
|
1080
|
+
continue
|
|
1081
|
+
if member.issym() or member.islnk():
|
|
1082
|
+
findings.append(Finding("MEDIUM", "tarball", name, None, "Symlink/hardlink entry in tarball", f"linkname={member.linkname}", "Review links manually; scanner does not follow package symlinks.", "medium", ["tarball", "link"]))
|
|
1083
|
+
continue
|
|
1084
|
+
if member.isdir():
|
|
1085
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
1086
|
+
continue
|
|
1087
|
+
if member.isfile():
|
|
1088
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
1089
|
+
src = tf.extractfile(member)
|
|
1090
|
+
if src is None:
|
|
1091
|
+
continue
|
|
1092
|
+
with target.open("wb") as out:
|
|
1093
|
+
shutil.copyfileobj(src, out)
|
|
1094
|
+
try:
|
|
1095
|
+
os.chmod(target, member.mode & 0o777)
|
|
1096
|
+
except Exception:
|
|
1097
|
+
pass
|
|
1098
|
+
except Exception as exc:
|
|
1099
|
+
findings.append(Finding("HIGH", "tarball", name, None, "Error extracting tarball member", str(exc), "Reject or manually inspect artifact extraction behavior.", "medium", ["tarball"]))
|
|
1100
|
+
return findings
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
def scan_root(ctx: ScanContext) -> ScanContext:
|
|
1104
|
+
files = iter_files(ctx)
|
|
1105
|
+
analyze_package_artifact_hygiene(ctx, files)
|
|
1106
|
+
|
|
1107
|
+
# First pass: manifests and lock/config files that establish context.
|
|
1108
|
+
for p in files:
|
|
1109
|
+
name = p.name
|
|
1110
|
+
rel = ctx.rel(p)
|
|
1111
|
+
if name == "package.json":
|
|
1112
|
+
analyze_package_json(ctx, p)
|
|
1113
|
+
elif name in {"package-lock.json", "npm-shrinkwrap.json"}:
|
|
1114
|
+
analyze_package_lock(ctx, p)
|
|
1115
|
+
elif name in {"pnpm-lock.yaml", "yarn.lock"}:
|
|
1116
|
+
analyze_text_lockfile(ctx, p)
|
|
1117
|
+
elif name in {".npmrc", ".yarnrc", ".pnpmrc"}:
|
|
1118
|
+
analyze_npmrc(ctx, p)
|
|
1119
|
+
elif name == "tsconfig.json" or name.startswith("tsconfig.") and name.endswith(".json"):
|
|
1120
|
+
analyze_tsconfig(ctx, p)
|
|
1121
|
+
elif rel.startswith(".github/workflows/") and p.suffix.lower() in {".yml", ".yaml"}:
|
|
1122
|
+
analyze_workflow(ctx, p)
|
|
1123
|
+
elif rel.startswith((".vscode/", ".claude/", ".cursor/", ".devcontainer/")) and p.suffix.lower() in {".json", ".jsonc", ".yml", ".yaml"}:
|
|
1124
|
+
analyze_json_config(ctx, p)
|
|
1125
|
+
|
|
1126
|
+
# Second pass: source and config content scan.
|
|
1127
|
+
for p in files:
|
|
1128
|
+
scan_source_file(ctx, p)
|
|
1129
|
+
|
|
1130
|
+
# Basic repo/package quality checks.
|
|
1131
|
+
root = ctx.root
|
|
1132
|
+
if not any((root / name).exists() for name in ("README.md", "readme.md", "README", "package/README.md")):
|
|
1133
|
+
ctx.add("LOW", "quality", root, None, "Missing README", "No README found at target root", "Add README with install, build, security and provenance guidance.", "low", ["quality"])
|
|
1134
|
+
if not any((root / name).exists() for name in ("SECURITY.md", "security.md", ".github/SECURITY.md", "package/SECURITY.md")):
|
|
1135
|
+
ctx.add("LOW", "quality", root, None, "Missing SECURITY.md", "No SECURITY.md found", "Add a security policy with vulnerability reporting instructions.", "low", ["quality", "security-policy"])
|
|
1136
|
+
if not any((root / name).exists() for name in ("LICENSE", "LICENSE.md", "license", "package/LICENSE", "package/LICENSE.md")):
|
|
1137
|
+
ctx.add("LOW", "quality", root, None, "Missing license file", "No LICENSE file found", "Include a license file matching package.json license metadata.", "low", ["quality", "license"])
|
|
1138
|
+
return ctx
|
|
1139
|
+
|
|
1140
|
+
|
|
1141
|
+
def scan_target(path: Path, args: argparse.Namespace) -> ScanContext:
|
|
1142
|
+
label = str(path)
|
|
1143
|
+
if path.is_file() and path.suffix.lower() in {".tgz", ".gz", ".tar"}:
|
|
1144
|
+
tmp = Path(tempfile.mkdtemp(prefix="npm-ts-audit-"))
|
|
1145
|
+
ctx = ScanContext(tmp, label, args.mode, True, args.include_node_modules, args.max_file_bytes, args.max_findings)
|
|
1146
|
+
ctx.summary.sha256 = sha256_file(path)
|
|
1147
|
+
extraction_findings = safe_extract_tgz(path, tmp)
|
|
1148
|
+
for f in extraction_findings:
|
|
1149
|
+
ctx.findings.append(f)
|
|
1150
|
+
scan_root(ctx)
|
|
1151
|
+
# Preserve tmp path in JSON for traceability but delete contents after scan.
|
|
1152
|
+
if not args.keep_extracted:
|
|
1153
|
+
shutil.rmtree(tmp, ignore_errors=True)
|
|
1154
|
+
return ctx
|
|
1155
|
+
if path.is_file() and path.name == "package.json":
|
|
1156
|
+
root = path.parent
|
|
1157
|
+
else:
|
|
1158
|
+
root = path
|
|
1159
|
+
ctx = ScanContext(root, label, args.mode, False, args.include_node_modules, args.max_file_bytes, args.max_findings)
|
|
1160
|
+
if path.is_file():
|
|
1161
|
+
ctx.summary.sha256 = sha256_file(path)
|
|
1162
|
+
scan_root(ctx)
|
|
1163
|
+
return ctx
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
def build_report(contexts: list[ScanContext]) -> ScanReport:
|
|
1167
|
+
findings: list[Finding] = []
|
|
1168
|
+
summaries: list[TargetSummary] = []
|
|
1169
|
+
for ctx in contexts:
|
|
1170
|
+
findings.extend(ctx.findings)
|
|
1171
|
+
summaries.append(ctx.summary)
|
|
1172
|
+
findings.sort(key=lambda f: (-SEVERITY_ORDER.get(f.severity, 0), f.category, f.path, f.line or 0, f.title))
|
|
1173
|
+
counts = {sev: 0 for sev in SEVERITIES}
|
|
1174
|
+
for f in findings:
|
|
1175
|
+
counts[f.severity] = counts.get(f.severity, 0) + 1
|
|
1176
|
+
if counts.get("CRITICAL", 0):
|
|
1177
|
+
decision = "QUARANTINE"
|
|
1178
|
+
strict_exit = 2
|
|
1179
|
+
elif counts.get("HIGH", 0):
|
|
1180
|
+
decision = "BLOCK_UNTIL_REVIEW"
|
|
1181
|
+
strict_exit = 2
|
|
1182
|
+
elif counts.get("MEDIUM", 0):
|
|
1183
|
+
decision = "REVIEW_BEFORE_USE"
|
|
1184
|
+
strict_exit = 1
|
|
1185
|
+
else:
|
|
1186
|
+
decision = "PASS_WITH_CAUTION"
|
|
1187
|
+
strict_exit = 0
|
|
1188
|
+
return ScanReport(
|
|
1189
|
+
tool="npm_ts_static_triage.py",
|
|
1190
|
+
generated_at=_dt.datetime.now(_dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
|
1191
|
+
summaries=summaries,
|
|
1192
|
+
findings=findings,
|
|
1193
|
+
counts_by_severity=counts,
|
|
1194
|
+
decision=decision,
|
|
1195
|
+
strict_exit_code=strict_exit,
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
def markdown_report(report: ScanReport) -> str:
|
|
1200
|
+
lines: list[str] = []
|
|
1201
|
+
lines.append("# npm/TypeScript Dependency & Package Static Audit")
|
|
1202
|
+
lines.append("")
|
|
1203
|
+
lines.append(f"Generated: `{report.generated_at}`")
|
|
1204
|
+
lines.append(f"Decision: **{report.decision}**")
|
|
1205
|
+
lines.append("")
|
|
1206
|
+
lines.append("## Scope")
|
|
1207
|
+
lines.append("")
|
|
1208
|
+
for s in report.summaries:
|
|
1209
|
+
lines.append(f"- Target: `{s.target}`")
|
|
1210
|
+
lines.append(f" - Mode: `{s.mode}`; tarball: `{s.is_tarball}`; files: `{s.file_count}`; bytes: `{s.total_bytes}`")
|
|
1211
|
+
if s.sha256:
|
|
1212
|
+
lines.append(f" - SHA-256: `{s.sha256}`")
|
|
1213
|
+
lines.append(f" - package.json: `{s.package_json_count}`; lockfiles: `{s.lockfile_count}`; tsconfig: `{s.tsconfig_count}`; workflows: `{s.workflow_count}`")
|
|
1214
|
+
lines.append("")
|
|
1215
|
+
lines.append("## Severity counts")
|
|
1216
|
+
lines.append("")
|
|
1217
|
+
lines.append("| Severity | Count |")
|
|
1218
|
+
lines.append("|---|---:|")
|
|
1219
|
+
for sev in ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO"]:
|
|
1220
|
+
lines.append(f"| {sev} | {report.counts_by_severity.get(sev, 0)} |")
|
|
1221
|
+
lines.append("")
|
|
1222
|
+
lines.append("## Findings")
|
|
1223
|
+
lines.append("")
|
|
1224
|
+
if not report.findings:
|
|
1225
|
+
lines.append("No findings. This is not a proof of safety; it only means these static checks did not trigger.")
|
|
1226
|
+
else:
|
|
1227
|
+
for idx, f in enumerate(report.findings, 1):
|
|
1228
|
+
loc = f"{f.path}:{f.line}" if f.line else f.path
|
|
1229
|
+
lines.append(f"### {idx}. [{f.severity}] {f.title}")
|
|
1230
|
+
lines.append("")
|
|
1231
|
+
lines.append(f"- Category: `{f.category}`")
|
|
1232
|
+
lines.append(f"- Location: `{loc}`")
|
|
1233
|
+
lines.append(f"- Confidence: `{f.confidence}`")
|
|
1234
|
+
if f.tags:
|
|
1235
|
+
lines.append(f"- Tags: `{', '.join(f.tags)}`")
|
|
1236
|
+
lines.append(f"- Evidence: `{f.evidence}`")
|
|
1237
|
+
lines.append(f"- Recommendation: {f.recommendation}")
|
|
1238
|
+
lines.append("")
|
|
1239
|
+
lines.append("## Suggested next steps")
|
|
1240
|
+
lines.append("")
|
|
1241
|
+
if report.decision == "QUARANTINE":
|
|
1242
|
+
lines.append("- Do not install, build, import or run this package/repo. Quarantine the artifact and rotate any credentials that may have been exposed to it.")
|
|
1243
|
+
elif report.decision == "BLOCK_UNTIL_REVIEW":
|
|
1244
|
+
lines.append("- Block use until each HIGH finding is manually explained, removed, or allowlisted with evidence.")
|
|
1245
|
+
elif report.decision == "REVIEW_BEFORE_USE":
|
|
1246
|
+
lines.append("- Review MEDIUM findings before use, especially non-standard dependency sources and CI install behavior.")
|
|
1247
|
+
else:
|
|
1248
|
+
lines.append("- Proceed only with normal supply-chain controls: lockfile review, script suppression, signature/provenance checks and sandboxed install/build.")
|
|
1249
|
+
lines.append("- Use `npm ci --ignore-scripts` for review installs and avoid optional dependencies unless explicitly needed.")
|
|
1250
|
+
lines.append("- Run vulnerability/signature tooling in a secret-free environment after static review.")
|
|
1251
|
+
lines.append("")
|
|
1252
|
+
return "\n".join(lines)
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
def sarif_report(report: ScanReport) -> dict[str, Any]:
|
|
1256
|
+
rules: dict[str, dict[str, Any]] = {}
|
|
1257
|
+
results: list[dict[str, Any]] = []
|
|
1258
|
+
for f in report.findings:
|
|
1259
|
+
rule_id = f.category
|
|
1260
|
+
rules.setdefault(rule_id, {
|
|
1261
|
+
"id": rule_id,
|
|
1262
|
+
"name": rule_id,
|
|
1263
|
+
"shortDescription": {"text": rule_id},
|
|
1264
|
+
"fullDescription": {"text": "Static npm/TypeScript supply-chain audit finding"},
|
|
1265
|
+
"defaultConfiguration": {"level": sarif_level(f.severity)},
|
|
1266
|
+
})
|
|
1267
|
+
result: dict[str, Any] = {
|
|
1268
|
+
"ruleId": rule_id,
|
|
1269
|
+
"level": sarif_level(f.severity),
|
|
1270
|
+
"message": {"text": f"[{f.severity}] {f.title}: {f.evidence}"},
|
|
1271
|
+
"locations": [{
|
|
1272
|
+
"physicalLocation": {
|
|
1273
|
+
"artifactLocation": {"uri": f.path},
|
|
1274
|
+
"region": {"startLine": f.line or 1},
|
|
1275
|
+
}
|
|
1276
|
+
}],
|
|
1277
|
+
}
|
|
1278
|
+
results.append(result)
|
|
1279
|
+
return {
|
|
1280
|
+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
1281
|
+
"version": "2.1.0",
|
|
1282
|
+
"runs": [{
|
|
1283
|
+
"tool": {
|
|
1284
|
+
"driver": {
|
|
1285
|
+
"name": "npm_ts_static_triage.py",
|
|
1286
|
+
"informationUri": "https://docs.npmjs.com/",
|
|
1287
|
+
"rules": list(rules.values()),
|
|
1288
|
+
}
|
|
1289
|
+
},
|
|
1290
|
+
"results": results,
|
|
1291
|
+
}],
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
def sarif_level(sev: str) -> str:
|
|
1296
|
+
if sev in {"CRITICAL", "HIGH"}:
|
|
1297
|
+
return "error"
|
|
1298
|
+
if sev == "MEDIUM":
|
|
1299
|
+
return "warning"
|
|
1300
|
+
return "note"
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
1304
|
+
p = argparse.ArgumentParser(description="Static npm/TypeScript dependency and package malware/quality triage. Does not execute target code.")
|
|
1305
|
+
p.add_argument("targets", nargs="+", help="Repo/package directory, package.json, or npm package tarball (.tgz) to scan")
|
|
1306
|
+
p.add_argument("--mode", choices=["package", "library", "application", "repo"], default="package", help="Review mode; affects quality/lockfile expectations")
|
|
1307
|
+
p.add_argument("--json", dest="json_out", help="Write JSON report to this path")
|
|
1308
|
+
p.add_argument("--markdown", "--out", dest="markdown_out", help="Write Markdown report to this path")
|
|
1309
|
+
p.add_argument("--sarif", dest="sarif_out", help="Write SARIF report to this path")
|
|
1310
|
+
p.add_argument("--strict-exit", action="store_true", help="Exit non-zero for MEDIUM/HIGH/CRITICAL findings; HIGH/CRITICAL return 2")
|
|
1311
|
+
p.add_argument("--include-node-modules", action="store_true", help="Include node_modules in repo scans. Tarball scans include all extracted contents by default.")
|
|
1312
|
+
p.add_argument("--max-file-bytes", type=int, default=5_000_000, help="Max bytes to read per text file")
|
|
1313
|
+
p.add_argument("--max-findings", type=int, default=1000, help="Maximum findings to record")
|
|
1314
|
+
p.add_argument("--ioc-file", action="append", default=[], help="Additional IOC text file; one indicator per line. hxxp and [.] are normalized.")
|
|
1315
|
+
p.add_argument("--keep-extracted", action="store_true", help="Keep extracted tarball temp directories for manual review")
|
|
1316
|
+
return p.parse_args(argv)
|
|
1317
|
+
|
|
1318
|
+
|
|
1319
|
+
def main(argv: list[str]) -> int:
|
|
1320
|
+
args = parse_args(argv)
|
|
1321
|
+
default_ioc = Path(__file__).resolve().parents[1] / "rules" / "iocs.txt"
|
|
1322
|
+
load_ioc_files([default_ioc] + [Path(x) for x in args.ioc_file])
|
|
1323
|
+
contexts: list[ScanContext] = []
|
|
1324
|
+
for target in args.targets:
|
|
1325
|
+
path = Path(target)
|
|
1326
|
+
if not path.exists():
|
|
1327
|
+
sys.stderr.write(f"Target not found: {target}\n")
|
|
1328
|
+
return 3
|
|
1329
|
+
contexts.append(scan_target(path, args))
|
|
1330
|
+
report = build_report(contexts)
|
|
1331
|
+
if args.json_out:
|
|
1332
|
+
Path(args.json_out).write_text(json.dumps(asdict(report), indent=2, ensure_ascii=False), encoding="utf-8")
|
|
1333
|
+
if args.markdown_out:
|
|
1334
|
+
Path(args.markdown_out).write_text(markdown_report(report), encoding="utf-8")
|
|
1335
|
+
if args.sarif_out:
|
|
1336
|
+
Path(args.sarif_out).write_text(json.dumps(sarif_report(report), indent=2), encoding="utf-8")
|
|
1337
|
+
if not args.json_out and not args.markdown_out and not args.sarif_out:
|
|
1338
|
+
print(markdown_report(report))
|
|
1339
|
+
if args.strict_exit:
|
|
1340
|
+
return report.strict_exit_code
|
|
1341
|
+
return 0
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
if __name__ == "__main__":
|
|
1345
|
+
raise SystemExit(main(sys.argv[1:]))
|