pkgwhy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. pkgwhy/__init__.py +3 -0
  2. pkgwhy/__main__.py +6 -0
  3. pkgwhy/agent/__init__.py +2 -0
  4. pkgwhy/agent/judge.py +93 -0
  5. pkgwhy/cli.py +676 -0
  6. pkgwhy/core/__init__.py +2 -0
  7. pkgwhy/core/constants.py +13 -0
  8. pkgwhy/core/models.py +608 -0
  9. pkgwhy/dependencies/__init__.py +2 -0
  10. pkgwhy/dependencies/graph.py +68 -0
  11. pkgwhy/dependencies/reason.py +79 -0
  12. pkgwhy/dynamic/__init__.py +2 -0
  13. pkgwhy/dynamic/analysis.py +156 -0
  14. pkgwhy/explanations/__init__.py +2 -0
  15. pkgwhy/explanations/explain.py +47 -0
  16. pkgwhy/explanations/local_db.py +52 -0
  17. pkgwhy/imports/__init__.py +2 -0
  18. pkgwhy/imports/scanner.py +43 -0
  19. pkgwhy/inspection/__init__.py +2 -0
  20. pkgwhy/inspection/files.py +540 -0
  21. pkgwhy/inspection/python_static.py +323 -0
  22. pkgwhy/inspection/size.py +58 -0
  23. pkgwhy/inspection/text_patterns.py +135 -0
  24. pkgwhy/manifests/__init__.py +2 -0
  25. pkgwhy/manifests/lockfiles.py +51 -0
  26. pkgwhy/manifests/pyproject.py +37 -0
  27. pkgwhy/manifests/requirements.py +27 -0
  28. pkgwhy/metadata/__init__.py +2 -0
  29. pkgwhy/metadata/installed.py +83 -0
  30. pkgwhy/metadata/pypi.py +199 -0
  31. pkgwhy/policy/__init__.py +1 -0
  32. pkgwhy/policy/agent_policy.py +114 -0
  33. pkgwhy/policy/audit_log.py +60 -0
  34. pkgwhy/policy/tool_execution.py +76 -0
  35. pkgwhy/provenance/__init__.py +2 -0
  36. pkgwhy/provenance/installed.py +45 -0
  37. pkgwhy/registry/__init__.py +2 -0
  38. pkgwhy/registry/local.py +178 -0
  39. pkgwhy/registry/manifest.py +78 -0
  40. pkgwhy/registry/publish.py +142 -0
  41. pkgwhy/registry/run.py +148 -0
  42. pkgwhy/registry/tools.py +121 -0
  43. pkgwhy/reports/__init__.py +2 -0
  44. pkgwhy/reports/audit.py +81 -0
  45. pkgwhy/risk/__init__.py +5 -0
  46. pkgwhy/risk/rules.py +372 -0
  47. pkgwhy/risk/scoring.py +231 -0
  48. pkgwhy/typosquat/__init__.py +2 -0
  49. pkgwhy/typosquat/detector.py +182 -0
  50. pkgwhy/typosquat/popular_packages.py +34 -0
  51. pkgwhy/vulnerabilities/__init__.py +2 -0
  52. pkgwhy/vulnerabilities/matching.py +122 -0
  53. pkgwhy/vulnerabilities/osv.py +330 -0
  54. pkgwhy-1.0.0.dist-info/METADATA +688 -0
  55. pkgwhy-1.0.0.dist-info/RECORD +58 -0
  56. pkgwhy-1.0.0.dist-info/WHEEL +4 -0
  57. pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
  58. pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,540 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import tomllib
5
+ from collections.abc import Callable
6
+ from importlib.metadata import Distribution
7
+ from pathlib import Path
8
+
9
+ from pkgwhy.core.models import FileStaticAnalysis, ReadabilityStatus, RiskRuleEvidence, RuleSeverity, SourceAvailability
10
+ from pkgwhy.inspection.size import JAVASCRIPT_SUFFIXES, NATIVE_SUFFIXES
11
+ from pkgwhy.inspection.text_patterns import analyze_text_patterns, is_text_pattern_candidate
12
+ from pkgwhy.risk.rules import make_rule_evidence
13
+
14
+ SHELL_SUFFIXES = {".sh", ".bash", ".zsh", ".fish", ".ksh"}
15
+ INSTALL_TIME_SCRIPT_NAMES = {"setup.py"}
16
+ BUILD_METADATA_NAMES = {"pyproject.toml", "setup.cfg"}
17
+ MAX_TEXT_SCAN_BYTES = 500_000
18
+ LONG_JS_LINE_LENGTH = 500
19
+ MINIFIED_JS_LINE_LENGTH = 1_000
20
+ LOW_WHITESPACE_RATIO = 0.08
21
+ HIGH_PUNCTUATION_RATIO = 0.32
22
+ JS_LIKELY_OBFUSCATED_WARNING = "likely obfuscated javascript"
23
+ JS_POSSIBLY_OBFUSCATED_WARNING = "possibly obfuscated javascript"
24
+ JS_APPEARS_MINIFIED_WARNING = "appears minified"
25
+ JS_MAY_BE_MINIFIED_WARNING = "may be minified"
26
+
27
+ JS_DYNAMIC_PATTERNS = {
28
+ re.compile(r"\beval\s*\("): "JavaScript eval call",
29
+ re.compile(r"\bFunction\s*\("): "JavaScript Function constructor",
30
+ }
31
+ JS_ENCODED_PATTERNS = {
32
+ re.compile(r"\batob\s*\("): "JavaScript base64 decode call",
33
+ re.compile(r"\bbtoa\s*\("): "JavaScript base64 encode call",
34
+ }
35
+ JS_OBFUSCATION_PATTERNS = {
36
+ re.compile(r"_0x[a-fA-F0-9]{3,}"): "hex-like JavaScript identifier",
37
+ re.compile(r"\\x[0-9a-fA-F]{2}"): "hex-escaped JavaScript string content",
38
+ re.compile(r"while\s*\(\s*!!\[\]\s*\)"): "control-flow flattening pattern",
39
+ re.compile(r"debugger\s*;"): "JavaScript anti-debugging statement",
40
+ }
41
+ JS_LARGE_ENCODED_PATTERN = re.compile(r"['\"][A-Za-z0-9+/]{80,}={0,2}['\"]")
42
+ JS_SOURCE_MAP_PATTERN = re.compile(r"sourceMappingURL\s*=", re.IGNORECASE)
43
+ SETUP_SUBPROCESS_PATTERN = re.compile(r"\b(subprocess|os\.system|os\.popen|Popen|check_call|check_output)\b")
44
+ SETUP_NETWORK_PATTERN = re.compile(r"\b(requests|httpx|urllib|socket|urlopen)\b")
45
+ SETUP_DYNAMIC_PATTERN = re.compile(r"\b(eval|exec|compile|__import__|importlib\.import_module)\b")
46
+
47
+
48
+ def distribution_file_paths(dist: Distribution | None, limit: int = 200) -> list[Path]:
49
+ if dist is None or dist.files is None:
50
+ return []
51
+ paths: list[Path] = []
52
+ for package_file in dist.files:
53
+ try:
54
+ path = Path(dist.locate_file(package_file))
55
+ except (OSError, ValueError):
56
+ continue
57
+ try:
58
+ if path.is_file():
59
+ paths.append(path)
60
+ except OSError:
61
+ continue
62
+ if len(paths) >= limit:
63
+ break
64
+ return paths
65
+
66
+
67
+ def infer_source_availability(paths: list[Path]) -> SourceAvailability:
68
+ if not paths:
69
+ return SourceAvailability.INSTALLED_METADATA_ONLY
70
+ if any(path.suffix == ".py" for path in paths):
71
+ return SourceAvailability.INSTALLED_SOURCE_PRESENT
72
+ return SourceAvailability.SOURCE_AVAILABILITY_UNKNOWN
73
+
74
+
75
+ def infer_readability(paths: list[Path], file_analysis: FileStaticAnalysis | None = None) -> ReadabilityStatus:
76
+ if any(path.suffix == ".py" for path in paths):
77
+ return ReadabilityStatus.READABLE
78
+ if file_analysis and any(JS_LIKELY_OBFUSCATED_WARNING in warning.lower() for warning in file_analysis.warnings):
79
+ return ReadabilityStatus.LIKELY_OBFUSCATED
80
+ if file_analysis and any(JS_POSSIBLY_OBFUSCATED_WARNING in warning.lower() for warning in file_analysis.warnings):
81
+ return ReadabilityStatus.POSSIBLY_OBFUSCATED
82
+ if file_analysis and any(
83
+ marker in warning.lower()
84
+ for warning in file_analysis.warnings
85
+ for marker in {JS_APPEARS_MINIFIED_WARNING, JS_MAY_BE_MINIFIED_WARNING}
86
+ ):
87
+ return ReadabilityStatus.MINIFIED
88
+ return ReadabilityStatus.NOT_ENOUGH_SOURCE_AVAILABLE
89
+
90
+
91
+ def detect_file_capabilities(paths: list[Path], entry_points: list[str]) -> list[str]:
92
+ return analyze_file_signals(paths, entry_points).detected_capabilities
93
+
94
+
95
+ def analyze_file_signals(paths: list[Path], entry_points: list[str]) -> FileStaticAnalysis:
96
+ capabilities: set[str] = set()
97
+ warnings: list[str] = []
98
+ evidence: list[str] = []
99
+ rule_evidence = []
100
+ url_references: list[str] = []
101
+ domain_references: list[str] = []
102
+ credential_references: list[str] = []
103
+ javascript_files_scanned = 0
104
+ shell_scripts_detected = 0
105
+ native_binaries_detected = 0
106
+ wasm_files_detected = 0
107
+ setup_files_detected = 0
108
+
109
+ if entry_points:
110
+ capabilities.add("CLI or plugin entrypoints declared in package metadata")
111
+ evidence.append("Package metadata declares CLI or plugin entrypoints.")
112
+
113
+ for path in paths:
114
+ suffix = path.suffix.lower()
115
+ name = path.name
116
+ if is_text_pattern_candidate(path):
117
+ text_result = analyze_text_patterns(path)
118
+ capabilities.update(text_result.detected_capabilities)
119
+ warnings.extend(text_result.warnings)
120
+ evidence.extend(text_result.evidence)
121
+ rule_evidence.extend(text_result.rule_evidence)
122
+ url_references.extend(text_result.url_references)
123
+ domain_references.extend(text_result.domain_references)
124
+ credential_references.extend(text_result.credential_references)
125
+ if suffix in NATIVE_SUFFIXES:
126
+ if suffix == ".wasm":
127
+ wasm_files_detected += 1
128
+ capabilities.add("WASM binary code present")
129
+ evidence.append(f"WASM file present: {name}")
130
+ rule_evidence.append(
131
+ make_rule_evidence(
132
+ "PKGWHY-BIN-002",
133
+ message="WebAssembly binary file is present.",
134
+ evidence=[f"WASM file present: {name}."],
135
+ file_path=name,
136
+ symbol=suffix,
137
+ )
138
+ )
139
+ else:
140
+ native_binaries_detected += 1
141
+ capabilities.add("Native compiled code present")
142
+ evidence.append(f"Native or executable file present: {name}")
143
+ binary_rule_id = "PKGWHY-BIN-003" if suffix == ".exe" else "PKGWHY-BIN-001"
144
+ rule_evidence.append(
145
+ make_rule_evidence(
146
+ binary_rule_id,
147
+ message=f"Native or executable file present: {name}.",
148
+ evidence=[f"File extension {suffix} detected for {name}."],
149
+ file_path=name,
150
+ symbol=suffix,
151
+ )
152
+ )
153
+ if suffix in JAVASCRIPT_SUFFIXES:
154
+ capabilities.add("Browser or JavaScript code present")
155
+ js_result = _analyze_javascript_file(path)
156
+ javascript_files_scanned += js_result.javascript_files_scanned
157
+ capabilities.update(js_result.detected_capabilities)
158
+ warnings.extend(js_result.warnings)
159
+ evidence.extend(js_result.evidence)
160
+ rule_evidence.extend(js_result.rule_evidence)
161
+ if _is_shell_script(path):
162
+ shell_scripts_detected += 1
163
+ capabilities.add("Shell script files present")
164
+ evidence.append(f"Shell script file present: {name}")
165
+ if name in INSTALL_TIME_SCRIPT_NAMES:
166
+ setup_result = _analyze_setup_py(path)
167
+ setup_files_detected += 1
168
+ capabilities.update(setup_result.detected_capabilities)
169
+ warnings.extend(setup_result.warnings)
170
+ evidence.extend(setup_result.evidence)
171
+ rule_evidence.extend(setup_result.rule_evidence)
172
+ elif name in BUILD_METADATA_NAMES:
173
+ build_result = _analyze_build_metadata(path)
174
+ warnings.extend(build_result.warnings)
175
+ evidence.extend(build_result.evidence)
176
+ rule_evidence.extend(build_result.rule_evidence)
177
+
178
+ return FileStaticAnalysis(
179
+ detected_capabilities=sorted(capabilities),
180
+ warnings=warnings[:100],
181
+ evidence=evidence[:100],
182
+ rule_evidence=_prioritize_rule_evidence(rule_evidence),
183
+ url_references=_unique(url_references)[:100],
184
+ domain_references=_unique(domain_references)[:100],
185
+ credential_references=_unique(credential_references)[:100],
186
+ javascript_files_scanned=javascript_files_scanned,
187
+ shell_scripts_detected=shell_scripts_detected,
188
+ native_binaries_detected=native_binaries_detected,
189
+ wasm_files_detected=wasm_files_detected,
190
+ setup_files_detected=setup_files_detected,
191
+ )
192
+
193
+
194
+ def _analyze_setup_py(path: Path) -> FileStaticAnalysis:
195
+ name = path.name
196
+ capabilities = {"Install-time setup files present"}
197
+ warnings = [
198
+ "setup.py is executable Python used by some build/install flows. pkgwhy reports static signals only and does not run it."
199
+ ]
200
+ evidence = [f"Install-time setup script present: {name}"]
201
+ rule_evidence = [
202
+ make_rule_evidence(
203
+ "PKGWHY-BUILD-001",
204
+ message="Executable setup.py file is present.",
205
+ evidence=[f"{name} is present."],
206
+ file_path=name,
207
+ symbol="setup.py",
208
+ )
209
+ ]
210
+ source = _read_small_text(path)
211
+ if source is None:
212
+ return FileStaticAnalysis(
213
+ detected_capabilities=sorted(capabilities),
214
+ warnings=warnings,
215
+ evidence=evidence,
216
+ rule_evidence=rule_evidence,
217
+ )
218
+
219
+ for rule_id, capability, pattern, detail in (
220
+ ("PKGWHY-BUILD-002", "Subprocess or shell execution signals", SETUP_SUBPROCESS_PATTERN, "subprocess or shell reference"),
221
+ ("PKGWHY-BUILD-003", "Network access signals", SETUP_NETWORK_PATTERN, "network access reference"),
222
+ ("PKGWHY-BUILD-004", "Dynamic code execution signals", SETUP_DYNAMIC_PATTERN, "dynamic execution reference"),
223
+ ):
224
+ line_number = _first_matching_line(source, pattern)
225
+ if line_number is None:
226
+ continue
227
+ capabilities.add(capability)
228
+ warnings.append(f"setup.py contains {detail}: {name}:{line_number}")
229
+ evidence.append(f"setup.py static signal in {name}:{line_number}: {detail}.")
230
+ rule_evidence.append(
231
+ make_rule_evidence(
232
+ rule_id,
233
+ message=f"setup.py contains {detail}.",
234
+ evidence=[f"{name}:{line_number} contains {detail}."],
235
+ file_path=name,
236
+ line_number=line_number,
237
+ symbol=detail,
238
+ )
239
+ )
240
+
241
+ return FileStaticAnalysis(
242
+ detected_capabilities=sorted(capabilities),
243
+ warnings=warnings,
244
+ evidence=evidence,
245
+ rule_evidence=rule_evidence,
246
+ )
247
+
248
+
249
+ def _analyze_build_metadata(path: Path) -> FileStaticAnalysis:
250
+ if path.name == "setup.cfg":
251
+ return FileStaticAnalysis(
252
+ evidence=["setup.cfg metadata file present."],
253
+ rule_evidence=[
254
+ make_rule_evidence(
255
+ "PKGWHY-BUILD-006",
256
+ message="setup.cfg metadata file is present.",
257
+ evidence=["setup.cfg is present."],
258
+ file_path=path.name,
259
+ symbol="setup.cfg",
260
+ )
261
+ ],
262
+ )
263
+
264
+ source = _read_small_text(path)
265
+ if source is None:
266
+ return FileStaticAnalysis(warnings=["Skipped large or unreadable pyproject.toml during static scan."])
267
+
268
+ try:
269
+ data = tomllib.loads(source)
270
+ except tomllib.TOMLDecodeError as exc:
271
+ return FileStaticAnalysis(warnings=[f"Could not statically parse pyproject.toml: {exc.__class__.__name__}"])
272
+
273
+ build_system = data.get("build-system")
274
+ if not isinstance(build_system, dict):
275
+ return FileStaticAnalysis(evidence=["pyproject.toml present without build-system table."])
276
+
277
+ backend = build_system.get("build-backend")
278
+ if not isinstance(backend, str) or not backend.strip():
279
+ return FileStaticAnalysis(evidence=["pyproject.toml build-system table present without build-backend."])
280
+
281
+ line_number = _first_matching_line(source, re.compile(r"build-backend\s*="))
282
+ evidence = [f"pyproject.toml declares build backend: {backend}"]
283
+ return FileStaticAnalysis(
284
+ evidence=evidence,
285
+ rule_evidence=[
286
+ make_rule_evidence(
287
+ "PKGWHY-BUILD-005",
288
+ message=f"Build backend declared: {backend}.",
289
+ evidence=evidence,
290
+ file_path=path.name,
291
+ line_number=line_number,
292
+ symbol=backend,
293
+ )
294
+ ],
295
+ )
296
+
297
+
298
+ def _analyze_javascript_file(path: Path) -> FileStaticAnalysis:
299
+ try:
300
+ if path.stat().st_size > MAX_TEXT_SCAN_BYTES:
301
+ return FileStaticAnalysis(
302
+ warnings=[f"Skipped large JavaScript file during static scan: {path.name}"],
303
+ )
304
+ source = path.read_text(encoding="utf-8")
305
+ except (OSError, UnicodeDecodeError) as exc:
306
+ return FileStaticAnalysis(
307
+ warnings=[f"Could not statically read JavaScript file {path.name}: {exc.__class__.__name__}"],
308
+ )
309
+
310
+ capabilities: set[str] = set()
311
+ warnings: list[str] = []
312
+ evidence: list[str] = [f"Statically scanned JavaScript file: {path.name}"]
313
+ rule_evidence = []
314
+
315
+ lines = source.splitlines() or [source]
316
+ longest_line = max((len(line) for line in lines), default=0)
317
+ whitespace_ratio = _character_ratio(source, str.isspace)
318
+ punctuation_ratio = _character_ratio(source, lambda char: not char.isalnum() and not char.isspace())
319
+
320
+ if path.name.endswith(".min.js") or longest_line >= MINIFIED_JS_LINE_LENGTH:
321
+ warnings.append(f"JavaScript file {JS_APPEARS_MINIFIED_WARNING}: {path.name}")
322
+ evidence.append(f"JavaScript minification signal in {path.name}: long line or .min.js filename.")
323
+ rule_evidence.append(
324
+ make_rule_evidence(
325
+ "PKGWHY-JS-001",
326
+ message="JavaScript file appears minified.",
327
+ evidence=[f"{path.name} has .min.js filename or a line at least {MINIFIED_JS_LINE_LENGTH} characters long."],
328
+ file_path=path.name,
329
+ line_number=_first_long_line(lines, MINIFIED_JS_LINE_LENGTH),
330
+ symbol="minified-javascript",
331
+ )
332
+ )
333
+ elif longest_line >= LONG_JS_LINE_LENGTH and whitespace_ratio < LOW_WHITESPACE_RATIO:
334
+ warnings.append(f"JavaScript file {JS_MAY_BE_MINIFIED_WARNING}: {path.name}")
335
+ evidence.append(f"JavaScript readability signal in {path.name}: long line with low whitespace ratio.")
336
+ rule_evidence.append(
337
+ make_rule_evidence(
338
+ "PKGWHY-JS-001",
339
+ message="JavaScript file may be minified.",
340
+ evidence=[f"{path.name} has a long line with low whitespace ratio."],
341
+ file_path=path.name,
342
+ line_number=_first_long_line(lines, LONG_JS_LINE_LENGTH),
343
+ symbol="minified-javascript",
344
+ )
345
+ )
346
+
347
+ if whitespace_ratio < LOW_WHITESPACE_RATIO and punctuation_ratio > HIGH_PUNCTUATION_RATIO:
348
+ warnings.append(f"JavaScript file has low whitespace and high punctuation ratios: {path.name}")
349
+ evidence.append(f"JavaScript density signal in {path.name}: low whitespace and high punctuation.")
350
+ rule_evidence.append(
351
+ make_rule_evidence(
352
+ "PKGWHY-JS-001",
353
+ message="JavaScript file has low whitespace and high punctuation ratios.",
354
+ evidence=[f"{path.name} has low whitespace and high punctuation ratios."],
355
+ file_path=path.name,
356
+ symbol="javascript-density",
357
+ )
358
+ )
359
+
360
+ for pattern, detail in JS_DYNAMIC_PATTERNS.items():
361
+ if pattern.search(source):
362
+ capabilities.add("JavaScript dynamic code execution signals")
363
+ evidence.append(f"JavaScript dynamic execution signal in {path.name}: {detail}.")
364
+ rule_evidence.append(
365
+ make_rule_evidence(
366
+ "PKGWHY-JS-002",
367
+ message=f"JavaScript dynamic execution signal: {detail}.",
368
+ evidence=[f"{path.name}:{_first_matching_line(source, pattern) or 1} references {detail}."],
369
+ file_path=path.name,
370
+ line_number=_first_matching_line(source, pattern),
371
+ symbol=detail,
372
+ )
373
+ )
374
+
375
+ for pattern, detail in JS_ENCODED_PATTERNS.items():
376
+ if pattern.search(source):
377
+ capabilities.add("Encoded payload handling signals")
378
+ evidence.append(f"JavaScript encoded payload signal in {path.name}: {detail}.")
379
+ rule_evidence.append(
380
+ make_rule_evidence(
381
+ "PKGWHY-JS-003",
382
+ message=f"JavaScript encoded payload signal: {detail}.",
383
+ evidence=[f"{path.name}:{_first_matching_line(source, pattern) or 1} references {detail}."],
384
+ file_path=path.name,
385
+ line_number=_first_matching_line(source, pattern),
386
+ symbol=detail,
387
+ )
388
+ )
389
+
390
+ large_encoded_line = _first_matching_line(source, JS_LARGE_ENCODED_PATTERN)
391
+ if large_encoded_line is not None:
392
+ capabilities.add("Encoded payload handling signals")
393
+ evidence.append(f"JavaScript large encoded-string signal in {path.name}:{large_encoded_line}.")
394
+ rule_evidence.append(
395
+ make_rule_evidence(
396
+ "PKGWHY-JS-003",
397
+ message="JavaScript large encoded-string signal detected.",
398
+ evidence=[f"{path.name}:{large_encoded_line} contains a large encoded-looking string; value omitted."],
399
+ file_path=path.name,
400
+ line_number=large_encoded_line,
401
+ symbol="large encoded-looking string",
402
+ )
403
+ )
404
+
405
+ source_map_line = _first_matching_line(source, JS_SOURCE_MAP_PATTERN)
406
+ if source_map_line is not None:
407
+ evidence.append(f"JavaScript source-map reference in {path.name}:{source_map_line}.")
408
+ rule_evidence.append(
409
+ make_rule_evidence(
410
+ "PKGWHY-JS-005",
411
+ message="JavaScript source-map reference detected.",
412
+ evidence=[f"{path.name}:{source_map_line} references sourceMappingURL."],
413
+ file_path=path.name,
414
+ line_number=source_map_line,
415
+ symbol="sourceMappingURL",
416
+ )
417
+ )
418
+
419
+ obfuscation_signals = [
420
+ detail for pattern, detail in JS_OBFUSCATION_PATTERNS.items() if pattern.search(source)
421
+ ]
422
+ if len(obfuscation_signals) >= 3:
423
+ warnings.append(f"JavaScript file has {JS_LIKELY_OBFUSCATED_WARNING} signals: {path.name}")
424
+ capabilities.add("JavaScript obfuscation signals")
425
+ evidence.append(f"JavaScript obfuscation signals in {path.name}: {', '.join(sorted(obfuscation_signals))}.")
426
+ rule_evidence.append(
427
+ make_rule_evidence(
428
+ "PKGWHY-JS-004",
429
+ message="JavaScript file has likely obfuscated signals.",
430
+ evidence=[f"{path.name} contains signals: {', '.join(sorted(obfuscation_signals))}."],
431
+ severity=RuleSeverity.HIGH,
432
+ file_path=path.name,
433
+ line_number=_first_obfuscation_line(source),
434
+ symbol="javascript-obfuscation",
435
+ )
436
+ )
437
+ elif len(obfuscation_signals) >= 2:
438
+ warnings.append(f"JavaScript file has {JS_POSSIBLY_OBFUSCATED_WARNING} signals: {path.name}")
439
+ capabilities.add("JavaScript obfuscation signals")
440
+ evidence.append(f"JavaScript obfuscation signals in {path.name}: {', '.join(sorted(obfuscation_signals))}.")
441
+ rule_evidence.append(
442
+ make_rule_evidence(
443
+ "PKGWHY-JS-004",
444
+ message="JavaScript file has possible obfuscation signals.",
445
+ evidence=[f"{path.name} contains signals: {', '.join(sorted(obfuscation_signals))}."],
446
+ file_path=path.name,
447
+ line_number=_first_obfuscation_line(source),
448
+ symbol="javascript-obfuscation",
449
+ )
450
+ )
451
+
452
+ return FileStaticAnalysis(
453
+ detected_capabilities=sorted(capabilities),
454
+ warnings=warnings,
455
+ evidence=evidence,
456
+ rule_evidence=rule_evidence,
457
+ javascript_files_scanned=1,
458
+ )
459
+
460
+
461
+ def _read_small_text(path: Path) -> str | None:
462
+ try:
463
+ if path.stat().st_size > MAX_TEXT_SCAN_BYTES:
464
+ return None
465
+ return path.read_text(encoding="utf-8")
466
+ except (OSError, UnicodeDecodeError):
467
+ return None
468
+
469
+
470
+ def _first_matching_line(source: str, pattern: re.Pattern[str]) -> int | None:
471
+ for index, line in enumerate(source.splitlines(), start=1):
472
+ if pattern.search(line):
473
+ return index
474
+ return None
475
+
476
+
477
+ def _first_long_line(lines: list[str], minimum_length: int) -> int | None:
478
+ for index, line in enumerate(lines, start=1):
479
+ if len(line) >= minimum_length:
480
+ return index
481
+ return None
482
+
483
+
484
+ def _first_obfuscation_line(source: str) -> int | None:
485
+ matching_lines = [
486
+ line_number
487
+ for pattern in JS_OBFUSCATION_PATTERNS
488
+ if (line_number := _first_matching_line(source, pattern)) is not None
489
+ ]
490
+ return min(matching_lines) if matching_lines else None
491
+
492
+
493
+ def _is_shell_script(path: Path) -> bool:
494
+ suffix = path.suffix.lower()
495
+ if suffix in SHELL_SUFFIXES:
496
+ return True
497
+ try:
498
+ with path.open("rb") as handle:
499
+ first_line = handle.readline(128)
500
+ except OSError:
501
+ return False
502
+ return first_line.startswith(b"#!") and b"sh" in first_line.lower()
503
+
504
+
505
+ def _character_ratio(source: str, predicate: Callable[[str], bool]) -> float:
506
+ if not source:
507
+ return 0.0
508
+ return sum(1 for char in source if predicate(char)) / len(source)
509
+
510
+
511
+ def _unique(values: list[str]) -> list[str]:
512
+ unique_values: list[str] = []
513
+ for value in values:
514
+ if value not in unique_values:
515
+ unique_values.append(value)
516
+ return unique_values
517
+
518
+
519
+ def _prioritize_rule_evidence(rules: list[RiskRuleEvidence], limit: int = 100) -> list[RiskRuleEvidence]:
520
+ severity_order = {
521
+ RuleSeverity.CRITICAL: 0,
522
+ RuleSeverity.HIGH: 1,
523
+ RuleSeverity.MEDIUM: 2,
524
+ RuleSeverity.LOW: 3,
525
+ RuleSeverity.INFO: 4,
526
+ }
527
+ deduped: dict[tuple[str, str | None, int | None, str | None, str], RiskRuleEvidence] = {}
528
+ for rule in rules:
529
+ key = (rule.rule_id, rule.file_path, rule.line_number, rule.symbol, rule.message)
530
+ deduped.setdefault(key, rule)
531
+ return sorted(
532
+ deduped.values(),
533
+ key=lambda rule: (
534
+ severity_order.get(rule.severity, 5),
535
+ rule.rule_id,
536
+ rule.file_path or "",
537
+ rule.line_number or 0,
538
+ rule.symbol or "",
539
+ ),
540
+ )[:limit]