pkgwhy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. pkgwhy/__init__.py +3 -0
  2. pkgwhy/__main__.py +6 -0
  3. pkgwhy/agent/__init__.py +2 -0
  4. pkgwhy/agent/judge.py +93 -0
  5. pkgwhy/cli.py +676 -0
  6. pkgwhy/core/__init__.py +2 -0
  7. pkgwhy/core/constants.py +13 -0
  8. pkgwhy/core/models.py +608 -0
  9. pkgwhy/dependencies/__init__.py +2 -0
  10. pkgwhy/dependencies/graph.py +68 -0
  11. pkgwhy/dependencies/reason.py +79 -0
  12. pkgwhy/dynamic/__init__.py +2 -0
  13. pkgwhy/dynamic/analysis.py +156 -0
  14. pkgwhy/explanations/__init__.py +2 -0
  15. pkgwhy/explanations/explain.py +47 -0
  16. pkgwhy/explanations/local_db.py +52 -0
  17. pkgwhy/imports/__init__.py +2 -0
  18. pkgwhy/imports/scanner.py +43 -0
  19. pkgwhy/inspection/__init__.py +2 -0
  20. pkgwhy/inspection/files.py +540 -0
  21. pkgwhy/inspection/python_static.py +323 -0
  22. pkgwhy/inspection/size.py +58 -0
  23. pkgwhy/inspection/text_patterns.py +135 -0
  24. pkgwhy/manifests/__init__.py +2 -0
  25. pkgwhy/manifests/lockfiles.py +51 -0
  26. pkgwhy/manifests/pyproject.py +37 -0
  27. pkgwhy/manifests/requirements.py +27 -0
  28. pkgwhy/metadata/__init__.py +2 -0
  29. pkgwhy/metadata/installed.py +83 -0
  30. pkgwhy/metadata/pypi.py +199 -0
  31. pkgwhy/policy/__init__.py +1 -0
  32. pkgwhy/policy/agent_policy.py +114 -0
  33. pkgwhy/policy/audit_log.py +60 -0
  34. pkgwhy/policy/tool_execution.py +76 -0
  35. pkgwhy/provenance/__init__.py +2 -0
  36. pkgwhy/provenance/installed.py +45 -0
  37. pkgwhy/registry/__init__.py +2 -0
  38. pkgwhy/registry/local.py +178 -0
  39. pkgwhy/registry/manifest.py +78 -0
  40. pkgwhy/registry/publish.py +142 -0
  41. pkgwhy/registry/run.py +148 -0
  42. pkgwhy/registry/tools.py +121 -0
  43. pkgwhy/reports/__init__.py +2 -0
  44. pkgwhy/reports/audit.py +81 -0
  45. pkgwhy/risk/__init__.py +5 -0
  46. pkgwhy/risk/rules.py +372 -0
  47. pkgwhy/risk/scoring.py +231 -0
  48. pkgwhy/typosquat/__init__.py +2 -0
  49. pkgwhy/typosquat/detector.py +182 -0
  50. pkgwhy/typosquat/popular_packages.py +34 -0
  51. pkgwhy/vulnerabilities/__init__.py +2 -0
  52. pkgwhy/vulnerabilities/matching.py +122 -0
  53. pkgwhy/vulnerabilities/osv.py +330 -0
  54. pkgwhy-1.0.0.dist-info/METADATA +688 -0
  55. pkgwhy-1.0.0.dist-info/RECORD +58 -0
  56. pkgwhy-1.0.0.dist-info/WHEEL +4 -0
  57. pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
  58. pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,81 @@
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ from typing import Any, TypedDict
5
+
6
+ from pkgwhy.core.models import PackageJudgement
7
+
8
+ AUDIT_SCHEMA_VERSION = "pkgwhy.audit.v2"
9
+
10
+
11
+ class AuditReport(TypedDict):
12
+ schema_version: str
13
+ package_count: int
14
+ vulnerability_match_count: int
15
+ vulnerability_sources: list[str]
16
+ provenance_sources: list[str]
17
+ warnings: list[str]
18
+ packages: list[dict[str, Any]]
19
+
20
+
21
+ def build_audit_report(judgements: list[PackageJudgement], warnings: list[str] | None = None) -> AuditReport:
22
+ return {
23
+ "schema_version": AUDIT_SCHEMA_VERSION,
24
+ "package_count": len(judgements),
25
+ "vulnerability_match_count": sum(len(judgement.known_vulnerabilities) for judgement in judgements),
26
+ "vulnerability_sources": sorted(
27
+ {
28
+ vulnerability.source
29
+ for judgement in judgements
30
+ for vulnerability in judgement.known_vulnerabilities
31
+ if vulnerability.source
32
+ }
33
+ ),
34
+ "provenance_sources": sorted(
35
+ {
36
+ judgement.provenance.metadata_source
37
+ for judgement in judgements
38
+ if judgement.provenance is not None and judgement.provenance.metadata_source
39
+ }
40
+ ),
41
+ "warnings": warnings or [],
42
+ "packages": [judgement.model_dump(mode="json") for judgement in judgements],
43
+ }
44
+
45
+
46
+ def render_audit_markdown(judgements: list[PackageJudgement], warnings: list[str] | None = None) -> str:
47
+ lines = [
48
+ "# pkgwhy Audit Report",
49
+ "",
50
+ "Runtime capability exposure:",
51
+ "",
52
+ "> Python packages run with the same permissions as the Python process. Static signals are not proof of runtime behavior or intent.",
53
+ "",
54
+ "| Package | Version | Risk | Decision | Vulnerabilities | Warnings |",
55
+ "| --- | --- | --- | --- | --- | --- |",
56
+ ]
57
+ for judgement in judgements:
58
+ warning_count = len(judgement.warnings)
59
+ lines.append(
60
+ "| "
61
+ f"{_escape_markdown_table_cell(judgement.package)} | "
62
+ f"{_escape_markdown_table_cell(judgement.version or 'unknown')} | "
63
+ f"{_escape_markdown_table_cell(judgement.risk_level.value)} | "
64
+ f"{_escape_markdown_table_cell(judgement.decision.value)} | "
65
+ f"{len(judgement.known_vulnerabilities)} | "
66
+ f"{warning_count} |"
67
+ )
68
+ if warnings:
69
+ lines.extend(["", "## Warnings", ""])
70
+ for warning in warnings:
71
+ lines.append(f"- {_escape_markdown_list_item(warning)}")
72
+ return "\n".join(lines) + "\n"
73
+
74
+
75
+ def _escape_markdown_table_cell(value: str) -> str:
76
+ escaped = html.escape(value, quote=False)
77
+ return escaped.replace("\\", r"\\").replace("\r", " ").replace("\n", " ").replace("|", r"\|")
78
+
79
+
80
+ def _escape_markdown_list_item(value: str) -> str:
81
+ return html.escape(value, quote=False).replace("\\", r"\\").replace("\r", " ").replace("\n", " ")
@@ -0,0 +1,5 @@
1
+ """Conservative risk scoring."""
2
+
3
+ from pkgwhy.risk.scoring import judge_inspection
4
+
5
+ __all__ = ["judge_inspection"]
pkgwhy/risk/rules.py ADDED
@@ -0,0 +1,372 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from pkgwhy.core.models import Confidence, RiskRuleEvidence, RuleCategory, RuleSeverity
6
+
7
+ RULE_CATALOG_SCHEMA_VERSION = "pkgwhy.static_rule_catalog.v1"
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class RuleDefinition:
12
+ """Stable metadata for one pre-alpha risk rule."""
13
+
14
+ rule_id: str
15
+ name: str
16
+ category: RuleCategory
17
+ severity: RuleSeverity
18
+ confidence: Confidence
19
+ default_message: str
20
+ false_positive_note: str | None = None
21
+
22
+
23
+ RULES: dict[str, RuleDefinition] = {
24
+ "PKGWHY-VULN-001": RuleDefinition(
25
+ rule_id="PKGWHY-VULN-001",
26
+ name="known_vulnerability_match",
27
+ category=RuleCategory.VULNERABILITY,
28
+ severity=RuleSeverity.HIGH,
29
+ confidence=Confidence.MEDIUM,
30
+ default_message="Known vulnerability advisory matched this package version.",
31
+ false_positive_note="Advisory databases can be incomplete or imprecise; verify the source advisory and version range.",
32
+ ),
33
+ "PKGWHY-RISK-001": RuleDefinition(
34
+ rule_id="PKGWHY-RISK-001",
35
+ name="possible_typosquat_similarity",
36
+ category=RuleCategory.IDENTITY,
37
+ severity=RuleSeverity.MEDIUM,
38
+ confidence=Confidence.MEDIUM,
39
+ default_message="Package name is similar to a popular package name.",
40
+ false_positive_note="Legitimate ecosystem packages can share prefixes or naming families.",
41
+ ),
42
+ "PKGWHY-RISK-002": RuleDefinition(
43
+ rule_id="PKGWHY-RISK-002",
44
+ name="source_availability_unknown",
45
+ category=RuleCategory.SOURCE,
46
+ severity=RuleSeverity.MEDIUM,
47
+ confidence=Confidence.MEDIUM,
48
+ default_message="Source availability is unknown from installed files.",
49
+ false_positive_note="Some legitimate packages ship metadata-only wheels or generated artifacts.",
50
+ ),
51
+ "PKGWHY-RISK-003": RuleDefinition(
52
+ rule_id="PKGWHY-RISK-003",
53
+ name="missing_license_metadata",
54
+ category=RuleCategory.METADATA,
55
+ severity=RuleSeverity.MEDIUM,
56
+ confidence=Confidence.MEDIUM,
57
+ default_message="Installed metadata does not include a clear license value.",
58
+ false_positive_note="License metadata can be omitted even when licensing is documented elsewhere.",
59
+ ),
60
+ "PKGWHY-RISK-004": RuleDefinition(
61
+ rule_id="PKGWHY-RISK-004",
62
+ name="native_compiled_code_present",
63
+ category=RuleCategory.BINARY,
64
+ severity=RuleSeverity.MEDIUM,
65
+ confidence=Confidence.MEDIUM,
66
+ default_message="Native compiled files are present.",
67
+ false_positive_note="Native extensions are common in legitimate numerical, cryptographic, and performance packages.",
68
+ ),
69
+ "PKGWHY-RISK-005": RuleDefinition(
70
+ rule_id="PKGWHY-RISK-005",
71
+ name="static_capability_signal",
72
+ category=RuleCategory.STATIC_ANALYSIS,
73
+ severity=RuleSeverity.MEDIUM,
74
+ confidence=Confidence.MEDIUM,
75
+ default_message="Static capability signal detected.",
76
+ false_positive_note="Static references are not proof of runtime behavior, intent, or unsafe use.",
77
+ ),
78
+ "PKGWHY-RISK-006": RuleDefinition(
79
+ rule_id="PKGWHY-RISK-006",
80
+ name="no_installed_files_found",
81
+ category=RuleCategory.METADATA,
82
+ severity=RuleSeverity.HIGH,
83
+ confidence=Confidence.LOW,
84
+ default_message="No installed package files were found through distribution metadata.",
85
+ false_positive_note="Editable installs or unusual packaging layouts can hide files from distribution metadata.",
86
+ ),
87
+ "PKGWHY-PY-001": RuleDefinition(
88
+ rule_id="PKGWHY-PY-001",
89
+ name="python_dynamic_code_execution",
90
+ category=RuleCategory.STATIC_ANALYSIS,
91
+ severity=RuleSeverity.HIGH,
92
+ confidence=Confidence.HIGH,
93
+ default_message="Python dynamic code execution construct referenced.",
94
+ false_positive_note="Dynamic execution can be legitimate in frameworks, plugins, and compatibility layers.",
95
+ ),
96
+ "PKGWHY-PY-002": RuleDefinition(
97
+ rule_id="PKGWHY-PY-002",
98
+ name="python_dynamic_import",
99
+ category=RuleCategory.STATIC_ANALYSIS,
100
+ severity=RuleSeverity.MEDIUM,
101
+ confidence=Confidence.HIGH,
102
+ default_message="Python dynamic import construct referenced.",
103
+ false_positive_note="Dynamic imports are common in plugin systems and optional dependency loading.",
104
+ ),
105
+ "PKGWHY-PY-003": RuleDefinition(
106
+ rule_id="PKGWHY-PY-003",
107
+ name="python_deserialisation_risk",
108
+ category=RuleCategory.STATIC_ANALYSIS,
109
+ severity=RuleSeverity.HIGH,
110
+ confidence=Confidence.HIGH,
111
+ default_message="Python deserialisation-risk construct referenced.",
112
+ false_positive_note="Deserialisation APIs can be safe when inputs are trusted and validated.",
113
+ ),
114
+ "PKGWHY-PY-004": RuleDefinition(
115
+ rule_id="PKGWHY-PY-004",
116
+ name="python_encoded_payload_handling",
117
+ category=RuleCategory.STATIC_ANALYSIS,
118
+ severity=RuleSeverity.MEDIUM,
119
+ confidence=Confidence.HIGH,
120
+ default_message="Python encoded-payload handling construct referenced.",
121
+ false_positive_note="Encoding and compression APIs are common in legitimate data processing code.",
122
+ ),
123
+ "PKGWHY-PY-005": RuleDefinition(
124
+ rule_id="PKGWHY-PY-005",
125
+ name="python_subprocess_or_shell_execution",
126
+ category=RuleCategory.STATIC_ANALYSIS,
127
+ severity=RuleSeverity.HIGH,
128
+ confidence=Confidence.HIGH,
129
+ default_message="Python subprocess or shell execution construct referenced.",
130
+ false_positive_note="Subprocess APIs can be legitimate for CLI wrappers, build tools, and integrations.",
131
+ ),
132
+ "PKGWHY-PY-006": RuleDefinition(
133
+ rule_id="PKGWHY-PY-006",
134
+ name="python_environment_or_secret_access",
135
+ category=RuleCategory.STATIC_ANALYSIS,
136
+ severity=RuleSeverity.MEDIUM,
137
+ confidence=Confidence.MEDIUM,
138
+ default_message="Python environment or secret-like access pattern referenced.",
139
+ false_positive_note="Environment variable and credential-like names can appear in legitimate configuration code.",
140
+ ),
141
+ "PKGWHY-PY-007": RuleDefinition(
142
+ rule_id="PKGWHY-PY-007",
143
+ name="python_package_manager_manipulation",
144
+ category=RuleCategory.STATIC_ANALYSIS,
145
+ severity=RuleSeverity.HIGH,
146
+ confidence=Confidence.MEDIUM,
147
+ default_message="Python package-manager manipulation pattern referenced.",
148
+ false_positive_note="Installer wrappers and build tooling may legitimately invoke package managers.",
149
+ ),
150
+ "PKGWHY-PY-008": RuleDefinition(
151
+ rule_id="PKGWHY-PY-008",
152
+ name="python_unsafe_yaml_load",
153
+ category=RuleCategory.STATIC_ANALYSIS,
154
+ severity=RuleSeverity.HIGH,
155
+ confidence=Confidence.HIGH,
156
+ default_message="Python unsafe YAML load construct referenced.",
157
+ false_positive_note="YAML loading can be safe when using safe loaders or trusted inputs; review call arguments.",
158
+ ),
159
+ "PKGWHY-PY-009": RuleDefinition(
160
+ rule_id="PKGWHY-PY-009",
161
+ name="python_obfuscation_bootstrap_signal",
162
+ category=RuleCategory.STATIC_ANALYSIS,
163
+ severity=RuleSeverity.HIGH,
164
+ confidence=Confidence.MEDIUM,
165
+ default_message="Python obfuscation-bootstrap pattern referenced.",
166
+ false_positive_note="Obfuscation-bootstrap names can appear in comments, fixtures, or vendored compatibility code.",
167
+ ),
168
+ "PKGWHY-BUILD-001": RuleDefinition(
169
+ rule_id="PKGWHY-BUILD-001",
170
+ name="executable_setup_py_present",
171
+ category=RuleCategory.STATIC_ANALYSIS,
172
+ severity=RuleSeverity.MEDIUM,
173
+ confidence=Confidence.HIGH,
174
+ default_message="Executable setup.py file is present.",
175
+ false_positive_note="Many legacy packages include setup.py; presence alone is not proof of unsafe install behavior.",
176
+ ),
177
+ "PKGWHY-BUILD-002": RuleDefinition(
178
+ rule_id="PKGWHY-BUILD-002",
179
+ name="setup_time_subprocess_signal",
180
+ category=RuleCategory.STATIC_ANALYSIS,
181
+ severity=RuleSeverity.HIGH,
182
+ confidence=Confidence.MEDIUM,
183
+ default_message="setup.py contains subprocess or shell execution references.",
184
+ false_positive_note="Build scripts can legitimately invoke compilers or local build tools.",
185
+ ),
186
+ "PKGWHY-BUILD-003": RuleDefinition(
187
+ rule_id="PKGWHY-BUILD-003",
188
+ name="setup_time_network_signal",
189
+ category=RuleCategory.STATIC_ANALYSIS,
190
+ severity=RuleSeverity.HIGH,
191
+ confidence=Confidence.MEDIUM,
192
+ default_message="setup.py contains network access references.",
193
+ false_positive_note="Build scripts can reference network libraries without using them during installation.",
194
+ ),
195
+ "PKGWHY-BUILD-004": RuleDefinition(
196
+ rule_id="PKGWHY-BUILD-004",
197
+ name="setup_time_dynamic_execution_signal",
198
+ category=RuleCategory.STATIC_ANALYSIS,
199
+ severity=RuleSeverity.HIGH,
200
+ confidence=Confidence.MEDIUM,
201
+ default_message="setup.py contains dynamic execution references.",
202
+ false_positive_note="Dynamic execution references can appear in compatibility logic; inspect context manually.",
203
+ ),
204
+ "PKGWHY-BUILD-005": RuleDefinition(
205
+ rule_id="PKGWHY-BUILD-005",
206
+ name="build_backend_declared",
207
+ category=RuleCategory.METADATA,
208
+ severity=RuleSeverity.INFO,
209
+ confidence=Confidence.HIGH,
210
+ default_message="Build backend metadata is declared.",
211
+ false_positive_note="Declarative build metadata is informational unless paired with other suspicious signals.",
212
+ ),
213
+ "PKGWHY-BUILD-006": RuleDefinition(
214
+ rule_id="PKGWHY-BUILD-006",
215
+ name="setup_cfg_present",
216
+ category=RuleCategory.METADATA,
217
+ severity=RuleSeverity.INFO,
218
+ confidence=Confidence.HIGH,
219
+ default_message="setup.cfg metadata file is present.",
220
+ false_positive_note="setup.cfg is usually declarative metadata and is not executable Python code.",
221
+ ),
222
+ "PKGWHY-NET-001": RuleDefinition(
223
+ rule_id="PKGWHY-NET-001",
224
+ name="source_url_or_domain_reference",
225
+ category=RuleCategory.STATIC_ANALYSIS,
226
+ severity=RuleSeverity.LOW,
227
+ confidence=Confidence.MEDIUM,
228
+ default_message="Source text references a URL or domain.",
229
+ false_positive_note="URL references can be documentation, examples, tests, or metadata and are not proof of network behavior.",
230
+ ),
231
+ "PKGWHY-CRED-001": RuleDefinition(
232
+ rule_id="PKGWHY-CRED-001",
233
+ name="credential_like_assignment",
234
+ category=RuleCategory.STATIC_ANALYSIS,
235
+ severity=RuleSeverity.MEDIUM,
236
+ confidence=Confidence.MEDIUM,
237
+ default_message="Source text contains a credential-like assignment pattern.",
238
+ false_positive_note="Credential-like names can appear in placeholders, tests, documentation, and configuration examples.",
239
+ ),
240
+ "PKGWHY-JS-001": RuleDefinition(
241
+ rule_id="PKGWHY-JS-001",
242
+ name="javascript_minification_signal",
243
+ category=RuleCategory.STATIC_ANALYSIS,
244
+ severity=RuleSeverity.LOW,
245
+ confidence=Confidence.MEDIUM,
246
+ default_message="JavaScript minification or density signal detected.",
247
+ false_positive_note="Minified JavaScript is common in legitimate distributions and is not automatically suspicious.",
248
+ ),
249
+ "PKGWHY-JS-002": RuleDefinition(
250
+ rule_id="PKGWHY-JS-002",
251
+ name="javascript_dynamic_execution_signal",
252
+ category=RuleCategory.STATIC_ANALYSIS,
253
+ severity=RuleSeverity.HIGH,
254
+ confidence=Confidence.HIGH,
255
+ default_message="JavaScript dynamic execution construct referenced.",
256
+ false_positive_note="Dynamic JavaScript execution can be legitimate in loaders and compatibility code; inspect context manually.",
257
+ ),
258
+ "PKGWHY-JS-003": RuleDefinition(
259
+ rule_id="PKGWHY-JS-003",
260
+ name="javascript_encoded_payload_signal",
261
+ category=RuleCategory.STATIC_ANALYSIS,
262
+ severity=RuleSeverity.MEDIUM,
263
+ confidence=Confidence.MEDIUM,
264
+ default_message="JavaScript encoded-payload handling signal detected.",
265
+ false_positive_note="Encoding APIs and encoded strings can be legitimate in bundled assets and data fixtures.",
266
+ ),
267
+ "PKGWHY-JS-004": RuleDefinition(
268
+ rule_id="PKGWHY-JS-004",
269
+ name="javascript_obfuscation_signal",
270
+ category=RuleCategory.STATIC_ANALYSIS,
271
+ severity=RuleSeverity.MEDIUM,
272
+ confidence=Confidence.MEDIUM,
273
+ default_message="JavaScript obfuscation-like pattern detected.",
274
+ false_positive_note="Obfuscation-like patterns can appear in generated or bundled code; they require context-sensitive review.",
275
+ ),
276
+ "PKGWHY-JS-005": RuleDefinition(
277
+ rule_id="PKGWHY-JS-005",
278
+ name="javascript_source_map_reference",
279
+ category=RuleCategory.STATIC_ANALYSIS,
280
+ severity=RuleSeverity.INFO,
281
+ confidence=Confidence.HIGH,
282
+ default_message="JavaScript source-map reference detected.",
283
+ false_positive_note="Source maps are often helpful debug metadata and are not a security issue by themselves.",
284
+ ),
285
+ "PKGWHY-BIN-001": RuleDefinition(
286
+ rule_id="PKGWHY-BIN-001",
287
+ name="native_extension_present",
288
+ category=RuleCategory.BINARY,
289
+ severity=RuleSeverity.MEDIUM,
290
+ confidence=Confidence.HIGH,
291
+ default_message="Native compiled extension or library file is present.",
292
+ false_positive_note="Native extensions are common in legitimate numerical, cryptographic, and performance packages.",
293
+ ),
294
+ "PKGWHY-BIN-002": RuleDefinition(
295
+ rule_id="PKGWHY-BIN-002",
296
+ name="wasm_binary_present",
297
+ category=RuleCategory.BINARY,
298
+ severity=RuleSeverity.MEDIUM,
299
+ confidence=Confidence.HIGH,
300
+ default_message="WebAssembly binary file is present.",
301
+ false_positive_note="WASM can be legitimate for portable performance-sensitive code and is not automatically malicious.",
302
+ ),
303
+ "PKGWHY-BIN-003": RuleDefinition(
304
+ rule_id="PKGWHY-BIN-003",
305
+ name="native_executable_present",
306
+ category=RuleCategory.BINARY,
307
+ severity=RuleSeverity.HIGH,
308
+ confidence=Confidence.HIGH,
309
+ default_message="Native executable file is present.",
310
+ false_positive_note="Executable files can be legitimate helper tools, but should be reviewed before agent or production use.",
311
+ ),
312
+ }
313
+
314
+
315
+ def rule_ids() -> tuple[str, ...]:
316
+ """Return rule IDs in the public catalog order."""
317
+ return tuple(RULES)
318
+
319
+
320
+ def rules_by_category() -> dict[RuleCategory, tuple[RuleDefinition, ...]]:
321
+ """Group rule definitions by category while preserving catalog order."""
322
+ grouped: dict[RuleCategory, list[RuleDefinition]] = {}
323
+ for rule in RULES.values():
324
+ grouped.setdefault(rule.category, []).append(rule)
325
+ return {category: tuple(definitions) for category, definitions in grouped.items()}
326
+
327
+
328
+ def rule_catalog_snapshot() -> dict[str, object]:
329
+ """Return a stable, JSON-friendly snapshot of the current rule catalog."""
330
+ return {
331
+ "schema_version": RULE_CATALOG_SCHEMA_VERSION,
332
+ "rule_count": len(RULES),
333
+ "rules": [
334
+ {
335
+ "rule_id": rule.rule_id,
336
+ "name": rule.name,
337
+ "default_message": rule.default_message,
338
+ "category": rule.category.value,
339
+ "severity": rule.severity.value,
340
+ "confidence": rule.confidence.value,
341
+ "false_positive_note": rule.false_positive_note,
342
+ }
343
+ for rule in RULES.values()
344
+ ],
345
+ }
346
+
347
+
348
+ def make_rule_evidence(
349
+ rule_id: str,
350
+ *,
351
+ message: str | None = None,
352
+ evidence: list[str] | None = None,
353
+ severity: RuleSeverity | None = None,
354
+ confidence: Confidence | None = None,
355
+ file_path: str | None = None,
356
+ line_number: int | None = None,
357
+ symbol: str | None = None,
358
+ ) -> RiskRuleEvidence:
359
+ definition = RULES[rule_id]
360
+ return RiskRuleEvidence(
361
+ rule_id=definition.rule_id,
362
+ name=definition.name,
363
+ category=definition.category,
364
+ severity=severity or definition.severity,
365
+ confidence=confidence or definition.confidence,
366
+ message=message or definition.default_message,
367
+ evidence=evidence or [],
368
+ file_path=file_path,
369
+ line_number=line_number,
370
+ symbol=symbol,
371
+ false_positive_note=definition.false_positive_note,
372
+ )
pkgwhy/risk/scoring.py ADDED
@@ -0,0 +1,231 @@
1
+ from __future__ import annotations
2
+
3
+ from pkgwhy.core.models import (
4
+ AgentDecision,
5
+ Confidence,
6
+ PackageInspection,
7
+ PackageJudgement,
8
+ PackageProvenance,
9
+ RiskRuleEvidence,
10
+ RiskLevel,
11
+ RuleSeverity,
12
+ SourceAvailability,
13
+ VulnerabilityMatch,
14
+ )
15
+ from pkgwhy.provenance.installed import assess_installed_provenance
16
+ from pkgwhy.risk.rules import make_rule_evidence
17
+ from pkgwhy.typosquat.detector import detect_typosquat
18
+
19
+
20
+ def judge_inspection(
21
+ inspection: PackageInspection,
22
+ known_vulnerabilities: list[VulnerabilityMatch] | None = None,
23
+ provenance: PackageProvenance | None = None,
24
+ ) -> PackageJudgement:
25
+ metadata = inspection.metadata
26
+ warnings = list(inspection.warnings)
27
+ evidence = list(inspection.evidence)
28
+ risk_rules: list[RiskRuleEvidence] = []
29
+ known_vulnerabilities = known_vulnerabilities or []
30
+ risk = RiskLevel.LOW
31
+ confidence = Confidence.MEDIUM
32
+ provenance = provenance or assess_installed_provenance(metadata)
33
+
34
+ for rule in inspection.rule_evidence:
35
+ risk_rules.append(rule)
36
+ risk = _raise_risk(risk, _risk_for_rule(rule))
37
+
38
+ for vulnerability in known_vulnerabilities:
39
+ fixed = f" Fixed versions: {', '.join(vulnerability.fixed_versions)}." if vulnerability.fixed_versions else ""
40
+ message = (
41
+ f"Known vulnerability match: {vulnerability.vulnerability_id} from {vulnerability.source}. "
42
+ "This result depends on the supplied vulnerability source and may be incomplete."
43
+ f"{fixed}"
44
+ )
45
+ warnings.append(message)
46
+ evidence.extend(vulnerability.evidence)
47
+ risk_rules.append(
48
+ make_rule_evidence(
49
+ "PKGWHY-VULN-001",
50
+ message=message,
51
+ evidence=vulnerability.evidence,
52
+ severity=_severity_for_vulnerability(vulnerability),
53
+ confidence=vulnerability.confidence,
54
+ )
55
+ )
56
+ risk = _raise_risk(risk, _risk_for_vulnerability(vulnerability))
57
+
58
+ typosquat_candidate = detect_typosquat(metadata.identity.name)
59
+ if typosquat_candidate is not None:
60
+ message = (
61
+ "Possible typosquatting risk: "
62
+ f"'{metadata.identity.name}' is similar to popular package '{typosquat_candidate.possible_target}'. "
63
+ "This is a signal, not proof of unsafe behavior."
64
+ )
65
+ warnings.append(message)
66
+ evidence.extend(typosquat_candidate.evidence)
67
+ risk_rules.append(make_rule_evidence("PKGWHY-RISK-001", message=message, evidence=typosquat_candidate.evidence))
68
+ risk = _raise_risk(risk, RiskLevel.MEDIUM)
69
+
70
+ if inspection.source_availability in {
71
+ SourceAvailability.SOURCE_AVAILABILITY_UNKNOWN,
72
+ SourceAvailability.INSTALLED_METADATA_ONLY,
73
+ }:
74
+ message = "Source availability is unknown from installed files."
75
+ warnings.append(message)
76
+ risk_rules.append(
77
+ make_rule_evidence(
78
+ "PKGWHY-RISK-002",
79
+ message=message,
80
+ evidence=["Installed file metadata did not provide readable source paths."],
81
+ )
82
+ )
83
+ risk = _raise_risk(risk, RiskLevel.MEDIUM)
84
+
85
+ if not metadata.license:
86
+ message = "Installed metadata does not include a clear license value."
87
+ warnings.append(message)
88
+ risk_rules.append(
89
+ make_rule_evidence("PKGWHY-RISK-003", message=message, evidence=["License metadata field was empty."])
90
+ )
91
+ risk = _raise_risk(risk, RiskLevel.MEDIUM)
92
+
93
+ if "Native compiled code present" in inspection.detected_capabilities:
94
+ message = "Native compiled files are present. This can be legitimate, but static review is more limited."
95
+ warnings.append(message)
96
+ risk_rules.append(
97
+ make_rule_evidence(
98
+ "PKGWHY-RISK-004",
99
+ message=message,
100
+ evidence=["Installed file scan detected native binary file extensions."],
101
+ )
102
+ )
103
+ risk = _raise_risk(risk, RiskLevel.MEDIUM)
104
+
105
+ for capability in _warning_capability_signals(inspection.detected_capabilities):
106
+ message = f"Static capability signal detected: {capability}. This is not proof of unsafe behavior."
107
+ warnings.append(message)
108
+ risk_rules.append(
109
+ make_rule_evidence(
110
+ "PKGWHY-RISK-005",
111
+ message=message,
112
+ evidence=[f"Detected capability signal: {capability}."],
113
+ )
114
+ )
115
+ risk = _raise_risk(risk, RiskLevel.MEDIUM)
116
+
117
+ if inspection.size.file_count == 0:
118
+ message = "No installed package files were found through distribution metadata."
119
+ warnings.append(message)
120
+ risk_rules.append(
121
+ make_rule_evidence(
122
+ "PKGWHY-RISK-006",
123
+ message=message,
124
+ evidence=["Distribution metadata did not expose files for static inspection."],
125
+ )
126
+ )
127
+ if risk not in {RiskLevel.HIGH, RiskLevel.CRITICAL}:
128
+ risk = RiskLevel.UNKNOWN
129
+ confidence = Confidence.LOW
130
+
131
+ decision = _decision_for_risk(risk)
132
+ recommendation = _recommendation_for_risk(risk)
133
+ summary = metadata.summary or "No installed summary is available for this package."
134
+
135
+ return PackageJudgement(
136
+ package=metadata.identity.name,
137
+ version=metadata.identity.version,
138
+ decision=decision,
139
+ risk_level=risk,
140
+ confidence=confidence,
141
+ summary=summary,
142
+ source_availability=inspection.source_availability,
143
+ installed_size_bytes=inspection.size.total_bytes,
144
+ detected_capabilities=inspection.detected_capabilities,
145
+ warnings=sorted(set(warnings)),
146
+ recommendation=recommendation,
147
+ evidence=evidence,
148
+ risk_rules=risk_rules,
149
+ known_vulnerabilities=known_vulnerabilities,
150
+ provenance=provenance,
151
+ )
152
+
153
+
154
+ def _warning_capability_signals(capabilities: list[str]) -> list[str]:
155
+ warning_signals = {
156
+ "Subprocess or shell execution signals",
157
+ "Dynamic code execution signals",
158
+ "JavaScript dynamic code execution signals",
159
+ "JavaScript obfuscation signals",
160
+ "Encoded payload handling signals",
161
+ "Deserialisation risk signals",
162
+ "Credential or token access patterns",
163
+ "Package manager manipulation signals",
164
+ "Shell script files present",
165
+ "Install-time setup files present",
166
+ "WASM binary code present",
167
+ }
168
+ return sorted(set(capabilities) & warning_signals)
169
+
170
+
171
+ def _decision_for_risk(risk: RiskLevel) -> AgentDecision:
172
+ if risk == RiskLevel.LOW:
173
+ return AgentDecision.ALLOW
174
+ if risk == RiskLevel.MEDIUM:
175
+ return AgentDecision.ALLOW_WITH_CAUTION
176
+ if risk == RiskLevel.HIGH:
177
+ return AgentDecision.REVIEW_MANUALLY
178
+ if risk == RiskLevel.CRITICAL:
179
+ return AgentDecision.BLOCK
180
+ if risk == RiskLevel.UNKNOWN:
181
+ return AgentDecision.REVIEW_MANUALLY
182
+ return AgentDecision.REVIEW_MANUALLY
183
+
184
+
185
+ def _recommendation_for_risk(risk: RiskLevel) -> str:
186
+ if risk == RiskLevel.LOW:
187
+ return "Allow for normal use in trusted projects, while keeping normal dependency review practices."
188
+ if risk == RiskLevel.MEDIUM:
189
+ return "Allow with caution. Review the listed signals before agent or production use."
190
+ if risk == RiskLevel.HIGH:
191
+ return "Manual review recommended before installation, import, or execution."
192
+ if risk == RiskLevel.CRITICAL:
193
+ return "Block for AI-agent usage until a human reviews the evidence."
194
+ if risk == RiskLevel.UNKNOWN:
195
+ return "Risk is unknown. Manual review recommended."
196
+ return "Risk is unknown. Manual review recommended."
197
+
198
+
199
+ def _raise_risk(current: RiskLevel, candidate: RiskLevel) -> RiskLevel:
200
+ order = {
201
+ RiskLevel.LOW: 0,
202
+ RiskLevel.MEDIUM: 1,
203
+ RiskLevel.HIGH: 2,
204
+ RiskLevel.CRITICAL: 3,
205
+ RiskLevel.UNKNOWN: 1,
206
+ }
207
+ return candidate if order[candidate] > order[current] else current
208
+
209
+
210
+ def _risk_for_vulnerability(vulnerability: VulnerabilityMatch) -> RiskLevel:
211
+ severity = " ".join(vulnerability.severity).lower()
212
+ if "critical" in severity:
213
+ return RiskLevel.CRITICAL
214
+ return RiskLevel.HIGH
215
+
216
+
217
+ def _risk_for_rule(rule: RiskRuleEvidence) -> RiskLevel:
218
+ if rule.severity == RuleSeverity.CRITICAL:
219
+ return RiskLevel.CRITICAL
220
+ if rule.severity == RuleSeverity.HIGH:
221
+ return RiskLevel.HIGH
222
+ if rule.severity == RuleSeverity.MEDIUM:
223
+ return RiskLevel.MEDIUM
224
+ return RiskLevel.LOW
225
+
226
+
227
+ def _severity_for_vulnerability(vulnerability: VulnerabilityMatch) -> RuleSeverity:
228
+ severity = " ".join(vulnerability.severity).lower()
229
+ if "critical" in severity:
230
+ return RuleSeverity.CRITICAL
231
+ return RuleSeverity.HIGH