pkgwhy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pkgwhy/__init__.py +3 -0
- pkgwhy/__main__.py +6 -0
- pkgwhy/agent/__init__.py +2 -0
- pkgwhy/agent/judge.py +93 -0
- pkgwhy/cli.py +676 -0
- pkgwhy/core/__init__.py +2 -0
- pkgwhy/core/constants.py +13 -0
- pkgwhy/core/models.py +608 -0
- pkgwhy/dependencies/__init__.py +2 -0
- pkgwhy/dependencies/graph.py +68 -0
- pkgwhy/dependencies/reason.py +79 -0
- pkgwhy/dynamic/__init__.py +2 -0
- pkgwhy/dynamic/analysis.py +156 -0
- pkgwhy/explanations/__init__.py +2 -0
- pkgwhy/explanations/explain.py +47 -0
- pkgwhy/explanations/local_db.py +52 -0
- pkgwhy/imports/__init__.py +2 -0
- pkgwhy/imports/scanner.py +43 -0
- pkgwhy/inspection/__init__.py +2 -0
- pkgwhy/inspection/files.py +540 -0
- pkgwhy/inspection/python_static.py +323 -0
- pkgwhy/inspection/size.py +58 -0
- pkgwhy/inspection/text_patterns.py +135 -0
- pkgwhy/manifests/__init__.py +2 -0
- pkgwhy/manifests/lockfiles.py +51 -0
- pkgwhy/manifests/pyproject.py +37 -0
- pkgwhy/manifests/requirements.py +27 -0
- pkgwhy/metadata/__init__.py +2 -0
- pkgwhy/metadata/installed.py +83 -0
- pkgwhy/metadata/pypi.py +199 -0
- pkgwhy/policy/__init__.py +1 -0
- pkgwhy/policy/agent_policy.py +114 -0
- pkgwhy/policy/audit_log.py +60 -0
- pkgwhy/policy/tool_execution.py +76 -0
- pkgwhy/provenance/__init__.py +2 -0
- pkgwhy/provenance/installed.py +45 -0
- pkgwhy/registry/__init__.py +2 -0
- pkgwhy/registry/local.py +178 -0
- pkgwhy/registry/manifest.py +78 -0
- pkgwhy/registry/publish.py +142 -0
- pkgwhy/registry/run.py +148 -0
- pkgwhy/registry/tools.py +121 -0
- pkgwhy/reports/__init__.py +2 -0
- pkgwhy/reports/audit.py +81 -0
- pkgwhy/risk/__init__.py +5 -0
- pkgwhy/risk/rules.py +372 -0
- pkgwhy/risk/scoring.py +231 -0
- pkgwhy/typosquat/__init__.py +2 -0
- pkgwhy/typosquat/detector.py +182 -0
- pkgwhy/typosquat/popular_packages.py +34 -0
- pkgwhy/vulnerabilities/__init__.py +2 -0
- pkgwhy/vulnerabilities/matching.py +122 -0
- pkgwhy/vulnerabilities/osv.py +330 -0
- pkgwhy-1.0.0.dist-info/METADATA +688 -0
- pkgwhy-1.0.0.dist-info/RECORD +58 -0
- pkgwhy-1.0.0.dist-info/WHEEL +4 -0
- pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
- pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
pkgwhy/core/constants.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
PACKAGE_JUDGEMENT_SCHEMA_VERSION = "pkgwhy.package_judgement.v1"
|
|
2
|
+
DYNAMIC_ANALYSIS_SCHEMA_VERSION = "pkgwhy.dynamic_analysis.v1"
|
|
3
|
+
AGENT_POLICY_SCHEMA_VERSION = "pkgwhy.agent_policy.v1"
|
|
4
|
+
AGENT_PACKAGE_PRECHECK_SCHEMA_VERSION = "pkgwhy.agent_package_precheck.v1"
|
|
5
|
+
RISK_MODEL_VERSION = "pkgwhy.risk_model.v1"
|
|
6
|
+
|
|
7
|
+
CAPABILITY_EXPOSURE_NOTE = (
|
|
8
|
+
"Python packages run with the same permissions as the Python process. "
|
|
9
|
+
"This analysis detects capabilities used or referenced by package code and metadata; "
|
|
10
|
+
"static signals are not proof of runtime behavior or intent."
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
PROJECT_NAME = "pkgwhy"
|
pkgwhy/core/models.py
ADDED
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field, field_validator
|
|
9
|
+
|
|
10
|
+
from pkgwhy.core.constants import (
|
|
11
|
+
AGENT_PACKAGE_PRECHECK_SCHEMA_VERSION,
|
|
12
|
+
AGENT_POLICY_SCHEMA_VERSION,
|
|
13
|
+
CAPABILITY_EXPOSURE_NOTE,
|
|
14
|
+
DYNAMIC_ANALYSIS_SCHEMA_VERSION,
|
|
15
|
+
PACKAGE_JUDGEMENT_SCHEMA_VERSION,
|
|
16
|
+
RISK_MODEL_VERSION,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
RiskModelVersion = Literal["pkgwhy.risk_model.v1"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RiskLevel(StrEnum):
|
|
23
|
+
LOW = "low"
|
|
24
|
+
MEDIUM = "medium"
|
|
25
|
+
HIGH = "high"
|
|
26
|
+
CRITICAL = "critical"
|
|
27
|
+
UNKNOWN = "unknown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AgentDecision(StrEnum):
|
|
31
|
+
ALLOW = "allow"
|
|
32
|
+
ALLOW_WITH_CAUTION = "allow_with_caution"
|
|
33
|
+
REVIEW_MANUALLY = "review_manually"
|
|
34
|
+
SANDBOX_ONLY = "sandbox_only"
|
|
35
|
+
BLOCK = "block"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AgentPolicyConfig(BaseModel):
|
|
39
|
+
"""Policy-as-code defaults for non-interactive agent package decisions."""
|
|
40
|
+
|
|
41
|
+
schema_version: str = AGENT_POLICY_SCHEMA_VERSION
|
|
42
|
+
allow_public_pypi: bool = False
|
|
43
|
+
allow_unpinned_dependencies: bool = False
|
|
44
|
+
allow_unsigned_tools: bool = False
|
|
45
|
+
require_pkgwhy_judgement: bool = True
|
|
46
|
+
require_hash_verification: bool = True
|
|
47
|
+
require_signature_verification: bool = False
|
|
48
|
+
non_interactive_default_decision: AgentDecision = AgentDecision.BLOCK
|
|
49
|
+
unknown_package_decision: AgentDecision = AgentDecision.REVIEW_MANUALLY
|
|
50
|
+
high_risk_package_decision: AgentDecision = AgentDecision.REVIEW_MANUALLY
|
|
51
|
+
critical_risk_package_decision: AgentDecision = AgentDecision.BLOCK
|
|
52
|
+
non_interactive_unknown_package_decision: AgentDecision = AgentDecision.BLOCK
|
|
53
|
+
non_interactive_high_risk_package_decision: AgentDecision = AgentDecision.BLOCK
|
|
54
|
+
non_interactive_critical_risk_package_decision: AgentDecision = AgentDecision.BLOCK
|
|
55
|
+
tool_execution_requires_local_registry: bool = True
|
|
56
|
+
dynamic_analysis_default_decision: AgentDecision = AgentDecision.BLOCK
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ToolArtifactType(StrEnum):
|
|
60
|
+
SCRIPT = "script"
|
|
61
|
+
FOLDER = "folder"
|
|
62
|
+
PACKAGE = "package"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class HashStatus(StrEnum):
|
|
66
|
+
VERIFIED = "verified"
|
|
67
|
+
MISMATCH = "mismatch"
|
|
68
|
+
MISSING = "missing"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ToolRunStatus(StrEnum):
|
|
72
|
+
COMPLETED = "completed"
|
|
73
|
+
FAILED = "failed"
|
|
74
|
+
BLOCKED = "blocked"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DynamicAnalysisStatus(StrEnum):
|
|
78
|
+
BLOCKED = "blocked"
|
|
79
|
+
BACKEND_UNAVAILABLE = "backend_unavailable"
|
|
80
|
+
COMPLETED = "completed"
|
|
81
|
+
FAILED = "failed"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DynamicNetworkMode(StrEnum):
|
|
85
|
+
OFF = "off"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class DynamicFilesystemMode(StrEnum):
|
|
89
|
+
SCRATCH = "scratch"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Confidence(StrEnum):
|
|
93
|
+
LOW = "low"
|
|
94
|
+
MEDIUM = "medium"
|
|
95
|
+
HIGH = "high"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class RuleSeverity(StrEnum):
|
|
99
|
+
INFO = "info"
|
|
100
|
+
LOW = "low"
|
|
101
|
+
MEDIUM = "medium"
|
|
102
|
+
HIGH = "high"
|
|
103
|
+
CRITICAL = "critical"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class RuleCategory(StrEnum):
|
|
107
|
+
VULNERABILITY = "vulnerability"
|
|
108
|
+
IDENTITY = "identity"
|
|
109
|
+
SOURCE = "source"
|
|
110
|
+
METADATA = "metadata"
|
|
111
|
+
STATIC_ANALYSIS = "static_analysis"
|
|
112
|
+
BINARY = "binary"
|
|
113
|
+
POLICY = "policy"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class DependencyStatus(StrEnum):
|
|
117
|
+
DIRECT = "direct"
|
|
118
|
+
TRANSITIVE = "transitive"
|
|
119
|
+
IMPORTED_BY_PROJECT = "imported_by_project"
|
|
120
|
+
NOT_INSTALLED = "not_installed"
|
|
121
|
+
UNKNOWN = "unknown"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class SourceAvailability(StrEnum):
|
|
125
|
+
INSTALLED_SOURCE_PRESENT = "installed_source_present"
|
|
126
|
+
INSTALLED_METADATA_ONLY = "installed_metadata_only"
|
|
127
|
+
SOURCE_AVAILABILITY_UNKNOWN = "source_availability_unknown"
|
|
128
|
+
NOT_INSTALLED = "not_installed"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ReadabilityStatus(StrEnum):
|
|
132
|
+
READABLE = "readable"
|
|
133
|
+
MOSTLY_READABLE = "mostly_readable"
|
|
134
|
+
PARTIALLY_READABLE = "partially_readable"
|
|
135
|
+
MINIFIED = "minified"
|
|
136
|
+
POSSIBLY_OBFUSCATED = "possibly_obfuscated"
|
|
137
|
+
LIKELY_OBFUSCATED = "likely_obfuscated"
|
|
138
|
+
NOT_ENOUGH_SOURCE_AVAILABLE = "not_enough_source_available"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class PackageIdentity(BaseModel):
|
|
142
|
+
"""Installed package identity fields using both display and normalized names."""
|
|
143
|
+
|
|
144
|
+
name: str
|
|
145
|
+
normalized_name: str
|
|
146
|
+
version: str | None = None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class ProjectUrls(BaseModel):
|
|
150
|
+
"""Project URLs extracted from installed distribution metadata."""
|
|
151
|
+
|
|
152
|
+
homepage: str | None = None
|
|
153
|
+
repository: str | None = None
|
|
154
|
+
documentation: str | None = None
|
|
155
|
+
raw: dict[str, str] = Field(default_factory=dict)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class PackageMetadata(BaseModel):
|
|
159
|
+
"""Installed distribution metadata gathered without importing package code."""
|
|
160
|
+
|
|
161
|
+
identity: PackageIdentity
|
|
162
|
+
summary: str | None = None
|
|
163
|
+
author: str | None = None
|
|
164
|
+
maintainer: str | None = None
|
|
165
|
+
license: str | None = None
|
|
166
|
+
requires: list[str] = Field(default_factory=list)
|
|
167
|
+
project_urls: ProjectUrls = Field(default_factory=ProjectUrls)
|
|
168
|
+
entry_points: list[str] = Field(default_factory=list)
|
|
169
|
+
metadata_available: bool = True
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class VulnerabilityRange(BaseModel):
|
|
173
|
+
"""Conservative affected-version range parsed from advisory data."""
|
|
174
|
+
|
|
175
|
+
introduced: str | None = None
|
|
176
|
+
fixed: str | None = None
|
|
177
|
+
last_affected: str | None = None
|
|
178
|
+
limit: str | None = None
|
|
179
|
+
range_type: str | None = None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class VulnerabilityRecord(BaseModel):
|
|
183
|
+
"""Source-attributed vulnerability advisory record."""
|
|
184
|
+
|
|
185
|
+
id: str
|
|
186
|
+
aliases: list[str] = Field(default_factory=list)
|
|
187
|
+
package_name: str
|
|
188
|
+
ecosystem: str | None = None
|
|
189
|
+
summary: str | None = None
|
|
190
|
+
details: str | None = None
|
|
191
|
+
severity: list[str] = Field(default_factory=list)
|
|
192
|
+
affected_ranges: list[VulnerabilityRange] = Field(default_factory=list)
|
|
193
|
+
affected_versions: list[str] = Field(default_factory=list)
|
|
194
|
+
fixed_versions: list[str] = Field(default_factory=list)
|
|
195
|
+
references: list[str] = Field(default_factory=list)
|
|
196
|
+
source: str
|
|
197
|
+
source_url: str | None = None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class VulnerabilityMatch(BaseModel):
|
|
201
|
+
"""A conservative package-version match against a vulnerability record."""
|
|
202
|
+
|
|
203
|
+
vulnerability_id: str
|
|
204
|
+
package: str
|
|
205
|
+
version: str
|
|
206
|
+
aliases: list[str] = Field(default_factory=list)
|
|
207
|
+
summary: str | None = None
|
|
208
|
+
severity: list[str] = Field(default_factory=list)
|
|
209
|
+
fixed_versions: list[str] = Field(default_factory=list)
|
|
210
|
+
references: list[str] = Field(default_factory=list)
|
|
211
|
+
source: str
|
|
212
|
+
source_url: str | None = None
|
|
213
|
+
confidence: Confidence = Confidence.MEDIUM
|
|
214
|
+
evidence: list[str] = Field(default_factory=list)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class PackageProvenance(BaseModel):
|
|
218
|
+
"""Metadata-derived source-trust signals without claiming unavailable attestations."""
|
|
219
|
+
|
|
220
|
+
package: str
|
|
221
|
+
version: str | None = None
|
|
222
|
+
repository_url: str | None = None
|
|
223
|
+
documentation_url: str | None = None
|
|
224
|
+
homepage_url: str | None = None
|
|
225
|
+
project_urls: dict[str, str] = Field(default_factory=dict)
|
|
226
|
+
metadata_source: str = "unknown"
|
|
227
|
+
source_distribution_status: str = "unknown"
|
|
228
|
+
trusted_publishing_status: str = "unknown"
|
|
229
|
+
attestation_status: str = "not_implemented"
|
|
230
|
+
release_activity_status: str = "unknown"
|
|
231
|
+
confidence: Confidence = Confidence.LOW
|
|
232
|
+
warnings: list[str] = Field(default_factory=list)
|
|
233
|
+
evidence: list[str] = Field(default_factory=list)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class RiskRuleEvidence(BaseModel):
|
|
237
|
+
"""One versioned risk-rule contribution to a package judgement."""
|
|
238
|
+
|
|
239
|
+
rule_id: str
|
|
240
|
+
name: str
|
|
241
|
+
category: RuleCategory
|
|
242
|
+
severity: RuleSeverity
|
|
243
|
+
confidence: Confidence
|
|
244
|
+
message: str
|
|
245
|
+
evidence: list[str] = Field(default_factory=list)
|
|
246
|
+
file_path: str | None = None
|
|
247
|
+
line_number: int | None = Field(default=None, ge=1)
|
|
248
|
+
symbol: str | None = None
|
|
249
|
+
false_positive_note: str | None = None
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class LargestFile(BaseModel):
|
|
253
|
+
"""A large installed file reported as inspection evidence."""
|
|
254
|
+
|
|
255
|
+
path: str
|
|
256
|
+
size_bytes: int
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class PackageSize(BaseModel):
|
|
260
|
+
"""Installed package size totals grouped by coarse file category."""
|
|
261
|
+
|
|
262
|
+
total_bytes: int = 0
|
|
263
|
+
python_bytes: int = 0
|
|
264
|
+
native_binary_bytes: int = 0
|
|
265
|
+
javascript_bytes: int = 0
|
|
266
|
+
other_bytes: int = 0
|
|
267
|
+
file_count: int = 0
|
|
268
|
+
largest_files: list[LargestFile] = Field(default_factory=list)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class PythonStaticAnalysis(BaseModel):
|
|
272
|
+
"""AST-derived Python capability signals and parse warnings."""
|
|
273
|
+
|
|
274
|
+
detected_capabilities: list[str] = Field(default_factory=list)
|
|
275
|
+
warnings: list[str] = Field(default_factory=list)
|
|
276
|
+
evidence: list[str] = Field(default_factory=list)
|
|
277
|
+
rule_evidence: list[RiskRuleEvidence] = Field(default_factory=list)
|
|
278
|
+
files_scanned: int = 0
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class FileStaticAnalysis(BaseModel):
|
|
282
|
+
"""Static file-type and text-pattern signals gathered without execution."""
|
|
283
|
+
|
|
284
|
+
detected_capabilities: list[str] = Field(default_factory=list)
|
|
285
|
+
warnings: list[str] = Field(default_factory=list)
|
|
286
|
+
evidence: list[str] = Field(default_factory=list)
|
|
287
|
+
rule_evidence: list[RiskRuleEvidence] = Field(default_factory=list)
|
|
288
|
+
url_references: list[str] = Field(default_factory=list)
|
|
289
|
+
domain_references: list[str] = Field(default_factory=list)
|
|
290
|
+
credential_references: list[str] = Field(default_factory=list)
|
|
291
|
+
javascript_files_scanned: int = Field(default=0, ge=0)
|
|
292
|
+
shell_scripts_detected: int = Field(default=0, ge=0)
|
|
293
|
+
native_binaries_detected: int = Field(default=0, ge=0)
|
|
294
|
+
wasm_files_detected: int = Field(default=0, ge=0)
|
|
295
|
+
setup_files_detected: int = Field(default=0, ge=0)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class PackageInspection(BaseModel):
|
|
299
|
+
"""Static inspection result combining metadata, files, warnings, and evidence."""
|
|
300
|
+
|
|
301
|
+
metadata: PackageMetadata
|
|
302
|
+
source_availability: SourceAvailability
|
|
303
|
+
readability: ReadabilityStatus
|
|
304
|
+
size: PackageSize
|
|
305
|
+
package_paths: list[Path] = Field(default_factory=list)
|
|
306
|
+
detected_capabilities: list[str] = Field(default_factory=list)
|
|
307
|
+
warnings: list[str] = Field(default_factory=list)
|
|
308
|
+
evidence: list[str] = Field(default_factory=list)
|
|
309
|
+
rule_evidence: list[RiskRuleEvidence] = Field(default_factory=list)
|
|
310
|
+
file_analysis: FileStaticAnalysis = Field(default_factory=FileStaticAnalysis)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class PackageExplanation(BaseModel):
|
|
314
|
+
"""Human-readable package explanation assembled from local and installed sources."""
|
|
315
|
+
|
|
316
|
+
package: str
|
|
317
|
+
version: str | None = None
|
|
318
|
+
summary: str
|
|
319
|
+
common_use_cases: list[str] = Field(default_factory=list)
|
|
320
|
+
common_imports: list[str] = Field(default_factory=list)
|
|
321
|
+
minimal_usage_example: str | None = None
|
|
322
|
+
common_alternatives: list[str] = Field(default_factory=list)
|
|
323
|
+
why_it_might_be_installed: list[str] = Field(default_factory=list)
|
|
324
|
+
dependency_status: DependencyStatus = DependencyStatus.UNKNOWN
|
|
325
|
+
confidence: Confidence = Confidence.LOW
|
|
326
|
+
sources_used: list[str] = Field(default_factory=list)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class PackageJudgement(BaseModel):
|
|
330
|
+
"""Agent-readable conservative judgement for an inspected package."""
|
|
331
|
+
|
|
332
|
+
schema_version: str = PACKAGE_JUDGEMENT_SCHEMA_VERSION
|
|
333
|
+
risk_model_version: RiskModelVersion = RISK_MODEL_VERSION
|
|
334
|
+
package: str
|
|
335
|
+
version: str | None = None
|
|
336
|
+
decision: AgentDecision
|
|
337
|
+
risk_level: RiskLevel
|
|
338
|
+
confidence: Confidence
|
|
339
|
+
summary: str
|
|
340
|
+
source_availability: SourceAvailability
|
|
341
|
+
installed_size_bytes: int
|
|
342
|
+
detected_capabilities: list[str] = Field(default_factory=list)
|
|
343
|
+
warnings: list[str] = Field(default_factory=list)
|
|
344
|
+
recommendation: str
|
|
345
|
+
evidence: list[str] = Field(default_factory=list)
|
|
346
|
+
risk_rules: list[RiskRuleEvidence] = Field(default_factory=list)
|
|
347
|
+
known_vulnerabilities: list[VulnerabilityMatch] = Field(default_factory=list)
|
|
348
|
+
provenance: PackageProvenance | None = None
|
|
349
|
+
capability_exposure_note: str = CAPABILITY_EXPOSURE_NOTE
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class AgentPackagePrecheckResult(BaseModel):
|
|
353
|
+
"""Schema-versioned agent policy decision for one package judgement."""
|
|
354
|
+
|
|
355
|
+
schema_version: str = AGENT_PACKAGE_PRECHECK_SCHEMA_VERSION
|
|
356
|
+
policy_schema_version: str = AGENT_POLICY_SCHEMA_VERSION
|
|
357
|
+
package: str
|
|
358
|
+
version: str | None = None
|
|
359
|
+
target_type: Literal["package"] = "package"
|
|
360
|
+
non_interactive: bool = True
|
|
361
|
+
decision: AgentDecision
|
|
362
|
+
risk_level: RiskLevel
|
|
363
|
+
confidence: Confidence
|
|
364
|
+
policy_decision_source: str
|
|
365
|
+
reasons: list[str] = Field(default_factory=list)
|
|
366
|
+
warnings: list[str] = Field(default_factory=list)
|
|
367
|
+
recommendation: str
|
|
368
|
+
package_judgement: PackageJudgement
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
class DynamicProcessEvent(BaseModel):
|
|
372
|
+
"""Observed process event from a dynamic backend."""
|
|
373
|
+
|
|
374
|
+
command: list[str] = Field(default_factory=list)
|
|
375
|
+
exit_code: int | None = None
|
|
376
|
+
duration_ms: int | None = Field(default=None, ge=0)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class DynamicFilesystemEvent(BaseModel):
|
|
380
|
+
"""Observed filesystem event from a dynamic backend."""
|
|
381
|
+
|
|
382
|
+
path: str
|
|
383
|
+
action: str
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
class DynamicNetworkEvent(BaseModel):
|
|
387
|
+
"""Observed network event from a dynamic backend."""
|
|
388
|
+
|
|
389
|
+
destination: str
|
|
390
|
+
action: str
|
|
391
|
+
protocol: str | None = None
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class DynamicAnalysisResult(BaseModel):
|
|
395
|
+
"""Schema-versioned dynamic analysis result without fabricated events."""
|
|
396
|
+
|
|
397
|
+
schema_version: str = DYNAMIC_ANALYSIS_SCHEMA_VERSION
|
|
398
|
+
target: str
|
|
399
|
+
mode: str = "inspect"
|
|
400
|
+
sandbox_backend: str
|
|
401
|
+
network_mode: DynamicNetworkMode = DynamicNetworkMode.OFF
|
|
402
|
+
filesystem_mode: DynamicFilesystemMode = DynamicFilesystemMode.SCRATCH
|
|
403
|
+
status: DynamicAnalysisStatus
|
|
404
|
+
warnings: list[str] = Field(default_factory=list)
|
|
405
|
+
process_events: list[DynamicProcessEvent] = Field(default_factory=list)
|
|
406
|
+
filesystem_events: list[DynamicFilesystemEvent] = Field(default_factory=list)
|
|
407
|
+
network_events: list[DynamicNetworkEvent] = Field(default_factory=list)
|
|
408
|
+
decision: AgentDecision
|
|
409
|
+
limitations: list[str] = Field(default_factory=list)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class TyposquatCandidate(BaseModel):
|
|
413
|
+
"""Conservative typosquatting similarity signal for a package name."""
|
|
414
|
+
|
|
415
|
+
package: str
|
|
416
|
+
normalized_package: str
|
|
417
|
+
possible_target: str
|
|
418
|
+
matched_reference: str
|
|
419
|
+
similarity: float
|
|
420
|
+
recommendation: str
|
|
421
|
+
signals: list[str] = Field(default_factory=list)
|
|
422
|
+
is_possible_typosquat: bool = False
|
|
423
|
+
evidence: list[str] = Field(default_factory=list)
|
|
424
|
+
|
|
425
|
+
@field_validator("similarity")
|
|
426
|
+
@classmethod
|
|
427
|
+
def validate_similarity(cls, value: float) -> float:
|
|
428
|
+
if not 0 <= value <= 1:
|
|
429
|
+
raise ValueError("similarity must be between 0 and 1")
|
|
430
|
+
return value
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class DependencyReason(BaseModel):
|
|
434
|
+
"""Project-local evidence explaining why a package may be present."""
|
|
435
|
+
|
|
436
|
+
package: str
|
|
437
|
+
normalized_package: str
|
|
438
|
+
status: DependencyStatus
|
|
439
|
+
declared_in: list[str] = Field(default_factory=list)
|
|
440
|
+
lockfiles: list[str] = Field(default_factory=list)
|
|
441
|
+
imported_by_project: bool = False
|
|
442
|
+
installed: bool = False
|
|
443
|
+
transitive_via: list[str] = Field(default_factory=list)
|
|
444
|
+
evidence: list[str] = Field(default_factory=list)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class RegistryConfig(BaseModel):
|
|
448
|
+
"""Local registry configuration stored on the user's machine."""
|
|
449
|
+
|
|
450
|
+
schema_version: str = "pkgwhy.registry_config.v1"
|
|
451
|
+
current_registry: str | None = None
|
|
452
|
+
registries: dict[str, str] = Field(default_factory=dict)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class RegistryEntry(BaseModel):
|
|
456
|
+
"""One configured registry location."""
|
|
457
|
+
|
|
458
|
+
name: str
|
|
459
|
+
path: Path
|
|
460
|
+
is_current: bool = False
|
|
461
|
+
index_exists: bool = False
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class RegistryToolEntry(BaseModel):
|
|
465
|
+
"""One published local tool bundle in a registry index."""
|
|
466
|
+
|
|
467
|
+
name: str
|
|
468
|
+
owner: str
|
|
469
|
+
version: str
|
|
470
|
+
artifact_type: ToolArtifactType
|
|
471
|
+
entrypoint: str
|
|
472
|
+
bundle_path: str
|
|
473
|
+
sha256: str
|
|
474
|
+
manifest_path: str
|
|
475
|
+
published_at: str
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
class RegistryIndex(BaseModel):
|
|
479
|
+
"""Local registry index placeholder for published private tools."""
|
|
480
|
+
|
|
481
|
+
schema_version: str = "pkgwhy.registry_index.v1"
|
|
482
|
+
tools: list[RegistryToolEntry] = Field(default_factory=list)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
class ToolSecurityPolicy(BaseModel):
|
|
486
|
+
"""Declared security policy for a private tool manifest."""
|
|
487
|
+
|
|
488
|
+
requires_human_approval: bool = True
|
|
489
|
+
allow_unsigned: bool = False
|
|
490
|
+
allow_unpinned_dependencies: bool = False
|
|
491
|
+
signing_status: str = "not_implemented"
|
|
492
|
+
|
|
493
|
+
@field_validator("signing_status")
|
|
494
|
+
@classmethod
|
|
495
|
+
def validate_signing_status(cls, value: str) -> str:
|
|
496
|
+
if value != "not_implemented":
|
|
497
|
+
raise ValueError("signing_status must be 'not_implemented' until signing is implemented")
|
|
498
|
+
return value
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
class ToolAgentPolicy(BaseModel):
|
|
502
|
+
"""Declared agent policy for a private tool manifest."""
|
|
503
|
+
|
|
504
|
+
default_decision: AgentDecision = AgentDecision.REVIEW_MANUALLY
|
|
505
|
+
non_interactive_decision: AgentDecision = AgentDecision.REVIEW_MANUALLY
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class ToolManifest(BaseModel):
|
|
509
|
+
"""Validated pkgwhy private tool manifest."""
|
|
510
|
+
|
|
511
|
+
schema_version: str = "pkgwhy.tool_manifest.v1"
|
|
512
|
+
name: str
|
|
513
|
+
owner: str
|
|
514
|
+
version: str
|
|
515
|
+
description: str
|
|
516
|
+
artifact_type: ToolArtifactType
|
|
517
|
+
entrypoint: str
|
|
518
|
+
python_requires: str = ">=3.11"
|
|
519
|
+
dependencies: list[str] = Field(default_factory=list)
|
|
520
|
+
declared_permissions: list[str] = Field(default_factory=list)
|
|
521
|
+
security: ToolSecurityPolicy = Field(default_factory=ToolSecurityPolicy)
|
|
522
|
+
agent: ToolAgentPolicy = Field(default_factory=ToolAgentPolicy)
|
|
523
|
+
|
|
524
|
+
@field_validator("name", "owner")
|
|
525
|
+
@classmethod
|
|
526
|
+
def validate_identifier(cls, value: str) -> str:
|
|
527
|
+
if not re.fullmatch(r"[A-Za-z0-9]+([._-][A-Za-z0-9]+)*", value):
|
|
528
|
+
raise ValueError(
|
|
529
|
+
"must start and end with a letter or number, with only single dots, underscores, or hyphens between segments"
|
|
530
|
+
)
|
|
531
|
+
return value
|
|
532
|
+
|
|
533
|
+
@field_validator("version", "description", "entrypoint", "python_requires")
|
|
534
|
+
@classmethod
|
|
535
|
+
def validate_non_empty_text(cls, value: str) -> str:
|
|
536
|
+
stripped = value.strip()
|
|
537
|
+
if not stripped:
|
|
538
|
+
raise ValueError("must not be empty")
|
|
539
|
+
return stripped
|
|
540
|
+
|
|
541
|
+
@field_validator("dependencies", "declared_permissions")
|
|
542
|
+
@classmethod
|
|
543
|
+
def validate_non_empty_list_items(cls, values: list[str]) -> list[str]:
|
|
544
|
+
stripped_values: list[str] = []
|
|
545
|
+
for value in values:
|
|
546
|
+
stripped = value.strip()
|
|
547
|
+
if not stripped:
|
|
548
|
+
raise ValueError("list values must not be empty")
|
|
549
|
+
stripped_values.append(stripped)
|
|
550
|
+
return stripped_values
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class PublishResult(BaseModel):
|
|
554
|
+
"""Result of a local registry publish operation."""
|
|
555
|
+
|
|
556
|
+
manifest: ToolManifest
|
|
557
|
+
registry_name: str
|
|
558
|
+
registry_path: Path
|
|
559
|
+
bundle_path: Path
|
|
560
|
+
manifest_path: Path
|
|
561
|
+
sha256: str
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
class ToolJudgement(BaseModel):
|
|
565
|
+
"""Agent-readable conservative judgement for a private registry tool."""
|
|
566
|
+
|
|
567
|
+
schema_version: str = "pkgwhy.tool_judgement.v1"
|
|
568
|
+
tool: str
|
|
569
|
+
owner: str
|
|
570
|
+
name: str
|
|
571
|
+
version: str
|
|
572
|
+
decision: AgentDecision
|
|
573
|
+
risk_level: RiskLevel
|
|
574
|
+
confidence: Confidence
|
|
575
|
+
reason: str
|
|
576
|
+
requires_human_approval: bool
|
|
577
|
+
manifest: ToolManifest
|
|
578
|
+
declared_permissions: list[str] = Field(default_factory=list)
|
|
579
|
+
detected_capabilities: list[str] = Field(default_factory=list)
|
|
580
|
+
hash_status: HashStatus
|
|
581
|
+
signature_status: str = "not_implemented"
|
|
582
|
+
warnings: list[str] = Field(default_factory=list)
|
|
583
|
+
recommendation: str
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
class ToolRunResult(BaseModel):
|
|
587
|
+
"""Execution metadata for a local private tool run."""
|
|
588
|
+
|
|
589
|
+
schema_version: str = "pkgwhy.tool_run.v1"
|
|
590
|
+
tool: str
|
|
591
|
+
owner: str
|
|
592
|
+
name: str
|
|
593
|
+
version: str
|
|
594
|
+
registry_name: str
|
|
595
|
+
registry_path: Path
|
|
596
|
+
command: list[str]
|
|
597
|
+
entrypoint: str
|
|
598
|
+
started_at: str
|
|
599
|
+
finished_at: str
|
|
600
|
+
exit_code: int
|
|
601
|
+
status: ToolRunStatus
|
|
602
|
+
stdout: str
|
|
603
|
+
stderr: str
|
|
604
|
+
log_path: Path
|
|
605
|
+
warning: str
|
|
606
|
+
policy_decision: AgentDecision
|
|
607
|
+
policy_reasons: list[str] = Field(default_factory=list)
|
|
608
|
+
policy_warnings: list[str] = Field(default_factory=list)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
|
|
5
|
+
from packaging.requirements import InvalidRequirement, Requirement
|
|
6
|
+
|
|
7
|
+
from pkgwhy.metadata.installed import list_installed_packages, normalize_package_name
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def installed_dependency_graph() -> dict[str, set[str]]:
|
|
11
|
+
graph: dict[str, set[str]] = {}
|
|
12
|
+
for package in list_installed_packages():
|
|
13
|
+
name = package.identity.normalized_name
|
|
14
|
+
graph[name] = _dependency_names(package.requires)
|
|
15
|
+
return graph
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def transitive_dependencies_for(direct_dependencies: set[str], graph: dict[str, set[str]] | None = None) -> set[str]:
|
|
19
|
+
dependency_graph = graph if graph is not None else installed_dependency_graph()
|
|
20
|
+
normalized_direct = {normalize_package_name(name) for name in direct_dependencies}
|
|
21
|
+
transitive: set[str] = set()
|
|
22
|
+
queue: deque[str] = deque()
|
|
23
|
+
|
|
24
|
+
for direct in normalized_direct:
|
|
25
|
+
queue.extend(dependency_graph.get(direct, set()))
|
|
26
|
+
|
|
27
|
+
while queue:
|
|
28
|
+
dependency = queue.popleft()
|
|
29
|
+
if dependency in normalized_direct or dependency in transitive:
|
|
30
|
+
continue
|
|
31
|
+
transitive.add(dependency)
|
|
32
|
+
queue.extend(dependency_graph.get(dependency, set()))
|
|
33
|
+
|
|
34
|
+
return transitive
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def transitive_parents_for(
|
|
38
|
+
package: str,
|
|
39
|
+
direct_dependencies: set[str],
|
|
40
|
+
graph: dict[str, set[str]] | None = None,
|
|
41
|
+
) -> set[str]:
|
|
42
|
+
dependency_graph = graph if graph is not None else installed_dependency_graph()
|
|
43
|
+
normalized_package = normalize_package_name(package)
|
|
44
|
+
normalized_direct = {normalize_package_name(name) for name in direct_dependencies}
|
|
45
|
+
parents: set[str] = set()
|
|
46
|
+
queue: deque[str] = deque(normalized_direct)
|
|
47
|
+
seen: set[str] = set()
|
|
48
|
+
|
|
49
|
+
while queue:
|
|
50
|
+
current = queue.popleft()
|
|
51
|
+
if current in seen:
|
|
52
|
+
continue
|
|
53
|
+
seen.add(current)
|
|
54
|
+
dependencies = dependency_graph.get(current, set())
|
|
55
|
+
if normalized_package in dependencies:
|
|
56
|
+
parents.add(current)
|
|
57
|
+
queue.extend(dependencies - seen)
|
|
58
|
+
return parents
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _dependency_names(requirements: list[str]) -> set[str]:
|
|
62
|
+
names: set[str] = set()
|
|
63
|
+
for value in requirements:
|
|
64
|
+
try:
|
|
65
|
+
names.add(normalize_package_name(Requirement(value).name))
|
|
66
|
+
except InvalidRequirement:
|
|
67
|
+
continue
|
|
68
|
+
return names
|