pkgwhy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. pkgwhy/__init__.py +3 -0
  2. pkgwhy/__main__.py +6 -0
  3. pkgwhy/agent/__init__.py +2 -0
  4. pkgwhy/agent/judge.py +93 -0
  5. pkgwhy/cli.py +676 -0
  6. pkgwhy/core/__init__.py +2 -0
  7. pkgwhy/core/constants.py +13 -0
  8. pkgwhy/core/models.py +608 -0
  9. pkgwhy/dependencies/__init__.py +2 -0
  10. pkgwhy/dependencies/graph.py +68 -0
  11. pkgwhy/dependencies/reason.py +79 -0
  12. pkgwhy/dynamic/__init__.py +2 -0
  13. pkgwhy/dynamic/analysis.py +156 -0
  14. pkgwhy/explanations/__init__.py +2 -0
  15. pkgwhy/explanations/explain.py +47 -0
  16. pkgwhy/explanations/local_db.py +52 -0
  17. pkgwhy/imports/__init__.py +2 -0
  18. pkgwhy/imports/scanner.py +43 -0
  19. pkgwhy/inspection/__init__.py +2 -0
  20. pkgwhy/inspection/files.py +540 -0
  21. pkgwhy/inspection/python_static.py +323 -0
  22. pkgwhy/inspection/size.py +58 -0
  23. pkgwhy/inspection/text_patterns.py +135 -0
  24. pkgwhy/manifests/__init__.py +2 -0
  25. pkgwhy/manifests/lockfiles.py +51 -0
  26. pkgwhy/manifests/pyproject.py +37 -0
  27. pkgwhy/manifests/requirements.py +27 -0
  28. pkgwhy/metadata/__init__.py +2 -0
  29. pkgwhy/metadata/installed.py +83 -0
  30. pkgwhy/metadata/pypi.py +199 -0
  31. pkgwhy/policy/__init__.py +1 -0
  32. pkgwhy/policy/agent_policy.py +114 -0
  33. pkgwhy/policy/audit_log.py +60 -0
  34. pkgwhy/policy/tool_execution.py +76 -0
  35. pkgwhy/provenance/__init__.py +2 -0
  36. pkgwhy/provenance/installed.py +45 -0
  37. pkgwhy/registry/__init__.py +2 -0
  38. pkgwhy/registry/local.py +178 -0
  39. pkgwhy/registry/manifest.py +78 -0
  40. pkgwhy/registry/publish.py +142 -0
  41. pkgwhy/registry/run.py +148 -0
  42. pkgwhy/registry/tools.py +121 -0
  43. pkgwhy/reports/__init__.py +2 -0
  44. pkgwhy/reports/audit.py +81 -0
  45. pkgwhy/risk/__init__.py +5 -0
  46. pkgwhy/risk/rules.py +372 -0
  47. pkgwhy/risk/scoring.py +231 -0
  48. pkgwhy/typosquat/__init__.py +2 -0
  49. pkgwhy/typosquat/detector.py +182 -0
  50. pkgwhy/typosquat/popular_packages.py +34 -0
  51. pkgwhy/vulnerabilities/__init__.py +2 -0
  52. pkgwhy/vulnerabilities/matching.py +122 -0
  53. pkgwhy/vulnerabilities/osv.py +330 -0
  54. pkgwhy-1.0.0.dist-info/METADATA +688 -0
  55. pkgwhy-1.0.0.dist-info/RECORD +58 -0
  56. pkgwhy-1.0.0.dist-info/WHEEL +4 -0
  57. pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
  58. pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from pkgwhy.core.models import DependencyReason, DependencyStatus, PackageMetadata
6
+ from pkgwhy.dependencies.graph import transitive_dependencies_for, transitive_parents_for
7
+ from pkgwhy.imports.scanner import scan_project_imports
8
+ from pkgwhy.manifests.lockfiles import read_lockfile_dependencies
9
+ from pkgwhy.manifests.pyproject import read_pyproject_dependencies
10
+ from pkgwhy.manifests.requirements import read_requirements_dependencies
11
+ from pkgwhy.metadata.installed import get_installed_package, normalize_package_name
12
+
13
+
14
+ def explain_dependency_reason(
15
+ package: str,
16
+ project_root: Path,
17
+ project_imports: set[str] | None = None,
18
+ metadata: PackageMetadata | None = None,
19
+ ) -> DependencyReason:
20
+ normalized = normalize_package_name(package)
21
+ pyproject_dependencies = read_pyproject_dependencies(project_root / "pyproject.toml")
22
+ requirements_dependencies = read_requirements_dependencies(project_root / "requirements.txt")
23
+ declared_dependencies = pyproject_dependencies | requirements_dependencies
24
+ declared_in: list[str] = []
25
+ evidence: list[str] = []
26
+
27
+ if normalized in pyproject_dependencies:
28
+ declared_in.append("pyproject.toml")
29
+ evidence.append("Package is declared in pyproject.toml.")
30
+ if normalized in requirements_dependencies:
31
+ declared_in.append("requirements.txt")
32
+ evidence.append("Package is declared in requirements.txt.")
33
+
34
+ lockfiles = read_lockfile_dependencies(project_root)
35
+ lockfile_hits = sorted(name for name, dependencies in lockfiles.items() if normalized in dependencies)
36
+ for lockfile in lockfile_hits:
37
+ evidence.append(f"Package appears in {lockfile}.")
38
+
39
+ transitive_dependencies = transitive_dependencies_for(declared_dependencies)
40
+ transitive_via = sorted(transitive_parents_for(normalized, declared_dependencies))
41
+ if normalized in transitive_dependencies:
42
+ evidence.append("Package is reachable from installed dependency metadata for declared project dependencies.")
43
+ if transitive_via:
44
+ evidence.append(f"Immediate dependency parent signal: {', '.join(transitive_via)}.")
45
+
46
+ installed_metadata = metadata if metadata is not None else get_installed_package(package)
47
+ installed = installed_metadata is not None
48
+ if installed:
49
+ evidence.append("Package is installed in the active Python environment.")
50
+ else:
51
+ evidence.append("Package is not installed in the active Python environment.")
52
+
53
+ imports = project_imports if project_imports is not None else scan_project_imports(project_root)
54
+ # Best-effort only: some packages use non-obvious import names such as PIL, bs4, or yaml.
55
+ imported_by_project = normalized.replace("-", "_") in imports
56
+ if imported_by_project:
57
+ evidence.append("Package import name appears in local project source.")
58
+
59
+ status = DependencyStatus.UNKNOWN
60
+ if declared_in:
61
+ status = DependencyStatus.DIRECT
62
+ elif normalized in transitive_dependencies:
63
+ status = DependencyStatus.TRANSITIVE
64
+ elif imported_by_project:
65
+ status = DependencyStatus.IMPORTED_BY_PROJECT
66
+ elif not installed:
67
+ status = DependencyStatus.NOT_INSTALLED
68
+
69
+ return DependencyReason(
70
+ package=package,
71
+ normalized_package=normalized,
72
+ status=status,
73
+ declared_in=declared_in,
74
+ lockfiles=lockfile_hits,
75
+ imported_by_project=imported_by_project,
76
+ installed=installed,
77
+ transitive_via=transitive_via,
78
+ evidence=evidence,
79
+ )
@@ -0,0 +1,2 @@
1
+ """Experimental dynamic analysis boundaries."""
2
+
@@ -0,0 +1,156 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import shutil
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from pkgwhy.core.models import (
10
+ AgentDecision,
11
+ DynamicAnalysisResult,
12
+ DynamicAnalysisStatus,
13
+ DynamicFilesystemEvent,
14
+ DynamicNetworkMode,
15
+ DynamicProcessEvent,
16
+ )
17
+
18
+ EXPERIMENTAL_DYNAMIC_WARNING = "Experimental dynamic analysis is not a production sandbox."
19
+ STATIC_DEFAULT_WARNING = "Static package inspection remains the default pkgwhy review path."
20
+ HOST_EXECUTION_REFUSAL = "Refusing to run dynamic analysis for unknown package code on the host."
21
+ DYNAMIC_SCOPE_WARNING = "Dynamic analysis is experimental and not part of the stable security decision surface in this build."
22
+
23
+
24
+ def build_unavailable_dynamic_result(
25
+ target: str,
26
+ *,
27
+ container: bool,
28
+ network: str,
29
+ ) -> DynamicAnalysisResult:
30
+ """Build a safe-fail dynamic result without executing the target."""
31
+ warnings = [
32
+ EXPERIMENTAL_DYNAMIC_WARNING,
33
+ STATIC_DEFAULT_WARNING,
34
+ HOST_EXECUTION_REFUSAL,
35
+ DYNAMIC_SCOPE_WARNING,
36
+ ]
37
+ limitations = [
38
+ DYNAMIC_SCOPE_WARNING,
39
+ "No dynamic sandbox backend is implemented in this build.",
40
+ "No process, filesystem, or network events were collected.",
41
+ "Empty event lists are not proof that no behavior would occur in a real run.",
42
+ ]
43
+ sandbox_backend = "container" if container else "none"
44
+ status = DynamicAnalysisStatus.BACKEND_UNAVAILABLE if container else DynamicAnalysisStatus.BLOCKED
45
+
46
+ if network != DynamicNetworkMode.OFF.value:
47
+ warnings.append("Only network mode 'off' is accepted in this pre-alpha skeleton.")
48
+ limitations.append("Network-enabled dynamic analysis is not supported.")
49
+ status = DynamicAnalysisStatus.BLOCKED
50
+ elif container:
51
+ docker_path = shutil.which("docker")
52
+ if docker_path is None:
53
+ warnings.append("Docker container backend is unavailable: docker executable was not found.")
54
+ else:
55
+ warnings.append("Docker executable was detected, but container execution is not implemented in this build.")
56
+ limitations.append("Docker is detected by executable lookup only; pkgwhy does not invoke Docker in this build.")
57
+ else:
58
+ warnings.append("No sandbox backend selected. Host execution is not allowed.")
59
+
60
+ return DynamicAnalysisResult(
61
+ target=target,
62
+ sandbox_backend=sandbox_backend,
63
+ network_mode=DynamicNetworkMode.OFF,
64
+ status=status,
65
+ warnings=warnings,
66
+ process_events=[],
67
+ filesystem_events=[],
68
+ network_events=[],
69
+ decision=AgentDecision.BLOCK,
70
+ limitations=limitations,
71
+ )
72
+
73
+
74
+ def run_controlled_fixture(
75
+ fixture_path: Path,
76
+ *,
77
+ fixture_root: Path,
78
+ scratch_dir: Path,
79
+ timeout_seconds: float = 5.0,
80
+ ) -> DynamicAnalysisResult:
81
+ """Run a known local fixture for tests without enabling arbitrary package execution."""
82
+ resolved_fixture = fixture_path.resolve()
83
+ resolved_fixture_root = fixture_root.resolve()
84
+ if resolved_fixture_root not in resolved_fixture.parents:
85
+ raise ValueError("controlled fixture must live under fixture_root")
86
+ if resolved_fixture.suffix != ".py" or not resolved_fixture.is_file():
87
+ raise ValueError("controlled fixture must be a Python file")
88
+
89
+ scratch_dir.mkdir(parents=True, exist_ok=True)
90
+ before = _snapshot_scratch(scratch_dir)
91
+ started_at = time.monotonic()
92
+ completed = subprocess.run(
93
+ [sys.executable, str(resolved_fixture)],
94
+ cwd=scratch_dir,
95
+ env={"PYTHONIOENCODING": "utf-8"},
96
+ capture_output=True,
97
+ text=True,
98
+ timeout=timeout_seconds,
99
+ check=False,
100
+ )
101
+ duration_ms = int((time.monotonic() - started_at) * 1000)
102
+ after = _snapshot_scratch(scratch_dir)
103
+ status = DynamicAnalysisStatus.COMPLETED if completed.returncode == 0 else DynamicAnalysisStatus.FAILED
104
+ decision = AgentDecision.ALLOW_WITH_CAUTION if completed.returncode == 0 else AgentDecision.BLOCK
105
+ warnings = [
106
+ "Controlled fixture execution only; this is not a sandbox for unknown package code.",
107
+ "No network monitor is implemented for controlled fixture execution.",
108
+ ]
109
+ limitations = [
110
+ "Execution was limited to a caller-provided local fixture path.",
111
+ "Only scratch filesystem changes are compared.",
112
+ "No process-tree or network telemetry is collected.",
113
+ ]
114
+ if completed.stderr:
115
+ warnings.append("Controlled fixture wrote to stderr; stderr content is intentionally not included in the result.")
116
+
117
+ return DynamicAnalysisResult(
118
+ target=resolved_fixture.name,
119
+ sandbox_backend="controlled_fixture",
120
+ status=status,
121
+ warnings=warnings,
122
+ process_events=[
123
+ DynamicProcessEvent(
124
+ command=[sys.executable, resolved_fixture.name],
125
+ exit_code=completed.returncode,
126
+ duration_ms=duration_ms,
127
+ )
128
+ ],
129
+ filesystem_events=_filesystem_events(before, after),
130
+ network_events=[],
131
+ decision=decision,
132
+ limitations=limitations,
133
+ )
134
+
135
+
136
+ def _snapshot_scratch(root: Path) -> dict[str, int]:
137
+ snapshot: dict[str, int] = {}
138
+ for path in root.rglob("*"):
139
+ if not path.is_file():
140
+ continue
141
+ try:
142
+ relative_path = path.relative_to(root).as_posix()
143
+ snapshot[relative_path] = path.stat().st_mtime_ns
144
+ except OSError:
145
+ continue
146
+ return snapshot
147
+
148
+
149
+ def _filesystem_events(before: dict[str, int], after: dict[str, int]) -> list[DynamicFilesystemEvent]:
150
+ events: list[DynamicFilesystemEvent] = []
151
+ for path, mtime_ns in sorted(after.items()):
152
+ if path not in before:
153
+ events.append(DynamicFilesystemEvent(path=path, action="created"))
154
+ elif before[path] != mtime_ns:
155
+ events.append(DynamicFilesystemEvent(path=path, action="modified"))
156
+ return events
@@ -0,0 +1,2 @@
1
+ """Offline package explanations."""
2
+
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from packaging.utils import canonicalize_name
4
+
5
+ from pkgwhy.core.models import Confidence, PackageExplanation, PackageMetadata
6
+ from pkgwhy.explanations.local_db import LOCAL_EXPLANATIONS
7
+
8
+
9
+ def explain_package(metadata: PackageMetadata | None, requested_name: str, dependency_status: str) -> PackageExplanation:
10
+ normalized = canonicalize_name(requested_name)
11
+ base = LOCAL_EXPLANATIONS.get(normalized)
12
+ if base is not None:
13
+ explanation = base.model_copy(deep=True)
14
+ explanation.version = metadata.identity.version if metadata else None
15
+ explanation.dependency_status = dependency_status
16
+ return explanation
17
+
18
+ if metadata is None:
19
+ return PackageExplanation(
20
+ package=normalized,
21
+ summary="No installed metadata was found for this package in the active Python environment.",
22
+ dependency_status=dependency_status,
23
+ confidence=Confidence.LOW,
24
+ sources_used=["active environment metadata lookup"],
25
+ )
26
+
27
+ summary = metadata.summary or "Installed package metadata does not include a summary."
28
+ return PackageExplanation(
29
+ package=normalized,
30
+ version=metadata.identity.version,
31
+ summary=summary,
32
+ why_it_might_be_installed=_why_from_metadata(metadata),
33
+ dependency_status=dependency_status,
34
+ confidence=Confidence.MEDIUM if metadata.summary else Confidence.LOW,
35
+ sources_used=["installed distribution metadata"],
36
+ )
37
+
38
+
39
+ def _why_from_metadata(metadata: PackageMetadata) -> list[str]:
40
+ reasons: list[str] = []
41
+ if metadata.entry_points:
42
+ reasons.append("It declares CLI or plugin entry points.")
43
+ if metadata.requires:
44
+ reasons.append("It declares dependencies on other packages.")
45
+ if metadata.summary:
46
+ reasons.append("Its installed metadata summary describes its intended purpose.")
47
+ return reasons or ["Installed metadata is available, but why it is installed is not yet known."]
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from pkgwhy.core.models import Confidence, PackageExplanation
4
+
5
+
6
+ LOCAL_EXPLANATIONS: dict[str, PackageExplanation] = {
7
+ "packaging": PackageExplanation(
8
+ package="packaging",
9
+ summary="Core utilities for parsing and comparing Python package versions, specifiers, markers, and requirements.",
10
+ common_use_cases=["Validate dependency specifiers", "Compare package versions", "Parse requirement strings"],
11
+ common_imports=["packaging.version", "packaging.requirements", "packaging.specifiers"],
12
+ minimal_usage_example="from packaging.version import Version\nVersion('2.0') > Version('1.9')",
13
+ common_alternatives=[],
14
+ why_it_might_be_installed=["Used by packaging tools and dependency-aware applications."],
15
+ confidence=Confidence.HIGH,
16
+ sources_used=["built-in pkgwhy explanation database"],
17
+ ),
18
+ "pydantic": PackageExplanation(
19
+ package="pydantic",
20
+ summary="Data validation and structured model library for Python type hints.",
21
+ common_use_cases=["Validate structured data", "Define JSON-friendly models", "Parse API payloads and configuration"],
22
+ common_imports=["pydantic.BaseModel", "pydantic.Field"],
23
+ minimal_usage_example="from pydantic import BaseModel\nclass Item(BaseModel):\n name: str",
24
+ common_alternatives=["attrs", "dataclasses", "marshmallow"],
25
+ why_it_might_be_installed=["Used by applications that need typed validation or stable JSON output."],
26
+ confidence=Confidence.HIGH,
27
+ sources_used=["built-in pkgwhy explanation database"],
28
+ ),
29
+ "rich": PackageExplanation(
30
+ package="rich",
31
+ summary="Terminal formatting library for tables, colors, tracebacks, progress bars, and structured console output.",
32
+ common_use_cases=["Render CLI tables", "Improve terminal output", "Display progress and formatted logs"],
33
+ common_imports=["rich.console", "rich.table"],
34
+ minimal_usage_example="from rich.console import Console\nConsole().print('[bold]Hello[/bold]')",
35
+ common_alternatives=["click styling", "blessed"],
36
+ why_it_might_be_installed=["Used by command-line tools for readable terminal output."],
37
+ confidence=Confidence.HIGH,
38
+ sources_used=["built-in pkgwhy explanation database"],
39
+ ),
40
+ "typer": PackageExplanation(
41
+ package="typer",
42
+ summary="CLI framework built on Click that uses Python type hints to define command-line interfaces.",
43
+ common_use_cases=["Build Python CLIs", "Create typed command arguments", "Generate command help"],
44
+ common_imports=["typer"],
45
+ minimal_usage_example="import typer\napp = typer.Typer()\n@app.command()\ndef main(name: str):\n typer.echo(name)",
46
+ common_alternatives=["click", "argparse"],
47
+ why_it_might_be_installed=["Used by Python projects that expose command-line commands."],
48
+ confidence=Confidence.HIGH,
49
+ sources_used=["built-in pkgwhy explanation database"],
50
+ ),
51
+ }
52
+
@@ -0,0 +1,2 @@
1
+ """Project import scanning."""
2
+
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ SKIP_DIRS = {
7
+ ".git",
8
+ ".hg",
9
+ ".mypy_cache",
10
+ ".pytest_cache",
11
+ ".ruff_cache",
12
+ ".tox",
13
+ ".venv",
14
+ "__pycache__",
15
+ "build",
16
+ "dist",
17
+ "site-packages",
18
+ }
19
+
20
+
21
+ def scan_project_imports(root: Path) -> set[str]:
22
+ imports: set[str] = set()
23
+ for path in root.rglob("*.py"):
24
+ if any(part in SKIP_DIRS for part in path.parts):
25
+ continue
26
+ try:
27
+ tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
28
+ except (OSError, SyntaxError, UnicodeDecodeError):
29
+ continue
30
+ imports.update(_imports_from_tree(tree))
31
+ return imports
32
+
33
+
34
+ def _imports_from_tree(tree: ast.AST) -> set[str]:
35
+ imports: set[str] = set()
36
+ for node in ast.walk(tree):
37
+ if isinstance(node, ast.Import):
38
+ for alias in node.names:
39
+ imports.add(alias.name.split(".", 1)[0])
40
+ elif isinstance(node, ast.ImportFrom) and node.module and node.level == 0:
41
+ imports.add(node.module.split(".", 1)[0])
42
+ return imports
43
+
@@ -0,0 +1,2 @@
1
+ """Static package inspection helpers."""
2
+