pkgwhy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. pkgwhy/__init__.py +3 -0
  2. pkgwhy/__main__.py +6 -0
  3. pkgwhy/agent/__init__.py +2 -0
  4. pkgwhy/agent/judge.py +93 -0
  5. pkgwhy/cli.py +676 -0
  6. pkgwhy/core/__init__.py +2 -0
  7. pkgwhy/core/constants.py +13 -0
  8. pkgwhy/core/models.py +608 -0
  9. pkgwhy/dependencies/__init__.py +2 -0
  10. pkgwhy/dependencies/graph.py +68 -0
  11. pkgwhy/dependencies/reason.py +79 -0
  12. pkgwhy/dynamic/__init__.py +2 -0
  13. pkgwhy/dynamic/analysis.py +156 -0
  14. pkgwhy/explanations/__init__.py +2 -0
  15. pkgwhy/explanations/explain.py +47 -0
  16. pkgwhy/explanations/local_db.py +52 -0
  17. pkgwhy/imports/__init__.py +2 -0
  18. pkgwhy/imports/scanner.py +43 -0
  19. pkgwhy/inspection/__init__.py +2 -0
  20. pkgwhy/inspection/files.py +540 -0
  21. pkgwhy/inspection/python_static.py +323 -0
  22. pkgwhy/inspection/size.py +58 -0
  23. pkgwhy/inspection/text_patterns.py +135 -0
  24. pkgwhy/manifests/__init__.py +2 -0
  25. pkgwhy/manifests/lockfiles.py +51 -0
  26. pkgwhy/manifests/pyproject.py +37 -0
  27. pkgwhy/manifests/requirements.py +27 -0
  28. pkgwhy/metadata/__init__.py +2 -0
  29. pkgwhy/metadata/installed.py +83 -0
  30. pkgwhy/metadata/pypi.py +199 -0
  31. pkgwhy/policy/__init__.py +1 -0
  32. pkgwhy/policy/agent_policy.py +114 -0
  33. pkgwhy/policy/audit_log.py +60 -0
  34. pkgwhy/policy/tool_execution.py +76 -0
  35. pkgwhy/provenance/__init__.py +2 -0
  36. pkgwhy/provenance/installed.py +45 -0
  37. pkgwhy/registry/__init__.py +2 -0
  38. pkgwhy/registry/local.py +178 -0
  39. pkgwhy/registry/manifest.py +78 -0
  40. pkgwhy/registry/publish.py +142 -0
  41. pkgwhy/registry/run.py +148 -0
  42. pkgwhy/registry/tools.py +121 -0
  43. pkgwhy/reports/__init__.py +2 -0
  44. pkgwhy/reports/audit.py +81 -0
  45. pkgwhy/risk/__init__.py +5 -0
  46. pkgwhy/risk/rules.py +372 -0
  47. pkgwhy/risk/scoring.py +231 -0
  48. pkgwhy/typosquat/__init__.py +2 -0
  49. pkgwhy/typosquat/detector.py +182 -0
  50. pkgwhy/typosquat/popular_packages.py +34 -0
  51. pkgwhy/vulnerabilities/__init__.py +2 -0
  52. pkgwhy/vulnerabilities/matching.py +122 -0
  53. pkgwhy/vulnerabilities/osv.py +330 -0
  54. pkgwhy-1.0.0.dist-info/METADATA +688 -0
  55. pkgwhy-1.0.0.dist-info/RECORD +58 -0
  56. pkgwhy-1.0.0.dist-info/WHEEL +4 -0
  57. pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
  58. pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,178 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ import tempfile
7
+ from pathlib import Path
8
+
9
+ from pydantic import ValidationError
10
+
11
+ from pkgwhy.core.models import RegistryConfig, RegistryEntry, RegistryIndex
12
+
13
+ CONFIG_ENV_VAR = "PKGWHY_CONFIG_HOME"
14
+ CONFIG_FILENAME = "registries.json"
15
+ REGISTRY_INDEX_FILENAME = "pkgwhy-registry.json"
16
+ DEFAULT_REGISTRY_NAME = "local"
17
+
18
+
19
+ def config_dir() -> Path:
20
+ override = os.environ.get(CONFIG_ENV_VAR)
21
+ if override:
22
+ return Path(override).expanduser()
23
+ if sys.platform == "win32":
24
+ appdata = os.environ.get("APPDATA")
25
+ if appdata:
26
+ return Path(appdata) / "pkgwhy"
27
+ elif sys.platform == "darwin":
28
+ return Path.home() / "Library" / "Application Support" / "pkgwhy"
29
+ xdg_config_home = os.environ.get("XDG_CONFIG_HOME")
30
+ if xdg_config_home:
31
+ return Path(xdg_config_home) / "pkgwhy"
32
+ return Path.home() / ".config" / "pkgwhy"
33
+
34
+
35
+ def config_path() -> Path:
36
+ return config_dir() / CONFIG_FILENAME
37
+
38
+
39
+ def registry_index_path(path: Path) -> Path:
40
+ return path / REGISTRY_INDEX_FILENAME
41
+
42
+
43
+ def load_registry_index(path: Path, *, strict: bool = False) -> RegistryIndex:
44
+ target = registry_index_path(path)
45
+ if not target.exists():
46
+ if strict:
47
+ raise ValueError(f"Registry index not found: {target}")
48
+ return RegistryIndex()
49
+ try:
50
+ data = json.loads(target.read_text(encoding="utf-8"))
51
+ return RegistryIndex.model_validate(data)
52
+ except (OSError, json.JSONDecodeError, ValidationError) as exc:
53
+ if strict:
54
+ raise ValueError(f"Could not read registry index: {target}") from exc
55
+ return RegistryIndex()
56
+
57
+
58
+ def save_registry_index(path: Path, index: RegistryIndex) -> None:
59
+ target = registry_index_path(path)
60
+ target.parent.mkdir(parents=True, exist_ok=True)
61
+ content = json.dumps(index.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
62
+ _atomic_write_text(target, content)
63
+
64
+
65
+ def load_registry_config(path: Path | None = None) -> RegistryConfig:
66
+ target = path or config_path()
67
+ if not target.exists():
68
+ return RegistryConfig()
69
+ try:
70
+ data = json.loads(target.read_text(encoding="utf-8"))
71
+ return RegistryConfig.model_validate(data)
72
+ except (OSError, json.JSONDecodeError, ValidationError):
73
+ return RegistryConfig()
74
+
75
+
76
+ def save_registry_config(config: RegistryConfig, path: Path | None = None) -> None:
77
+ target = path or config_path()
78
+ target.parent.mkdir(parents=True, exist_ok=True)
79
+ content = json.dumps(config.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
80
+ _atomic_write_text(target, content)
81
+
82
+
83
+ def _atomic_write_text(target: Path, content: str) -> None:
84
+ temp_path: Path | None = None
85
+ try:
86
+ with tempfile.NamedTemporaryFile("w", encoding="utf-8", dir=target.parent, delete=False) as handle:
87
+ handle.write(content)
88
+ temp_path = Path(handle.name)
89
+ temp_path.replace(target)
90
+ except Exception:
91
+ if temp_path is not None:
92
+ temp_path.unlink(missing_ok=True)
93
+ raise
94
+
95
+
96
+ def init_local_registry(path: Path, name: str = DEFAULT_REGISTRY_NAME) -> RegistryEntry:
97
+ registry_path = path.expanduser().resolve()
98
+ registry_path.mkdir(parents=True, exist_ok=True)
99
+ index_path = registry_index_path(registry_path)
100
+ if not index_path.exists():
101
+ save_registry_index(registry_path, RegistryIndex())
102
+
103
+ config = load_registry_config()
104
+ if name in config.registries and config.registries[name] != str(registry_path):
105
+ raise ValueError(f"A registry with this name already exists at a different path: {name}")
106
+ config.registries[name] = str(registry_path)
107
+ config.current_registry = name
108
+ save_registry_config(config)
109
+ return RegistryEntry(name=name, path=registry_path, is_current=True, index_exists=True)
110
+
111
+
112
+ def add_registry(name: str, path: Path) -> RegistryEntry:
113
+ registry_path = path.expanduser().resolve()
114
+ if not registry_path.is_dir():
115
+ raise ValueError(f"Registry path does not exist or is not a directory: {registry_path}")
116
+
117
+ index_exists = registry_index_path(registry_path).exists()
118
+ config = load_registry_config()
119
+ if name in config.registries:
120
+ raise ValueError(f"A registry with this name already exists: {name}")
121
+ config.registries[name] = str(registry_path)
122
+ if config.current_registry is None:
123
+ config.current_registry = name
124
+ save_registry_config(config)
125
+ return RegistryEntry(
126
+ name=name,
127
+ path=registry_path,
128
+ is_current=config.current_registry == name,
129
+ index_exists=index_exists,
130
+ )
131
+
132
+
133
+ def use_registry(name: str) -> RegistryEntry:
134
+ config = load_registry_config()
135
+ registry_path_text = config.registries.get(name)
136
+ if registry_path_text is None:
137
+ raise ValueError(f"Registry is not configured: {name}")
138
+
139
+ config.current_registry = name
140
+ save_registry_config(config)
141
+ registry_path = Path(registry_path_text)
142
+ return RegistryEntry(
143
+ name=name,
144
+ path=registry_path,
145
+ is_current=True,
146
+ index_exists=registry_index_path(registry_path).exists(),
147
+ )
148
+
149
+
150
+ def current_registry() -> RegistryEntry:
151
+ config = load_registry_config()
152
+ if config.current_registry is None:
153
+ raise ValueError("No current registry is configured. Run 'pkgwhy registry init <path>' first.")
154
+ if config.current_registry not in config.registries:
155
+ raise ValueError(f"Current registry is not configured: {config.current_registry}")
156
+ registry_path = Path(config.registries[config.current_registry])
157
+ return RegistryEntry(
158
+ name=config.current_registry,
159
+ path=registry_path,
160
+ is_current=True,
161
+ index_exists=registry_index_path(registry_path).exists(),
162
+ )
163
+
164
+
165
+ def list_registries() -> list[RegistryEntry]:
166
+ config = load_registry_config()
167
+ entries: list[RegistryEntry] = []
168
+ for name, path_text in sorted(config.registries.items()):
169
+ registry_path = Path(path_text)
170
+ entries.append(
171
+ RegistryEntry(
172
+ name=name,
173
+ path=registry_path,
174
+ is_current=config.current_registry == name,
175
+ index_exists=registry_index_path(registry_path).exists(),
176
+ )
177
+ )
178
+ return entries
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ import tomllib
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pkgwhy.core.models import ToolAgentPolicy, ToolManifest, ToolSecurityPolicy
8
+
9
+ MANIFEST_FILENAME = "pkgwhy.toml"
10
+
11
+
12
+ def read_tool_manifest(path: Path) -> ToolManifest:
13
+ manifest_path = path / MANIFEST_FILENAME if path.is_dir() else path
14
+ try:
15
+ data = tomllib.loads(manifest_path.read_text(encoding="utf-8"))
16
+ except FileNotFoundError as exc:
17
+ raise ValueError(f"Tool manifest not found: {manifest_path}") from exc
18
+ except tomllib.TOMLDecodeError as exc:
19
+ raise ValueError(f"Tool manifest is not valid TOML: {manifest_path}") from exc
20
+ except OSError as exc:
21
+ raise ValueError(f"Could not read tool manifest: {manifest_path}") from exc
22
+
23
+ return parse_tool_manifest_data(data)
24
+
25
+
26
+ def parse_tool_manifest_data(data: dict[str, Any]) -> ToolManifest:
27
+ tool = _required_table(data, "tool")
28
+ security = _optional_table(data, "security")
29
+ agent = _optional_table(data, "agent")
30
+
31
+ return ToolManifest(
32
+ name=_required_text(tool, "name"),
33
+ owner=_required_text(tool, "owner"),
34
+ version=_required_text(tool, "version"),
35
+ description=_required_text(tool, "description"),
36
+ artifact_type=_required_text(tool, "artifact_type"),
37
+ entrypoint=_required_text(tool, "entrypoint"),
38
+ python_requires=_optional_text(tool, "python_requires", ">=3.11"),
39
+ dependencies=_optional_text_list(tool, "dependencies"),
40
+ declared_permissions=_optional_text_list(tool, "declared_permissions"),
41
+ security=ToolSecurityPolicy.model_validate(security),
42
+ agent=ToolAgentPolicy.model_validate(agent),
43
+ )
44
+
45
+
46
+ def _required_table(data: dict[str, Any], key: str) -> dict[str, Any]:
47
+ table = data.get(key)
48
+ if not isinstance(table, dict):
49
+ raise ValueError(f"Tool manifest must include a [{key}] table")
50
+ return table
51
+
52
+
53
+ def _optional_table(data: dict[str, Any], key: str) -> dict[str, Any]:
54
+ table = data.get(key, {})
55
+ if not isinstance(table, dict):
56
+ raise ValueError(f"Tool manifest [{key}] value must be a table")
57
+ return table
58
+
59
+
60
+ def _required_text(data: dict[str, Any], key: str) -> str:
61
+ value = data.get(key)
62
+ if not isinstance(value, str) or not value.strip():
63
+ raise ValueError(f"Tool manifest field is required and must be text: {key}")
64
+ return value
65
+
66
+
67
+ def _optional_text(data: dict[str, Any], key: str, default: str) -> str:
68
+ value = data.get(key, default)
69
+ if not isinstance(value, str) or not value.strip():
70
+ raise ValueError(f"Tool manifest field must be text when present: {key}")
71
+ return value
72
+
73
+
74
+ def _optional_text_list(data: dict[str, Any], key: str) -> list[str]:
75
+ values = data.get(key, [])
76
+ if not isinstance(values, list) or not all(isinstance(value, str) and value.strip() for value in values):
77
+ raise ValueError(f"Tool manifest field must be a list of non-empty strings when present: {key}")
78
+ return values
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import tarfile
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
8
+
9
+ from pkgwhy.core.models import PublishResult, RegistryToolEntry, ToolArtifactType, ToolManifest
10
+ from pkgwhy.registry.local import current_registry, load_registry_index, save_registry_index
11
+ from pkgwhy.registry.manifest import read_tool_manifest
12
+
13
+ EXCLUDED_DIRS = {".git", ".hg", ".svn", ".venv", "venv", "__pycache__"}
14
+
15
+
16
+ def publish_local_tool(path: Path) -> PublishResult:
17
+ requested_source = path.expanduser()
18
+ if requested_source.is_symlink():
19
+ raise ValueError(f"Publish path must not be a symlink: {requested_source}")
20
+ source = requested_source.resolve()
21
+ if not source.exists():
22
+ raise ValueError(f"Publish path does not exist: {source}")
23
+
24
+ registry = current_registry()
25
+ manifest = _manifest_for_source(source)
26
+ _ensure_version_is_new(registry.path, manifest)
27
+ bundle_path = _bundle_path(registry.path, manifest)
28
+ manifest_path = _manifest_path(registry.path, manifest)
29
+ bundle_path.parent.mkdir(parents=True, exist_ok=True)
30
+ manifest_path.parent.mkdir(parents=True, exist_ok=True)
31
+
32
+ _write_bundle(source, bundle_path)
33
+ sha256 = _sha256_file(bundle_path)
34
+ manifest_path.write_text(
35
+ json.dumps(manifest.model_dump(mode="json"), indent=2, sort_keys=True) + "\n",
36
+ encoding="utf-8",
37
+ )
38
+ _update_index(registry.path, manifest, bundle_path, manifest_path, sha256)
39
+
40
+ return PublishResult(
41
+ manifest=manifest,
42
+ registry_name=registry.name,
43
+ registry_path=registry.path,
44
+ bundle_path=bundle_path,
45
+ manifest_path=manifest_path,
46
+ sha256=sha256,
47
+ )
48
+
49
+
50
+ def _manifest_for_source(source: Path) -> ToolManifest:
51
+ if source.is_file() and source.suffix == ".py":
52
+ return ToolManifest(
53
+ name=source.stem,
54
+ owner="local",
55
+ version="0.1.0",
56
+ description="Local Python script published with pkgwhy.",
57
+ artifact_type=ToolArtifactType.SCRIPT,
58
+ entrypoint=source.name,
59
+ declared_permissions=[],
60
+ )
61
+ if source.is_dir():
62
+ return read_tool_manifest(source)
63
+ raise ValueError("Publish path must be a Python script or a folder with pkgwhy.toml")
64
+
65
+
66
+ def _bundle_path(registry_path: Path, manifest: ToolManifest) -> Path:
67
+ return (
68
+ registry_path
69
+ / "bundles"
70
+ / manifest.owner
71
+ / manifest.name
72
+ / manifest.version
73
+ / f"{manifest.name}-{manifest.version}.tar.gz"
74
+ )
75
+
76
+
77
+ def _manifest_path(registry_path: Path, manifest: ToolManifest) -> Path:
78
+ return registry_path / "manifests" / manifest.owner / manifest.name / manifest.version / "manifest.json"
79
+
80
+
81
+ def _write_bundle(source: Path, bundle_path: Path) -> None:
82
+ with tarfile.open(bundle_path, "w:gz", dereference=False) as archive:
83
+ if source.is_file():
84
+ archive.add(source, arcname=source.name)
85
+ return
86
+ for child in sorted(source.rglob("*")):
87
+ if _should_skip(child):
88
+ continue
89
+ if child.is_symlink():
90
+ raise ValueError(
91
+ f"Symlinks are not supported in tool bundles: {child.relative_to(source)}"
92
+ )
93
+ archive.add(child, arcname=child.relative_to(source))
94
+
95
+
96
+ def _should_skip(path: Path) -> bool:
97
+ return any(part in EXCLUDED_DIRS for part in path.parts)
98
+
99
+
100
+ def _sha256_file(path: Path) -> str:
101
+ digest = hashlib.sha256()
102
+ with path.open("rb") as handle:
103
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
104
+ digest.update(chunk)
105
+ return digest.hexdigest()
106
+
107
+
108
+ def _update_index(
109
+ registry_path: Path,
110
+ manifest: ToolManifest,
111
+ bundle_path: Path,
112
+ manifest_path: Path,
113
+ sha256: str,
114
+ ) -> None:
115
+ index = load_registry_index(registry_path, strict=True)
116
+ published_at = datetime.now(tz=UTC).isoformat()
117
+ entry = RegistryToolEntry(
118
+ name=manifest.name,
119
+ owner=manifest.owner,
120
+ version=manifest.version,
121
+ artifact_type=manifest.artifact_type,
122
+ entrypoint=manifest.entrypoint,
123
+ bundle_path=str(bundle_path.relative_to(registry_path)),
124
+ sha256=sha256,
125
+ manifest_path=str(manifest_path.relative_to(registry_path)),
126
+ published_at=published_at,
127
+ )
128
+ index.tools.append(entry)
129
+ save_registry_index(registry_path, index)
130
+
131
+
132
+ def _ensure_version_is_new(registry_path: Path, manifest: ToolManifest) -> None:
133
+ index = load_registry_index(registry_path, strict=True)
134
+ for existing in index.tools:
135
+ if (
136
+ existing.owner == manifest.owner
137
+ and existing.name == manifest.name
138
+ and existing.version == manifest.version
139
+ ):
140
+ raise ValueError(
141
+ f"Tool version is already published: {manifest.owner}/{manifest.name} {manifest.version}"
142
+ )
pkgwhy/registry/run.py ADDED
@@ -0,0 +1,148 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import shutil
5
+ import subprocess
6
+ import sys
7
+ import tarfile
8
+ import venv
9
+ from datetime import UTC, datetime
10
+ from pathlib import Path
11
+
12
+ from pkgwhy.core.models import (
13
+ ToolArtifactType,
14
+ ToolRunResult,
15
+ ToolRunStatus,
16
+ )
17
+ from pkgwhy.policy.tool_execution import evaluate_tool_execution_policy
18
+ from pkgwhy.registry.local import current_registry
19
+ from pkgwhy.registry.tools import judge_tool, resolve_tool_entry
20
+
21
+ RUNNER_ISOLATION_WARNING = (
22
+ "This run uses a Python virtual environment for dependency isolation. "
23
+ "It does not fully sandbox operating-system permissions."
24
+ )
25
+ DEFAULT_RUN_TIMEOUT_SECONDS = 300
26
+
27
+
28
+ def run_local_tool(reference: str, *, non_interactive: bool = False) -> ToolRunResult:
29
+ registry = current_registry()
30
+ entry = resolve_tool_entry(reference, registry)
31
+ judgement = judge_tool(reference)
32
+ policy_result = evaluate_tool_execution_policy(judgement, non_interactive=non_interactive)
33
+ if not policy_result.allowed:
34
+ raise ValueError(f"Tool policy blocks execution: {' '.join(policy_result.reasons)}")
35
+ manifest = judgement.manifest
36
+ if manifest.artifact_type not in {ToolArtifactType.SCRIPT, ToolArtifactType.FOLDER}:
37
+ raise ValueError(f"Unsupported tool artifact type for runner MVP: {manifest.artifact_type.value}")
38
+ if manifest.dependencies:
39
+ raise ValueError("Dependency installation is not implemented for pkgwhy run MVP.")
40
+ entrypoint = Path(manifest.entrypoint)
41
+ if entrypoint.is_absolute() or ".." in entrypoint.parts or entrypoint.suffix != ".py":
42
+ raise ValueError(f"Unsupported entrypoint for runner MVP: {manifest.entrypoint}")
43
+
44
+ tool_root = registry.path / "run-workspaces" / entry.owner / entry.name / entry.version
45
+ venv_path = registry.path / "venvs" / entry.owner / entry.name / entry.version
46
+ log_dir = registry.path / "execution-logs" / entry.owner / entry.name / entry.version
47
+ bundle_path = registry.path / entry.bundle_path
48
+
49
+ _prepare_workspace(bundle_path, tool_root)
50
+ entrypoint_path = (tool_root / entrypoint).resolve()
51
+ if not entrypoint_path.is_file() or tool_root.resolve() not in entrypoint_path.parents:
52
+ raise ValueError(f"Entrypoint not found in tool bundle: {manifest.entrypoint}")
53
+ python_path = _ensure_venv_python(venv_path)
54
+ log_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ started_at_dt = datetime.now(tz=UTC)
57
+ command = [str(python_path), str(entrypoint_path)]
58
+ try:
59
+ completed = subprocess.run(
60
+ command,
61
+ capture_output=True,
62
+ text=True,
63
+ check=False,
64
+ cwd=tool_root,
65
+ timeout=DEFAULT_RUN_TIMEOUT_SECONDS,
66
+ )
67
+ exit_code = completed.returncode
68
+ stdout = completed.stdout
69
+ stderr = completed.stderr
70
+ except subprocess.TimeoutExpired as exc:
71
+ exit_code = 124
72
+ stdout = _output_text(exc.stdout)
73
+ stderr = _output_text(exc.stderr)
74
+ timeout_message = f"Tool execution timed out after {DEFAULT_RUN_TIMEOUT_SECONDS} seconds."
75
+ stderr = f"{stderr.rstrip()}\n{timeout_message}\n" if stderr else f"{timeout_message}\n"
76
+ finished_at_dt = datetime.now(tz=UTC)
77
+ status = ToolRunStatus.COMPLETED if exit_code == 0 else ToolRunStatus.FAILED
78
+ result = ToolRunResult(
79
+ tool=f"{entry.owner}/{entry.name}",
80
+ owner=entry.owner,
81
+ name=entry.name,
82
+ version=entry.version,
83
+ registry_name=registry.name,
84
+ registry_path=registry.path,
85
+ command=command,
86
+ entrypoint=manifest.entrypoint,
87
+ started_at=started_at_dt.isoformat(),
88
+ finished_at=finished_at_dt.isoformat(),
89
+ exit_code=exit_code,
90
+ status=status,
91
+ stdout=stdout,
92
+ stderr=stderr,
93
+ log_path=log_dir / f"{started_at_dt.strftime('%Y%m%dT%H%M%S%fZ')}.json",
94
+ warning=RUNNER_ISOLATION_WARNING,
95
+ policy_decision=policy_result.decision,
96
+ policy_reasons=policy_result.reasons,
97
+ policy_warnings=policy_result.warnings,
98
+ )
99
+ _write_execution_log(result)
100
+ return result
101
+
102
+
103
+ def _prepare_workspace(bundle_path: Path, tool_root: Path) -> None:
104
+ if tool_root.exists():
105
+ shutil.rmtree(tool_root)
106
+ tool_root.mkdir(parents=True, exist_ok=True)
107
+ with tarfile.open(bundle_path, "r:gz") as archive:
108
+ members = archive.getmembers()
109
+ _validate_archive_members(members)
110
+ try:
111
+ archive.extractall(tool_root, members=members, filter="data")
112
+ except TypeError:
113
+ archive.extractall(tool_root, members=members)
114
+
115
+
116
+ def _validate_archive_members(members: list[tarfile.TarInfo]) -> None:
117
+ for member in members:
118
+ path = Path(member.name)
119
+ if path.is_absolute() or ".." in path.parts:
120
+ raise ValueError(f"Unsafe path in tool bundle: {member.name}")
121
+ if member.issym() or member.islnk():
122
+ raise ValueError(f"Links are not supported in runner bundles: {member.name}")
123
+
124
+
125
+ def _ensure_venv_python(venv_path: Path) -> Path:
126
+ python_path = venv_path / ("Scripts/python.exe" if _is_windows_venv() else "bin/python")
127
+ if not python_path.exists():
128
+ venv.EnvBuilder(with_pip=False, clear=False).create(venv_path)
129
+ if not python_path.exists():
130
+ raise ValueError(f"Could not create runner virtual environment at {venv_path}")
131
+ return python_path
132
+
133
+
134
+ def _is_windows_venv() -> bool:
135
+ return sys.platform == "win32"
136
+
137
+
138
+ def _output_text(value: str | bytes | None) -> str:
139
+ if value is None:
140
+ return ""
141
+ if isinstance(value, bytes):
142
+ return value.decode(errors="replace")
143
+ return value
144
+
145
+
146
+ def _write_execution_log(result: ToolRunResult) -> None:
147
+ result.log_path.parent.mkdir(parents=True, exist_ok=True)
148
+ result.log_path.write_text(json.dumps(result.model_dump(mode="json"), indent=2, sort_keys=True) + "\n", encoding="utf-8")
@@ -0,0 +1,121 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from pydantic import ValidationError
8
+
9
+ from pkgwhy.core.models import (
10
+ AgentDecision,
11
+ Confidence,
12
+ HashStatus,
13
+ RegistryEntry,
14
+ RegistryToolEntry,
15
+ RiskLevel,
16
+ ToolJudgement,
17
+ ToolManifest,
18
+ )
19
+ from pkgwhy.registry.local import current_registry, load_registry_index
20
+
21
+
22
+ def judge_tool(reference: str) -> ToolJudgement:
23
+ registry = current_registry()
24
+ entry = resolve_tool_entry(reference, registry)
25
+ manifest = _load_manifest(registry.path, entry)
26
+ hash_status = _verify_hash(registry.path, entry)
27
+ warnings: list[str] = ["Signature verification is not implemented yet."]
28
+ detected_capabilities: list[str] = []
29
+
30
+ if hash_status == HashStatus.VERIFIED:
31
+ risk = RiskLevel.MEDIUM
32
+ decision = AgentDecision.REVIEW_MANUALLY if manifest.security.requires_human_approval else AgentDecision.ALLOW_WITH_CAUTION
33
+ reason = "Tool bundle hash matches the local registry index."
34
+ recommendation = "Review declared permissions and manifest metadata before running this private tool."
35
+ elif hash_status == HashStatus.MISSING:
36
+ risk = RiskLevel.UNKNOWN
37
+ decision = AgentDecision.REVIEW_MANUALLY
38
+ reason = "Tool bundle is missing from the local registry."
39
+ recommendation = "Restore or republish the bundle before running this tool."
40
+ warnings.append("Bundle file is missing.")
41
+ else:
42
+ risk = RiskLevel.HIGH
43
+ decision = AgentDecision.BLOCK
44
+ reason = "Tool bundle hash does not match the local registry index."
45
+ recommendation = "Block use until a human verifies or republishes the tool."
46
+ warnings.append("Bundle hash mismatch.")
47
+
48
+ if not detected_capabilities:
49
+ warnings.append("Static capability detection for tool bundles is not implemented yet.")
50
+
51
+ return ToolJudgement(
52
+ tool=f"{entry.owner}/{entry.name}",
53
+ owner=entry.owner,
54
+ name=entry.name,
55
+ version=entry.version,
56
+ decision=decision,
57
+ risk_level=risk,
58
+ confidence=Confidence.MEDIUM if hash_status == HashStatus.VERIFIED else Confidence.LOW,
59
+ reason=reason,
60
+ requires_human_approval=manifest.security.requires_human_approval,
61
+ manifest=manifest,
62
+ declared_permissions=manifest.declared_permissions,
63
+ detected_capabilities=detected_capabilities,
64
+ hash_status=hash_status,
65
+ warnings=warnings,
66
+ recommendation=recommendation,
67
+ )
68
+
69
+
70
+ def resolve_tool_entry(reference: str, registry: RegistryEntry | None = None) -> RegistryToolEntry:
71
+ active_registry = registry or current_registry()
72
+ index = load_registry_index(active_registry.path, strict=True)
73
+ owner, name = _parse_reference(reference)
74
+ matches = [
75
+ entry
76
+ for entry in index.tools
77
+ if entry.name == name and (owner is None or entry.owner == owner)
78
+ ]
79
+ if not matches:
80
+ raise ValueError(f"Tool is not published in the current registry: {reference}")
81
+ if owner is None and len({entry.owner for entry in matches}) > 1:
82
+ raise ValueError(f"Tool reference is ambiguous; include owner: {reference}")
83
+ return sorted(matches, key=lambda entry: entry.published_at, reverse=True)[0]
84
+
85
+
86
+ def _parse_reference(reference: str) -> tuple[str | None, str]:
87
+ parts = reference.split("/", maxsplit=1)
88
+ if len(parts) == 2:
89
+ return parts[0], parts[1]
90
+ return None, reference
91
+
92
+
93
+ def _load_manifest(registry_path: Path, entry: RegistryToolEntry) -> ToolManifest:
94
+ manifest_path = _validate_registry_path(registry_path, entry.manifest_path, entry)
95
+ try:
96
+ data = json.loads(manifest_path.read_text(encoding="utf-8"))
97
+ return ToolManifest.model_validate(data)
98
+ except (OSError, json.JSONDecodeError, ValidationError) as exc:
99
+ raise ValueError(f"Could not read stored tool manifest for {entry.owner}/{entry.name}") from exc
100
+
101
+
102
+ def _verify_hash(registry_path: Path, entry: RegistryToolEntry) -> HashStatus:
103
+ bundle_path = _validate_registry_path(registry_path, entry.bundle_path, entry)
104
+ digest = hashlib.sha256()
105
+ try:
106
+ with bundle_path.open("rb") as handle:
107
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
108
+ digest.update(chunk)
109
+ except FileNotFoundError:
110
+ return HashStatus.MISSING
111
+ if digest.hexdigest() != entry.sha256:
112
+ return HashStatus.MISMATCH
113
+ return HashStatus.VERIFIED
114
+
115
+
116
+ def _validate_registry_path(registry_path: Path, entry_path: str, entry: RegistryToolEntry) -> Path:
117
+ registry_root = registry_path.resolve()
118
+ candidate = (registry_root / entry_path).resolve()
119
+ if not candidate.is_relative_to(registry_root):
120
+ raise ValueError(f"Registry entry path escapes registry root for {entry.owner}/{entry.name}: {entry_path}")
121
+ return candidate
@@ -0,0 +1,2 @@
1
+ """Human and machine-readable package inspection reports."""
2
+