pkgwhy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pkgwhy/__init__.py +3 -0
- pkgwhy/__main__.py +6 -0
- pkgwhy/agent/__init__.py +2 -0
- pkgwhy/agent/judge.py +93 -0
- pkgwhy/cli.py +676 -0
- pkgwhy/core/__init__.py +2 -0
- pkgwhy/core/constants.py +13 -0
- pkgwhy/core/models.py +608 -0
- pkgwhy/dependencies/__init__.py +2 -0
- pkgwhy/dependencies/graph.py +68 -0
- pkgwhy/dependencies/reason.py +79 -0
- pkgwhy/dynamic/__init__.py +2 -0
- pkgwhy/dynamic/analysis.py +156 -0
- pkgwhy/explanations/__init__.py +2 -0
- pkgwhy/explanations/explain.py +47 -0
- pkgwhy/explanations/local_db.py +52 -0
- pkgwhy/imports/__init__.py +2 -0
- pkgwhy/imports/scanner.py +43 -0
- pkgwhy/inspection/__init__.py +2 -0
- pkgwhy/inspection/files.py +540 -0
- pkgwhy/inspection/python_static.py +323 -0
- pkgwhy/inspection/size.py +58 -0
- pkgwhy/inspection/text_patterns.py +135 -0
- pkgwhy/manifests/__init__.py +2 -0
- pkgwhy/manifests/lockfiles.py +51 -0
- pkgwhy/manifests/pyproject.py +37 -0
- pkgwhy/manifests/requirements.py +27 -0
- pkgwhy/metadata/__init__.py +2 -0
- pkgwhy/metadata/installed.py +83 -0
- pkgwhy/metadata/pypi.py +199 -0
- pkgwhy/policy/__init__.py +1 -0
- pkgwhy/policy/agent_policy.py +114 -0
- pkgwhy/policy/audit_log.py +60 -0
- pkgwhy/policy/tool_execution.py +76 -0
- pkgwhy/provenance/__init__.py +2 -0
- pkgwhy/provenance/installed.py +45 -0
- pkgwhy/registry/__init__.py +2 -0
- pkgwhy/registry/local.py +178 -0
- pkgwhy/registry/manifest.py +78 -0
- pkgwhy/registry/publish.py +142 -0
- pkgwhy/registry/run.py +148 -0
- pkgwhy/registry/tools.py +121 -0
- pkgwhy/reports/__init__.py +2 -0
- pkgwhy/reports/audit.py +81 -0
- pkgwhy/risk/__init__.py +5 -0
- pkgwhy/risk/rules.py +372 -0
- pkgwhy/risk/scoring.py +231 -0
- pkgwhy/typosquat/__init__.py +2 -0
- pkgwhy/typosquat/detector.py +182 -0
- pkgwhy/typosquat/popular_packages.py +34 -0
- pkgwhy/vulnerabilities/__init__.py +2 -0
- pkgwhy/vulnerabilities/matching.py +122 -0
- pkgwhy/vulnerabilities/osv.py +330 -0
- pkgwhy-1.0.0.dist-info/METADATA +688 -0
- pkgwhy-1.0.0.dist-info/RECORD +58 -0
- pkgwhy-1.0.0.dist-info/WHEEL +4 -0
- pkgwhy-1.0.0.dist-info/entry_points.txt +2 -0
- pkgwhy-1.0.0.dist-info/licenses/LICENSE +22 -0
pkgwhy/registry/local.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from pydantic import ValidationError
|
|
10
|
+
|
|
11
|
+
from pkgwhy.core.models import RegistryConfig, RegistryEntry, RegistryIndex
|
|
12
|
+
|
|
13
|
+
CONFIG_ENV_VAR = "PKGWHY_CONFIG_HOME"
|
|
14
|
+
CONFIG_FILENAME = "registries.json"
|
|
15
|
+
REGISTRY_INDEX_FILENAME = "pkgwhy-registry.json"
|
|
16
|
+
DEFAULT_REGISTRY_NAME = "local"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def config_dir() -> Path:
|
|
20
|
+
override = os.environ.get(CONFIG_ENV_VAR)
|
|
21
|
+
if override:
|
|
22
|
+
return Path(override).expanduser()
|
|
23
|
+
if sys.platform == "win32":
|
|
24
|
+
appdata = os.environ.get("APPDATA")
|
|
25
|
+
if appdata:
|
|
26
|
+
return Path(appdata) / "pkgwhy"
|
|
27
|
+
elif sys.platform == "darwin":
|
|
28
|
+
return Path.home() / "Library" / "Application Support" / "pkgwhy"
|
|
29
|
+
xdg_config_home = os.environ.get("XDG_CONFIG_HOME")
|
|
30
|
+
if xdg_config_home:
|
|
31
|
+
return Path(xdg_config_home) / "pkgwhy"
|
|
32
|
+
return Path.home() / ".config" / "pkgwhy"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def config_path() -> Path:
|
|
36
|
+
return config_dir() / CONFIG_FILENAME
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def registry_index_path(path: Path) -> Path:
|
|
40
|
+
return path / REGISTRY_INDEX_FILENAME
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_registry_index(path: Path, *, strict: bool = False) -> RegistryIndex:
|
|
44
|
+
target = registry_index_path(path)
|
|
45
|
+
if not target.exists():
|
|
46
|
+
if strict:
|
|
47
|
+
raise ValueError(f"Registry index not found: {target}")
|
|
48
|
+
return RegistryIndex()
|
|
49
|
+
try:
|
|
50
|
+
data = json.loads(target.read_text(encoding="utf-8"))
|
|
51
|
+
return RegistryIndex.model_validate(data)
|
|
52
|
+
except (OSError, json.JSONDecodeError, ValidationError) as exc:
|
|
53
|
+
if strict:
|
|
54
|
+
raise ValueError(f"Could not read registry index: {target}") from exc
|
|
55
|
+
return RegistryIndex()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def save_registry_index(path: Path, index: RegistryIndex) -> None:
|
|
59
|
+
target = registry_index_path(path)
|
|
60
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
content = json.dumps(index.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
|
|
62
|
+
_atomic_write_text(target, content)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def load_registry_config(path: Path | None = None) -> RegistryConfig:
|
|
66
|
+
target = path or config_path()
|
|
67
|
+
if not target.exists():
|
|
68
|
+
return RegistryConfig()
|
|
69
|
+
try:
|
|
70
|
+
data = json.loads(target.read_text(encoding="utf-8"))
|
|
71
|
+
return RegistryConfig.model_validate(data)
|
|
72
|
+
except (OSError, json.JSONDecodeError, ValidationError):
|
|
73
|
+
return RegistryConfig()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def save_registry_config(config: RegistryConfig, path: Path | None = None) -> None:
|
|
77
|
+
target = path or config_path()
|
|
78
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
content = json.dumps(config.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
|
|
80
|
+
_atomic_write_text(target, content)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _atomic_write_text(target: Path, content: str) -> None:
|
|
84
|
+
temp_path: Path | None = None
|
|
85
|
+
try:
|
|
86
|
+
with tempfile.NamedTemporaryFile("w", encoding="utf-8", dir=target.parent, delete=False) as handle:
|
|
87
|
+
handle.write(content)
|
|
88
|
+
temp_path = Path(handle.name)
|
|
89
|
+
temp_path.replace(target)
|
|
90
|
+
except Exception:
|
|
91
|
+
if temp_path is not None:
|
|
92
|
+
temp_path.unlink(missing_ok=True)
|
|
93
|
+
raise
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def init_local_registry(path: Path, name: str = DEFAULT_REGISTRY_NAME) -> RegistryEntry:
|
|
97
|
+
registry_path = path.expanduser().resolve()
|
|
98
|
+
registry_path.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
index_path = registry_index_path(registry_path)
|
|
100
|
+
if not index_path.exists():
|
|
101
|
+
save_registry_index(registry_path, RegistryIndex())
|
|
102
|
+
|
|
103
|
+
config = load_registry_config()
|
|
104
|
+
if name in config.registries and config.registries[name] != str(registry_path):
|
|
105
|
+
raise ValueError(f"A registry with this name already exists at a different path: {name}")
|
|
106
|
+
config.registries[name] = str(registry_path)
|
|
107
|
+
config.current_registry = name
|
|
108
|
+
save_registry_config(config)
|
|
109
|
+
return RegistryEntry(name=name, path=registry_path, is_current=True, index_exists=True)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def add_registry(name: str, path: Path) -> RegistryEntry:
|
|
113
|
+
registry_path = path.expanduser().resolve()
|
|
114
|
+
if not registry_path.is_dir():
|
|
115
|
+
raise ValueError(f"Registry path does not exist or is not a directory: {registry_path}")
|
|
116
|
+
|
|
117
|
+
index_exists = registry_index_path(registry_path).exists()
|
|
118
|
+
config = load_registry_config()
|
|
119
|
+
if name in config.registries:
|
|
120
|
+
raise ValueError(f"A registry with this name already exists: {name}")
|
|
121
|
+
config.registries[name] = str(registry_path)
|
|
122
|
+
if config.current_registry is None:
|
|
123
|
+
config.current_registry = name
|
|
124
|
+
save_registry_config(config)
|
|
125
|
+
return RegistryEntry(
|
|
126
|
+
name=name,
|
|
127
|
+
path=registry_path,
|
|
128
|
+
is_current=config.current_registry == name,
|
|
129
|
+
index_exists=index_exists,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def use_registry(name: str) -> RegistryEntry:
|
|
134
|
+
config = load_registry_config()
|
|
135
|
+
registry_path_text = config.registries.get(name)
|
|
136
|
+
if registry_path_text is None:
|
|
137
|
+
raise ValueError(f"Registry is not configured: {name}")
|
|
138
|
+
|
|
139
|
+
config.current_registry = name
|
|
140
|
+
save_registry_config(config)
|
|
141
|
+
registry_path = Path(registry_path_text)
|
|
142
|
+
return RegistryEntry(
|
|
143
|
+
name=name,
|
|
144
|
+
path=registry_path,
|
|
145
|
+
is_current=True,
|
|
146
|
+
index_exists=registry_index_path(registry_path).exists(),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def current_registry() -> RegistryEntry:
|
|
151
|
+
config = load_registry_config()
|
|
152
|
+
if config.current_registry is None:
|
|
153
|
+
raise ValueError("No current registry is configured. Run 'pkgwhy registry init <path>' first.")
|
|
154
|
+
if config.current_registry not in config.registries:
|
|
155
|
+
raise ValueError(f"Current registry is not configured: {config.current_registry}")
|
|
156
|
+
registry_path = Path(config.registries[config.current_registry])
|
|
157
|
+
return RegistryEntry(
|
|
158
|
+
name=config.current_registry,
|
|
159
|
+
path=registry_path,
|
|
160
|
+
is_current=True,
|
|
161
|
+
index_exists=registry_index_path(registry_path).exists(),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def list_registries() -> list[RegistryEntry]:
|
|
166
|
+
config = load_registry_config()
|
|
167
|
+
entries: list[RegistryEntry] = []
|
|
168
|
+
for name, path_text in sorted(config.registries.items()):
|
|
169
|
+
registry_path = Path(path_text)
|
|
170
|
+
entries.append(
|
|
171
|
+
RegistryEntry(
|
|
172
|
+
name=name,
|
|
173
|
+
path=registry_path,
|
|
174
|
+
is_current=config.current_registry == name,
|
|
175
|
+
index_exists=registry_index_path(registry_path).exists(),
|
|
176
|
+
)
|
|
177
|
+
)
|
|
178
|
+
return entries
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tomllib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pkgwhy.core.models import ToolAgentPolicy, ToolManifest, ToolSecurityPolicy
|
|
8
|
+
|
|
9
|
+
MANIFEST_FILENAME = "pkgwhy.toml"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def read_tool_manifest(path: Path) -> ToolManifest:
|
|
13
|
+
manifest_path = path / MANIFEST_FILENAME if path.is_dir() else path
|
|
14
|
+
try:
|
|
15
|
+
data = tomllib.loads(manifest_path.read_text(encoding="utf-8"))
|
|
16
|
+
except FileNotFoundError as exc:
|
|
17
|
+
raise ValueError(f"Tool manifest not found: {manifest_path}") from exc
|
|
18
|
+
except tomllib.TOMLDecodeError as exc:
|
|
19
|
+
raise ValueError(f"Tool manifest is not valid TOML: {manifest_path}") from exc
|
|
20
|
+
except OSError as exc:
|
|
21
|
+
raise ValueError(f"Could not read tool manifest: {manifest_path}") from exc
|
|
22
|
+
|
|
23
|
+
return parse_tool_manifest_data(data)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def parse_tool_manifest_data(data: dict[str, Any]) -> ToolManifest:
|
|
27
|
+
tool = _required_table(data, "tool")
|
|
28
|
+
security = _optional_table(data, "security")
|
|
29
|
+
agent = _optional_table(data, "agent")
|
|
30
|
+
|
|
31
|
+
return ToolManifest(
|
|
32
|
+
name=_required_text(tool, "name"),
|
|
33
|
+
owner=_required_text(tool, "owner"),
|
|
34
|
+
version=_required_text(tool, "version"),
|
|
35
|
+
description=_required_text(tool, "description"),
|
|
36
|
+
artifact_type=_required_text(tool, "artifact_type"),
|
|
37
|
+
entrypoint=_required_text(tool, "entrypoint"),
|
|
38
|
+
python_requires=_optional_text(tool, "python_requires", ">=3.11"),
|
|
39
|
+
dependencies=_optional_text_list(tool, "dependencies"),
|
|
40
|
+
declared_permissions=_optional_text_list(tool, "declared_permissions"),
|
|
41
|
+
security=ToolSecurityPolicy.model_validate(security),
|
|
42
|
+
agent=ToolAgentPolicy.model_validate(agent),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _required_table(data: dict[str, Any], key: str) -> dict[str, Any]:
|
|
47
|
+
table = data.get(key)
|
|
48
|
+
if not isinstance(table, dict):
|
|
49
|
+
raise ValueError(f"Tool manifest must include a [{key}] table")
|
|
50
|
+
return table
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _optional_table(data: dict[str, Any], key: str) -> dict[str, Any]:
|
|
54
|
+
table = data.get(key, {})
|
|
55
|
+
if not isinstance(table, dict):
|
|
56
|
+
raise ValueError(f"Tool manifest [{key}] value must be a table")
|
|
57
|
+
return table
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _required_text(data: dict[str, Any], key: str) -> str:
|
|
61
|
+
value = data.get(key)
|
|
62
|
+
if not isinstance(value, str) or not value.strip():
|
|
63
|
+
raise ValueError(f"Tool manifest field is required and must be text: {key}")
|
|
64
|
+
return value
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _optional_text(data: dict[str, Any], key: str, default: str) -> str:
|
|
68
|
+
value = data.get(key, default)
|
|
69
|
+
if not isinstance(value, str) or not value.strip():
|
|
70
|
+
raise ValueError(f"Tool manifest field must be text when present: {key}")
|
|
71
|
+
return value
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _optional_text_list(data: dict[str, Any], key: str) -> list[str]:
|
|
75
|
+
values = data.get(key, [])
|
|
76
|
+
if not isinstance(values, list) or not all(isinstance(value, str) and value.strip() for value in values):
|
|
77
|
+
raise ValueError(f"Tool manifest field must be a list of non-empty strings when present: {key}")
|
|
78
|
+
return values
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import tarfile
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from pkgwhy.core.models import PublishResult, RegistryToolEntry, ToolArtifactType, ToolManifest
|
|
10
|
+
from pkgwhy.registry.local import current_registry, load_registry_index, save_registry_index
|
|
11
|
+
from pkgwhy.registry.manifest import read_tool_manifest
|
|
12
|
+
|
|
13
|
+
EXCLUDED_DIRS = {".git", ".hg", ".svn", ".venv", "venv", "__pycache__"}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def publish_local_tool(path: Path) -> PublishResult:
|
|
17
|
+
requested_source = path.expanduser()
|
|
18
|
+
if requested_source.is_symlink():
|
|
19
|
+
raise ValueError(f"Publish path must not be a symlink: {requested_source}")
|
|
20
|
+
source = requested_source.resolve()
|
|
21
|
+
if not source.exists():
|
|
22
|
+
raise ValueError(f"Publish path does not exist: {source}")
|
|
23
|
+
|
|
24
|
+
registry = current_registry()
|
|
25
|
+
manifest = _manifest_for_source(source)
|
|
26
|
+
_ensure_version_is_new(registry.path, manifest)
|
|
27
|
+
bundle_path = _bundle_path(registry.path, manifest)
|
|
28
|
+
manifest_path = _manifest_path(registry.path, manifest)
|
|
29
|
+
bundle_path.parent.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
|
|
32
|
+
_write_bundle(source, bundle_path)
|
|
33
|
+
sha256 = _sha256_file(bundle_path)
|
|
34
|
+
manifest_path.write_text(
|
|
35
|
+
json.dumps(manifest.model_dump(mode="json"), indent=2, sort_keys=True) + "\n",
|
|
36
|
+
encoding="utf-8",
|
|
37
|
+
)
|
|
38
|
+
_update_index(registry.path, manifest, bundle_path, manifest_path, sha256)
|
|
39
|
+
|
|
40
|
+
return PublishResult(
|
|
41
|
+
manifest=manifest,
|
|
42
|
+
registry_name=registry.name,
|
|
43
|
+
registry_path=registry.path,
|
|
44
|
+
bundle_path=bundle_path,
|
|
45
|
+
manifest_path=manifest_path,
|
|
46
|
+
sha256=sha256,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _manifest_for_source(source: Path) -> ToolManifest:
|
|
51
|
+
if source.is_file() and source.suffix == ".py":
|
|
52
|
+
return ToolManifest(
|
|
53
|
+
name=source.stem,
|
|
54
|
+
owner="local",
|
|
55
|
+
version="0.1.0",
|
|
56
|
+
description="Local Python script published with pkgwhy.",
|
|
57
|
+
artifact_type=ToolArtifactType.SCRIPT,
|
|
58
|
+
entrypoint=source.name,
|
|
59
|
+
declared_permissions=[],
|
|
60
|
+
)
|
|
61
|
+
if source.is_dir():
|
|
62
|
+
return read_tool_manifest(source)
|
|
63
|
+
raise ValueError("Publish path must be a Python script or a folder with pkgwhy.toml")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _bundle_path(registry_path: Path, manifest: ToolManifest) -> Path:
|
|
67
|
+
return (
|
|
68
|
+
registry_path
|
|
69
|
+
/ "bundles"
|
|
70
|
+
/ manifest.owner
|
|
71
|
+
/ manifest.name
|
|
72
|
+
/ manifest.version
|
|
73
|
+
/ f"{manifest.name}-{manifest.version}.tar.gz"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _manifest_path(registry_path: Path, manifest: ToolManifest) -> Path:
|
|
78
|
+
return registry_path / "manifests" / manifest.owner / manifest.name / manifest.version / "manifest.json"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _write_bundle(source: Path, bundle_path: Path) -> None:
|
|
82
|
+
with tarfile.open(bundle_path, "w:gz", dereference=False) as archive:
|
|
83
|
+
if source.is_file():
|
|
84
|
+
archive.add(source, arcname=source.name)
|
|
85
|
+
return
|
|
86
|
+
for child in sorted(source.rglob("*")):
|
|
87
|
+
if _should_skip(child):
|
|
88
|
+
continue
|
|
89
|
+
if child.is_symlink():
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Symlinks are not supported in tool bundles: {child.relative_to(source)}"
|
|
92
|
+
)
|
|
93
|
+
archive.add(child, arcname=child.relative_to(source))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _should_skip(path: Path) -> bool:
|
|
97
|
+
return any(part in EXCLUDED_DIRS for part in path.parts)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _sha256_file(path: Path) -> str:
|
|
101
|
+
digest = hashlib.sha256()
|
|
102
|
+
with path.open("rb") as handle:
|
|
103
|
+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
104
|
+
digest.update(chunk)
|
|
105
|
+
return digest.hexdigest()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _update_index(
|
|
109
|
+
registry_path: Path,
|
|
110
|
+
manifest: ToolManifest,
|
|
111
|
+
bundle_path: Path,
|
|
112
|
+
manifest_path: Path,
|
|
113
|
+
sha256: str,
|
|
114
|
+
) -> None:
|
|
115
|
+
index = load_registry_index(registry_path, strict=True)
|
|
116
|
+
published_at = datetime.now(tz=UTC).isoformat()
|
|
117
|
+
entry = RegistryToolEntry(
|
|
118
|
+
name=manifest.name,
|
|
119
|
+
owner=manifest.owner,
|
|
120
|
+
version=manifest.version,
|
|
121
|
+
artifact_type=manifest.artifact_type,
|
|
122
|
+
entrypoint=manifest.entrypoint,
|
|
123
|
+
bundle_path=str(bundle_path.relative_to(registry_path)),
|
|
124
|
+
sha256=sha256,
|
|
125
|
+
manifest_path=str(manifest_path.relative_to(registry_path)),
|
|
126
|
+
published_at=published_at,
|
|
127
|
+
)
|
|
128
|
+
index.tools.append(entry)
|
|
129
|
+
save_registry_index(registry_path, index)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _ensure_version_is_new(registry_path: Path, manifest: ToolManifest) -> None:
|
|
133
|
+
index = load_registry_index(registry_path, strict=True)
|
|
134
|
+
for existing in index.tools:
|
|
135
|
+
if (
|
|
136
|
+
existing.owner == manifest.owner
|
|
137
|
+
and existing.name == manifest.name
|
|
138
|
+
and existing.version == manifest.version
|
|
139
|
+
):
|
|
140
|
+
raise ValueError(
|
|
141
|
+
f"Tool version is already published: {manifest.owner}/{manifest.name} {manifest.version}"
|
|
142
|
+
)
|
pkgwhy/registry/run.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import tarfile
|
|
8
|
+
import venv
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from pkgwhy.core.models import (
|
|
13
|
+
ToolArtifactType,
|
|
14
|
+
ToolRunResult,
|
|
15
|
+
ToolRunStatus,
|
|
16
|
+
)
|
|
17
|
+
from pkgwhy.policy.tool_execution import evaluate_tool_execution_policy
|
|
18
|
+
from pkgwhy.registry.local import current_registry
|
|
19
|
+
from pkgwhy.registry.tools import judge_tool, resolve_tool_entry
|
|
20
|
+
|
|
21
|
+
RUNNER_ISOLATION_WARNING = (
|
|
22
|
+
"This run uses a Python virtual environment for dependency isolation. "
|
|
23
|
+
"It does not fully sandbox operating-system permissions."
|
|
24
|
+
)
|
|
25
|
+
DEFAULT_RUN_TIMEOUT_SECONDS = 300
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def run_local_tool(reference: str, *, non_interactive: bool = False) -> ToolRunResult:
|
|
29
|
+
registry = current_registry()
|
|
30
|
+
entry = resolve_tool_entry(reference, registry)
|
|
31
|
+
judgement = judge_tool(reference)
|
|
32
|
+
policy_result = evaluate_tool_execution_policy(judgement, non_interactive=non_interactive)
|
|
33
|
+
if not policy_result.allowed:
|
|
34
|
+
raise ValueError(f"Tool policy blocks execution: {' '.join(policy_result.reasons)}")
|
|
35
|
+
manifest = judgement.manifest
|
|
36
|
+
if manifest.artifact_type not in {ToolArtifactType.SCRIPT, ToolArtifactType.FOLDER}:
|
|
37
|
+
raise ValueError(f"Unsupported tool artifact type for runner MVP: {manifest.artifact_type.value}")
|
|
38
|
+
if manifest.dependencies:
|
|
39
|
+
raise ValueError("Dependency installation is not implemented for pkgwhy run MVP.")
|
|
40
|
+
entrypoint = Path(manifest.entrypoint)
|
|
41
|
+
if entrypoint.is_absolute() or ".." in entrypoint.parts or entrypoint.suffix != ".py":
|
|
42
|
+
raise ValueError(f"Unsupported entrypoint for runner MVP: {manifest.entrypoint}")
|
|
43
|
+
|
|
44
|
+
tool_root = registry.path / "run-workspaces" / entry.owner / entry.name / entry.version
|
|
45
|
+
venv_path = registry.path / "venvs" / entry.owner / entry.name / entry.version
|
|
46
|
+
log_dir = registry.path / "execution-logs" / entry.owner / entry.name / entry.version
|
|
47
|
+
bundle_path = registry.path / entry.bundle_path
|
|
48
|
+
|
|
49
|
+
_prepare_workspace(bundle_path, tool_root)
|
|
50
|
+
entrypoint_path = (tool_root / entrypoint).resolve()
|
|
51
|
+
if not entrypoint_path.is_file() or tool_root.resolve() not in entrypoint_path.parents:
|
|
52
|
+
raise ValueError(f"Entrypoint not found in tool bundle: {manifest.entrypoint}")
|
|
53
|
+
python_path = _ensure_venv_python(venv_path)
|
|
54
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
started_at_dt = datetime.now(tz=UTC)
|
|
57
|
+
command = [str(python_path), str(entrypoint_path)]
|
|
58
|
+
try:
|
|
59
|
+
completed = subprocess.run(
|
|
60
|
+
command,
|
|
61
|
+
capture_output=True,
|
|
62
|
+
text=True,
|
|
63
|
+
check=False,
|
|
64
|
+
cwd=tool_root,
|
|
65
|
+
timeout=DEFAULT_RUN_TIMEOUT_SECONDS,
|
|
66
|
+
)
|
|
67
|
+
exit_code = completed.returncode
|
|
68
|
+
stdout = completed.stdout
|
|
69
|
+
stderr = completed.stderr
|
|
70
|
+
except subprocess.TimeoutExpired as exc:
|
|
71
|
+
exit_code = 124
|
|
72
|
+
stdout = _output_text(exc.stdout)
|
|
73
|
+
stderr = _output_text(exc.stderr)
|
|
74
|
+
timeout_message = f"Tool execution timed out after {DEFAULT_RUN_TIMEOUT_SECONDS} seconds."
|
|
75
|
+
stderr = f"{stderr.rstrip()}\n{timeout_message}\n" if stderr else f"{timeout_message}\n"
|
|
76
|
+
finished_at_dt = datetime.now(tz=UTC)
|
|
77
|
+
status = ToolRunStatus.COMPLETED if exit_code == 0 else ToolRunStatus.FAILED
|
|
78
|
+
result = ToolRunResult(
|
|
79
|
+
tool=f"{entry.owner}/{entry.name}",
|
|
80
|
+
owner=entry.owner,
|
|
81
|
+
name=entry.name,
|
|
82
|
+
version=entry.version,
|
|
83
|
+
registry_name=registry.name,
|
|
84
|
+
registry_path=registry.path,
|
|
85
|
+
command=command,
|
|
86
|
+
entrypoint=manifest.entrypoint,
|
|
87
|
+
started_at=started_at_dt.isoformat(),
|
|
88
|
+
finished_at=finished_at_dt.isoformat(),
|
|
89
|
+
exit_code=exit_code,
|
|
90
|
+
status=status,
|
|
91
|
+
stdout=stdout,
|
|
92
|
+
stderr=stderr,
|
|
93
|
+
log_path=log_dir / f"{started_at_dt.strftime('%Y%m%dT%H%M%S%fZ')}.json",
|
|
94
|
+
warning=RUNNER_ISOLATION_WARNING,
|
|
95
|
+
policy_decision=policy_result.decision,
|
|
96
|
+
policy_reasons=policy_result.reasons,
|
|
97
|
+
policy_warnings=policy_result.warnings,
|
|
98
|
+
)
|
|
99
|
+
_write_execution_log(result)
|
|
100
|
+
return result
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _prepare_workspace(bundle_path: Path, tool_root: Path) -> None:
|
|
104
|
+
if tool_root.exists():
|
|
105
|
+
shutil.rmtree(tool_root)
|
|
106
|
+
tool_root.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
with tarfile.open(bundle_path, "r:gz") as archive:
|
|
108
|
+
members = archive.getmembers()
|
|
109
|
+
_validate_archive_members(members)
|
|
110
|
+
try:
|
|
111
|
+
archive.extractall(tool_root, members=members, filter="data")
|
|
112
|
+
except TypeError:
|
|
113
|
+
archive.extractall(tool_root, members=members)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _validate_archive_members(members: list[tarfile.TarInfo]) -> None:
|
|
117
|
+
for member in members:
|
|
118
|
+
path = Path(member.name)
|
|
119
|
+
if path.is_absolute() or ".." in path.parts:
|
|
120
|
+
raise ValueError(f"Unsafe path in tool bundle: {member.name}")
|
|
121
|
+
if member.issym() or member.islnk():
|
|
122
|
+
raise ValueError(f"Links are not supported in runner bundles: {member.name}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _ensure_venv_python(venv_path: Path) -> Path:
|
|
126
|
+
python_path = venv_path / ("Scripts/python.exe" if _is_windows_venv() else "bin/python")
|
|
127
|
+
if not python_path.exists():
|
|
128
|
+
venv.EnvBuilder(with_pip=False, clear=False).create(venv_path)
|
|
129
|
+
if not python_path.exists():
|
|
130
|
+
raise ValueError(f"Could not create runner virtual environment at {venv_path}")
|
|
131
|
+
return python_path
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _is_windows_venv() -> bool:
|
|
135
|
+
return sys.platform == "win32"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _output_text(value: str | bytes | None) -> str:
|
|
139
|
+
if value is None:
|
|
140
|
+
return ""
|
|
141
|
+
if isinstance(value, bytes):
|
|
142
|
+
return value.decode(errors="replace")
|
|
143
|
+
return value
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _write_execution_log(result: ToolRunResult) -> None:
|
|
147
|
+
result.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
result.log_path.write_text(json.dumps(result.model_dump(mode="json"), indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
pkgwhy/registry/tools.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pydantic import ValidationError
|
|
8
|
+
|
|
9
|
+
from pkgwhy.core.models import (
|
|
10
|
+
AgentDecision,
|
|
11
|
+
Confidence,
|
|
12
|
+
HashStatus,
|
|
13
|
+
RegistryEntry,
|
|
14
|
+
RegistryToolEntry,
|
|
15
|
+
RiskLevel,
|
|
16
|
+
ToolJudgement,
|
|
17
|
+
ToolManifest,
|
|
18
|
+
)
|
|
19
|
+
from pkgwhy.registry.local import current_registry, load_registry_index
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def judge_tool(reference: str) -> ToolJudgement:
|
|
23
|
+
registry = current_registry()
|
|
24
|
+
entry = resolve_tool_entry(reference, registry)
|
|
25
|
+
manifest = _load_manifest(registry.path, entry)
|
|
26
|
+
hash_status = _verify_hash(registry.path, entry)
|
|
27
|
+
warnings: list[str] = ["Signature verification is not implemented yet."]
|
|
28
|
+
detected_capabilities: list[str] = []
|
|
29
|
+
|
|
30
|
+
if hash_status == HashStatus.VERIFIED:
|
|
31
|
+
risk = RiskLevel.MEDIUM
|
|
32
|
+
decision = AgentDecision.REVIEW_MANUALLY if manifest.security.requires_human_approval else AgentDecision.ALLOW_WITH_CAUTION
|
|
33
|
+
reason = "Tool bundle hash matches the local registry index."
|
|
34
|
+
recommendation = "Review declared permissions and manifest metadata before running this private tool."
|
|
35
|
+
elif hash_status == HashStatus.MISSING:
|
|
36
|
+
risk = RiskLevel.UNKNOWN
|
|
37
|
+
decision = AgentDecision.REVIEW_MANUALLY
|
|
38
|
+
reason = "Tool bundle is missing from the local registry."
|
|
39
|
+
recommendation = "Restore or republish the bundle before running this tool."
|
|
40
|
+
warnings.append("Bundle file is missing.")
|
|
41
|
+
else:
|
|
42
|
+
risk = RiskLevel.HIGH
|
|
43
|
+
decision = AgentDecision.BLOCK
|
|
44
|
+
reason = "Tool bundle hash does not match the local registry index."
|
|
45
|
+
recommendation = "Block use until a human verifies or republishes the tool."
|
|
46
|
+
warnings.append("Bundle hash mismatch.")
|
|
47
|
+
|
|
48
|
+
if not detected_capabilities:
|
|
49
|
+
warnings.append("Static capability detection for tool bundles is not implemented yet.")
|
|
50
|
+
|
|
51
|
+
return ToolJudgement(
|
|
52
|
+
tool=f"{entry.owner}/{entry.name}",
|
|
53
|
+
owner=entry.owner,
|
|
54
|
+
name=entry.name,
|
|
55
|
+
version=entry.version,
|
|
56
|
+
decision=decision,
|
|
57
|
+
risk_level=risk,
|
|
58
|
+
confidence=Confidence.MEDIUM if hash_status == HashStatus.VERIFIED else Confidence.LOW,
|
|
59
|
+
reason=reason,
|
|
60
|
+
requires_human_approval=manifest.security.requires_human_approval,
|
|
61
|
+
manifest=manifest,
|
|
62
|
+
declared_permissions=manifest.declared_permissions,
|
|
63
|
+
detected_capabilities=detected_capabilities,
|
|
64
|
+
hash_status=hash_status,
|
|
65
|
+
warnings=warnings,
|
|
66
|
+
recommendation=recommendation,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def resolve_tool_entry(reference: str, registry: RegistryEntry | None = None) -> RegistryToolEntry:
|
|
71
|
+
active_registry = registry or current_registry()
|
|
72
|
+
index = load_registry_index(active_registry.path, strict=True)
|
|
73
|
+
owner, name = _parse_reference(reference)
|
|
74
|
+
matches = [
|
|
75
|
+
entry
|
|
76
|
+
for entry in index.tools
|
|
77
|
+
if entry.name == name and (owner is None or entry.owner == owner)
|
|
78
|
+
]
|
|
79
|
+
if not matches:
|
|
80
|
+
raise ValueError(f"Tool is not published in the current registry: {reference}")
|
|
81
|
+
if owner is None and len({entry.owner for entry in matches}) > 1:
|
|
82
|
+
raise ValueError(f"Tool reference is ambiguous; include owner: {reference}")
|
|
83
|
+
return sorted(matches, key=lambda entry: entry.published_at, reverse=True)[0]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _parse_reference(reference: str) -> tuple[str | None, str]:
|
|
87
|
+
parts = reference.split("/", maxsplit=1)
|
|
88
|
+
if len(parts) == 2:
|
|
89
|
+
return parts[0], parts[1]
|
|
90
|
+
return None, reference
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _load_manifest(registry_path: Path, entry: RegistryToolEntry) -> ToolManifest:
|
|
94
|
+
manifest_path = _validate_registry_path(registry_path, entry.manifest_path, entry)
|
|
95
|
+
try:
|
|
96
|
+
data = json.loads(manifest_path.read_text(encoding="utf-8"))
|
|
97
|
+
return ToolManifest.model_validate(data)
|
|
98
|
+
except (OSError, json.JSONDecodeError, ValidationError) as exc:
|
|
99
|
+
raise ValueError(f"Could not read stored tool manifest for {entry.owner}/{entry.name}") from exc
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _verify_hash(registry_path: Path, entry: RegistryToolEntry) -> HashStatus:
|
|
103
|
+
bundle_path = _validate_registry_path(registry_path, entry.bundle_path, entry)
|
|
104
|
+
digest = hashlib.sha256()
|
|
105
|
+
try:
|
|
106
|
+
with bundle_path.open("rb") as handle:
|
|
107
|
+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
108
|
+
digest.update(chunk)
|
|
109
|
+
except FileNotFoundError:
|
|
110
|
+
return HashStatus.MISSING
|
|
111
|
+
if digest.hexdigest() != entry.sha256:
|
|
112
|
+
return HashStatus.MISMATCH
|
|
113
|
+
return HashStatus.VERIFIED
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _validate_registry_path(registry_path: Path, entry_path: str, entry: RegistryToolEntry) -> Path:
|
|
117
|
+
registry_root = registry_path.resolve()
|
|
118
|
+
candidate = (registry_root / entry_path).resolve()
|
|
119
|
+
if not candidate.is_relative_to(registry_root):
|
|
120
|
+
raise ValueError(f"Registry entry path escapes registry root for {entry.owner}/{entry.name}: {entry_path}")
|
|
121
|
+
return candidate
|