skip-trace 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skip_trace/__about__.py +13 -3
- skip_trace/__init__.py +0 -2
- skip_trace/analysis/content_scanner.py +189 -0
- skip_trace/analysis/evidence.py +1 -1
- skip_trace/analysis/scoring.py +46 -1
- skip_trace/analysis/source_scanner.py +1 -1
- skip_trace/cli.py +1 -1
- skip_trace/collectors/__init__.py +2 -2
- skip_trace/collectors/github_files.py +359 -0
- skip_trace/collectors/package_files.py +232 -41
- skip_trace/collectors/pypi.py +1 -1
- skip_trace/collectors/pypi_attestations.py +160 -0
- skip_trace/collectors/sigstore.py +160 -0
- skip_trace/collectors/urls.py +96 -0
- skip_trace/m.py +287 -0
- skip_trace/main.py +103 -85
- skip_trace/reporting/md_reporter.py +68 -4
- skip_trace/schemas.py +21 -0
- skip_trace/utils/http_client.py +18 -0
- {skip_trace-0.1.0.dist-info → skip_trace-0.1.1.dist-info}/METADATA +7 -3
- skip_trace-0.1.1.dist-info/RECORD +39 -0
- skip_trace-0.1.0.dist-info/RECORD +0 -33
- {skip_trace-0.1.0.dist-info → skip_trace-0.1.1.dist-info}/WHEEL +0 -0
- {skip_trace-0.1.0.dist-info → skip_trace-0.1.1.dist-info}/entry_points.txt +0 -0
- {skip_trace-0.1.0.dist-info → skip_trace-0.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,23 +1,149 @@
|
|
1
1
|
# skip_trace/collectors/package_files.py
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
import datetime
|
5
|
+
import glob
|
4
6
|
import logging
|
5
7
|
import os
|
6
8
|
import shutil
|
7
9
|
import tarfile
|
8
10
|
import zipfile
|
11
|
+
from email.parser import Parser
|
9
12
|
from typing import Any, Dict, List, Optional
|
10
13
|
|
11
14
|
from ..analysis import source_scanner
|
15
|
+
from ..analysis.evidence import generate_evidence_id
|
12
16
|
from ..exceptions import CollectorError, NetworkError
|
13
|
-
from ..schemas import EvidenceRecord
|
17
|
+
from ..schemas import EvidenceKind, EvidenceRecord, EvidenceSource
|
14
18
|
from ..utils import http_client
|
15
19
|
from ..utils.safe_targz import safe_extract_auto
|
20
|
+
from ..utils.validation import is_valid_email
|
21
|
+
from . import sigstore
|
16
22
|
|
17
23
|
logger = logging.getLogger(__name__)
|
18
24
|
PACKAGE_DOWNLOAD_DIR = ".packages"
|
19
25
|
|
20
26
|
|
27
|
+
def _create_evidence_from_contact(
|
28
|
+
contact_str: str,
|
29
|
+
role_kind: EvidenceKind,
|
30
|
+
locator: str,
|
31
|
+
confidence: float,
|
32
|
+
notes_prefix: str,
|
33
|
+
) -> List[EvidenceRecord]:
|
34
|
+
"""Helper to create PERSON and EMAIL evidence from a 'Name <email>' string."""
|
35
|
+
from ..analysis.evidence import _parse_contact_string
|
36
|
+
|
37
|
+
evidence_list = []
|
38
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
39
|
+
parsed = _parse_contact_string(contact_str)
|
40
|
+
name = parsed.get("name")
|
41
|
+
email = parsed.get("email")
|
42
|
+
source = EvidenceSource.WHEEL
|
43
|
+
|
44
|
+
if name:
|
45
|
+
value = {"name": name}
|
46
|
+
record = EvidenceRecord(
|
47
|
+
id=generate_evidence_id(
|
48
|
+
source, EvidenceKind.PERSON, locator, str(value), name
|
49
|
+
),
|
50
|
+
source=source,
|
51
|
+
locator=locator,
|
52
|
+
kind=EvidenceKind.PERSON,
|
53
|
+
value=value,
|
54
|
+
observed_at=now,
|
55
|
+
confidence=confidence,
|
56
|
+
notes=f"{notes_prefix} name '{name}' from {role_kind.value} field in package metadata.",
|
57
|
+
)
|
58
|
+
evidence_list.append(record)
|
59
|
+
|
60
|
+
if email:
|
61
|
+
value = {"email": email}
|
62
|
+
slug = name or email.split("@")[0]
|
63
|
+
record = EvidenceRecord(
|
64
|
+
id=generate_evidence_id(
|
65
|
+
source, EvidenceKind.EMAIL, locator, str(value), slug
|
66
|
+
),
|
67
|
+
source=source,
|
68
|
+
locator=locator,
|
69
|
+
kind=EvidenceKind.EMAIL,
|
70
|
+
value=value,
|
71
|
+
observed_at=now,
|
72
|
+
confidence=confidence + 0.1, # Email is a stronger signal
|
73
|
+
notes=f"{notes_prefix} email for '{slug}' from {role_kind.value} field in package metadata.",
|
74
|
+
)
|
75
|
+
evidence_list.append(record)
|
76
|
+
|
77
|
+
return evidence_list
|
78
|
+
|
79
|
+
|
80
|
+
def _parse_metadata_file(content: str, locator: str) -> List[EvidenceRecord]:
|
81
|
+
"""Parses a PKG-INFO or METADATA file for evidence."""
|
82
|
+
evidence_list: List[EvidenceRecord] = []
|
83
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
84
|
+
headers = Parser().parsestr(content)
|
85
|
+
|
86
|
+
# Author/Maintainer information
|
87
|
+
if author_email := headers.get("Author-email"):
|
88
|
+
evidence_list.extend(
|
89
|
+
_create_evidence_from_contact(
|
90
|
+
author_email, EvidenceKind.AUTHOR_TAG, locator, 0.35, "Found"
|
91
|
+
)
|
92
|
+
)
|
93
|
+
if author := headers.get("Author"):
|
94
|
+
evidence_list.extend(
|
95
|
+
_create_evidence_from_contact(
|
96
|
+
author, EvidenceKind.AUTHOR_TAG, locator, 0.30, "Found"
|
97
|
+
)
|
98
|
+
)
|
99
|
+
|
100
|
+
if maintainer_email := headers.get("Maintainer-email"):
|
101
|
+
evidence_list.extend(
|
102
|
+
_create_evidence_from_contact(
|
103
|
+
maintainer_email, EvidenceKind.MAINTAINER, locator, 0.35, "Found"
|
104
|
+
)
|
105
|
+
)
|
106
|
+
if maintainer := headers.get("Maintainer"):
|
107
|
+
evidence_list.extend(
|
108
|
+
_create_evidence_from_contact(
|
109
|
+
maintainer, EvidenceKind.MAINTAINER, locator, 0.30, "Found"
|
110
|
+
)
|
111
|
+
)
|
112
|
+
|
113
|
+
# Project URLs
|
114
|
+
urls = headers.get_all("Project-URL", [])
|
115
|
+
if home_page := headers.get("Home-page"):
|
116
|
+
urls.append(f"Homepage, {home_page}")
|
117
|
+
|
118
|
+
for url_entry in urls:
|
119
|
+
try:
|
120
|
+
label, url = [part.strip() for part in url_entry.split(",", 1)]
|
121
|
+
if not is_valid_email(label): # Filter out email-like labels
|
122
|
+
value = {"label": label, "url": url}
|
123
|
+
record = EvidenceRecord(
|
124
|
+
id=generate_evidence_id(
|
125
|
+
EvidenceSource.WHEEL,
|
126
|
+
EvidenceKind.PROJECT_URL,
|
127
|
+
locator,
|
128
|
+
str(value),
|
129
|
+
label,
|
130
|
+
hint="metadata-file",
|
131
|
+
),
|
132
|
+
source=EvidenceSource.WHEEL,
|
133
|
+
locator=locator,
|
134
|
+
kind=EvidenceKind.PROJECT_URL,
|
135
|
+
value=value,
|
136
|
+
observed_at=now,
|
137
|
+
confidence=0.30,
|
138
|
+
notes=f"Found project URL '{label}' in package metadata file.",
|
139
|
+
)
|
140
|
+
evidence_list.append(record)
|
141
|
+
except ValueError:
|
142
|
+
logger.debug(f"Could not parse Project-URL from metadata file: {url_entry}")
|
143
|
+
|
144
|
+
return evidence_list
|
145
|
+
|
146
|
+
|
21
147
|
def _ensure_download_dir():
|
22
148
|
"""Ensures the package download directory and .gitignore exist."""
|
23
149
|
os.makedirs(PACKAGE_DOWNLOAD_DIR, exist_ok=True)
|
@@ -37,6 +163,8 @@ def _find_download_url(metadata: Dict[str, Any]) -> Optional[str]:
|
|
37
163
|
wheel_url = None
|
38
164
|
sdist_url = None
|
39
165
|
for url_info in urls:
|
166
|
+
if url_info.get("yanked"):
|
167
|
+
continue
|
40
168
|
packagetype = url_info.get("packagetype")
|
41
169
|
if packagetype == "bdist_wheel":
|
42
170
|
wheel_url = url_info.get("url")
|
@@ -47,6 +175,33 @@ def _find_download_url(metadata: Dict[str, Any]) -> Optional[str]:
|
|
47
175
|
return wheel_url or sdist_url or (urls[0].get("url") if urls else None)
|
48
176
|
|
49
177
|
|
178
|
+
def _download_file(url: str, download_dir: str) -> str | None:
|
179
|
+
"""Downloads a file to a directory if it doesn't exist, returns the path."""
|
180
|
+
filename = os.path.basename(url)
|
181
|
+
download_path = os.path.join(download_dir, filename)
|
182
|
+
|
183
|
+
if not os.path.exists(download_path):
|
184
|
+
logger.info(f"Downloading {filename} from {url}")
|
185
|
+
try:
|
186
|
+
with http_client.get_client().stream("GET", url) as response:
|
187
|
+
response.raise_for_status()
|
188
|
+
with open(download_path, "wb") as f:
|
189
|
+
for chunk in response.iter_bytes():
|
190
|
+
f.write(chunk)
|
191
|
+
except (
|
192
|
+
NetworkError,
|
193
|
+
http_client.httpx.RequestError,
|
194
|
+
http_client.httpx.HTTPStatusError,
|
195
|
+
) as e:
|
196
|
+
# A 404 is expected for bundles, so we don't raise a CollectorError
|
197
|
+
if response and response.status_code == 404:
|
198
|
+
logger.info(f"No file found at {url} (404 Not Found)")
|
199
|
+
return None
|
200
|
+
raise CollectorError(f"Failed to download file {filename}: {e}") from e
|
201
|
+
|
202
|
+
return download_path
|
203
|
+
|
204
|
+
|
50
205
|
def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]:
|
51
206
|
"""
|
52
207
|
Downloads, extracts, and scans a package's files for evidence.
|
@@ -70,62 +225,55 @@ def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]
|
|
70
225
|
return []
|
71
226
|
|
72
227
|
_ensure_download_dir()
|
73
|
-
filename = os.path.basename(download_url)
|
74
|
-
download_path = os.path.join(PACKAGE_DOWNLOAD_DIR, filename)
|
75
228
|
|
76
|
-
# Download the
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
229
|
+
# Download the main package artifact
|
230
|
+
artifact_path = _download_file(download_url, PACKAGE_DOWNLOAD_DIR)
|
231
|
+
if not artifact_path:
|
232
|
+
return [] # Can't proceed without the artifact
|
233
|
+
|
234
|
+
# Attempt to download the corresponding Sigstore bundle
|
235
|
+
bundle_url = f"{download_url}.sigstore"
|
236
|
+
bundle_path = _download_file(bundle_url, PACKAGE_DOWNLOAD_DIR)
|
237
|
+
|
238
|
+
# Initialize evidence list
|
239
|
+
evidence: list[EvidenceRecord] = []
|
240
|
+
|
241
|
+
# Verify with Sigstore if the bundle was found
|
242
|
+
if bundle_path:
|
243
|
+
sigstore_evidence = sigstore.verify_and_collect(
|
244
|
+
artifact_path, bundle_path, package_name, package_version
|
245
|
+
)
|
246
|
+
evidence.extend(sigstore_evidence)
|
91
247
|
|
92
248
|
# Determine the persistent extraction directory path from the filename
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
break
|
249
|
+
filename = os.path.basename(artifact_path)
|
250
|
+
base_filename, _ = os.path.splitext(filename)
|
251
|
+
if filename.endswith(".tar.gz"):
|
252
|
+
base_filename, _ = os.path.splitext(base_filename)
|
98
253
|
extract_dir = os.path.join(PACKAGE_DOWNLOAD_DIR, base_filename)
|
99
254
|
|
100
255
|
# Extract the archive ONLY if the destination directory doesn't already exist
|
101
256
|
if not os.path.exists(extract_dir):
|
102
|
-
logger.info(f"Extracting {
|
257
|
+
logger.info(f"Extracting {artifact_path} to {extract_dir}")
|
103
258
|
os.makedirs(extract_dir, exist_ok=True)
|
104
259
|
try:
|
105
|
-
if
|
106
|
-
with zipfile.ZipFile(
|
107
|
-
zf.extractall(extract_dir) # nosec
|
108
|
-
elif
|
260
|
+
if artifact_path.endswith((".whl", ".zip")):
|
261
|
+
with zipfile.ZipFile(artifact_path, "r") as zf:
|
262
|
+
zf.extractall(extract_dir) # nosec
|
263
|
+
elif artifact_path.endswith(
|
109
264
|
(".tar.gz", ".tgz", ".tar.bz2", ".tar.xz", ".tar")
|
110
265
|
):
|
111
|
-
safe_extract_auto(
|
112
|
-
# elif download_path.endswith((".tar.gz", ".tgz")):
|
113
|
-
# with tarfile.open(download_path, "r:gz") as tf: # nosec # noqa
|
114
|
-
# tf.extractall(extract_dir) # nosec # noqa
|
115
|
-
# elif download_path.endswith(".tar.bz2"):
|
116
|
-
# with tarfile.open(download_path, "r:bz2") as tf: # nosec # noqa
|
117
|
-
# tf.extractall(extract_dir) # nosec # noqa
|
266
|
+
safe_extract_auto(artifact_path, extract_dir)
|
118
267
|
else:
|
119
268
|
logger.warning(
|
120
269
|
f"Unsupported archive format for {filename}. Skipping file scan."
|
121
270
|
)
|
122
271
|
shutil.rmtree(extract_dir) # Clean up the empty dir
|
123
|
-
return
|
272
|
+
return evidence # Return any Sigstore evidence found
|
124
273
|
except (zipfile.BadZipFile, tarfile.TarError, PermissionError) as e:
|
125
|
-
logger.error(f"Failed to extract archive {
|
126
|
-
# Clean up potentially corrupted extraction on error
|
274
|
+
logger.error(f"Failed to extract archive {artifact_path}: {e}")
|
127
275
|
shutil.rmtree(extract_dir, ignore_errors=True)
|
128
|
-
return
|
276
|
+
return evidence # Return any Sigstore evidence found
|
129
277
|
else:
|
130
278
|
logger.info(f"Using cached package files from {extract_dir}")
|
131
279
|
|
@@ -143,8 +291,51 @@ def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]
|
|
143
291
|
logger.error(
|
144
292
|
f"Extraction directory {extract_dir} not found after apparent success. Check permissions."
|
145
293
|
)
|
146
|
-
return
|
294
|
+
return evidence
|
147
295
|
|
148
296
|
locator_prefix = f"{package_name}-{package_version}"
|
149
|
-
|
297
|
+
source_scan_evidence = source_scanner.scan_directory(
|
298
|
+
scan_target_dir, locator_prefix
|
299
|
+
)
|
300
|
+
evidence.extend(source_scan_evidence)
|
301
|
+
|
302
|
+
# --- Scan for PKG-INFO/METADATA file ---
|
303
|
+
metadata_file_path = None
|
304
|
+
# Use a recursive glob to find the relevant .dist-info or .egg-info directory
|
305
|
+
# This is more robust for sdists that may have a nested src/ directory.
|
306
|
+
dist_info_pattern = os.path.join(scan_target_dir, "**", "*.dist-info")
|
307
|
+
egg_info_pattern = os.path.join(scan_target_dir, "**", "*.egg-info")
|
308
|
+
|
309
|
+
info_dirs = glob.glob(dist_info_pattern, recursive=True) + glob.glob(
|
310
|
+
egg_info_pattern, recursive=True
|
311
|
+
)
|
312
|
+
|
313
|
+
if info_dirs:
|
314
|
+
info_dir_path = info_dirs[0] # Assume there's only one
|
315
|
+
potential_files = [
|
316
|
+
os.path.join(info_dir_path, "METADATA"),
|
317
|
+
os.path.join(info_dir_path, "PKG-INFO"),
|
318
|
+
]
|
319
|
+
for f_path in potential_files:
|
320
|
+
if os.path.exists(f_path):
|
321
|
+
metadata_file_path = f_path
|
322
|
+
break
|
323
|
+
|
324
|
+
if metadata_file_path:
|
325
|
+
rel_path = os.path.relpath(metadata_file_path, PACKAGE_DOWNLOAD_DIR)
|
326
|
+
logger.info(f"Found package metadata file: {rel_path}")
|
327
|
+
try:
|
328
|
+
with open(metadata_file_path, "r", encoding="utf-8", errors="ignore") as f:
|
329
|
+
content = f.read()
|
330
|
+
# Create a locator relative to the package root
|
331
|
+
relative_locator_path = os.path.relpath(metadata_file_path, scan_target_dir)
|
332
|
+
locator = f"{locator_prefix}/{relative_locator_path}"
|
333
|
+
metadata_evidence = _parse_metadata_file(content, locator)
|
334
|
+
evidence.extend(metadata_evidence)
|
335
|
+
logger.info(
|
336
|
+
f"Extracted {len(metadata_evidence)} evidence records from package metadata file."
|
337
|
+
)
|
338
|
+
except IOError as e:
|
339
|
+
logger.warning(f"Could not read metadata file {metadata_file_path}: {e}")
|
340
|
+
|
150
341
|
return evidence
|
skip_trace/collectors/pypi.py
CHANGED
@@ -99,7 +99,7 @@ def cross_reference_by_user(package_name: str) -> List[EvidenceRecord]:
|
|
99
99
|
new_evidence: List[EvidenceRecord] = []
|
100
100
|
profile_url = _scrape_user_profile_url(package_name)
|
101
101
|
|
102
|
-
# ---
|
102
|
+
# --- Always create evidence for the PyPI user if found ---
|
103
103
|
if profile_url:
|
104
104
|
try:
|
105
105
|
username = profile_url.strip("/").rsplit("/", maxsplit=1)[-1]
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# skip_trace/collectors/pypi_attestations.py
|
2
|
+
|
3
|
+
# I have no evidence that this works.
|
4
|
+
# It appears to crash, possibly because I'm on windows.
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import datetime
|
9
|
+
import logging
|
10
|
+
import os
|
11
|
+
import shutil
|
12
|
+
import subprocess # nosec
|
13
|
+
import tempfile
|
14
|
+
from typing import Any, Dict, List
|
15
|
+
|
16
|
+
from ..analysis.evidence import generate_evidence_id
|
17
|
+
from ..schemas import EvidenceKind, EvidenceRecord, EvidenceSource
|
18
|
+
from ..utils import http_client
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
def collect(metadata: Dict[str, Any]) -> List[EvidenceRecord]:
|
24
|
+
"""
|
25
|
+
Finds and verifies PyPI attestations by calling the `pypi-attestations` CLI.
|
26
|
+
|
27
|
+
This collector manually queries the PyPI Integrity API to get the attestation,
|
28
|
+
saves it to a temporary file, and then passes that file to the `inspect`
|
29
|
+
command of the CLI tool for verification and parsing.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
metadata: The PyPI JSON metadata for the package.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
A list of EvidenceRecord objects from verified attestations.
|
36
|
+
"""
|
37
|
+
if not shutil.which("pypi-attestations"):
|
38
|
+
logger.debug(
|
39
|
+
"`pypi-attestations` CLI not found in PATH. Skipping attestation check."
|
40
|
+
)
|
41
|
+
return []
|
42
|
+
|
43
|
+
evidence = []
|
44
|
+
urls_data = metadata.get("urls", [])
|
45
|
+
if not urls_data:
|
46
|
+
return []
|
47
|
+
|
48
|
+
project_name = metadata.get("info", {}).get("name")
|
49
|
+
project_version = metadata.get("info", {}).get("version")
|
50
|
+
|
51
|
+
# Find the first downloadable artifact (wheel or sdist) and check it.
|
52
|
+
for url_info in urls_data:
|
53
|
+
artifact_url = url_info.get("url")
|
54
|
+
if not artifact_url or url_info.get("yanked", False):
|
55
|
+
continue
|
56
|
+
|
57
|
+
artifact_filename = os.path.basename(artifact_url)
|
58
|
+
|
59
|
+
# 1. Construct the PyPI Integrity API URL for this specific file.
|
60
|
+
integrity_api_url = f"https://pypi.org/integrity/{project_name}/{project_version}/{artifact_filename}/provenance"
|
61
|
+
logger.info(f"Querying PyPI Integrity API: {integrity_api_url}")
|
62
|
+
|
63
|
+
# 2. Make the API call to get the attestation.
|
64
|
+
response = http_client.make_request_safe(integrity_api_url)
|
65
|
+
if response is None or response.status_code != 200:
|
66
|
+
logger.info(
|
67
|
+
f"No attestation found for {artifact_filename} via Integrity API (Status: {response.status_code if response else 'N/A'})."
|
68
|
+
)
|
69
|
+
continue
|
70
|
+
|
71
|
+
try:
|
72
|
+
response.json()
|
73
|
+
except Exception:
|
74
|
+
logger.warning(
|
75
|
+
f"Failed to parse JSON from Integrity API for {artifact_filename}"
|
76
|
+
)
|
77
|
+
continue
|
78
|
+
|
79
|
+
# 3. Save the attestation to a temporary file for the CLI to use.
|
80
|
+
with tempfile.NamedTemporaryFile(
|
81
|
+
mode="w", delete=False, suffix=".attestation.json", encoding="utf-8"
|
82
|
+
) as tmp_file:
|
83
|
+
tmp_file.write(response.text)
|
84
|
+
temp_attestation_path = tmp_file.name
|
85
|
+
|
86
|
+
try:
|
87
|
+
# 4. Call `pypi-attestations inspect` on the temporary attestation file.
|
88
|
+
# The tool verifies the attestation as part of the inspect command.
|
89
|
+
command = ["pypi-attestations", "inspect", temp_attestation_path]
|
90
|
+
logger.info(f"Running command: {' '.join(command)}")
|
91
|
+
result = subprocess.run( # nosec
|
92
|
+
command,
|
93
|
+
capture_output=True,
|
94
|
+
text=True,
|
95
|
+
check=True, # Raises CalledProcessError on non-zero exit codes
|
96
|
+
)
|
97
|
+
|
98
|
+
logger.info(f"Successfully verified attestation for {artifact_filename}")
|
99
|
+
|
100
|
+
# Parse the human-readable output to find key details.
|
101
|
+
repo_slug = None
|
102
|
+
workflow = None
|
103
|
+
lines = result.stdout.splitlines()
|
104
|
+
print(lines)
|
105
|
+
for line in lines:
|
106
|
+
if "Repository:" in line:
|
107
|
+
repo_slug = line.split(":", 1)[1].strip()
|
108
|
+
elif "Workflow:" in line:
|
109
|
+
workflow = line.split(":", 1)[1].strip()
|
110
|
+
|
111
|
+
if not repo_slug:
|
112
|
+
logger.warning(
|
113
|
+
"Verified attestation but could not parse repository slug from CLI output."
|
114
|
+
)
|
115
|
+
continue
|
116
|
+
|
117
|
+
# Create the evidence record.
|
118
|
+
org_name = repo_slug.split("/")[0]
|
119
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
120
|
+
value = {
|
121
|
+
"publisher_kind": "github",
|
122
|
+
"repository": repo_slug,
|
123
|
+
"workflow": workflow,
|
124
|
+
}
|
125
|
+
|
126
|
+
record = EvidenceRecord(
|
127
|
+
id=generate_evidence_id(
|
128
|
+
EvidenceSource.PYPI_ATTESTATION,
|
129
|
+
EvidenceKind.PYPI_PUBLISHER_ATTESTATION,
|
130
|
+
integrity_api_url,
|
131
|
+
str(value),
|
132
|
+
org_name,
|
133
|
+
),
|
134
|
+
source=EvidenceSource.PYPI_ATTESTATION,
|
135
|
+
locator=integrity_api_url,
|
136
|
+
kind=EvidenceKind.PYPI_PUBLISHER_ATTESTATION,
|
137
|
+
value=value,
|
138
|
+
observed_at=now,
|
139
|
+
confidence=1.0,
|
140
|
+
notes=(
|
141
|
+
"Verified PyPI attestation proves publication from GitHub "
|
142
|
+
f"repo '{repo_slug}' via workflow '{workflow or 'unknown'}'."
|
143
|
+
),
|
144
|
+
)
|
145
|
+
evidence.append(record)
|
146
|
+
break
|
147
|
+
|
148
|
+
except subprocess.CalledProcessError as e:
|
149
|
+
logger.warning(
|
150
|
+
f"CLI verification failed for attestation of {artifact_filename}:\n{e.stderr}"
|
151
|
+
)
|
152
|
+
except Exception as e:
|
153
|
+
logger.error(
|
154
|
+
f"An unexpected error occurred during CLI attestation processing: {e}"
|
155
|
+
)
|
156
|
+
finally:
|
157
|
+
# Clean up the temporary file.
|
158
|
+
os.remove(temp_attestation_path)
|
159
|
+
|
160
|
+
return evidence
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# skip_trace/collectors/sigstore.py
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import logging
|
5
|
+
from typing import List
|
6
|
+
from urllib.parse import urlparse
|
7
|
+
|
8
|
+
from sigstore.models import Bundle
|
9
|
+
|
10
|
+
from ..schemas import EvidenceRecord
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def _parse_san_from_cert(bundle: Bundle) -> str | None:
|
16
|
+
"""Extracts the Subject Alternative Name from the signing certificate."""
|
17
|
+
try:
|
18
|
+
# The SAN extension is a list of GeneralName objects.
|
19
|
+
# We look for rfc822Name (email) or uniformResourceIdentifier.
|
20
|
+
sans = bundle.signing_certificate.subject # no alt name?
|
21
|
+
for san in sans:
|
22
|
+
# The value attribute of the GeneralName object holds the identity.
|
23
|
+
return san.value
|
24
|
+
except Exception:
|
25
|
+
return None
|
26
|
+
return None
|
27
|
+
|
28
|
+
|
29
|
+
def _parse_repo_from_github_uri(uri: str | None) -> str | None:
|
30
|
+
"""Parses a GitHub workflow URI to get the 'owner/repo' string."""
|
31
|
+
if not uri or not uri.startswith("https://github.com/"):
|
32
|
+
try:
|
33
|
+
parsed = urlparse(uri)
|
34
|
+
path_parts = parsed.path.strip("/").split("/") # type: ignore
|
35
|
+
if len(path_parts) >= 2: # type: ignore
|
36
|
+
return f"{path_parts[0]}/{path_parts[1]}" # type: ignore
|
37
|
+
except Exception: # nosec
|
38
|
+
pass
|
39
|
+
return None
|
40
|
+
|
41
|
+
|
42
|
+
def verify_and_collect(
|
43
|
+
artifact_path: str, bundle_path: str, package_name: str, package_version: str
|
44
|
+
) -> List[EvidenceRecord]:
|
45
|
+
"""
|
46
|
+
Verifies a package artifact against a Sigstore bundle and collects evidence.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
artifact_path: Path to the downloaded package file (.whl, .tar.gz).
|
50
|
+
bundle_path: Path to the downloaded .sigstore bundle file.
|
51
|
+
package_name: The name of the package.
|
52
|
+
package_version: The version of the package.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
A list of EvidenceRecord objects from the verification.
|
56
|
+
"""
|
57
|
+
return []
|
58
|
+
# if not SIGSTORE_AVAILABLE:
|
59
|
+
# logger.warning("sigstore library not installed, skipping verification.")
|
60
|
+
# return []
|
61
|
+
#
|
62
|
+
# evidence: list[EvidenceRecord] = []
|
63
|
+
# locator = f"pkg:pypi/{package_name}@{package_version}"
|
64
|
+
# now = datetime.datetime.now(datetime.timezone.utc)
|
65
|
+
#
|
66
|
+
# logger.info(f"Performing Sigstore verification for {artifact_path}")
|
67
|
+
#
|
68
|
+
# try:
|
69
|
+
# # 1. Load the bundle from the file
|
70
|
+
# with open(bundle_path, "r", encoding="utf-8") as f:
|
71
|
+
# bundle = Bundle.from_json(f.read())
|
72
|
+
#
|
73
|
+
# # 2. Create a verifier. We use the production instance by default.
|
74
|
+
# verifier = Verifier.production()
|
75
|
+
#
|
76
|
+
# # 3. Define a verification policy.
|
77
|
+
# # WARNING: UnsafeNoOp is for demonstration and testing only.
|
78
|
+
# # It performs no identity verification. Replace with a real policy
|
79
|
+
# # like `policy.Identity(identity=..., issuer=...)` for security.
|
80
|
+
# verification_policy = policy.UnsafeNoOp()
|
81
|
+
#
|
82
|
+
# # 4. Verify the artifact. This will raise VerificationError on failure.
|
83
|
+
# with open(artifact_path, "rb") as artifact:
|
84
|
+
# verifier.verify_artifact(artifact, bundle, verification_policy) # type: ignore
|
85
|
+
#
|
86
|
+
# logger.info(f"Sigstore verification successful for {package_name}")
|
87
|
+
#
|
88
|
+
# # 5. If verification succeeds, extract evidence from the trusted bundle.
|
89
|
+
#
|
90
|
+
# # Evidence for the signer's identity from the certificate's SAN
|
91
|
+
# if identity := _parse_san_from_cert(bundle):
|
92
|
+
# value = {
|
93
|
+
# "identity": identity,
|
94
|
+
# "issuer": bundle.signing_certificate.issuer.rfc4514_string(),
|
95
|
+
# }
|
96
|
+
# record = EvidenceRecord(
|
97
|
+
# id=generate_evidence_id(
|
98
|
+
# EvidenceSource.SIGSTORE,
|
99
|
+
# EvidenceKind.SIGSTORE_SIGNER_IDENTITY,
|
100
|
+
# locator,
|
101
|
+
# str(value),
|
102
|
+
# identity,
|
103
|
+
# ),
|
104
|
+
# source=EvidenceSource.SIGSTORE,
|
105
|
+
# locator=locator,
|
106
|
+
# kind=EvidenceKind.SIGSTORE_SIGNER_IDENTITY,
|
107
|
+
# value=value,
|
108
|
+
# observed_at=now,
|
109
|
+
# confidence=0.95,
|
110
|
+
# notes=(
|
111
|
+
# f"Package cryptographically signed by identity '{identity}' "
|
112
|
+
# f"(issuer: {value['issuer']})."
|
113
|
+
# ),
|
114
|
+
# )
|
115
|
+
# evidence.append(record)
|
116
|
+
#
|
117
|
+
# # Evidence from GitHub workflow info in the certificate extensions
|
118
|
+
# # OID 1.3.6.1.4.1.57264.1.5 = GitHub Workflow Repository
|
119
|
+
# oid_repo = "1.3.6.1.4.1.57264.1.5" # what is this?
|
120
|
+
# try:
|
121
|
+
# repo_ext = bundle.signing_certificate.extensions.get_extension_for_oid( # type: ignore
|
122
|
+
# oid_repo # type: ignore
|
123
|
+
# )
|
124
|
+
# repo_uri = repo_ext.value.oid.dotted_string # .decode("utf-8") # what is this?
|
125
|
+
#
|
126
|
+
# repo_slug = _parse_repo_from_github_uri(repo_uri) or repo_uri
|
127
|
+
#
|
128
|
+
# value = {"repo_uri": repo_uri}
|
129
|
+
# record = EvidenceRecord(
|
130
|
+
# id=generate_evidence_id(
|
131
|
+
# EvidenceSource.SIGSTORE,
|
132
|
+
# EvidenceKind.SIGSTORE_BUILD_PROVENANCE,
|
133
|
+
# locator,
|
134
|
+
# str(value),
|
135
|
+
# repo_slug,
|
136
|
+
# ),
|
137
|
+
# source=EvidenceSource.SIGSTORE,
|
138
|
+
# locator=locator,
|
139
|
+
# kind=EvidenceKind.SIGSTORE_BUILD_PROVENANCE,
|
140
|
+
# value=value,
|
141
|
+
# observed_at=now,
|
142
|
+
# confidence=0.90,
|
143
|
+
# notes=f"Sigstore certificate attests build from repo '{repo_slug}'.",
|
144
|
+
# )
|
145
|
+
# evidence.append(record)
|
146
|
+
# except Exception:
|
147
|
+
# # This is not an error; the extension is optional.
|
148
|
+
# pass
|
149
|
+
#
|
150
|
+
# except (VerificationError, FileNotFoundError, ValueError) as e:
|
151
|
+
# logger.info(f"Sigstore verification failed for {package_name}: {e}")
|
152
|
+
# return []
|
153
|
+
# except Exception as e:
|
154
|
+
# logger.warning(
|
155
|
+
# f"An unexpected error occurred during Sigstore verification: {e}"
|
156
|
+
# )
|
157
|
+
# return []
|
158
|
+
#
|
159
|
+
# logger.info(f"Found {len(evidence)} evidence records from Sigstore.")
|
160
|
+
# return evidence
|