skip-trace 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,149 @@
1
1
  # skip_trace/collectors/package_files.py
2
2
  from __future__ import annotations
3
3
 
4
+ import datetime
5
+ import glob
4
6
  import logging
5
7
  import os
6
8
  import shutil
7
9
  import tarfile
8
10
  import zipfile
11
+ from email.parser import Parser
9
12
  from typing import Any, Dict, List, Optional
10
13
 
11
14
  from ..analysis import source_scanner
15
+ from ..analysis.evidence import generate_evidence_id
12
16
  from ..exceptions import CollectorError, NetworkError
13
- from ..schemas import EvidenceRecord
17
+ from ..schemas import EvidenceKind, EvidenceRecord, EvidenceSource
14
18
  from ..utils import http_client
15
19
  from ..utils.safe_targz import safe_extract_auto
20
+ from ..utils.validation import is_valid_email
21
+ from . import sigstore
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
  PACKAGE_DOWNLOAD_DIR = ".packages"
19
25
 
20
26
 
27
+ def _create_evidence_from_contact(
28
+ contact_str: str,
29
+ role_kind: EvidenceKind,
30
+ locator: str,
31
+ confidence: float,
32
+ notes_prefix: str,
33
+ ) -> List[EvidenceRecord]:
34
+ """Helper to create PERSON and EMAIL evidence from a 'Name <email>' string."""
35
+ from ..analysis.evidence import _parse_contact_string
36
+
37
+ evidence_list = []
38
+ now = datetime.datetime.now(datetime.timezone.utc)
39
+ parsed = _parse_contact_string(contact_str)
40
+ name = parsed.get("name")
41
+ email = parsed.get("email")
42
+ source = EvidenceSource.WHEEL
43
+
44
+ if name:
45
+ value = {"name": name}
46
+ record = EvidenceRecord(
47
+ id=generate_evidence_id(
48
+ source, EvidenceKind.PERSON, locator, str(value), name
49
+ ),
50
+ source=source,
51
+ locator=locator,
52
+ kind=EvidenceKind.PERSON,
53
+ value=value,
54
+ observed_at=now,
55
+ confidence=confidence,
56
+ notes=f"{notes_prefix} name '{name}' from {role_kind.value} field in package metadata.",
57
+ )
58
+ evidence_list.append(record)
59
+
60
+ if email:
61
+ value = {"email": email}
62
+ slug = name or email.split("@")[0]
63
+ record = EvidenceRecord(
64
+ id=generate_evidence_id(
65
+ source, EvidenceKind.EMAIL, locator, str(value), slug
66
+ ),
67
+ source=source,
68
+ locator=locator,
69
+ kind=EvidenceKind.EMAIL,
70
+ value=value,
71
+ observed_at=now,
72
+ confidence=confidence + 0.1, # Email is a stronger signal
73
+ notes=f"{notes_prefix} email for '{slug}' from {role_kind.value} field in package metadata.",
74
+ )
75
+ evidence_list.append(record)
76
+
77
+ return evidence_list
78
+
79
+
80
+ def _parse_metadata_file(content: str, locator: str) -> List[EvidenceRecord]:
81
+ """Parses a PKG-INFO or METADATA file for evidence."""
82
+ evidence_list: List[EvidenceRecord] = []
83
+ now = datetime.datetime.now(datetime.timezone.utc)
84
+ headers = Parser().parsestr(content)
85
+
86
+ # Author/Maintainer information
87
+ if author_email := headers.get("Author-email"):
88
+ evidence_list.extend(
89
+ _create_evidence_from_contact(
90
+ author_email, EvidenceKind.AUTHOR_TAG, locator, 0.35, "Found"
91
+ )
92
+ )
93
+ if author := headers.get("Author"):
94
+ evidence_list.extend(
95
+ _create_evidence_from_contact(
96
+ author, EvidenceKind.AUTHOR_TAG, locator, 0.30, "Found"
97
+ )
98
+ )
99
+
100
+ if maintainer_email := headers.get("Maintainer-email"):
101
+ evidence_list.extend(
102
+ _create_evidence_from_contact(
103
+ maintainer_email, EvidenceKind.MAINTAINER, locator, 0.35, "Found"
104
+ )
105
+ )
106
+ if maintainer := headers.get("Maintainer"):
107
+ evidence_list.extend(
108
+ _create_evidence_from_contact(
109
+ maintainer, EvidenceKind.MAINTAINER, locator, 0.30, "Found"
110
+ )
111
+ )
112
+
113
+ # Project URLs
114
+ urls = headers.get_all("Project-URL", [])
115
+ if home_page := headers.get("Home-page"):
116
+ urls.append(f"Homepage, {home_page}")
117
+
118
+ for url_entry in urls:
119
+ try:
120
+ label, url = [part.strip() for part in url_entry.split(",", 1)]
121
+ if not is_valid_email(label): # Filter out email-like labels
122
+ value = {"label": label, "url": url}
123
+ record = EvidenceRecord(
124
+ id=generate_evidence_id(
125
+ EvidenceSource.WHEEL,
126
+ EvidenceKind.PROJECT_URL,
127
+ locator,
128
+ str(value),
129
+ label,
130
+ hint="metadata-file",
131
+ ),
132
+ source=EvidenceSource.WHEEL,
133
+ locator=locator,
134
+ kind=EvidenceKind.PROJECT_URL,
135
+ value=value,
136
+ observed_at=now,
137
+ confidence=0.30,
138
+ notes=f"Found project URL '{label}' in package metadata file.",
139
+ )
140
+ evidence_list.append(record)
141
+ except ValueError:
142
+ logger.debug(f"Could not parse Project-URL from metadata file: {url_entry}")
143
+
144
+ return evidence_list
145
+
146
+
21
147
  def _ensure_download_dir():
22
148
  """Ensures the package download directory and .gitignore exist."""
23
149
  os.makedirs(PACKAGE_DOWNLOAD_DIR, exist_ok=True)
@@ -37,6 +163,8 @@ def _find_download_url(metadata: Dict[str, Any]) -> Optional[str]:
37
163
  wheel_url = None
38
164
  sdist_url = None
39
165
  for url_info in urls:
166
+ if url_info.get("yanked"):
167
+ continue
40
168
  packagetype = url_info.get("packagetype")
41
169
  if packagetype == "bdist_wheel":
42
170
  wheel_url = url_info.get("url")
@@ -47,6 +175,33 @@ def _find_download_url(metadata: Dict[str, Any]) -> Optional[str]:
47
175
  return wheel_url or sdist_url or (urls[0].get("url") if urls else None)
48
176
 
49
177
 
178
+ def _download_file(url: str, download_dir: str) -> str | None:
179
+ """Downloads a file to a directory if it doesn't exist, returns the path."""
180
+ filename = os.path.basename(url)
181
+ download_path = os.path.join(download_dir, filename)
182
+
183
+ if not os.path.exists(download_path):
184
+ logger.info(f"Downloading {filename} from {url}")
185
+ try:
186
+ with http_client.get_client().stream("GET", url) as response:
187
+ response.raise_for_status()
188
+ with open(download_path, "wb") as f:
189
+ for chunk in response.iter_bytes():
190
+ f.write(chunk)
191
+ except (
192
+ NetworkError,
193
+ http_client.httpx.RequestError,
194
+ http_client.httpx.HTTPStatusError,
195
+ ) as e:
196
+ # A 404 is expected for bundles, so we don't raise a CollectorError
197
+ if response and response.status_code == 404:
198
+ logger.info(f"No file found at {url} (404 Not Found)")
199
+ return None
200
+ raise CollectorError(f"Failed to download file {filename}: {e}") from e
201
+
202
+ return download_path
203
+
204
+
50
205
  def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]:
51
206
  """
52
207
  Downloads, extracts, and scans a package's files for evidence.
@@ -70,62 +225,55 @@ def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]
70
225
  return []
71
226
 
72
227
  _ensure_download_dir()
73
- filename = os.path.basename(download_url)
74
- download_path = os.path.join(PACKAGE_DOWNLOAD_DIR, filename)
75
228
 
76
- # Download the file if it doesn't already exist
77
- if not os.path.exists(download_path):
78
- logger.info(f"Downloading {filename} from {download_url}")
79
- try:
80
- with http_client.get_client().stream("GET", download_url) as response:
81
- response.raise_for_status()
82
- with open(download_path, "wb") as f:
83
- for chunk in response.iter_bytes():
84
- f.write(chunk)
85
- except (
86
- NetworkError,
87
- http_client.httpx.RequestError,
88
- http_client.httpx.HTTPStatusError,
89
- ) as e:
90
- raise CollectorError(f"Failed to download package {filename}: {e}") from e
229
+ # Download the main package artifact
230
+ artifact_path = _download_file(download_url, PACKAGE_DOWNLOAD_DIR)
231
+ if not artifact_path:
232
+ return [] # Can't proceed without the artifact
233
+
234
+ # Attempt to download the corresponding Sigstore bundle
235
+ bundle_url = f"{download_url}.sigstore"
236
+ bundle_path = _download_file(bundle_url, PACKAGE_DOWNLOAD_DIR)
237
+
238
+ # Initialize evidence list
239
+ evidence: list[EvidenceRecord] = []
240
+
241
+ # Verify with Sigstore if the bundle was found
242
+ if bundle_path:
243
+ sigstore_evidence = sigstore.verify_and_collect(
244
+ artifact_path, bundle_path, package_name, package_version
245
+ )
246
+ evidence.extend(sigstore_evidence)
91
247
 
92
248
  # Determine the persistent extraction directory path from the filename
93
- base_filename = filename
94
- for ext in [".whl", ".zip", ".tar.gz", ".tgz", ".tar.bz2"]:
95
- if base_filename.endswith(ext):
96
- base_filename = base_filename[: -len(ext)]
97
- break
249
+ filename = os.path.basename(artifact_path)
250
+ base_filename, _ = os.path.splitext(filename)
251
+ if filename.endswith(".tar.gz"):
252
+ base_filename, _ = os.path.splitext(base_filename)
98
253
  extract_dir = os.path.join(PACKAGE_DOWNLOAD_DIR, base_filename)
99
254
 
100
255
  # Extract the archive ONLY if the destination directory doesn't already exist
101
256
  if not os.path.exists(extract_dir):
102
- logger.info(f"Extracting {download_path} to {extract_dir}")
257
+ logger.info(f"Extracting {artifact_path} to {extract_dir}")
103
258
  os.makedirs(extract_dir, exist_ok=True)
104
259
  try:
105
- if download_path.endswith((".whl", ".zip")):
106
- with zipfile.ZipFile(download_path, "r") as zf: # nosec # noqa
107
- zf.extractall(extract_dir) # nosec # noqa
108
- elif download_path.endswith(
260
+ if artifact_path.endswith((".whl", ".zip")):
261
+ with zipfile.ZipFile(artifact_path, "r") as zf:
262
+ zf.extractall(extract_dir) # nosec
263
+ elif artifact_path.endswith(
109
264
  (".tar.gz", ".tgz", ".tar.bz2", ".tar.xz", ".tar")
110
265
  ):
111
- safe_extract_auto(download_path, extract_dir)
112
- # elif download_path.endswith((".tar.gz", ".tgz")):
113
- # with tarfile.open(download_path, "r:gz") as tf: # nosec # noqa
114
- # tf.extractall(extract_dir) # nosec # noqa
115
- # elif download_path.endswith(".tar.bz2"):
116
- # with tarfile.open(download_path, "r:bz2") as tf: # nosec # noqa
117
- # tf.extractall(extract_dir) # nosec # noqa
266
+ safe_extract_auto(artifact_path, extract_dir)
118
267
  else:
119
268
  logger.warning(
120
269
  f"Unsupported archive format for {filename}. Skipping file scan."
121
270
  )
122
271
  shutil.rmtree(extract_dir) # Clean up the empty dir
123
- return []
272
+ return evidence # Return any Sigstore evidence found
124
273
  except (zipfile.BadZipFile, tarfile.TarError, PermissionError) as e:
125
- logger.error(f"Failed to extract archive {download_path}: {e}")
126
- # Clean up potentially corrupted extraction on error
274
+ logger.error(f"Failed to extract archive {artifact_path}: {e}")
127
275
  shutil.rmtree(extract_dir, ignore_errors=True)
128
- return []
276
+ return evidence # Return any Sigstore evidence found
129
277
  else:
130
278
  logger.info(f"Using cached package files from {extract_dir}")
131
279
 
@@ -143,8 +291,51 @@ def collect_from_package_files(metadata: Dict[str, Any]) -> List[EvidenceRecord]
143
291
  logger.error(
144
292
  f"Extraction directory {extract_dir} not found after apparent success. Check permissions."
145
293
  )
146
- return []
294
+ return evidence
147
295
 
148
296
  locator_prefix = f"{package_name}-{package_version}"
149
- evidence = source_scanner.scan_directory(scan_target_dir, locator_prefix)
297
+ source_scan_evidence = source_scanner.scan_directory(
298
+ scan_target_dir, locator_prefix
299
+ )
300
+ evidence.extend(source_scan_evidence)
301
+
302
+ # --- Scan for PKG-INFO/METADATA file ---
303
+ metadata_file_path = None
304
+ # Use a recursive glob to find the relevant .dist-info or .egg-info directory
305
+ # This is more robust for sdists that may have a nested src/ directory.
306
+ dist_info_pattern = os.path.join(scan_target_dir, "**", "*.dist-info")
307
+ egg_info_pattern = os.path.join(scan_target_dir, "**", "*.egg-info")
308
+
309
+ info_dirs = glob.glob(dist_info_pattern, recursive=True) + glob.glob(
310
+ egg_info_pattern, recursive=True
311
+ )
312
+
313
+ if info_dirs:
314
+ info_dir_path = info_dirs[0] # Assume there's only one
315
+ potential_files = [
316
+ os.path.join(info_dir_path, "METADATA"),
317
+ os.path.join(info_dir_path, "PKG-INFO"),
318
+ ]
319
+ for f_path in potential_files:
320
+ if os.path.exists(f_path):
321
+ metadata_file_path = f_path
322
+ break
323
+
324
+ if metadata_file_path:
325
+ rel_path = os.path.relpath(metadata_file_path, PACKAGE_DOWNLOAD_DIR)
326
+ logger.info(f"Found package metadata file: {rel_path}")
327
+ try:
328
+ with open(metadata_file_path, "r", encoding="utf-8", errors="ignore") as f:
329
+ content = f.read()
330
+ # Create a locator relative to the package root
331
+ relative_locator_path = os.path.relpath(metadata_file_path, scan_target_dir)
332
+ locator = f"{locator_prefix}/{relative_locator_path}"
333
+ metadata_evidence = _parse_metadata_file(content, locator)
334
+ evidence.extend(metadata_evidence)
335
+ logger.info(
336
+ f"Extracted {len(metadata_evidence)} evidence records from package metadata file."
337
+ )
338
+ except IOError as e:
339
+ logger.warning(f"Could not read metadata file {metadata_file_path}: {e}")
340
+
150
341
  return evidence
@@ -99,7 +99,7 @@ def cross_reference_by_user(package_name: str) -> List[EvidenceRecord]:
99
99
  new_evidence: List[EvidenceRecord] = []
100
100
  profile_url = _scrape_user_profile_url(package_name)
101
101
 
102
- # --- NEW: Always create evidence for the PyPI user if found ---
102
+ # --- Always create evidence for the PyPI user if found ---
103
103
  if profile_url:
104
104
  try:
105
105
  username = profile_url.strip("/").rsplit("/", maxsplit=1)[-1]
@@ -0,0 +1,160 @@
1
+ # skip_trace/collectors/pypi_attestations.py
2
+
3
+ # I have no evidence that this works.
4
+ # It appears to crash, possibly because I'm on windows.
5
+
6
+ from __future__ import annotations
7
+
8
+ import datetime
9
+ import logging
10
+ import os
11
+ import shutil
12
+ import subprocess # nosec
13
+ import tempfile
14
+ from typing import Any, Dict, List
15
+
16
+ from ..analysis.evidence import generate_evidence_id
17
+ from ..schemas import EvidenceKind, EvidenceRecord, EvidenceSource
18
+ from ..utils import http_client
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def collect(metadata: Dict[str, Any]) -> List[EvidenceRecord]:
24
+ """
25
+ Finds and verifies PyPI attestations by calling the `pypi-attestations` CLI.
26
+
27
+ This collector manually queries the PyPI Integrity API to get the attestation,
28
+ saves it to a temporary file, and then passes that file to the `inspect`
29
+ command of the CLI tool for verification and parsing.
30
+
31
+ Args:
32
+ metadata: The PyPI JSON metadata for the package.
33
+
34
+ Returns:
35
+ A list of EvidenceRecord objects from verified attestations.
36
+ """
37
+ if not shutil.which("pypi-attestations"):
38
+ logger.debug(
39
+ "`pypi-attestations` CLI not found in PATH. Skipping attestation check."
40
+ )
41
+ return []
42
+
43
+ evidence = []
44
+ urls_data = metadata.get("urls", [])
45
+ if not urls_data:
46
+ return []
47
+
48
+ project_name = metadata.get("info", {}).get("name")
49
+ project_version = metadata.get("info", {}).get("version")
50
+
51
+ # Find the first downloadable artifact (wheel or sdist) and check it.
52
+ for url_info in urls_data:
53
+ artifact_url = url_info.get("url")
54
+ if not artifact_url or url_info.get("yanked", False):
55
+ continue
56
+
57
+ artifact_filename = os.path.basename(artifact_url)
58
+
59
+ # 1. Construct the PyPI Integrity API URL for this specific file.
60
+ integrity_api_url = f"https://pypi.org/integrity/{project_name}/{project_version}/{artifact_filename}/provenance"
61
+ logger.info(f"Querying PyPI Integrity API: {integrity_api_url}")
62
+
63
+ # 2. Make the API call to get the attestation.
64
+ response = http_client.make_request_safe(integrity_api_url)
65
+ if response is None or response.status_code != 200:
66
+ logger.info(
67
+ f"No attestation found for {artifact_filename} via Integrity API (Status: {response.status_code if response else 'N/A'})."
68
+ )
69
+ continue
70
+
71
+ try:
72
+ response.json()
73
+ except Exception:
74
+ logger.warning(
75
+ f"Failed to parse JSON from Integrity API for {artifact_filename}"
76
+ )
77
+ continue
78
+
79
+ # 3. Save the attestation to a temporary file for the CLI to use.
80
+ with tempfile.NamedTemporaryFile(
81
+ mode="w", delete=False, suffix=".attestation.json", encoding="utf-8"
82
+ ) as tmp_file:
83
+ tmp_file.write(response.text)
84
+ temp_attestation_path = tmp_file.name
85
+
86
+ try:
87
+ # 4. Call `pypi-attestations inspect` on the temporary attestation file.
88
+ # The tool verifies the attestation as part of the inspect command.
89
+ command = ["pypi-attestations", "inspect", temp_attestation_path]
90
+ logger.info(f"Running command: {' '.join(command)}")
91
+ result = subprocess.run( # nosec
92
+ command,
93
+ capture_output=True,
94
+ text=True,
95
+ check=True, # Raises CalledProcessError on non-zero exit codes
96
+ )
97
+
98
+ logger.info(f"Successfully verified attestation for {artifact_filename}")
99
+
100
+ # Parse the human-readable output to find key details.
101
+ repo_slug = None
102
+ workflow = None
103
+ lines = result.stdout.splitlines()
104
+ print(lines)
105
+ for line in lines:
106
+ if "Repository:" in line:
107
+ repo_slug = line.split(":", 1)[1].strip()
108
+ elif "Workflow:" in line:
109
+ workflow = line.split(":", 1)[1].strip()
110
+
111
+ if not repo_slug:
112
+ logger.warning(
113
+ "Verified attestation but could not parse repository slug from CLI output."
114
+ )
115
+ continue
116
+
117
+ # Create the evidence record.
118
+ org_name = repo_slug.split("/")[0]
119
+ now = datetime.datetime.now(datetime.timezone.utc)
120
+ value = {
121
+ "publisher_kind": "github",
122
+ "repository": repo_slug,
123
+ "workflow": workflow,
124
+ }
125
+
126
+ record = EvidenceRecord(
127
+ id=generate_evidence_id(
128
+ EvidenceSource.PYPI_ATTESTATION,
129
+ EvidenceKind.PYPI_PUBLISHER_ATTESTATION,
130
+ integrity_api_url,
131
+ str(value),
132
+ org_name,
133
+ ),
134
+ source=EvidenceSource.PYPI_ATTESTATION,
135
+ locator=integrity_api_url,
136
+ kind=EvidenceKind.PYPI_PUBLISHER_ATTESTATION,
137
+ value=value,
138
+ observed_at=now,
139
+ confidence=1.0,
140
+ notes=(
141
+ "Verified PyPI attestation proves publication from GitHub "
142
+ f"repo '{repo_slug}' via workflow '{workflow or 'unknown'}'."
143
+ ),
144
+ )
145
+ evidence.append(record)
146
+ break
147
+
148
+ except subprocess.CalledProcessError as e:
149
+ logger.warning(
150
+ f"CLI verification failed for attestation of {artifact_filename}:\n{e.stderr}"
151
+ )
152
+ except Exception as e:
153
+ logger.error(
154
+ f"An unexpected error occurred during CLI attestation processing: {e}"
155
+ )
156
+ finally:
157
+ # Clean up the temporary file.
158
+ os.remove(temp_attestation_path)
159
+
160
+ return evidence
@@ -0,0 +1,160 @@
1
+ # skip_trace/collectors/sigstore.py
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from typing import List
6
+ from urllib.parse import urlparse
7
+
8
+ from sigstore.models import Bundle
9
+
10
+ from ..schemas import EvidenceRecord
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def _parse_san_from_cert(bundle: Bundle) -> str | None:
16
+ """Extracts the Subject Alternative Name from the signing certificate."""
17
+ try:
18
+ # The SAN extension is a list of GeneralName objects.
19
+ # We look for rfc822Name (email) or uniformResourceIdentifier.
20
+ sans = bundle.signing_certificate.subject # no alt name?
21
+ for san in sans:
22
+ # The value attribute of the GeneralName object holds the identity.
23
+ return san.value
24
+ except Exception:
25
+ return None
26
+ return None
27
+
28
+
29
+ def _parse_repo_from_github_uri(uri: str | None) -> str | None:
30
+ """Parses a GitHub workflow URI to get the 'owner/repo' string."""
31
+ if not uri or not uri.startswith("https://github.com/"):
32
+ try:
33
+ parsed = urlparse(uri)
34
+ path_parts = parsed.path.strip("/").split("/") # type: ignore
35
+ if len(path_parts) >= 2: # type: ignore
36
+ return f"{path_parts[0]}/{path_parts[1]}" # type: ignore
37
+ except Exception: # nosec
38
+ pass
39
+ return None
40
+
41
+
42
+ def verify_and_collect(
43
+ artifact_path: str, bundle_path: str, package_name: str, package_version: str
44
+ ) -> List[EvidenceRecord]:
45
+ """
46
+ Verifies a package artifact against a Sigstore bundle and collects evidence.
47
+
48
+ Args:
49
+ artifact_path: Path to the downloaded package file (.whl, .tar.gz).
50
+ bundle_path: Path to the downloaded .sigstore bundle file.
51
+ package_name: The name of the package.
52
+ package_version: The version of the package.
53
+
54
+ Returns:
55
+ A list of EvidenceRecord objects from the verification.
56
+ """
57
+ return []
58
+ # if not SIGSTORE_AVAILABLE:
59
+ # logger.warning("sigstore library not installed, skipping verification.")
60
+ # return []
61
+ #
62
+ # evidence: list[EvidenceRecord] = []
63
+ # locator = f"pkg:pypi/{package_name}@{package_version}"
64
+ # now = datetime.datetime.now(datetime.timezone.utc)
65
+ #
66
+ # logger.info(f"Performing Sigstore verification for {artifact_path}")
67
+ #
68
+ # try:
69
+ # # 1. Load the bundle from the file
70
+ # with open(bundle_path, "r", encoding="utf-8") as f:
71
+ # bundle = Bundle.from_json(f.read())
72
+ #
73
+ # # 2. Create a verifier. We use the production instance by default.
74
+ # verifier = Verifier.production()
75
+ #
76
+ # # 3. Define a verification policy.
77
+ # # WARNING: UnsafeNoOp is for demonstration and testing only.
78
+ # # It performs no identity verification. Replace with a real policy
79
+ # # like `policy.Identity(identity=..., issuer=...)` for security.
80
+ # verification_policy = policy.UnsafeNoOp()
81
+ #
82
+ # # 4. Verify the artifact. This will raise VerificationError on failure.
83
+ # with open(artifact_path, "rb") as artifact:
84
+ # verifier.verify_artifact(artifact, bundle, verification_policy) # type: ignore
85
+ #
86
+ # logger.info(f"Sigstore verification successful for {package_name}")
87
+ #
88
+ # # 5. If verification succeeds, extract evidence from the trusted bundle.
89
+ #
90
+ # # Evidence for the signer's identity from the certificate's SAN
91
+ # if identity := _parse_san_from_cert(bundle):
92
+ # value = {
93
+ # "identity": identity,
94
+ # "issuer": bundle.signing_certificate.issuer.rfc4514_string(),
95
+ # }
96
+ # record = EvidenceRecord(
97
+ # id=generate_evidence_id(
98
+ # EvidenceSource.SIGSTORE,
99
+ # EvidenceKind.SIGSTORE_SIGNER_IDENTITY,
100
+ # locator,
101
+ # str(value),
102
+ # identity,
103
+ # ),
104
+ # source=EvidenceSource.SIGSTORE,
105
+ # locator=locator,
106
+ # kind=EvidenceKind.SIGSTORE_SIGNER_IDENTITY,
107
+ # value=value,
108
+ # observed_at=now,
109
+ # confidence=0.95,
110
+ # notes=(
111
+ # f"Package cryptographically signed by identity '{identity}' "
112
+ # f"(issuer: {value['issuer']})."
113
+ # ),
114
+ # )
115
+ # evidence.append(record)
116
+ #
117
+ # # Evidence from GitHub workflow info in the certificate extensions
118
+ # # OID 1.3.6.1.4.1.57264.1.5 = GitHub Workflow Repository
119
+ # oid_repo = "1.3.6.1.4.1.57264.1.5" # what is this?
120
+ # try:
121
+ # repo_ext = bundle.signing_certificate.extensions.get_extension_for_oid( # type: ignore
122
+ # oid_repo # type: ignore
123
+ # )
124
+ # repo_uri = repo_ext.value.oid.dotted_string # .decode("utf-8") # what is this?
125
+ #
126
+ # repo_slug = _parse_repo_from_github_uri(repo_uri) or repo_uri
127
+ #
128
+ # value = {"repo_uri": repo_uri}
129
+ # record = EvidenceRecord(
130
+ # id=generate_evidence_id(
131
+ # EvidenceSource.SIGSTORE,
132
+ # EvidenceKind.SIGSTORE_BUILD_PROVENANCE,
133
+ # locator,
134
+ # str(value),
135
+ # repo_slug,
136
+ # ),
137
+ # source=EvidenceSource.SIGSTORE,
138
+ # locator=locator,
139
+ # kind=EvidenceKind.SIGSTORE_BUILD_PROVENANCE,
140
+ # value=value,
141
+ # observed_at=now,
142
+ # confidence=0.90,
143
+ # notes=f"Sigstore certificate attests build from repo '{repo_slug}'.",
144
+ # )
145
+ # evidence.append(record)
146
+ # except Exception:
147
+ # # This is not an error; the extension is optional.
148
+ # pass
149
+ #
150
+ # except (VerificationError, FileNotFoundError, ValueError) as e:
151
+ # logger.info(f"Sigstore verification failed for {package_name}: {e}")
152
+ # return []
153
+ # except Exception as e:
154
+ # logger.warning(
155
+ # f"An unexpected error occurred during Sigstore verification: {e}"
156
+ # )
157
+ # return []
158
+ #
159
+ # logger.info(f"Found {len(evidence)} evidence records from Sigstore.")
160
+ # return evidence