code-provenance 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-provenance
3
+ Version: 0.1.0
4
+ Summary: Resolve Docker images to their source code commits on GitHub
5
+ Author: SCRT Labs
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/scrtlabs/code-provenance
8
+ Project-URL: Repository, https://github.com/scrtlabs/code-provenance
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Topic :: Software Development :: Build Tools
16
+ Classifier: Topic :: System :: Software Distribution
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: pyyaml>=6.0
20
+ Requires-Dist: requests>=2.31
21
+ Requires-Dist: rich>=13.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+
25
+ # code-provenance
26
+
27
+ Resolve Docker images in a docker-compose file to their exact source code commits on GitHub.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install code-provenance
33
+ ```
34
+
35
+ Requires Python 3.10+.
36
+
37
+ ## CLI Usage
38
+
39
+ ```bash
40
+ code-provenance [compose-file] [--json] [--verbose]
41
+ ```
42
+
43
+ - `compose-file` -- path to a docker-compose file (default: `docker-compose.yml`)
44
+ - `--json` -- output results as JSON
45
+ - `--verbose`, `-v` -- show resolution steps for each image
46
+
47
+ ### Example
48
+
49
+ ```bash
50
+ code-provenance docker-compose.yml
51
+ ```
52
+
53
+ ```
54
+ ┌─────────┬────────────────┬────────────────────────────┬──────────────┬──────────┬────────────┐
55
+ │ SERVICE │ IMAGE │ REPO │ COMMIT │ STATUS │ CONFIDENCE │
56
+ ├─────────┼────────────────┼────────────────────────────┼──────────────┼──────────┼────────────┤
57
+ │ web │ traefik:v3.6.0 │ github.com/traefik/traefik │ 06db5168c0d9 │ resolved │ exact │
58
+ └─────────┴────────────────┴────────────────────────────┴──────────────┴──────────┴────────────┘
59
+ ```
60
+
61
+ ## Library Usage
62
+
63
+ ```python
64
+ from code_provenance.compose_parser import parse_compose, parse_image_ref
65
+ from code_provenance.resolver import resolve_image
66
+
67
+ yaml_content = open("docker-compose.yml").read()
68
+ for service, image in parse_compose(yaml_content):
69
+ ref = parse_image_ref(image)
70
+ result = resolve_image(service, ref)
71
+ print(f"{result.service}: {result.commit} ({result.confidence})")
72
+ ```
73
+
74
+ ## API Reference
75
+
76
+ ### Functions
77
+
78
+ - `parse_compose(yaml_content: str) -> list[tuple[str, str]]` -- parse a docker-compose YAML string and return `(service_name, image_string)` pairs
79
+ - `parse_image_ref(image: str) -> ImageRef` -- parse a Docker image string into its components
80
+ - `resolve_image(service: str, ref: ImageRef) -> ImageResult` -- resolve an image reference to its source code commit
81
+
82
+ ### ImageRef
83
+
84
+ | Field | Type | Description |
85
+ |-------|------|-------------|
86
+ | `registry` | `str` | e.g. `"ghcr.io"`, `"docker.io"` |
87
+ | `namespace` | `str` | e.g. `"myorg"`, `"library"` |
88
+ | `name` | `str` | e.g. `"traefik"`, `"postgres"` |
89
+ | `tag` | `str` | e.g. `"v3.6.0"`, `"latest"` |
90
+ | `raw` | `str` | original image string from docker-compose |
91
+
92
+ ### ImageResult
93
+
94
+ | Field | Type | Description |
95
+ |-------|------|-------------|
96
+ | `service` | `str` | service name from docker-compose |
97
+ | `image` | `str` | original image string |
98
+ | `registry` | `str` | image registry |
99
+ | `repo` | `str \| None` | GitHub repository URL |
100
+ | `tag` | `str` | image tag |
101
+ | `commit` | `str \| None` | resolved commit SHA |
102
+ | `commit_url` | `str \| None` | URL to the commit on GitHub |
103
+ | `status` | `str` | `"resolved"`, `"repo_not_found"`, `"repo_found_tag_not_matched"`, or `"no_tag"` |
104
+ | `resolution_method` | `str \| None` | how the commit was resolved (e.g. `"oci_labels"`, `"tag_match"`) |
105
+ | `confidence` | `str \| None` | `"exact"` or `"approximate"` |
106
+ | `steps` | `list[str]` | resolution steps taken (useful with `--verbose`) |
107
+
108
+ ## Authentication
109
+
110
+ Set `GITHUB_TOKEN` for full functionality (digest resolution, `:latest` on GHCR, higher rate limits):
111
+
112
+ ```bash
113
+ export GITHUB_TOKEN=ghp_your_token_here
114
+ ```
115
+
116
+ Create a classic token at https://github.com/settings/tokens with `read:packages` scope. If using the `gh` CLI, run `gh auth refresh -h github.com -s read:packages` first.
117
+
118
+ The `run.sh` wrapper auto-detects the token from `gh` CLI if available.
119
+
120
+ ## License
121
+
122
+ MIT
@@ -0,0 +1,98 @@
1
+ # code-provenance
2
+
3
+ Resolve Docker images in a docker-compose file to their exact source code commits on GitHub.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install code-provenance
9
+ ```
10
+
11
+ Requires Python 3.10+.
12
+
13
+ ## CLI Usage
14
+
15
+ ```bash
16
+ code-provenance [compose-file] [--json] [--verbose]
17
+ ```
18
+
19
+ - `compose-file` -- path to a docker-compose file (default: `docker-compose.yml`)
20
+ - `--json` -- output results as JSON
21
+ - `--verbose`, `-v` -- show resolution steps for each image
22
+
23
+ ### Example
24
+
25
+ ```bash
26
+ code-provenance docker-compose.yml
27
+ ```
28
+
29
+ ```
30
+ ┌─────────┬────────────────┬────────────────────────────┬──────────────┬──────────┬────────────┐
31
+ │ SERVICE │ IMAGE │ REPO │ COMMIT │ STATUS │ CONFIDENCE │
32
+ ├─────────┼────────────────┼────────────────────────────┼──────────────┼──────────┼────────────┤
33
+ │ web │ traefik:v3.6.0 │ github.com/traefik/traefik │ 06db5168c0d9 │ resolved │ exact │
34
+ └─────────┴────────────────┴────────────────────────────┴──────────────┴──────────┴────────────┘
35
+ ```
36
+
37
+ ## Library Usage
38
+
39
+ ```python
40
+ from code_provenance.compose_parser import parse_compose, parse_image_ref
41
+ from code_provenance.resolver import resolve_image
42
+
43
+ yaml_content = open("docker-compose.yml").read()
44
+ for service, image in parse_compose(yaml_content):
45
+ ref = parse_image_ref(image)
46
+ result = resolve_image(service, ref)
47
+ print(f"{result.service}: {result.commit} ({result.confidence})")
48
+ ```
49
+
50
+ ## API Reference
51
+
52
+ ### Functions
53
+
54
+ - `parse_compose(yaml_content: str) -> list[tuple[str, str]]` -- parse a docker-compose YAML string and return `(service_name, image_string)` pairs
55
+ - `parse_image_ref(image: str) -> ImageRef` -- parse a Docker image string into its components
56
+ - `resolve_image(service: str, ref: ImageRef) -> ImageResult` -- resolve an image reference to its source code commit
57
+
58
+ ### ImageRef
59
+
60
+ | Field | Type | Description |
61
+ |-------|------|-------------|
62
+ | `registry` | `str` | e.g. `"ghcr.io"`, `"docker.io"` |
63
+ | `namespace` | `str` | e.g. `"myorg"`, `"library"` |
64
+ | `name` | `str` | e.g. `"traefik"`, `"postgres"` |
65
+ | `tag` | `str` | e.g. `"v3.6.0"`, `"latest"` |
66
+ | `raw` | `str` | original image string from docker-compose |
67
+
68
+ ### ImageResult
69
+
70
+ | Field | Type | Description |
71
+ |-------|------|-------------|
72
+ | `service` | `str` | service name from docker-compose |
73
+ | `image` | `str` | original image string |
74
+ | `registry` | `str` | image registry |
75
+ | `repo` | `str \| None` | GitHub repository URL |
76
+ | `tag` | `str` | image tag |
77
+ | `commit` | `str \| None` | resolved commit SHA |
78
+ | `commit_url` | `str \| None` | URL to the commit on GitHub |
79
+ | `status` | `str` | `"resolved"`, `"repo_not_found"`, `"repo_found_tag_not_matched"`, or `"no_tag"` |
80
+ | `resolution_method` | `str \| None` | how the commit was resolved (e.g. `"oci_labels"`, `"tag_match"`) |
81
+ | `confidence` | `str \| None` | `"exact"` or `"approximate"` |
82
+ | `steps` | `list[str]` | resolution steps taken (useful with `--verbose`) |
83
+
84
+ ## Authentication
85
+
86
+ Set `GITHUB_TOKEN` for full functionality (digest resolution, `:latest` on GHCR, higher rate limits):
87
+
88
+ ```bash
89
+ export GITHUB_TOKEN=ghp_your_token_here
90
+ ```
91
+
92
+ Create a classic token at https://github.com/settings/tokens with `read:packages` scope. If using the `gh` CLI, run `gh auth refresh -h github.com -s read:packages` first.
93
+
94
+ The `run.sh` wrapper auto-detects the token from `gh` CLI if available.
95
+
96
+ ## License
97
+
98
+ MIT
@@ -0,0 +1 @@
1
+ from code_provenance.models import ImageRef, ImageResult
@@ -0,0 +1,69 @@
1
+ import argparse
2
+ import sys
3
+ from pathlib import Path
4
+ from code_provenance.compose_parser import parse_compose, parse_image_ref
5
+ from code_provenance.resolver import resolve_image
6
+ from code_provenance.output import format_json, format_table
7
+
8
+
9
+ def main(argv: list[str] | None = None) -> int:
10
+ parser = argparse.ArgumentParser(
11
+ prog="code-provenance",
12
+ description="Resolve Docker images to their source code commits on GitHub.",
13
+ )
14
+ parser.add_argument(
15
+ "compose_file",
16
+ nargs="?",
17
+ default="docker-compose.yml",
18
+ help="Path to docker-compose file (default: docker-compose.yml)",
19
+ )
20
+ parser.add_argument(
21
+ "--json",
22
+ action="store_true",
23
+ dest="json_output",
24
+ help="Output results as JSON",
25
+ )
26
+ parser.add_argument(
27
+ "--verbose", "-v",
28
+ action="store_true",
29
+ help="Show resolution steps",
30
+ )
31
+
32
+ args = parser.parse_args(argv)
33
+
34
+ compose_path = Path(args.compose_file)
35
+ if not compose_path.exists():
36
+ print(f"Error: {compose_path} not found", file=sys.stderr)
37
+ return 1
38
+
39
+ yaml_content = compose_path.read_text()
40
+ services = parse_compose(yaml_content)
41
+
42
+ if not services:
43
+ print("No services with images found.", file=sys.stderr)
44
+ return 0
45
+
46
+ results = []
47
+ for service_name, image_string in services:
48
+ ref = parse_image_ref(image_string)
49
+ result = resolve_image(service_name, ref)
50
+ results.append(result)
51
+
52
+ if args.verbose:
53
+ for result in results:
54
+ print(f"\nResolving {result.image} ...", file=sys.stderr)
55
+ for step in result.steps:
56
+ print(f" {step}", file=sys.stderr)
57
+ print(f" → {result.status}" + (f" ({result.resolution_method}, {result.confidence})" if result.status == "resolved" else ""), file=sys.stderr)
58
+ print(file=sys.stderr)
59
+
60
+ if args.json_output:
61
+ print(format_json(results))
62
+ else:
63
+ print(format_table(results))
64
+
65
+ return 0
66
+
67
+
68
+ if __name__ == "__main__":
69
+ sys.exit(main())
@@ -0,0 +1,58 @@
1
+ import yaml
2
+ from code_provenance.models import ImageRef
3
+
4
+
5
+ def parse_image_ref(image_string: str) -> ImageRef:
6
+ """Parse a Docker image string into an ImageRef."""
7
+ raw = image_string
8
+
9
+ # Handle digest references (image@sha256:...)
10
+ if "@" in image_string:
11
+ name_part, digest = image_string.split("@", 1)
12
+ tag = digest
13
+ image_string = name_part
14
+ elif ":" in image_string.split("/")[-1]:
15
+ colon_pos = image_string.rfind(":")
16
+ tag = image_string[colon_pos + 1:]
17
+ image_string = image_string[:colon_pos]
18
+ else:
19
+ tag = "latest"
20
+
21
+ # Determine registry
22
+ parts = image_string.split("/")
23
+ if len(parts) >= 2 and ("." in parts[0] or ":" in parts[0]):
24
+ registry = parts[0]
25
+ remaining = parts[1:]
26
+ else:
27
+ registry = "docker.io"
28
+ remaining = parts
29
+
30
+ # Determine namespace and name
31
+ if len(remaining) == 1:
32
+ namespace = "library"
33
+ name = remaining[0]
34
+ elif len(remaining) == 2:
35
+ namespace = remaining[0]
36
+ name = remaining[1]
37
+ else:
38
+ namespace = remaining[0]
39
+ name = "/".join(remaining[1:])
40
+
41
+ return ImageRef(
42
+ registry=registry,
43
+ namespace=namespace,
44
+ name=name,
45
+ tag=tag,
46
+ raw=raw,
47
+ )
48
+
49
+
50
+ def parse_compose(yaml_content: str) -> list[tuple[str, str]]:
51
+ """Parse docker-compose YAML and return list of (service_name, image_string)."""
52
+ data = yaml.safe_load(yaml_content)
53
+ services = data.get("services", {}) or {}
54
+ results = []
55
+ for service_name, service_config in services.items():
56
+ if isinstance(service_config, dict) and "image" in service_config:
57
+ results.append((service_name, service_config["image"]))
58
+ return results
@@ -0,0 +1,250 @@
1
+ import os
2
+ import re
3
+ import requests
4
+
5
+
6
+ def github_headers() -> dict[str, str]:
7
+ """Build GitHub API headers, with optional token auth."""
8
+ headers = {"Accept": "application/vnd.github+json"}
9
+ token = os.environ.get("GITHUB_TOKEN")
10
+ if token:
11
+ headers["Authorization"] = f"Bearer {token}"
12
+ return headers
13
+
14
+
15
+ def _normalize_tag(tag: str) -> str:
16
+ """Strip leading 'v' for comparison."""
17
+ return tag.lstrip("v")
18
+
19
+
20
+ def _is_prefix_match(image_tag: str, git_tag: str) -> bool:
21
+ """Check if git_tag is a more specific version of image_tag.
22
+
23
+ e.g., image_tag='v2.10' matches git_tag='v2.10.7' but not 'v2.1' or 'v2.100'.
24
+ """
25
+ norm_image = _normalize_tag(image_tag)
26
+ norm_git = _normalize_tag(git_tag)
27
+ return norm_git.startswith(norm_image + ".")
28
+
29
+
30
+ def _parse_version_tuple(tag: str) -> tuple[int, ...] | None:
31
+ """Parse a version string into a tuple of ints for comparison."""
32
+ norm = _normalize_tag(tag)
33
+ # Strip pre-release suffixes like -rc1, -beta2
34
+ norm = re.split(r"[-+]", norm)[0]
35
+ parts = norm.split(".")
36
+ try:
37
+ return tuple(int(p) for p in parts)
38
+ except ValueError:
39
+ return None
40
+
41
+
42
+ def resolve_tag_to_commit(owner: str, repo: str, tag: str) -> tuple[str, bool] | None:
43
+ """Resolve an image tag to a commit SHA by matching against git tags.
44
+
45
+ Tries exact match first, then prefix match (e.g., v2.10 -> highest v2.10.x).
46
+ Returns (commit_sha, is_exact_match) or None.
47
+ """
48
+ headers = github_headers()
49
+ url = f"https://api.github.com/repos/{owner}/{repo}/tags"
50
+
51
+ prefix_candidates: list[tuple[tuple[int, ...], str]] = []
52
+
53
+ while url:
54
+ resp = requests.get(url, headers=headers, params={"per_page": 100}, timeout=10)
55
+ if resp.status_code != 200:
56
+ return None
57
+
58
+ for git_tag in resp.json():
59
+ name = git_tag["name"]
60
+ # Exact match (with/without v prefix)
61
+ if name == tag or name == f"v{tag}" or _normalize_tag(name) == _normalize_tag(tag):
62
+ return git_tag["commit"]["sha"], True
63
+
64
+ # Collect prefix match candidates
65
+ if _is_prefix_match(tag, name):
66
+ version = _parse_version_tuple(name)
67
+ if version is not None:
68
+ prefix_candidates.append((version, git_tag["commit"]["sha"]))
69
+
70
+ url = resp.links.get("next", {}).get("url")
71
+
72
+ # Return the highest version among prefix matches
73
+ if prefix_candidates:
74
+ prefix_candidates.sort(reverse=True)
75
+ return prefix_candidates[0][1], False
76
+
77
+ return None
78
+
79
+
80
+ def get_latest_release_commit(owner: str, repo: str) -> tuple[str, str] | None:
81
+ """Get the commit SHA of the latest GitHub release.
82
+
83
+ Returns (commit_sha, tag_name) or None.
84
+ """
85
+ headers = github_headers()
86
+ try:
87
+ resp = requests.get(
88
+ f"https://api.github.com/repos/{owner}/{repo}/releases/latest",
89
+ headers=headers,
90
+ timeout=10,
91
+ )
92
+ if resp.status_code != 200:
93
+ return None
94
+ tag_name = resp.json().get("tag_name")
95
+ if not tag_name:
96
+ return None
97
+ except requests.RequestException:
98
+ return None
99
+
100
+ # Resolve the release tag to a commit
101
+ tag_result = resolve_tag_to_commit(owner, repo, tag_name)
102
+ if tag_result:
103
+ commit_sha, _ = tag_result
104
+ return commit_sha, tag_name
105
+ return None
106
+
107
+
108
+ def get_latest_commit(owner: str, repo: str) -> str | None:
109
+ """Get the latest commit SHA on the default branch."""
110
+ headers = github_headers()
111
+ try:
112
+ resp = requests.get(
113
+ f"https://api.github.com/repos/{owner}/{repo}/commits",
114
+ headers=headers,
115
+ params={"per_page": 1},
116
+ timeout=10,
117
+ )
118
+ if resp.status_code != 200:
119
+ return None
120
+ commits = resp.json()
121
+ if commits:
122
+ return commits[0]["sha"]
123
+ except (requests.RequestException, KeyError, IndexError):
124
+ pass
125
+ return None
126
+
127
+
128
+ def check_github_repo_exists(owner: str, repo: str) -> bool:
129
+ """Check if a GitHub repo exists."""
130
+ headers = github_headers()
131
+ try:
132
+ resp = requests.get(
133
+ f"https://api.github.com/repos/{owner}/{repo}",
134
+ headers=headers,
135
+ timeout=10,
136
+ )
137
+ return resp.status_code == 200
138
+ except requests.RequestException:
139
+ return False
140
+
141
+
142
+ def _find_ghcr_package_version(
143
+ owner: str, package_name: str, *, match_digest: str | None = None, match_tag: str | None = None,
144
+ ) -> dict | None:
145
+ """Find a GHCR package version by digest or tag via the GitHub Packages API.
146
+
147
+ Requires GITHUB_TOKEN with read:packages scope.
148
+ Returns {"repo": "owner/repo", "commit": "sha", "tags": [...]} or None.
149
+ """
150
+ headers = github_headers()
151
+ if "Authorization" not in headers:
152
+ return None
153
+
154
+ for entity_type in ["orgs", "users"]:
155
+ pkg_base = f"https://api.github.com/{entity_type}/{owner}/packages/container/{package_name}"
156
+
157
+ # Get package metadata for source repo
158
+ try:
159
+ pkg_resp = requests.get(pkg_base, headers=headers, timeout=10)
160
+ if pkg_resp.status_code == 403:
161
+ return None
162
+ if pkg_resp.status_code != 200:
163
+ continue
164
+ pkg_data = pkg_resp.json()
165
+ except requests.RequestException:
166
+ continue
167
+
168
+ repo_info = pkg_data.get("repository", {})
169
+ full_name = repo_info.get("full_name")
170
+ if not full_name:
171
+ continue
172
+
173
+ # Search versions
174
+ url = f"{pkg_base}/versions"
175
+ try:
176
+ while url:
177
+ resp = requests.get(url, headers=headers, params={"per_page": 50}, timeout=10)
178
+ if resp.status_code != 200:
179
+ break
180
+
181
+ for version in resp.json():
182
+ name = version.get("name", "")
183
+ metadata = version.get("metadata", {}).get("container", {})
184
+ tags = metadata.get("tags", [])
185
+
186
+ # Match by digest (version name is the digest)
187
+ if match_digest and name != match_digest:
188
+ if match_tag is None:
189
+ continue
190
+ # Match by tag
191
+ if match_tag and match_tag not in tags:
192
+ continue
193
+
194
+ # Found matching version — resolve tags to a commit
195
+ repo_owner, repo_name = full_name.split("/", 1)
196
+ resolvable_tags = [t for t in tags if t != "latest"]
197
+ for tag in resolvable_tags:
198
+ tag_result = resolve_tag_to_commit(repo_owner, repo_name, tag)
199
+ if tag_result:
200
+ commit_sha, _ = tag_result
201
+ return {"repo": full_name, "commit": commit_sha, "tags": tags}
202
+
203
+ return {"repo": full_name, "commit": None, "tags": tags}
204
+
205
+ url = resp.links.get("next", {}).get("url")
206
+ except requests.RequestException:
207
+ continue
208
+
209
+ return None
210
+
211
+
212
+ def resolve_ghcr_digest_via_packages(owner: str, package_name: str, digest: str) -> dict | None:
213
+ """Find the commit for a GHCR image by its digest."""
214
+ return _find_ghcr_package_version(owner, package_name, match_digest=digest)
215
+
216
+
217
+ def resolve_ghcr_latest_via_packages(owner: str, package_name: str) -> dict | None:
218
+ """Find the commit for a GHCR image's :latest tag."""
219
+ return _find_ghcr_package_version(owner, package_name, match_tag="latest")
220
+
221
+
222
+ def infer_repo_from_dockerhub(namespace: str, name: str) -> tuple[str, str] | None:
223
+ """Try to find the GitHub repo for a Docker Hub image."""
224
+ # For official images (library/X), try the image name as org/repo directly
225
+ # e.g., traefik -> traefik/traefik, nginx -> nginx/nginx
226
+ if namespace == "library":
227
+ if check_github_repo_exists(name, name):
228
+ return name, name
229
+
230
+ # For namespaced images, try namespace/name on GitHub
231
+ if namespace != "library":
232
+ if check_github_repo_exists(namespace, name):
233
+ return namespace, name
234
+
235
+ # Fall back to scraping Docker Hub description for GitHub links
236
+ url = f"https://hub.docker.com/v2/repositories/{namespace}/{name}"
237
+ try:
238
+ resp = requests.get(url, timeout=10)
239
+ if resp.status_code != 200:
240
+ return None
241
+
242
+ data = resp.json()
243
+ text = (data.get("full_description") or "") + " " + (data.get("description") or "")
244
+ match = re.search(r"https?://github\.com/([\w.-]+)/([\w.-]+)", text)
245
+ if match:
246
+ return match.group(1), match.group(2)
247
+ except requests.RequestException:
248
+ pass
249
+
250
+ return None
@@ -0,0 +1,32 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class ImageRef:
6
+ """A parsed Docker image reference."""
7
+ registry: str # e.g. "ghcr.io", "docker.io"
8
+ namespace: str # e.g. "myorg", "library"
9
+ name: str # e.g. "excalidraw", "postgres"
10
+ tag: str # e.g. "v3.4.12", "latest"
11
+ raw: str # original string from docker-compose
12
+
13
+ @property
14
+ def full_name(self) -> str:
15
+ """Registry/namespace/name without tag."""
16
+ return f"{self.registry}/{self.namespace}/{self.name}"
17
+
18
+
19
+ @dataclass
20
+ class ImageResult:
21
+ """Resolution result for a single image."""
22
+ service: str
23
+ image: str # original image string
24
+ registry: str
25
+ repo: str | None = None
26
+ tag: str = ""
27
+ commit: str | None = None
28
+ commit_url: str | None = None
29
+ status: str = "repo_not_found"
30
+ resolution_method: str | None = None
31
+ confidence: str | None = None # "exact", "approximate", or None if unresolved
32
+ steps: list[str] = field(default_factory=list)
@@ -0,0 +1,33 @@
1
+ import json
2
+ from dataclasses import asdict
3
+ from io import StringIO
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from code_provenance.models import ImageResult
7
+
8
+
9
+ def format_json(results: list[ImageResult]) -> str:
10
+ """Format results as a JSON array."""
11
+ return json.dumps([asdict(r) for r in results], indent=2)
12
+
13
+
14
+ def format_table(results: list[ImageResult]) -> str:
15
+ """Format results as a rich table, returned as a string."""
16
+ table = Table(show_header=True, header_style="bold")
17
+ table.add_column("SERVICE")
18
+ table.add_column("IMAGE")
19
+ table.add_column("REPO")
20
+ table.add_column("COMMIT")
21
+ table.add_column("STATUS")
22
+ table.add_column("CONFIDENCE")
23
+
24
+ for r in results:
25
+ commit_display = r.commit[:12] if r.commit else "-"
26
+ repo_display = r.repo.replace("https://", "") if r.repo else "-"
27
+ confidence_display = r.confidence or "-"
28
+ table.add_row(r.service, r.image, repo_display, commit_display, r.status, confidence_display)
29
+
30
+ buf = StringIO()
31
+ console = Console(file=buf, force_terminal=False, width=160)
32
+ console.print(table)
33
+ return buf.getvalue()