code-provenance 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_provenance-0.1.0/PKG-INFO +122 -0
- code_provenance-0.1.0/README.md +98 -0
- code_provenance-0.1.0/code_provenance/__init__.py +1 -0
- code_provenance-0.1.0/code_provenance/cli.py +69 -0
- code_provenance-0.1.0/code_provenance/compose_parser.py +58 -0
- code_provenance-0.1.0/code_provenance/github.py +250 -0
- code_provenance-0.1.0/code_provenance/models.py +32 -0
- code_provenance-0.1.0/code_provenance/output.py +33 -0
- code_provenance-0.1.0/code_provenance/registry.py +121 -0
- code_provenance-0.1.0/code_provenance/resolver.py +160 -0
- code_provenance-0.1.0/code_provenance.egg-info/PKG-INFO +122 -0
- code_provenance-0.1.0/code_provenance.egg-info/SOURCES.txt +24 -0
- code_provenance-0.1.0/code_provenance.egg-info/dependency_links.txt +1 -0
- code_provenance-0.1.0/code_provenance.egg-info/entry_points.txt +2 -0
- code_provenance-0.1.0/code_provenance.egg-info/requires.txt +6 -0
- code_provenance-0.1.0/code_provenance.egg-info/top_level.txt +1 -0
- code_provenance-0.1.0/pyproject.toml +43 -0
- code_provenance-0.1.0/setup.cfg +4 -0
- code_provenance-0.1.0/tests/test_cli.py +52 -0
- code_provenance-0.1.0/tests/test_compose_parser.py +72 -0
- code_provenance-0.1.0/tests/test_github.py +171 -0
- code_provenance-0.1.0/tests/test_integration.py +30 -0
- code_provenance-0.1.0/tests/test_models.py +19 -0
- code_provenance-0.1.0/tests/test_output.py +49 -0
- code_provenance-0.1.0/tests/test_registry.py +77 -0
- code_provenance-0.1.0/tests/test_resolver.py +197 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-provenance
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Resolve Docker images to their source code commits on GitHub
|
|
5
|
+
Author: SCRT Labs
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/scrtlabs/code-provenance
|
|
8
|
+
Project-URL: Repository, https://github.com/scrtlabs/code-provenance
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
16
|
+
Classifier: Topic :: System :: Software Distribution
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: pyyaml>=6.0
|
|
20
|
+
Requires-Dist: requests>=2.31
|
|
21
|
+
Requires-Dist: rich>=13.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
|
|
25
|
+
# code-provenance
|
|
26
|
+
|
|
27
|
+
Resolve Docker images in a docker-compose file to their exact source code commits on GitHub.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install code-provenance
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Requires Python 3.10+.
|
|
36
|
+
|
|
37
|
+
## CLI Usage
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
code-provenance [compose-file] [--json] [--verbose]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
- `compose-file` -- path to a docker-compose file (default: `docker-compose.yml`)
|
|
44
|
+
- `--json` -- output results as JSON
|
|
45
|
+
- `--verbose`, `-v` -- show resolution steps for each image
|
|
46
|
+
|
|
47
|
+
### Example
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
code-provenance docker-compose.yml
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
┌─────────┬────────────────┬────────────────────────────┬──────────────┬──────────┬────────────┐
|
|
55
|
+
│ SERVICE │ IMAGE │ REPO │ COMMIT │ STATUS │ CONFIDENCE │
|
|
56
|
+
├─────────┼────────────────┼────────────────────────────┼──────────────┼──────────┼────────────┤
|
|
57
|
+
│ web │ traefik:v3.6.0 │ github.com/traefik/traefik │ 06db5168c0d9 │ resolved │ exact │
|
|
58
|
+
└─────────┴────────────────┴────────────────────────────┴──────────────┴──────────┴────────────┘
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Library Usage
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from code_provenance.compose_parser import parse_compose, parse_image_ref
|
|
65
|
+
from code_provenance.resolver import resolve_image
|
|
66
|
+
|
|
67
|
+
yaml_content = open("docker-compose.yml").read()
|
|
68
|
+
for service, image in parse_compose(yaml_content):
|
|
69
|
+
ref = parse_image_ref(image)
|
|
70
|
+
result = resolve_image(service, ref)
|
|
71
|
+
print(f"{result.service}: {result.commit} ({result.confidence})")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## API Reference
|
|
75
|
+
|
|
76
|
+
### Functions
|
|
77
|
+
|
|
78
|
+
- `parse_compose(yaml_content: str) -> list[tuple[str, str]]` -- parse a docker-compose YAML string and return `(service_name, image_string)` pairs
|
|
79
|
+
- `parse_image_ref(image: str) -> ImageRef` -- parse a Docker image string into its components
|
|
80
|
+
- `resolve_image(service: str, ref: ImageRef) -> ImageResult` -- resolve an image reference to its source code commit
|
|
81
|
+
|
|
82
|
+
### ImageRef
|
|
83
|
+
|
|
84
|
+
| Field | Type | Description |
|
|
85
|
+
|-------|------|-------------|
|
|
86
|
+
| `registry` | `str` | e.g. `"ghcr.io"`, `"docker.io"` |
|
|
87
|
+
| `namespace` | `str` | e.g. `"myorg"`, `"library"` |
|
|
88
|
+
| `name` | `str` | e.g. `"traefik"`, `"postgres"` |
|
|
89
|
+
| `tag` | `str` | e.g. `"v3.6.0"`, `"latest"` |
|
|
90
|
+
| `raw` | `str` | original image string from docker-compose |
|
|
91
|
+
|
|
92
|
+
### ImageResult
|
|
93
|
+
|
|
94
|
+
| Field | Type | Description |
|
|
95
|
+
|-------|------|-------------|
|
|
96
|
+
| `service` | `str` | service name from docker-compose |
|
|
97
|
+
| `image` | `str` | original image string |
|
|
98
|
+
| `registry` | `str` | image registry |
|
|
99
|
+
| `repo` | `str \| None` | GitHub repository URL |
|
|
100
|
+
| `tag` | `str` | image tag |
|
|
101
|
+
| `commit` | `str \| None` | resolved commit SHA |
|
|
102
|
+
| `commit_url` | `str \| None` | URL to the commit on GitHub |
|
|
103
|
+
| `status` | `str` | `"resolved"`, `"repo_not_found"`, `"repo_found_tag_not_matched"`, or `"no_tag"` |
|
|
104
|
+
| `resolution_method` | `str \| None` | how the commit was resolved (e.g. `"oci_labels"`, `"tag_match"`) |
|
|
105
|
+
| `confidence` | `str \| None` | `"exact"` or `"approximate"` |
|
|
106
|
+
| `steps` | `list[str]` | resolution steps taken (useful with `--verbose`) |
|
|
107
|
+
|
|
108
|
+
## Authentication
|
|
109
|
+
|
|
110
|
+
Set `GITHUB_TOKEN` for full functionality (digest resolution, `:latest` on GHCR, higher rate limits):
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
export GITHUB_TOKEN=ghp_your_token_here
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Create a classic token at https://github.com/settings/tokens with `read:packages` scope. If using the `gh` CLI, run `gh auth refresh -h github.com -s read:packages` first.
|
|
117
|
+
|
|
118
|
+
The `run.sh` wrapper auto-detects the token from `gh` CLI if available.
|
|
119
|
+
|
|
120
|
+
## License
|
|
121
|
+
|
|
122
|
+
MIT
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# code-provenance
|
|
2
|
+
|
|
3
|
+
Resolve Docker images in a docker-compose file to their exact source code commits on GitHub.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install code-provenance
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Requires Python 3.10+.
|
|
12
|
+
|
|
13
|
+
## CLI Usage
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
code-provenance [compose-file] [--json] [--verbose]
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
- `compose-file` -- path to a docker-compose file (default: `docker-compose.yml`)
|
|
20
|
+
- `--json` -- output results as JSON
|
|
21
|
+
- `--verbose`, `-v` -- show resolution steps for each image
|
|
22
|
+
|
|
23
|
+
### Example
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
code-provenance docker-compose.yml
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
┌─────────┬────────────────┬────────────────────────────┬──────────────┬──────────┬────────────┐
|
|
31
|
+
│ SERVICE │ IMAGE │ REPO │ COMMIT │ STATUS │ CONFIDENCE │
|
|
32
|
+
├─────────┼────────────────┼────────────────────────────┼──────────────┼──────────┼────────────┤
|
|
33
|
+
│ web │ traefik:v3.6.0 │ github.com/traefik/traefik │ 06db5168c0d9 │ resolved │ exact │
|
|
34
|
+
└─────────┴────────────────┴────────────────────────────┴──────────────┴──────────┴────────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Library Usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from code_provenance.compose_parser import parse_compose, parse_image_ref
|
|
41
|
+
from code_provenance.resolver import resolve_image
|
|
42
|
+
|
|
43
|
+
yaml_content = open("docker-compose.yml").read()
|
|
44
|
+
for service, image in parse_compose(yaml_content):
|
|
45
|
+
ref = parse_image_ref(image)
|
|
46
|
+
result = resolve_image(service, ref)
|
|
47
|
+
print(f"{result.service}: {result.commit} ({result.confidence})")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## API Reference
|
|
51
|
+
|
|
52
|
+
### Functions
|
|
53
|
+
|
|
54
|
+
- `parse_compose(yaml_content: str) -> list[tuple[str, str]]` -- parse a docker-compose YAML string and return `(service_name, image_string)` pairs
|
|
55
|
+
- `parse_image_ref(image: str) -> ImageRef` -- parse a Docker image string into its components
|
|
56
|
+
- `resolve_image(service: str, ref: ImageRef) -> ImageResult` -- resolve an image reference to its source code commit
|
|
57
|
+
|
|
58
|
+
### ImageRef
|
|
59
|
+
|
|
60
|
+
| Field | Type | Description |
|
|
61
|
+
|-------|------|-------------|
|
|
62
|
+
| `registry` | `str` | e.g. `"ghcr.io"`, `"docker.io"` |
|
|
63
|
+
| `namespace` | `str` | e.g. `"myorg"`, `"library"` |
|
|
64
|
+
| `name` | `str` | e.g. `"traefik"`, `"postgres"` |
|
|
65
|
+
| `tag` | `str` | e.g. `"v3.6.0"`, `"latest"` |
|
|
66
|
+
| `raw` | `str` | original image string from docker-compose |
|
|
67
|
+
|
|
68
|
+
### ImageResult
|
|
69
|
+
|
|
70
|
+
| Field | Type | Description |
|
|
71
|
+
|-------|------|-------------|
|
|
72
|
+
| `service` | `str` | service name from docker-compose |
|
|
73
|
+
| `image` | `str` | original image string |
|
|
74
|
+
| `registry` | `str` | image registry |
|
|
75
|
+
| `repo` | `str \| None` | GitHub repository URL |
|
|
76
|
+
| `tag` | `str` | image tag |
|
|
77
|
+
| `commit` | `str \| None` | resolved commit SHA |
|
|
78
|
+
| `commit_url` | `str \| None` | URL to the commit on GitHub |
|
|
79
|
+
| `status` | `str` | `"resolved"`, `"repo_not_found"`, `"repo_found_tag_not_matched"`, or `"no_tag"` |
|
|
80
|
+
| `resolution_method` | `str \| None` | how the commit was resolved (e.g. `"oci_labels"`, `"tag_match"`) |
|
|
81
|
+
| `confidence` | `str \| None` | `"exact"` or `"approximate"` |
|
|
82
|
+
| `steps` | `list[str]` | resolution steps taken (useful with `--verbose`) |
|
|
83
|
+
|
|
84
|
+
## Authentication
|
|
85
|
+
|
|
86
|
+
Set `GITHUB_TOKEN` for full functionality (digest resolution, `:latest` on GHCR, higher rate limits):
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
export GITHUB_TOKEN=ghp_your_token_here
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Create a classic token at https://github.com/settings/tokens with `read:packages` scope. If using the `gh` CLI, run `gh auth refresh -h github.com -s read:packages` first.
|
|
93
|
+
|
|
94
|
+
The `run.sh` wrapper auto-detects the token from `gh` CLI if available.
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from code_provenance.models import ImageRef, ImageResult
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from code_provenance.compose_parser import parse_compose, parse_image_ref
|
|
5
|
+
from code_provenance.resolver import resolve_image
|
|
6
|
+
from code_provenance.output import format_json, format_table
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main(argv: list[str] | None = None) -> int:
|
|
10
|
+
parser = argparse.ArgumentParser(
|
|
11
|
+
prog="code-provenance",
|
|
12
|
+
description="Resolve Docker images to their source code commits on GitHub.",
|
|
13
|
+
)
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
"compose_file",
|
|
16
|
+
nargs="?",
|
|
17
|
+
default="docker-compose.yml",
|
|
18
|
+
help="Path to docker-compose file (default: docker-compose.yml)",
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--json",
|
|
22
|
+
action="store_true",
|
|
23
|
+
dest="json_output",
|
|
24
|
+
help="Output results as JSON",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--verbose", "-v",
|
|
28
|
+
action="store_true",
|
|
29
|
+
help="Show resolution steps",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
args = parser.parse_args(argv)
|
|
33
|
+
|
|
34
|
+
compose_path = Path(args.compose_file)
|
|
35
|
+
if not compose_path.exists():
|
|
36
|
+
print(f"Error: {compose_path} not found", file=sys.stderr)
|
|
37
|
+
return 1
|
|
38
|
+
|
|
39
|
+
yaml_content = compose_path.read_text()
|
|
40
|
+
services = parse_compose(yaml_content)
|
|
41
|
+
|
|
42
|
+
if not services:
|
|
43
|
+
print("No services with images found.", file=sys.stderr)
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
for service_name, image_string in services:
|
|
48
|
+
ref = parse_image_ref(image_string)
|
|
49
|
+
result = resolve_image(service_name, ref)
|
|
50
|
+
results.append(result)
|
|
51
|
+
|
|
52
|
+
if args.verbose:
|
|
53
|
+
for result in results:
|
|
54
|
+
print(f"\nResolving {result.image} ...", file=sys.stderr)
|
|
55
|
+
for step in result.steps:
|
|
56
|
+
print(f" {step}", file=sys.stderr)
|
|
57
|
+
print(f" → {result.status}" + (f" ({result.resolution_method}, {result.confidence})" if result.status == "resolved" else ""), file=sys.stderr)
|
|
58
|
+
print(file=sys.stderr)
|
|
59
|
+
|
|
60
|
+
if args.json_output:
|
|
61
|
+
print(format_json(results))
|
|
62
|
+
else:
|
|
63
|
+
print(format_table(results))
|
|
64
|
+
|
|
65
|
+
return 0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
sys.exit(main())
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
from code_provenance.models import ImageRef
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def parse_image_ref(image_string: str) -> ImageRef:
|
|
6
|
+
"""Parse a Docker image string into an ImageRef."""
|
|
7
|
+
raw = image_string
|
|
8
|
+
|
|
9
|
+
# Handle digest references (image@sha256:...)
|
|
10
|
+
if "@" in image_string:
|
|
11
|
+
name_part, digest = image_string.split("@", 1)
|
|
12
|
+
tag = digest
|
|
13
|
+
image_string = name_part
|
|
14
|
+
elif ":" in image_string.split("/")[-1]:
|
|
15
|
+
colon_pos = image_string.rfind(":")
|
|
16
|
+
tag = image_string[colon_pos + 1:]
|
|
17
|
+
image_string = image_string[:colon_pos]
|
|
18
|
+
else:
|
|
19
|
+
tag = "latest"
|
|
20
|
+
|
|
21
|
+
# Determine registry
|
|
22
|
+
parts = image_string.split("/")
|
|
23
|
+
if len(parts) >= 2 and ("." in parts[0] or ":" in parts[0]):
|
|
24
|
+
registry = parts[0]
|
|
25
|
+
remaining = parts[1:]
|
|
26
|
+
else:
|
|
27
|
+
registry = "docker.io"
|
|
28
|
+
remaining = parts
|
|
29
|
+
|
|
30
|
+
# Determine namespace and name
|
|
31
|
+
if len(remaining) == 1:
|
|
32
|
+
namespace = "library"
|
|
33
|
+
name = remaining[0]
|
|
34
|
+
elif len(remaining) == 2:
|
|
35
|
+
namespace = remaining[0]
|
|
36
|
+
name = remaining[1]
|
|
37
|
+
else:
|
|
38
|
+
namespace = remaining[0]
|
|
39
|
+
name = "/".join(remaining[1:])
|
|
40
|
+
|
|
41
|
+
return ImageRef(
|
|
42
|
+
registry=registry,
|
|
43
|
+
namespace=namespace,
|
|
44
|
+
name=name,
|
|
45
|
+
tag=tag,
|
|
46
|
+
raw=raw,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse_compose(yaml_content: str) -> list[tuple[str, str]]:
|
|
51
|
+
"""Parse docker-compose YAML and return list of (service_name, image_string)."""
|
|
52
|
+
data = yaml.safe_load(yaml_content)
|
|
53
|
+
services = data.get("services", {}) or {}
|
|
54
|
+
results = []
|
|
55
|
+
for service_name, service_config in services.items():
|
|
56
|
+
if isinstance(service_config, dict) and "image" in service_config:
|
|
57
|
+
results.append((service_name, service_config["image"]))
|
|
58
|
+
return results
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def github_headers() -> dict[str, str]:
|
|
7
|
+
"""Build GitHub API headers, with optional token auth."""
|
|
8
|
+
headers = {"Accept": "application/vnd.github+json"}
|
|
9
|
+
token = os.environ.get("GITHUB_TOKEN")
|
|
10
|
+
if token:
|
|
11
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
12
|
+
return headers
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _normalize_tag(tag: str) -> str:
|
|
16
|
+
"""Strip leading 'v' for comparison."""
|
|
17
|
+
return tag.lstrip("v")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _is_prefix_match(image_tag: str, git_tag: str) -> bool:
|
|
21
|
+
"""Check if git_tag is a more specific version of image_tag.
|
|
22
|
+
|
|
23
|
+
e.g., image_tag='v2.10' matches git_tag='v2.10.7' but not 'v2.1' or 'v2.100'.
|
|
24
|
+
"""
|
|
25
|
+
norm_image = _normalize_tag(image_tag)
|
|
26
|
+
norm_git = _normalize_tag(git_tag)
|
|
27
|
+
return norm_git.startswith(norm_image + ".")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _parse_version_tuple(tag: str) -> tuple[int, ...] | None:
|
|
31
|
+
"""Parse a version string into a tuple of ints for comparison."""
|
|
32
|
+
norm = _normalize_tag(tag)
|
|
33
|
+
# Strip pre-release suffixes like -rc1, -beta2
|
|
34
|
+
norm = re.split(r"[-+]", norm)[0]
|
|
35
|
+
parts = norm.split(".")
|
|
36
|
+
try:
|
|
37
|
+
return tuple(int(p) for p in parts)
|
|
38
|
+
except ValueError:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def resolve_tag_to_commit(owner: str, repo: str, tag: str) -> tuple[str, bool] | None:
|
|
43
|
+
"""Resolve an image tag to a commit SHA by matching against git tags.
|
|
44
|
+
|
|
45
|
+
Tries exact match first, then prefix match (e.g., v2.10 -> highest v2.10.x).
|
|
46
|
+
Returns (commit_sha, is_exact_match) or None.
|
|
47
|
+
"""
|
|
48
|
+
headers = github_headers()
|
|
49
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/tags"
|
|
50
|
+
|
|
51
|
+
prefix_candidates: list[tuple[tuple[int, ...], str]] = []
|
|
52
|
+
|
|
53
|
+
while url:
|
|
54
|
+
resp = requests.get(url, headers=headers, params={"per_page": 100}, timeout=10)
|
|
55
|
+
if resp.status_code != 200:
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
for git_tag in resp.json():
|
|
59
|
+
name = git_tag["name"]
|
|
60
|
+
# Exact match (with/without v prefix)
|
|
61
|
+
if name == tag or name == f"v{tag}" or _normalize_tag(name) == _normalize_tag(tag):
|
|
62
|
+
return git_tag["commit"]["sha"], True
|
|
63
|
+
|
|
64
|
+
# Collect prefix match candidates
|
|
65
|
+
if _is_prefix_match(tag, name):
|
|
66
|
+
version = _parse_version_tuple(name)
|
|
67
|
+
if version is not None:
|
|
68
|
+
prefix_candidates.append((version, git_tag["commit"]["sha"]))
|
|
69
|
+
|
|
70
|
+
url = resp.links.get("next", {}).get("url")
|
|
71
|
+
|
|
72
|
+
# Return the highest version among prefix matches
|
|
73
|
+
if prefix_candidates:
|
|
74
|
+
prefix_candidates.sort(reverse=True)
|
|
75
|
+
return prefix_candidates[0][1], False
|
|
76
|
+
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_latest_release_commit(owner: str, repo: str) -> tuple[str, str] | None:
|
|
81
|
+
"""Get the commit SHA of the latest GitHub release.
|
|
82
|
+
|
|
83
|
+
Returns (commit_sha, tag_name) or None.
|
|
84
|
+
"""
|
|
85
|
+
headers = github_headers()
|
|
86
|
+
try:
|
|
87
|
+
resp = requests.get(
|
|
88
|
+
f"https://api.github.com/repos/{owner}/{repo}/releases/latest",
|
|
89
|
+
headers=headers,
|
|
90
|
+
timeout=10,
|
|
91
|
+
)
|
|
92
|
+
if resp.status_code != 200:
|
|
93
|
+
return None
|
|
94
|
+
tag_name = resp.json().get("tag_name")
|
|
95
|
+
if not tag_name:
|
|
96
|
+
return None
|
|
97
|
+
except requests.RequestException:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
# Resolve the release tag to a commit
|
|
101
|
+
tag_result = resolve_tag_to_commit(owner, repo, tag_name)
|
|
102
|
+
if tag_result:
|
|
103
|
+
commit_sha, _ = tag_result
|
|
104
|
+
return commit_sha, tag_name
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_latest_commit(owner: str, repo: str) -> str | None:
|
|
109
|
+
"""Get the latest commit SHA on the default branch."""
|
|
110
|
+
headers = github_headers()
|
|
111
|
+
try:
|
|
112
|
+
resp = requests.get(
|
|
113
|
+
f"https://api.github.com/repos/{owner}/{repo}/commits",
|
|
114
|
+
headers=headers,
|
|
115
|
+
params={"per_page": 1},
|
|
116
|
+
timeout=10,
|
|
117
|
+
)
|
|
118
|
+
if resp.status_code != 200:
|
|
119
|
+
return None
|
|
120
|
+
commits = resp.json()
|
|
121
|
+
if commits:
|
|
122
|
+
return commits[0]["sha"]
|
|
123
|
+
except (requests.RequestException, KeyError, IndexError):
|
|
124
|
+
pass
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def check_github_repo_exists(owner: str, repo: str) -> bool:
|
|
129
|
+
"""Check if a GitHub repo exists."""
|
|
130
|
+
headers = github_headers()
|
|
131
|
+
try:
|
|
132
|
+
resp = requests.get(
|
|
133
|
+
f"https://api.github.com/repos/{owner}/{repo}",
|
|
134
|
+
headers=headers,
|
|
135
|
+
timeout=10,
|
|
136
|
+
)
|
|
137
|
+
return resp.status_code == 200
|
|
138
|
+
except requests.RequestException:
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _find_ghcr_package_version(
|
|
143
|
+
owner: str, package_name: str, *, match_digest: str | None = None, match_tag: str | None = None,
|
|
144
|
+
) -> dict | None:
|
|
145
|
+
"""Find a GHCR package version by digest or tag via the GitHub Packages API.
|
|
146
|
+
|
|
147
|
+
Requires GITHUB_TOKEN with read:packages scope.
|
|
148
|
+
Returns {"repo": "owner/repo", "commit": "sha", "tags": [...]} or None.
|
|
149
|
+
"""
|
|
150
|
+
headers = github_headers()
|
|
151
|
+
if "Authorization" not in headers:
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
for entity_type in ["orgs", "users"]:
|
|
155
|
+
pkg_base = f"https://api.github.com/{entity_type}/{owner}/packages/container/{package_name}"
|
|
156
|
+
|
|
157
|
+
# Get package metadata for source repo
|
|
158
|
+
try:
|
|
159
|
+
pkg_resp = requests.get(pkg_base, headers=headers, timeout=10)
|
|
160
|
+
if pkg_resp.status_code == 403:
|
|
161
|
+
return None
|
|
162
|
+
if pkg_resp.status_code != 200:
|
|
163
|
+
continue
|
|
164
|
+
pkg_data = pkg_resp.json()
|
|
165
|
+
except requests.RequestException:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
repo_info = pkg_data.get("repository", {})
|
|
169
|
+
full_name = repo_info.get("full_name")
|
|
170
|
+
if not full_name:
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# Search versions
|
|
174
|
+
url = f"{pkg_base}/versions"
|
|
175
|
+
try:
|
|
176
|
+
while url:
|
|
177
|
+
resp = requests.get(url, headers=headers, params={"per_page": 50}, timeout=10)
|
|
178
|
+
if resp.status_code != 200:
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
for version in resp.json():
|
|
182
|
+
name = version.get("name", "")
|
|
183
|
+
metadata = version.get("metadata", {}).get("container", {})
|
|
184
|
+
tags = metadata.get("tags", [])
|
|
185
|
+
|
|
186
|
+
# Match by digest (version name is the digest)
|
|
187
|
+
if match_digest and name != match_digest:
|
|
188
|
+
if match_tag is None:
|
|
189
|
+
continue
|
|
190
|
+
# Match by tag
|
|
191
|
+
if match_tag and match_tag not in tags:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Found matching version — resolve tags to a commit
|
|
195
|
+
repo_owner, repo_name = full_name.split("/", 1)
|
|
196
|
+
resolvable_tags = [t for t in tags if t != "latest"]
|
|
197
|
+
for tag in resolvable_tags:
|
|
198
|
+
tag_result = resolve_tag_to_commit(repo_owner, repo_name, tag)
|
|
199
|
+
if tag_result:
|
|
200
|
+
commit_sha, _ = tag_result
|
|
201
|
+
return {"repo": full_name, "commit": commit_sha, "tags": tags}
|
|
202
|
+
|
|
203
|
+
return {"repo": full_name, "commit": None, "tags": tags}
|
|
204
|
+
|
|
205
|
+
url = resp.links.get("next", {}).get("url")
|
|
206
|
+
except requests.RequestException:
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def resolve_ghcr_digest_via_packages(owner: str, package_name: str, digest: str) -> dict | None:
|
|
213
|
+
"""Find the commit for a GHCR image by its digest."""
|
|
214
|
+
return _find_ghcr_package_version(owner, package_name, match_digest=digest)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def resolve_ghcr_latest_via_packages(owner: str, package_name: str) -> dict | None:
|
|
218
|
+
"""Find the commit for a GHCR image's :latest tag."""
|
|
219
|
+
return _find_ghcr_package_version(owner, package_name, match_tag="latest")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def infer_repo_from_dockerhub(namespace: str, name: str) -> tuple[str, str] | None:
|
|
223
|
+
"""Try to find the GitHub repo for a Docker Hub image."""
|
|
224
|
+
# For official images (library/X), try the image name as org/repo directly
|
|
225
|
+
# e.g., traefik -> traefik/traefik, nginx -> nginx/nginx
|
|
226
|
+
if namespace == "library":
|
|
227
|
+
if check_github_repo_exists(name, name):
|
|
228
|
+
return name, name
|
|
229
|
+
|
|
230
|
+
# For namespaced images, try namespace/name on GitHub
|
|
231
|
+
if namespace != "library":
|
|
232
|
+
if check_github_repo_exists(namespace, name):
|
|
233
|
+
return namespace, name
|
|
234
|
+
|
|
235
|
+
# Fall back to scraping Docker Hub description for GitHub links
|
|
236
|
+
url = f"https://hub.docker.com/v2/repositories/{namespace}/{name}"
|
|
237
|
+
try:
|
|
238
|
+
resp = requests.get(url, timeout=10)
|
|
239
|
+
if resp.status_code != 200:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
data = resp.json()
|
|
243
|
+
text = (data.get("full_description") or "") + " " + (data.get("description") or "")
|
|
244
|
+
match = re.search(r"https?://github\.com/([\w.-]+)/([\w.-]+)", text)
|
|
245
|
+
if match:
|
|
246
|
+
return match.group(1), match.group(2)
|
|
247
|
+
except requests.RequestException:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
return None
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class ImageRef:
|
|
6
|
+
"""A parsed Docker image reference."""
|
|
7
|
+
registry: str # e.g. "ghcr.io", "docker.io"
|
|
8
|
+
namespace: str # e.g. "myorg", "library"
|
|
9
|
+
name: str # e.g. "excalidraw", "postgres"
|
|
10
|
+
tag: str # e.g. "v3.4.12", "latest"
|
|
11
|
+
raw: str # original string from docker-compose
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def full_name(self) -> str:
|
|
15
|
+
"""Registry/namespace/name without tag."""
|
|
16
|
+
return f"{self.registry}/{self.namespace}/{self.name}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ImageResult:
|
|
21
|
+
"""Resolution result for a single image."""
|
|
22
|
+
service: str
|
|
23
|
+
image: str # original image string
|
|
24
|
+
registry: str
|
|
25
|
+
repo: str | None = None
|
|
26
|
+
tag: str = ""
|
|
27
|
+
commit: str | None = None
|
|
28
|
+
commit_url: str | None = None
|
|
29
|
+
status: str = "repo_not_found"
|
|
30
|
+
resolution_method: str | None = None
|
|
31
|
+
confidence: str | None = None # "exact", "approximate", or None if unresolved
|
|
32
|
+
steps: list[str] = field(default_factory=list)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import asdict
|
|
3
|
+
from io import StringIO
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
from code_provenance.models import ImageResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def format_json(results: list[ImageResult]) -> str:
|
|
10
|
+
"""Format results as a JSON array."""
|
|
11
|
+
return json.dumps([asdict(r) for r in results], indent=2)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def format_table(results: list[ImageResult]) -> str:
|
|
15
|
+
"""Format results as a rich table, returned as a string."""
|
|
16
|
+
table = Table(show_header=True, header_style="bold")
|
|
17
|
+
table.add_column("SERVICE")
|
|
18
|
+
table.add_column("IMAGE")
|
|
19
|
+
table.add_column("REPO")
|
|
20
|
+
table.add_column("COMMIT")
|
|
21
|
+
table.add_column("STATUS")
|
|
22
|
+
table.add_column("CONFIDENCE")
|
|
23
|
+
|
|
24
|
+
for r in results:
|
|
25
|
+
commit_display = r.commit[:12] if r.commit else "-"
|
|
26
|
+
repo_display = r.repo.replace("https://", "") if r.repo else "-"
|
|
27
|
+
confidence_display = r.confidence or "-"
|
|
28
|
+
table.add_row(r.service, r.image, repo_display, commit_display, r.status, confidence_display)
|
|
29
|
+
|
|
30
|
+
buf = StringIO()
|
|
31
|
+
console = Console(file=buf, force_terminal=False, width=160)
|
|
32
|
+
console.print(table)
|
|
33
|
+
return buf.getvalue()
|