data-annotations 2.2.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {data_annotations-2.2.0 → data_annotations-2.3.0}/PKG-INFO +33 -3
  2. {data_annotations-2.2.0 → data_annotations-2.3.0}/README.md +32 -2
  3. {data_annotations-2.2.0 → data_annotations-2.3.0}/pyproject.toml +1 -1
  4. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/annotations/models.py +2 -2
  5. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/annotations/writers.py +7 -6
  6. data_annotations-2.3.0/src/data_annotations/cli_app/provenance_commands.py +321 -0
  7. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/__init__.py +14 -1
  8. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/models.py +10 -0
  9. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/recovery.py +326 -4
  10. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/writers.py +119 -10
  11. data_annotations-2.2.0/src/data_annotations/cli_app/provenance_commands.py +0 -107
  12. {data_annotations-2.2.0 → data_annotations-2.3.0}/LICENSE +0 -0
  13. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/__init__.py +0 -0
  14. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/_decorators.py +0 -0
  15. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/annotations/__init__.py +0 -0
  16. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/annotations/decorators.py +0 -0
  17. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/cli.py +0 -0
  18. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/cli_app/__init__.py +0 -0
  19. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/cli_app/annotate.py +0 -0
  20. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/cli_app/common.py +0 -0
  21. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/cli_app/prompts.py +0 -0
  22. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/description/__init__.py +0 -0
  23. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/description/decorators.py +0 -0
  24. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/description/models.py +0 -0
  25. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/description/writers.py +0 -0
  26. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/decorators.py +0 -0
  27. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/git.py +0 -0
  28. {data_annotations-2.2.0 → data_annotations-2.3.0}/src/data_annotations/provenance/runtime.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-annotations
3
- Version: 2.2.0
3
+ Version: 2.3.0
4
4
  Summary: Annotate generated data artifacts
5
5
  Keywords: annotations,data,metadata,provenance,reproducibility
6
6
  Author: Rodrigo C. G. Pena
@@ -102,6 +102,9 @@ Every annotation document includes provenance with:
102
102
  - Git commit, branch, dirty state, canonical repository remote, exact tags, and
103
103
  `git describe` output when available
104
104
  - The current `SLURM_JOB_ID` when available
105
+ - Structured snapshots for recorded local inputs, including file checksums,
106
+ directory content digests, and upstream annotation sidecar references when
107
+ present
105
108
 
106
109
  You can also attach your own parameters, input file paths, and function names.
107
110
  Local filesystem paths in provenance are stored as absolute paths. URI-style inputs
@@ -380,6 +383,7 @@ File annotations store:
380
383
  - `subject.kind`
381
384
  - `subject.sha256`
382
385
  - `provenance.*`
386
+ - `provenance.input_artifacts[]`
383
387
  - `description.title`
384
388
  - `description.summary`
385
389
  - `description.fields`
@@ -396,6 +400,7 @@ Directory annotations store:
396
400
  - `subject.child_bundles[]`
397
401
  - `subject.content_digest`
398
402
  - `provenance.*`
403
+ - `provenance.input_artifacts[]`
399
404
  - `description.title`
400
405
  - `description.summary`
401
406
  - `description.artifact_groups[]`
@@ -529,13 +534,16 @@ per call.
529
534
  ## Recovery Helpers
530
535
 
531
536
  Use `artifact_matches_manifest(...)` to verify whether a detached artifact still
532
- matches an annotation document, and `checkout_manifest_source(...)` to recover the
533
- recorded code state from Git metadata.
537
+ matches an annotation document. Use `analyze_provenance_chain(...)` when you also
538
+ want to verify recorded inputs and recursively follow upstream annotation
539
+ sidecars. Use `checkout_manifest_source(...)` to recover the recorded code state
540
+ from Git metadata.
534
541
 
535
542
  ```python
536
543
  from pathlib import Path
537
544
 
538
545
  from data_annotations.provenance import (
546
+ analyze_provenance_chain,
539
547
  artifact_matches_manifest,
540
548
  checkout_manifest_source,
541
549
  )
@@ -544,6 +552,8 @@ annotation_path = Path("outputs/participants.csv.annotation.json")
544
552
  artifact_path = Path("downloads/participants.csv")
545
553
 
546
554
  if artifact_matches_manifest(artifact_path, annotation_path):
555
+ chain = analyze_provenance_chain(artifact_path)
556
+ print(chain.status)
547
557
  recovered = checkout_manifest_source(annotation_path)
548
558
  print(recovered.checkout_path)
549
559
  print(recovered.script_path)
@@ -602,12 +612,24 @@ For provenance inspection and source recovery:
602
612
 
603
613
  ```bash
604
614
  data-annotations provenance match path/to/artifact
615
+ data-annotations provenance chain path/to/artifact
616
+ data-annotations provenance chain path/to/artifact --full-paths
605
617
  data-annotations provenance checkout path/to/artifact
606
618
  ```
607
619
 
608
620
  Command `match` auto-discovers `*.annotation.json` for files and `data-annotations.json` for
609
621
  directories, prints a verification summary, and suggests the exact `checkout`
610
622
  command to run next when Git recovery metadata is available.
623
+ Command `chain` uses the same sidecar discovery, then verifies the artifact,
624
+ recorded input snapshots, and any upstream annotation sidecars reachable from
625
+ those inputs. Its default output shows a compact relative-path tree and lists
626
+ stale, missing, or unverifiable nodes first; use `--full-paths` when you need
627
+ absolute paths.
628
+
629
+ If `data-annotations provenance --help` does not list `chain`, your shell is
630
+ resolving an older installed command. From a source checkout, use
631
+ `uv run data-annotations provenance chain ...`, or reinstall the CLI from the
632
+ updated source before using the bare `data-annotations` command.
611
633
 
612
634
  ### Run With `uvx`
613
635
 
@@ -632,6 +654,7 @@ the project environment. You can then run:
632
654
  uv run data-annotations annotate file path/to/participants.csv
633
655
  uv run data-annotations annotate directory path/to/run-001
634
656
  uv run data-annotations provenance match path/to/participants.csv
657
+ uv run data-annotations provenance chain path/to/participants.csv
635
658
  uv run data-annotations provenance checkout path/to/participants.csv
636
659
  ```
637
660
 
@@ -684,9 +707,12 @@ uv run data-annotations provenance checkout path/to/participants.csv
684
707
 
685
708
  - `ProducedFile`
686
709
  - `ChildBundle`
710
+ - `InputArtifact`
687
711
  - `BaseProvenance`
688
712
  - `FileManifest`
689
713
  - `DirectoryManifest`
714
+ - `ProvenanceChainNode`
715
+ - `ProvenanceChainReport`
690
716
  - `RecoveredSource`
691
717
 
692
718
  ### Provenance Functions
@@ -696,6 +722,8 @@ uv run data-annotations provenance checkout path/to/participants.csv
696
722
  - `write_file_manifest(...)`
697
723
  - `write_directory_manifest(...)`
698
724
  - `directory_content_digest(...)`
725
+ - `analyze_provenance_chain(...)`
726
+ - `provenance_chain_is_fresh(...)`
699
727
  - `artifact_matches_manifest(...)`
700
728
  - `checkout_manifest_source(...)`
701
729
 
@@ -717,6 +745,8 @@ uv run python examples/write_file_manifest.py
717
745
  uv run python examples/write_directory_manifest.py
718
746
  uv run python examples/write_file_description.py
719
747
  uv run python examples/write_directory_description.py
748
+ uv run python examples/provenance_chain.py
749
+ uv run python examples/provenance_chain_cli.py
720
750
  uv run python examples/recover_provenance.py
721
751
  uv run python examples/recover_provenance_cli.py
722
752
  ```
@@ -73,6 +73,9 @@ Every annotation document includes provenance with:
73
73
  - Git commit, branch, dirty state, canonical repository remote, exact tags, and
74
74
  `git describe` output when available
75
75
  - The current `SLURM_JOB_ID` when available
76
+ - Structured snapshots for recorded local inputs, including file checksums,
77
+ directory content digests, and upstream annotation sidecar references when
78
+ present
76
79
 
77
80
  You can also attach your own parameters, input file paths, and function names.
78
81
  Local filesystem paths in provenance are stored as absolute paths. URI-style inputs
@@ -351,6 +354,7 @@ File annotations store:
351
354
  - `subject.kind`
352
355
  - `subject.sha256`
353
356
  - `provenance.*`
357
+ - `provenance.input_artifacts[]`
354
358
  - `description.title`
355
359
  - `description.summary`
356
360
  - `description.fields`
@@ -367,6 +371,7 @@ Directory annotations store:
367
371
  - `subject.child_bundles[]`
368
372
  - `subject.content_digest`
369
373
  - `provenance.*`
374
+ - `provenance.input_artifacts[]`
370
375
  - `description.title`
371
376
  - `description.summary`
372
377
  - `description.artifact_groups[]`
@@ -500,13 +505,16 @@ per call.
500
505
  ## Recovery Helpers
501
506
 
502
507
  Use `artifact_matches_manifest(...)` to verify whether a detached artifact still
503
- matches an annotation document, and `checkout_manifest_source(...)` to recover the
504
- recorded code state from Git metadata.
508
+ matches an annotation document. Use `analyze_provenance_chain(...)` when you also
509
+ want to verify recorded inputs and recursively follow upstream annotation
510
+ sidecars. Use `checkout_manifest_source(...)` to recover the recorded code state
511
+ from Git metadata.
505
512
 
506
513
  ```python
507
514
  from pathlib import Path
508
515
 
509
516
  from data_annotations.provenance import (
517
+ analyze_provenance_chain,
510
518
  artifact_matches_manifest,
511
519
  checkout_manifest_source,
512
520
  )
@@ -515,6 +523,8 @@ annotation_path = Path("outputs/participants.csv.annotation.json")
515
523
  artifact_path = Path("downloads/participants.csv")
516
524
 
517
525
  if artifact_matches_manifest(artifact_path, annotation_path):
526
+ chain = analyze_provenance_chain(artifact_path)
527
+ print(chain.status)
518
528
  recovered = checkout_manifest_source(annotation_path)
519
529
  print(recovered.checkout_path)
520
530
  print(recovered.script_path)
@@ -573,12 +583,24 @@ For provenance inspection and source recovery:
573
583
 
574
584
  ```bash
575
585
  data-annotations provenance match path/to/artifact
586
+ data-annotations provenance chain path/to/artifact
587
+ data-annotations provenance chain path/to/artifact --full-paths
576
588
  data-annotations provenance checkout path/to/artifact
577
589
  ```
578
590
 
579
591
  Command `match` auto-discovers `*.annotation.json` for files and `data-annotations.json` for
580
592
  directories, prints a verification summary, and suggests the exact `checkout`
581
593
  command to run next when Git recovery metadata is available.
594
+ Command `chain` uses the same sidecar discovery, then verifies the artifact,
595
+ recorded input snapshots, and any upstream annotation sidecars reachable from
596
+ those inputs. Its default output shows a compact relative-path tree and lists
597
+ stale, missing, or unverifiable nodes first; use `--full-paths` when you need
598
+ absolute paths.
599
+
600
+ If `data-annotations provenance --help` does not list `chain`, your shell is
601
+ resolving an older installed command. From a source checkout, use
602
+ `uv run data-annotations provenance chain ...`, or reinstall the CLI from the
603
+ updated source before using the bare `data-annotations` command.
582
604
 
583
605
  ### Run With `uvx`
584
606
 
@@ -603,6 +625,7 @@ the project environment. You can then run:
603
625
  uv run data-annotations annotate file path/to/participants.csv
604
626
  uv run data-annotations annotate directory path/to/run-001
605
627
  uv run data-annotations provenance match path/to/participants.csv
628
+ uv run data-annotations provenance chain path/to/participants.csv
606
629
  uv run data-annotations provenance checkout path/to/participants.csv
607
630
  ```
608
631
 
@@ -655,9 +678,12 @@ uv run data-annotations provenance checkout path/to/participants.csv
655
678
 
656
679
  - `ProducedFile`
657
680
  - `ChildBundle`
681
+ - `InputArtifact`
658
682
  - `BaseProvenance`
659
683
  - `FileManifest`
660
684
  - `DirectoryManifest`
685
+ - `ProvenanceChainNode`
686
+ - `ProvenanceChainReport`
661
687
  - `RecoveredSource`
662
688
 
663
689
  ### Provenance Functions
@@ -667,6 +693,8 @@ uv run data-annotations provenance checkout path/to/participants.csv
667
693
  - `write_file_manifest(...)`
668
694
  - `write_directory_manifest(...)`
669
695
  - `directory_content_digest(...)`
696
+ - `analyze_provenance_chain(...)`
697
+ - `provenance_chain_is_fresh(...)`
670
698
  - `artifact_matches_manifest(...)`
671
699
  - `checkout_manifest_source(...)`
672
700
 
@@ -688,6 +716,8 @@ uv run python examples/write_file_manifest.py
688
716
  uv run python examples/write_directory_manifest.py
689
717
  uv run python examples/write_file_description.py
690
718
  uv run python examples/write_directory_description.py
719
+ uv run python examples/provenance_chain.py
720
+ uv run python examples/provenance_chain_cli.py
691
721
  uv run python examples/recover_provenance.py
692
722
  uv run python examples/recover_provenance_cli.py
693
723
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "data-annotations"
3
- version = "2.2.0"
3
+ version = "2.3.0"
4
4
  description = "Annotate generated data artifacts"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -22,14 +22,14 @@ class DirectoryArtifactSubject(BaseModel):
22
22
 
23
23
 
24
24
  class FileAnnotationDocument(BaseModel):
25
- annotation_version: Literal["4"] = "4"
25
+ annotation_version: Literal["5"] = "5"
26
26
  subject: FileArtifactSubject
27
27
  provenance: BaseProvenance
28
28
  description: FileDescription
29
29
 
30
30
 
31
31
  class DirectoryAnnotationDocument(BaseModel):
32
- annotation_version: Literal["4"] = "4"
32
+ annotation_version: Literal["5"] = "5"
33
33
  subject: DirectoryArtifactSubject
34
34
  provenance: BaseProvenance
35
35
  description: DirectoryDescription
@@ -1,3 +1,4 @@
1
+ from collections.abc import Sequence
1
2
  from pathlib import Path
2
3
  from typing import Any, Callable
3
4
 
@@ -154,7 +155,7 @@ def _build_file_annotation_document(
154
155
  generation_context: dict[str, Any] | None = None,
155
156
  artifact_kind: ArtifactKind = "other",
156
157
  params: dict[str, Any] | None = None,
157
- inputs: list[str] | None = None,
158
+ inputs: Sequence[str | Path] | None = None,
158
159
  function: Callable[..., Any] | None = None,
159
160
  capture_mode: str = "runtime",
160
161
  provenance_overrides: dict[str, Any] | None = None,
@@ -205,7 +206,7 @@ def _build_directory_annotation_document(
205
206
  acquisition_context: dict[str, Any] | None = None,
206
207
  generation_context: dict[str, Any] | None = None,
207
208
  params: dict[str, Any] | None = None,
208
- inputs: list[str] | None = None,
209
+ inputs: Sequence[str | Path] | None = None,
209
210
  function: Callable[..., Any] | None = None,
210
211
  capture_mode: str = "runtime",
211
212
  provenance_overrides: dict[str, Any] | None = None,
@@ -285,7 +286,7 @@ def write_file_annotation(
285
286
  generation_context: dict[str, Any] | None = None,
286
287
  artifact_kind: ArtifactKind = "other",
287
288
  params: dict[str, Any] | None = None,
288
- inputs: list[str] | None = None,
289
+ inputs: Sequence[str | Path] | None = None,
289
290
  function: Callable[..., Any] | None = None,
290
291
  capture_mode: str = "runtime",
291
292
  provenance_overrides: dict[str, Any] | None = None,
@@ -324,7 +325,7 @@ def write_directory_annotation(
324
325
  acquisition_context: dict[str, Any] | None = None,
325
326
  generation_context: dict[str, Any] | None = None,
326
327
  params: dict[str, Any] | None = None,
327
- inputs: list[str] | None = None,
328
+ inputs: Sequence[str | Path] | None = None,
328
329
  function: Callable[..., Any] | None = None,
329
330
  capture_mode: str = "runtime",
330
331
  provenance_overrides: dict[str, Any] | None = None,
@@ -363,7 +364,7 @@ def annotate_file(
363
364
  generation_context: dict[str, Any] | None = None,
364
365
  artifact_kind: ArtifactKind = "other",
365
366
  params: dict[str, Any] | None = None,
366
- inputs: list[str] | None = None,
367
+ inputs: Sequence[str | Path] | None = None,
367
368
  function: Callable[..., Any] | None = None,
368
369
  write_readme: bool = True,
369
370
  write_schema: bool | None = None,
@@ -418,7 +419,7 @@ def annotate_directory(
418
419
  acquisition_context: dict[str, Any] | None = None,
419
420
  generation_context: dict[str, Any] | None = None,
420
421
  params: dict[str, Any] | None = None,
421
- inputs: list[str] | None = None,
422
+ inputs: Sequence[str | Path] | None = None,
422
423
  function: Callable[..., Any] | None = None,
423
424
  write_readme: bool = True,
424
425
  write_schema: bool | None = None,
@@ -0,0 +1,321 @@
1
+ import subprocess
2
+ from dataclasses import dataclass
3
+ from os.path import commonpath
4
+ from pathlib import Path
5
+ from urllib.parse import urlparse
6
+
7
+ import typer
8
+
9
+ from data_annotations.provenance import checkout_manifest_source
10
+ from data_annotations.provenance import recovery as provenance_recovery
11
+
12
+ from .common import (
13
+ _checkout_hint,
14
+ _echo_entries,
15
+ _error,
16
+ _match_target_path,
17
+ _missing_checkout_fields,
18
+ _resolve_manifest_path,
19
+ _resolved_path,
20
+ )
21
+
22
+ provenance_app = typer.Typer(
23
+ no_args_is_help=True,
24
+ help="Inspect provenance recorded in annotation documents.",
25
+ )
26
+
27
+
28
+ @provenance_app.command("match")
29
+ def match_command(
30
+ target: Path = typer.Argument(
31
+ ..., help="Artifact, directory, or annotation document path."
32
+ ),
33
+ manifest: Path | None = typer.Option(
34
+ None,
35
+ "--manifest",
36
+ help="Explicit annotation document path to use instead of auto-discovery.",
37
+ ),
38
+ ) -> None:
39
+ manifest_path = _resolve_manifest_path(target, manifest)
40
+ candidate_path = _match_target_path(target, manifest)
41
+ loaded_manifest = provenance_recovery._load_manifest(manifest_path)
42
+ match = provenance_recovery._analyze_artifact_match(candidate_path, loaded_manifest)
43
+
44
+ typer.echo(f"Target: {candidate_path}")
45
+ typer.echo(f"Manifest: {manifest_path}")
46
+ typer.echo(f"Result: {match.status.replace('_', ' ').upper()}")
47
+
48
+ _echo_entries("Verified entries", match.verified_entries)
49
+ _echo_entries("Missing tracked entries", match.missing_tracked_entries)
50
+ _echo_entries("Mismatched tracked entries", match.mismatched_tracked_entries)
51
+ _echo_entries("Extra entries", match.extra_entries)
52
+ _echo_entries("Unverifiable tracked entries", match.unverifiable_tracked_entries)
53
+
54
+ if match.status in {"match", "partial_match"}:
55
+ missing_checkout_fields = _missing_checkout_fields(loaded_manifest)
56
+ if missing_checkout_fields:
57
+ typer.echo(
58
+ "Checkout unavailable: manifest is missing "
59
+ + ", ".join(missing_checkout_fields)
60
+ )
61
+ else:
62
+ typer.echo("Next step:")
63
+ typer.echo(
64
+ " "
65
+ + _checkout_hint(
66
+ str(_resolved_path(target)),
67
+ str(_resolved_path(manifest)) if manifest is not None else None,
68
+ )
69
+ )
70
+ return
71
+
72
+ raise typer.Exit(code=1)
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class _DisplayChainNode:
77
+ path: str
78
+ status: provenance_recovery.ChainStatus
79
+ details: tuple[str, ...]
80
+ inputs: tuple["_DisplayChainNode", ...]
81
+
82
+
83
+ def _is_uri_like(value: str) -> bool:
84
+ parsed = urlparse(value)
85
+ return bool(parsed.scheme and (parsed.netloc or "://" in value))
86
+
87
+
88
+ def _node_paths_equal(left: str, right: str) -> bool:
89
+ if _is_uri_like(left) or _is_uri_like(right):
90
+ return left == right
91
+ return Path(left).expanduser().resolve() == Path(right).expanduser().resolve()
92
+
93
+
94
+ def _should_merge_upstream_node(
95
+ node: provenance_recovery.ProvenanceChainNode,
96
+ child: provenance_recovery.ProvenanceChainNode,
97
+ ) -> bool:
98
+ return _node_paths_equal(node.path, child.path)
99
+
100
+
101
+ def _display_chain_node(
102
+ node: provenance_recovery.ProvenanceChainNode,
103
+ ) -> _DisplayChainNode:
104
+ details = node.details
105
+ inputs = node.inputs
106
+ if len(inputs) == 1 and _should_merge_upstream_node(node, inputs[0]):
107
+ upstream = inputs[0]
108
+ details = (*details, *upstream.details)
109
+ inputs = upstream.inputs
110
+
111
+ return _DisplayChainNode(
112
+ path=node.path,
113
+ status=node.status,
114
+ details=details,
115
+ inputs=tuple(_display_chain_node(input_node) for input_node in inputs),
116
+ )
117
+
118
+
119
+ def _filesystem_display_base(root: _DisplayChainNode) -> Path | None:
120
+ parent_paths: list[str] = []
121
+
122
+ def collect(node: _DisplayChainNode) -> None:
123
+ if not _is_uri_like(node.path):
124
+ path = Path(node.path).expanduser().resolve()
125
+ parent_paths.append(str(path if path.is_dir() else path.parent))
126
+ for input_node in node.inputs:
127
+ collect(input_node)
128
+
129
+ collect(root)
130
+ if not parent_paths:
131
+ return None
132
+ try:
133
+ return Path(commonpath(parent_paths))
134
+ except ValueError:
135
+ return None
136
+
137
+
138
+ def _format_chain_path(
139
+ value: str,
140
+ *,
141
+ display_base: Path | None,
142
+ full_paths: bool,
143
+ ) -> str:
144
+ if full_paths or _is_uri_like(value):
145
+ return value
146
+ path = Path(value).expanduser().resolve()
147
+ if display_base is None:
148
+ return path.name
149
+ try:
150
+ return path.relative_to(display_base).as_posix()
151
+ except ValueError:
152
+ return str(path)
153
+
154
+
155
+ def _problem_detail(details: tuple[str, ...]) -> str | None:
156
+ problem_terms = (
157
+ "changed",
158
+ "missing",
159
+ "no recorded",
160
+ "not locally verifiable",
161
+ "could not",
162
+ "unverifiable",
163
+ "stale:",
164
+ )
165
+ for detail in details:
166
+ lowered = detail.lower()
167
+ if any(term in lowered for term in problem_terms):
168
+ return detail
169
+ return None
170
+
171
+
172
+ def _echo_chain_problems(
173
+ root: _DisplayChainNode,
174
+ *,
175
+ display_base: Path | None,
176
+ full_paths: bool,
177
+ ) -> None:
178
+ problems: list[tuple[_DisplayChainNode, _DisplayChainNode | None, str]] = []
179
+
180
+ def collect(
181
+ node: _DisplayChainNode,
182
+ parent: _DisplayChainNode | None = None,
183
+ ) -> None:
184
+ if node.status != "fresh":
185
+ detail = _problem_detail(node.details)
186
+ if detail is not None:
187
+ problems.append((node, parent, detail))
188
+ for input_node in node.inputs:
189
+ collect(input_node, node)
190
+
191
+ collect(root)
192
+ if not problems:
193
+ return
194
+
195
+ typer.echo("")
196
+ typer.echo("Problems:")
197
+ for node, parent, detail in problems:
198
+ typer.echo(
199
+ " "
200
+ + f"{node.status.upper():<12}"
201
+ + _format_chain_path(
202
+ node.path,
203
+ display_base=display_base,
204
+ full_paths=full_paths,
205
+ )
206
+ )
207
+ typer.echo(f" {'':<12}{detail}")
208
+ if parent is not None:
209
+ typer.echo(
210
+ " "
211
+ + f"{'':<12}used by "
212
+ + _format_chain_path(
213
+ parent.path,
214
+ display_base=display_base,
215
+ full_paths=full_paths,
216
+ )
217
+ )
218
+
219
+
220
+ def _echo_chain_tree(
221
+ node: _DisplayChainNode,
222
+ *,
223
+ display_base: Path | None,
224
+ full_paths: bool,
225
+ depth: int = 0,
226
+ ) -> None:
227
+ indent = " " * depth
228
+ path = _format_chain_path(
229
+ node.path,
230
+ display_base=display_base,
231
+ full_paths=full_paths,
232
+ )
233
+ typer.echo(f"{indent}[{node.status.upper()}] {path}")
234
+ for input_node in node.inputs:
235
+ _echo_chain_tree(
236
+ input_node,
237
+ display_base=display_base,
238
+ full_paths=full_paths,
239
+ depth=depth + 1,
240
+ )
241
+
242
+
243
+ def _echo_chain_report(
244
+ report: provenance_recovery.ProvenanceChainReport,
245
+ *,
246
+ full_paths: bool,
247
+ ) -> None:
248
+ root = _display_chain_node(report.root)
249
+ display_base = _filesystem_display_base(root)
250
+ typer.echo(f"Result: {report.status.upper()}")
251
+ _echo_chain_problems(root, display_base=display_base, full_paths=full_paths)
252
+ typer.echo("")
253
+ typer.echo("Chain:")
254
+ _echo_chain_tree(root, display_base=display_base, full_paths=full_paths)
255
+
256
+
257
+ @provenance_app.command("chain")
258
+ def chain_command(
259
+ target: Path = typer.Argument(
260
+ ..., help="Artifact, directory, or annotation document path."
261
+ ),
262
+ manifest: Path | None = typer.Option(
263
+ None,
264
+ "--manifest",
265
+ help="Explicit annotation document path to use instead of auto-discovery.",
266
+ ),
267
+ full_paths: bool = typer.Option(
268
+ False,
269
+ "--full-paths",
270
+ help="Show full paths instead of compact paths relative to the chain root.",
271
+ ),
272
+ ) -> None:
273
+ manifest_path = _resolve_manifest_path(target, manifest)
274
+ candidate_path = _match_target_path(target, manifest)
275
+ report = provenance_recovery.analyze_provenance_chain(
276
+ candidate_path,
277
+ manifest_path,
278
+ )
279
+
280
+ _echo_chain_report(report, full_paths=full_paths)
281
+
282
+ if report.status != "fresh":
283
+ raise typer.Exit(code=1)
284
+
285
+
286
+ @provenance_app.command("checkout")
287
+ def checkout_command(
288
+ target: Path = typer.Argument(
289
+ ..., help="Artifact, directory, or annotation document path."
290
+ ),
291
+ manifest: Path | None = typer.Option(
292
+ None,
293
+ "--manifest",
294
+ help="Explicit annotation document path to use instead of auto-discovery.",
295
+ ),
296
+ dest: Path | None = typer.Option(
297
+ None,
298
+ "--dest",
299
+ help="Optional checkout destination. Defaults to a stable user cache.",
300
+ ),
301
+ ) -> None:
302
+ manifest_path = _resolve_manifest_path(target, manifest)
303
+
304
+ try:
305
+ recovered = checkout_manifest_source(
306
+ manifest_path,
307
+ destination_dir=dest,
308
+ )
309
+ except ValueError as exc:
310
+ _error(str(exc), code=1)
311
+ except subprocess.CalledProcessError:
312
+ _error("failed to clone or checkout the recorded repository state", code=1)
313
+
314
+ typer.echo(f"Manifest: {manifest_path}")
315
+ typer.echo(f"Checkout path: {recovered.checkout_path}")
316
+ if recovered.script_path is not None:
317
+ typer.echo(f"Recovered script: {recovered.script_path}")
318
+ else:
319
+ typer.echo(
320
+ "Recovered repository checkout, but the generating script could not be resolved."
321
+ )
@@ -4,12 +4,20 @@ from .models import (
4
4
  ChildBundle,
5
5
  DirectoryManifest,
6
6
  FileManifest,
7
+ InputArtifact,
7
8
  ProducedFile,
8
9
  RecoveredSource,
9
10
  )
10
11
  from .decorators import record_directory_manifest, record_file_manifest
11
12
  from .git import capture_git_info
12
- from .recovery import artifact_matches_manifest, checkout_manifest_source
13
+ from .recovery import (
14
+ ProvenanceChainNode,
15
+ ProvenanceChainReport,
16
+ analyze_provenance_chain,
17
+ artifact_matches_manifest,
18
+ checkout_manifest_source,
19
+ provenance_chain_is_fresh,
20
+ )
13
21
  from .runtime import capture_runtime_info
14
22
  from .writers import (
15
23
  callable_name,
@@ -25,13 +33,18 @@ __all__ = [
25
33
  "ChildBundle",
26
34
  "DirectoryManifest",
27
35
  "FileManifest",
36
+ "InputArtifact",
28
37
  "ProducedFile",
38
+ "ProvenanceChainNode",
39
+ "ProvenanceChainReport",
29
40
  "RecoveredSource",
41
+ "analyze_provenance_chain",
30
42
  "artifact_matches_manifest",
31
43
  "callable_name",
32
44
  "capture_git_info",
33
45
  "capture_runtime_info",
34
46
  "checkout_manifest_source",
47
+ "provenance_chain_is_fresh",
35
48
  "directory_content_digest",
36
49
  "record_directory_manifest",
37
50
  "record_file_manifest",