data-annotations 2.4.0__tar.gz → 2.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {data_annotations-2.4.0 → data_annotations-2.5.0}/PKG-INFO +68 -41
  2. {data_annotations-2.4.0 → data_annotations-2.5.0}/README.md +66 -39
  3. {data_annotations-2.4.0 → data_annotations-2.5.0}/pyproject.toml +2 -2
  4. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/annotations/models.py +2 -2
  5. data_annotations-2.5.0/src/data_annotations/cli_app/annotate/__init__.py +492 -0
  6. data_annotations-2.4.0/src/data_annotations/cli_app/annotate.py → data_annotations-2.5.0/src/data_annotations/cli_app/annotate/helpers.py +105 -439
  7. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/answers.py +3 -1
  8. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/common.py +50 -4
  9. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/provenance_commands.py +28 -16
  10. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/publish.py +7 -1
  11. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/__init__.py +6 -0
  12. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/models.py +19 -2
  13. data_annotations-2.5.0/src/data_annotations/provenance/recovery/__init__.py +102 -0
  14. data_annotations-2.5.0/src/data_annotations/provenance/recovery/chain.py +312 -0
  15. data_annotations-2.5.0/src/data_annotations/provenance/recovery/manifest.py +179 -0
  16. data_annotations-2.5.0/src/data_annotations/provenance/recovery/matching.py +263 -0
  17. data_annotations-2.5.0/src/data_annotations/provenance/recovery/sources.py +507 -0
  18. data_annotations-2.5.0/src/data_annotations/provenance/recovery/types.py +32 -0
  19. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/writers.py +23 -0
  20. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/publish.py +14 -0
  21. data_annotations-2.4.0/src/data_annotations/provenance/recovery.py +0 -926
  22. {data_annotations-2.4.0 → data_annotations-2.5.0}/LICENSE +0 -0
  23. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/__init__.py +0 -0
  24. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/_decorators.py +0 -0
  25. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/annotations/__init__.py +0 -0
  26. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/annotations/decorators.py +0 -0
  27. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/annotations/writers.py +0 -0
  28. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli.py +0 -0
  29. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/__init__.py +0 -0
  30. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/cli_app/prompts.py +0 -0
  31. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/description/__init__.py +0 -0
  32. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/description/decorators.py +0 -0
  33. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/description/models.py +0 -0
  34. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/description/writers.py +0 -0
  35. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/decorators.py +0 -0
  36. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/git.py +0 -0
  37. {data_annotations-2.4.0 → data_annotations-2.5.0}/src/data_annotations/provenance/runtime.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-annotations
3
- Version: 2.4.0
4
- Summary: Annotate generated data artifacts
3
+ Version: 2.5.0
4
+ Summary: Annotate data artifacts with provenance and descriptions
5
5
  Keywords: annotations,data,metadata,provenance,reproducibility
6
6
  Author: Rodrigo C. G. Pena
7
7
  Author-email: Rodrigo C. G. Pena <rodrigo.cerqueiragonzalezpena@unibas.ch>
@@ -102,6 +102,8 @@ Every annotation document includes provenance with:
102
102
  - The script path relative to the Git repo root when it can be determined
103
103
  - Git commit, branch, dirty state, canonical repository remote, exact tags, and
104
104
  `git describe` output when available
105
+ - A source-code reference for recovery, derived from Git metadata when possible
106
+ or supplied explicitly for archives, individual files, and DOI/URI records
105
107
  - The current `SLURM_JOB_ID` when available
106
108
  - Structured snapshots for recorded local inputs, including file checksums,
107
109
  directory content digests, and upstream annotation sidecar references when
@@ -110,8 +112,8 @@ Every annotation document includes provenance with:
110
112
  You can also attach your own parameters, input file paths, and function names.
111
113
  Local filesystem paths in provenance are stored as absolute paths. URI-style inputs
112
114
  such as `s3://...` or `https://...` are preserved as provided.
113
- Git tags and `git_describe` are human-friendly hints only; `git_sha` remains the
114
- source of truth for reproducibility, matching, and source checkout.
115
+ Git tags and `git_describe` are human-friendly hints only. For Git sources,
116
+ `git_sha` and `source_code.revision` identify the recoverable code state.
115
117
 
116
118
  ## Quick Start
117
119
 
@@ -537,8 +539,9 @@ per call.
537
539
  Use `artifact_matches_manifest(...)` to verify whether a detached artifact still
538
540
  matches an annotation document. Use `analyze_provenance_chain(...)` when you also
539
541
  want to verify recorded inputs and recursively follow upstream annotation
540
- sidecars. Use `checkout_manifest_source(...)` to recover the recorded code state
541
- from Git metadata.
542
+ sidecars. Use `recover_manifest_source(...)` to recover the recorded source code
543
+ from Git metadata, a recorded source archive, or a recorded source file.
544
+ `checkout_manifest_source(...)` remains available as a compatibility alias.
542
545
 
543
546
  ```python
544
547
  from pathlib import Path
@@ -546,7 +549,7 @@ from pathlib import Path
546
549
  from data_annotations.provenance import (
547
550
  analyze_provenance_chain,
548
551
  artifact_matches_manifest,
549
- checkout_manifest_source,
552
+ recover_manifest_source,
550
553
  )
551
554
 
552
555
  annotation_path = Path("outputs/participants.csv.annotation.json")
@@ -555,7 +558,7 @@ artifact_path = Path("downloads/participants.csv")
555
558
  if artifact_matches_manifest(artifact_path, annotation_path):
556
559
  chain = analyze_provenance_chain(artifact_path)
557
560
  print(chain.status)
558
- recovered = checkout_manifest_source(annotation_path)
561
+ recovered = recover_manifest_source(annotation_path)
559
562
  print(recovered.checkout_path)
560
563
  print(recovered.script_path)
561
564
  ```
@@ -572,9 +575,9 @@ still attach provenance and description after the fact.
572
575
 
573
576
  Post-hoc descriptions can still be very useful, but the quality of post-hoc
574
577
  provenance depends on how exact the supplied answers are. In particular, fields
575
- such as the generating script, command, function, Git commit, repository path,
576
- Git tags, `git describe` output, inputs, and parameters are only as reliable as
577
- the information entered during annotation.
578
+ such as the generating script, command, function, source-code URI, Git commit,
579
+ repository path, Git tags, `git describe` output, inputs, and parameters are
580
+ only as reliable as the information entered during annotation.
578
581
 
579
582
  ## CLI Workflow
580
583
 
@@ -627,25 +630,31 @@ summary: Participant-level cohort assignments.
627
630
  kind: dataset
628
631
 
629
632
  inputs:
630
- - ${DATA_ROOT}/raw/participants.csv
633
+ - ${DATA_ROOT}/raw/participants.csv
631
634
 
632
635
  params:
633
- split: validation
636
+ split: validation
634
637
 
635
638
  provenance:
636
- command: bash scripts/build_participants.sh
637
- script: scripts/build_participants.sh
638
- git_sha: deadbeef
639
+ command: bash scripts/build_participants.sh
640
+ script: scripts/build_participants.sh
641
+ git_sha: deadbeef
642
+ source_code:
643
+ kind: archive
644
+ uri: https://doi.org/10.5281/zenodo.12345
645
+ download_uri: https://zenodo.org/records/12345/files/source.zip
646
+ path: scripts/build_participants.sh
647
+ sha256: 0000000000000000000000000000000000000000000000000000000000000000
639
648
 
640
649
  fields:
641
- - name: participant_id
642
- summary: Stable participant identifier.
643
- data_type: string
644
- required: true
645
- nullable: false
650
+ - name: participant_id
651
+ summary: Stable participant identifier.
652
+ data_type: string
653
+ required: true
654
+ nullable: false
646
655
 
647
656
  primary_key:
648
- - participant_id
657
+ - participant_id
649
658
  ```
650
659
 
651
660
  Directory answers use an explicit inventory. Paths in `artifacts`,
@@ -658,26 +667,26 @@ title: Processing outputs
658
667
  summary: Files produced by the shell processing workflow.
659
668
 
660
669
  provenance:
661
- command: bash process_from_instrument.sh
662
- script: process_from_instrument.sh
670
+ command: bash process_from_instrument.sh
671
+ script: process_from_instrument.sh
663
672
 
664
673
  artifacts:
665
- - path: processed.csv
666
- kind: dataset
667
- title: Processed instrument output
668
- summary: Normalized output from the processing script.
674
+ - path: processed.csv
675
+ kind: dataset
676
+ title: Processed instrument output
677
+ summary: Normalized output from the processing script.
669
678
 
670
679
  artifact_groups:
671
- - title: Diagnostic plots
672
- kind: plot
673
- selector: plots/*.png
674
- paths:
675
- - plots/qc-1.png
676
- - plots/qc-2.png
680
+ - title: Diagnostic plots
681
+ kind: plot
682
+ selector: plots/*.png
683
+ paths:
684
+ - plots/qc-1.png
685
+ - plots/qc-2.png
677
686
 
678
687
  child_bundles:
679
- - path: model
680
- annotation_path: model/data-annotations.json
688
+ - path: model
689
+ annotation_path: model/data-annotations.json
681
690
  ```
682
691
 
683
692
  Answers files may also use schema-style aliases such as `subject.path`,
@@ -685,14 +694,24 @@ Answers files may also use schema-style aliases such as `subject.path`,
685
694
  `description.artifacts`, `description.artifact_groups`, `provenance.inputs`,
686
695
  and `provenance.params`.
687
696
 
697
+ For source-code recovery, `provenance.source_code.kind` may be `git`, `archive`,
698
+ `file`, or `uri`. Git sources use `uri` plus `revision`; archive and file
699
+ sources use `uri` or `download_uri` plus an optional `sha256`; `path` points to
700
+ the generating script inside the recovered source. DOI or landing-page-only
701
+ references can be recorded with `kind: uri`, but they are not directly
702
+ recoverable unless a direct archive or file `download_uri` is also recorded.
703
+
688
704
  When group selectors are provided, the CLI expands them to concrete member paths
689
705
  at annotation time. Grouped files are tracked in `subject.produced_files[]` but
690
706
  are skipped by the per-file prompt flow, so you do not have to answer the same
691
707
  questions for every matching file.
692
708
 
693
- For post-hoc provenance, use repeatable `--git-tag` and optional
694
- `--git-describe` when you know the original code state. These values are stored
695
- as human-readable hints; `--git-sha` remains the field used for recovery.
709
+ For post-hoc provenance, use `--source-kind`, `--source-uri`,
710
+ `--source-download-uri`, `--source-path`, `--source-revision`, and
711
+ `--source-sha256` when the generating code is recoverable from a Git remote,
712
+ source archive, source file, or reference URI. Use repeatable `--git-tag` and
713
+ optional `--git-describe` when you know the original Git state; these values are
714
+ stored as human-readable hints.
696
715
 
697
716
  For provenance inspection and source recovery:
698
717
 
@@ -703,8 +722,12 @@ data-annotations provenance chain path/to/artifact --full-paths
703
722
  data-annotations provenance checkout path/to/artifact
704
723
  ```
705
724
 
706
- Command `checkout` downloads the recorded Git remote and checks out the recorded
707
- commit. It prompts before downloading source code and defaults to No; use
725
+ Command `checkout` recovers the recorded source code. For Git sources, it clones
726
+ the recorded remote and checks out the recorded revision. For archive and file
727
+ sources, it downloads or copies the recorded object, verifies `sha256` when
728
+ present, and resolves the generating script path when recorded. Reference-only
729
+ URI sources are preserved in the annotation but are not directly recoverable.
730
+ The command prompts before downloading source code and defaults to No; use
708
731
  `--force` when running trusted provenance checkout non-interactively.
709
732
 
710
733
  Command `match` auto-discovers `*.annotation.json` for files and `data-annotations.json` for
@@ -820,6 +843,8 @@ uv run data-annotations publish path/to/run-001 path/to/publish-bundle
820
843
  - `ProducedFile`
821
844
  - `ChildBundle`
822
845
  - `InputArtifact`
846
+ - `SourceCodeKind`
847
+ - `SourceCodeReference`
823
848
  - `BaseProvenance`
824
849
  - `FileManifest`
825
850
  - `DirectoryManifest`
@@ -837,6 +862,7 @@ uv run data-annotations publish path/to/run-001 path/to/publish-bundle
837
862
  - `analyze_provenance_chain(...)`
838
863
  - `provenance_chain_is_fresh(...)`
839
864
  - `artifact_matches_manifest(...)`
865
+ - `recover_manifest_source(...)`
840
866
  - `checkout_manifest_source(...)`
841
867
 
842
868
  ### Publish Functions
@@ -869,6 +895,7 @@ uv run python examples/provenance_chain.py
869
895
  uv run python examples/provenance_chain_cli.py
870
896
  uv run python examples/recover_provenance.py
871
897
  uv run python examples/recover_provenance_cli.py
898
+ uv run python examples/recover_archive_source.py
872
899
  uv run python examples/publish_cli.py
873
900
  ```
874
901
 
@@ -72,6 +72,8 @@ Every annotation document includes provenance with:
72
72
  - The script path relative to the Git repo root when it can be determined
73
73
  - Git commit, branch, dirty state, canonical repository remote, exact tags, and
74
74
  `git describe` output when available
75
+ - A source-code reference for recovery, derived from Git metadata when possible
76
+ or supplied explicitly for archives, individual files, and DOI/URI records
75
77
  - The current `SLURM_JOB_ID` when available
76
78
  - Structured snapshots for recorded local inputs, including file checksums,
77
79
  directory content digests, and upstream annotation sidecar references when
@@ -80,8 +82,8 @@ Every annotation document includes provenance with:
80
82
  You can also attach your own parameters, input file paths, and function names.
81
83
  Local filesystem paths in provenance are stored as absolute paths. URI-style inputs
82
84
  such as `s3://...` or `https://...` are preserved as provided.
83
- Git tags and `git_describe` are human-friendly hints only; `git_sha` remains the
84
- source of truth for reproducibility, matching, and source checkout.
85
+ Git tags and `git_describe` are human-friendly hints only. For Git sources,
86
+ `git_sha` and `source_code.revision` identify the recoverable code state.
85
87
 
86
88
  ## Quick Start
87
89
 
@@ -507,8 +509,9 @@ per call.
507
509
  Use `artifact_matches_manifest(...)` to verify whether a detached artifact still
508
510
  matches an annotation document. Use `analyze_provenance_chain(...)` when you also
509
511
  want to verify recorded inputs and recursively follow upstream annotation
510
- sidecars. Use `checkout_manifest_source(...)` to recover the recorded code state
511
- from Git metadata.
512
+ sidecars. Use `recover_manifest_source(...)` to recover the recorded source code
513
+ from Git metadata, a recorded source archive, or a recorded source file.
514
+ `checkout_manifest_source(...)` remains available as a compatibility alias.
512
515
 
513
516
  ```python
514
517
  from pathlib import Path
@@ -516,7 +519,7 @@ from pathlib import Path
516
519
  from data_annotations.provenance import (
517
520
  analyze_provenance_chain,
518
521
  artifact_matches_manifest,
519
- checkout_manifest_source,
522
+ recover_manifest_source,
520
523
  )
521
524
 
522
525
  annotation_path = Path("outputs/participants.csv.annotation.json")
@@ -525,7 +528,7 @@ artifact_path = Path("downloads/participants.csv")
525
528
  if artifact_matches_manifest(artifact_path, annotation_path):
526
529
  chain = analyze_provenance_chain(artifact_path)
527
530
  print(chain.status)
528
- recovered = checkout_manifest_source(annotation_path)
531
+ recovered = recover_manifest_source(annotation_path)
529
532
  print(recovered.checkout_path)
530
533
  print(recovered.script_path)
531
534
  ```
@@ -542,9 +545,9 @@ still attach provenance and description after the fact.
542
545
 
543
546
  Post-hoc descriptions can still be very useful, but the quality of post-hoc
544
547
  provenance depends on how exact the supplied answers are. In particular, fields
545
- such as the generating script, command, function, Git commit, repository path,
546
- Git tags, `git describe` output, inputs, and parameters are only as reliable as
547
- the information entered during annotation.
548
+ such as the generating script, command, function, source-code URI, Git commit,
549
+ repository path, Git tags, `git describe` output, inputs, and parameters are
550
+ only as reliable as the information entered during annotation.
548
551
 
549
552
  ## CLI Workflow
550
553
 
@@ -597,25 +600,31 @@ summary: Participant-level cohort assignments.
597
600
  kind: dataset
598
601
 
599
602
  inputs:
600
- - ${DATA_ROOT}/raw/participants.csv
603
+ - ${DATA_ROOT}/raw/participants.csv
601
604
 
602
605
  params:
603
- split: validation
606
+ split: validation
604
607
 
605
608
  provenance:
606
- command: bash scripts/build_participants.sh
607
- script: scripts/build_participants.sh
608
- git_sha: deadbeef
609
+ command: bash scripts/build_participants.sh
610
+ script: scripts/build_participants.sh
611
+ git_sha: deadbeef
612
+ source_code:
613
+ kind: archive
614
+ uri: https://doi.org/10.5281/zenodo.12345
615
+ download_uri: https://zenodo.org/records/12345/files/source.zip
616
+ path: scripts/build_participants.sh
617
+ sha256: 0000000000000000000000000000000000000000000000000000000000000000
609
618
 
610
619
  fields:
611
- - name: participant_id
612
- summary: Stable participant identifier.
613
- data_type: string
614
- required: true
615
- nullable: false
620
+ - name: participant_id
621
+ summary: Stable participant identifier.
622
+ data_type: string
623
+ required: true
624
+ nullable: false
616
625
 
617
626
  primary_key:
618
- - participant_id
627
+ - participant_id
619
628
  ```
620
629
 
621
630
  Directory answers use an explicit inventory. Paths in `artifacts`,
@@ -628,26 +637,26 @@ title: Processing outputs
628
637
  summary: Files produced by the shell processing workflow.
629
638
 
630
639
  provenance:
631
- command: bash process_from_instrument.sh
632
- script: process_from_instrument.sh
640
+ command: bash process_from_instrument.sh
641
+ script: process_from_instrument.sh
633
642
 
634
643
  artifacts:
635
- - path: processed.csv
636
- kind: dataset
637
- title: Processed instrument output
638
- summary: Normalized output from the processing script.
644
+ - path: processed.csv
645
+ kind: dataset
646
+ title: Processed instrument output
647
+ summary: Normalized output from the processing script.
639
648
 
640
649
  artifact_groups:
641
- - title: Diagnostic plots
642
- kind: plot
643
- selector: plots/*.png
644
- paths:
645
- - plots/qc-1.png
646
- - plots/qc-2.png
650
+ - title: Diagnostic plots
651
+ kind: plot
652
+ selector: plots/*.png
653
+ paths:
654
+ - plots/qc-1.png
655
+ - plots/qc-2.png
647
656
 
648
657
  child_bundles:
649
- - path: model
650
- annotation_path: model/data-annotations.json
658
+ - path: model
659
+ annotation_path: model/data-annotations.json
651
660
  ```
652
661
 
653
662
  Answers files may also use schema-style aliases such as `subject.path`,
@@ -655,14 +664,24 @@ Answers files may also use schema-style aliases such as `subject.path`,
655
664
  `description.artifacts`, `description.artifact_groups`, `provenance.inputs`,
656
665
  and `provenance.params`.
657
666
 
667
+ For source-code recovery, `provenance.source_code.kind` may be `git`, `archive`,
668
+ `file`, or `uri`. Git sources use `uri` plus `revision`; archive and file
669
+ sources use `uri` or `download_uri` plus an optional `sha256`; `path` points to
670
+ the generating script inside the recovered source. DOI or landing-page-only
671
+ references can be recorded with `kind: uri`, but they are not directly
672
+ recoverable unless a direct archive or file `download_uri` is also recorded.
673
+
658
674
  When group selectors are provided, the CLI expands them to concrete member paths
659
675
  at annotation time. Grouped files are tracked in `subject.produced_files[]` but
660
676
  are skipped by the per-file prompt flow, so you do not have to answer the same
661
677
  questions for every matching file.
662
678
 
663
- For post-hoc provenance, use repeatable `--git-tag` and optional
664
- `--git-describe` when you know the original code state. These values are stored
665
- as human-readable hints; `--git-sha` remains the field used for recovery.
679
+ For post-hoc provenance, use `--source-kind`, `--source-uri`,
680
+ `--source-download-uri`, `--source-path`, `--source-revision`, and
681
+ `--source-sha256` when the generating code is recoverable from a Git remote,
682
+ source archive, source file, or reference URI. Use repeatable `--git-tag` and
683
+ optional `--git-describe` when you know the original Git state; these values are
684
+ stored as human-readable hints.
666
685
 
667
686
  For provenance inspection and source recovery:
668
687
 
@@ -673,8 +692,12 @@ data-annotations provenance chain path/to/artifact --full-paths
673
692
  data-annotations provenance checkout path/to/artifact
674
693
  ```
675
694
 
676
- Command `checkout` downloads the recorded Git remote and checks out the recorded
677
- commit. It prompts before downloading source code and defaults to No; use
695
+ Command `checkout` recovers the recorded source code. For Git sources, it clones
696
+ the recorded remote and checks out the recorded revision. For archive and file
697
+ sources, it downloads or copies the recorded object, verifies `sha256` when
698
+ present, and resolves the generating script path when recorded. Reference-only
699
+ URI sources are preserved in the annotation but are not directly recoverable.
700
+ The command prompts before downloading source code and defaults to No; use
678
701
  `--force` when running trusted provenance checkout non-interactively.
679
702
 
680
703
  Command `match` auto-discovers `*.annotation.json` for files and `data-annotations.json` for
@@ -790,6 +813,8 @@ uv run data-annotations publish path/to/run-001 path/to/publish-bundle
790
813
  - `ProducedFile`
791
814
  - `ChildBundle`
792
815
  - `InputArtifact`
816
+ - `SourceCodeKind`
817
+ - `SourceCodeReference`
793
818
  - `BaseProvenance`
794
819
  - `FileManifest`
795
820
  - `DirectoryManifest`
@@ -807,6 +832,7 @@ uv run data-annotations publish path/to/run-001 path/to/publish-bundle
807
832
  - `analyze_provenance_chain(...)`
808
833
  - `provenance_chain_is_fresh(...)`
809
834
  - `artifact_matches_manifest(...)`
835
+ - `recover_manifest_source(...)`
810
836
  - `checkout_manifest_source(...)`
811
837
 
812
838
  ### Publish Functions
@@ -839,6 +865,7 @@ uv run python examples/provenance_chain.py
839
865
  uv run python examples/provenance_chain_cli.py
840
866
  uv run python examples/recover_provenance.py
841
867
  uv run python examples/recover_provenance_cli.py
868
+ uv run python examples/recover_archive_source.py
842
869
  uv run python examples/publish_cli.py
843
870
  ```
844
871
 
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "data-annotations"
3
- version = "2.4.0"
4
- description = "Annotate generated data artifacts"
3
+ version = "2.5.0"
4
+ description = "Annotate data artifacts with provenance and descriptions"
5
5
  readme = "README.md"
6
6
  authors = [
7
7
  { name = "Rodrigo C. G. Pena", email = "rodrigo.cerqueiragonzalezpena@unibas.ch" },
@@ -22,14 +22,14 @@ class DirectoryArtifactSubject(BaseModel):
22
22
 
23
23
 
24
24
  class FileAnnotationDocument(BaseModel):
25
- annotation_version: Literal["5"] = "5"
25
+ annotation_version: Literal["6"] = "6"
26
26
  subject: FileArtifactSubject
27
27
  provenance: BaseProvenance
28
28
  description: FileDescription
29
29
 
30
30
 
31
31
  class DirectoryAnnotationDocument(BaseModel):
32
- annotation_version: Literal["5"] = "5"
32
+ annotation_version: Literal["6"] = "6"
33
33
  subject: DirectoryArtifactSubject
34
34
  provenance: BaseProvenance
35
35
  description: DirectoryDescription