data-annotations 2.8.0__tar.gz → 2.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data_annotations-2.9.0/PKG-INFO +167 -0
  2. data_annotations-2.9.0/README.md +137 -0
  3. {data_annotations-2.8.0 → data_annotations-2.9.0}/pyproject.toml +3 -1
  4. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/annotations/answers.py +242 -67
  5. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/annotations/decorators.py +54 -12
  6. data_annotations-2.9.0/src/data_annotations/annotations/models.py +98 -0
  7. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/annotations/writers.py +119 -0
  8. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/annotate/__init__.py +69 -13
  9. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/annotate/helpers.py +48 -12
  10. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/common.py +7 -0
  11. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/description/decorators.py +38 -12
  12. data_annotations-2.9.0/src/data_annotations/description/models.py +261 -0
  13. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/description/writers.py +73 -0
  14. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/decorators.py +34 -10
  15. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/git.py +14 -3
  16. data_annotations-2.9.0/src/data_annotations/provenance/models.py +244 -0
  17. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/chain.py +24 -0
  18. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/matching.py +15 -0
  19. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/sources.py +27 -0
  20. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/types.py +6 -0
  21. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/runtime.py +19 -2
  22. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/writers.py +66 -0
  23. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/publish.py +82 -0
  24. data_annotations-2.8.0/PKG-INFO +0 -1059
  25. data_annotations-2.8.0/README.md +0 -1029
  26. data_annotations-2.8.0/src/data_annotations/annotations/models.py +0 -47
  27. data_annotations-2.8.0/src/data_annotations/description/models.py +0 -104
  28. data_annotations-2.8.0/src/data_annotations/provenance/models.py +0 -88
  29. {data_annotations-2.8.0 → data_annotations-2.9.0}/LICENSE +0 -0
  30. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/__init__.py +0 -0
  31. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/_decorators.py +0 -0
  32. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/annotations/__init__.py +0 -0
  33. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli.py +0 -0
  34. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/__init__.py +0 -0
  35. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/answers.py +0 -0
  36. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/prompts.py +0 -0
  37. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/provenance_commands.py +0 -0
  38. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/cli_app/publish.py +0 -0
  39. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/description/__init__.py +0 -0
  40. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/__init__.py +0 -0
  41. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/__init__.py +0 -0
  42. {data_annotations-2.8.0 → data_annotations-2.9.0}/src/data_annotations/provenance/recovery/manifest.py +0 -0
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-annotations
3
+ Version: 2.9.0
4
+ Summary: Annotate data artifacts with provenance and descriptions
5
+ Keywords: annotations,data,metadata,provenance,reproducibility
6
+ Author: Rodrigo C. G. Pena
7
+ Author-email: Rodrigo C. G. Pena <rodrigo.cerqueiragonzalezpena@unibas.ch>
8
+ License-Expression: BSD-3-Clause
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: pydantic>=2.13.1
21
+ Requires-Dist: pyyaml>=6.0.2
22
+ Requires-Dist: questionary>=2.1.1 ; extra == 'cli'
23
+ Requires-Dist: typer>=0.16.0 ; extra == 'cli'
24
+ Requires-Python: >=3.12
25
+ Project-URL: Source, https://gitlab.com/ceda-unibas/tools/data-annotations
26
+ Project-URL: Changelog, https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
27
+ Project-URL: Issues, https://gitlab.com/ceda-unibas/tools/data-annotations/-/issues
28
+ Provides-Extra: cli
29
+ Description-Content-Type: text/markdown
30
+
31
+ # data-annotations
32
+
33
+ [![PyPI](https://img.shields.io/pypi/v/data-annotations?label=pypi)](https://pypi.org/project/data-annotations/)
34
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://ceda-unibas.gitlab.io/tools/data-annotations/)
35
+ [![License](https://img.shields.io/pypi/l/data-annotations?label=license)](https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/LICENSE)
36
+ [![CI](https://gitlab.com/ceda-unibas/tools/data-annotations/badges/main/pipeline.svg)](https://gitlab.com/ceda-unibas/tools/data-annotations/-/pipelines)
37
+
38
+ `data-annotations` is a Python package for attaching provenance and structured
39
+ descriptions to the files and directories your workflows produce.
40
+
41
+ It writes plain JSON annotation sidecars that are easy to inspect, archive, and
42
+ publish with research outputs:
43
+
44
+ - files use `artifact.ext.annotation.json`
45
+ - directories use `data-annotations.json` at their root
46
+
47
+ Optional Markdown README sidecars can be generated for human-readable summaries.
48
+
49
+ ## Documentation
50
+
51
+ The [full documentation](https://ceda-unibas.gitlab.io/tools/data-annotations/) is organized as a [Diátaxis](https://diataxis.fr/) site.
52
+
53
+ Other links:
54
+
55
+ - [Source code](https://gitlab.com/ceda-unibas/tools/data-annotations)
56
+ - [Changelog](https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md)
57
+ - [Work items](https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items)
58
+
59
+ ## Installation
60
+
61
+ Install the core library from PyPI:
62
+
63
+ ```bash
64
+ pip install data-annotations
65
+ ```
66
+
67
+ Or add it to a project with `uv`:
68
+
69
+ ```bash
70
+ uv add data-annotations
71
+ ```
72
+
73
+ Install CLI support when you want the `data-annotations` command:
74
+
75
+ ```bash
76
+ pip install "data-annotations[cli]"
77
+ uv add "data-annotations[cli]"
78
+ ```
79
+
80
+ ## Quick start
81
+
82
+ Decorate a function that writes an artifact. When the function runs,
83
+ `data-annotations` records provenance and writes the JSON sidecar.
84
+
85
+ ```python
86
+ from pathlib import Path
87
+
88
+ from data_annotations.annotations import record_file_annotation
89
+ from data_annotations.description import FieldDefinition
90
+
91
+
92
+ @record_file_annotation(
93
+ title="Participant Cohort",
94
+ summary="Participant-level cohort assignments.",
95
+ fields=[
96
+ FieldDefinition(
97
+ name="participant_id",
98
+ data_type="string",
99
+ summary="Stable participant identifier.",
100
+ required=True,
101
+ nullable=False,
102
+ ),
103
+ ],
104
+ primary_key=["participant_id"],
105
+ artifact_kind="dataset",
106
+ write_readme=True,
107
+ )
108
+ def write_participants(artifact_path: Path, input_path: Path) -> Path:
109
+ participant_ids = [
110
+ line.strip()
111
+ for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
112
+ if line.strip()
113
+ ]
114
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
115
+ artifact_path.write_text(
116
+ "participant_id\n" + "\n".join(participant_ids) + "\n",
117
+ encoding="utf-8",
118
+ )
119
+ return artifact_path
120
+
121
+
122
+ artifact_path = Path("outputs") / "participants.csv"
123
+ write_participants(
124
+ artifact_path=artifact_path,
125
+ input_path=Path("data/raw/participants.csv"),
126
+ )
127
+ ```
128
+
129
+ This writes:
130
+
131
+ ```text
132
+ outputs/participants.csv
133
+ outputs/participants.csv.annotation.json
134
+ outputs/participants.csv.README.md
135
+ ```
136
+
137
+ ## CLI
138
+
139
+ The CLI supports retrospective annotation, provenance inspection, source
140
+ recovery, and sanitized publish bundles.
141
+
142
+ ```bash
143
+ data-annotations annotate file path/to/participants.csv --write-readme
144
+ data-annotations annotate directory path/to/run-001 --recursive
145
+ data-annotations provenance match path/to/participants.csv
146
+ data-annotations provenance chain path/to/participants.csv
147
+ data-annotations provenance checkout path/to/participants.csv
148
+ data-annotations publish path/to/run-001 path/to/publish-bundle
149
+ ```
150
+
151
+ ## Development
152
+
153
+ From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
154
+
155
+ ```bash
156
+ task install
157
+ task lint
158
+ task type-check
159
+ task test
160
+ ```
161
+
162
+ Build or preview the documentation site:
163
+
164
+ ```bash
165
+ task docs-build
166
+ task docs-serve
167
+ ```
@@ -0,0 +1,137 @@
1
+ # data-annotations
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/data-annotations?label=pypi)](https://pypi.org/project/data-annotations/)
4
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://ceda-unibas.gitlab.io/tools/data-annotations/)
5
+ [![License](https://img.shields.io/pypi/l/data-annotations?label=license)](https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/LICENSE)
6
+ [![CI](https://gitlab.com/ceda-unibas/tools/data-annotations/badges/main/pipeline.svg)](https://gitlab.com/ceda-unibas/tools/data-annotations/-/pipelines)
7
+
8
+ `data-annotations` is a Python package for attaching provenance and structured
9
+ descriptions to the files and directories your workflows produce.
10
+
11
+ It writes plain JSON annotation sidecars that are easy to inspect, archive, and
12
+ publish with research outputs:
13
+
14
+ - files use `artifact.ext.annotation.json`
15
+ - directories use `data-annotations.json` at their root
16
+
17
+ Optional Markdown README sidecars can be generated for human-readable summaries.
18
+
19
+ ## Documentation
20
+
21
+ The [full documentation](https://ceda-unibas.gitlab.io/tools/data-annotations/) is organized as a [Diátaxis](https://diataxis.fr/) site.
22
+
23
+ Other links:
24
+
25
+ - [Source code](https://gitlab.com/ceda-unibas/tools/data-annotations)
26
+ - [Changelog](https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md)
27
+ - [Work items](https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items)
28
+
29
+ ## Installation
30
+
31
+ Install the core library from PyPI:
32
+
33
+ ```bash
34
+ pip install data-annotations
35
+ ```
36
+
37
+ Or add it to a project with `uv`:
38
+
39
+ ```bash
40
+ uv add data-annotations
41
+ ```
42
+
43
+ Install CLI support when you want the `data-annotations` command:
44
+
45
+ ```bash
46
+ pip install "data-annotations[cli]"
47
+ uv add "data-annotations[cli]"
48
+ ```
49
+
50
+ ## Quick start
51
+
52
+ Decorate a function that writes an artifact. When the function runs,
53
+ `data-annotations` records provenance and writes the JSON sidecar.
54
+
55
+ ```python
56
+ from pathlib import Path
57
+
58
+ from data_annotations.annotations import record_file_annotation
59
+ from data_annotations.description import FieldDefinition
60
+
61
+
62
+ @record_file_annotation(
63
+ title="Participant Cohort",
64
+ summary="Participant-level cohort assignments.",
65
+ fields=[
66
+ FieldDefinition(
67
+ name="participant_id",
68
+ data_type="string",
69
+ summary="Stable participant identifier.",
70
+ required=True,
71
+ nullable=False,
72
+ ),
73
+ ],
74
+ primary_key=["participant_id"],
75
+ artifact_kind="dataset",
76
+ write_readme=True,
77
+ )
78
+ def write_participants(artifact_path: Path, input_path: Path) -> Path:
79
+ participant_ids = [
80
+ line.strip()
81
+ for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
82
+ if line.strip()
83
+ ]
84
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
85
+ artifact_path.write_text(
86
+ "participant_id\n" + "\n".join(participant_ids) + "\n",
87
+ encoding="utf-8",
88
+ )
89
+ return artifact_path
90
+
91
+
92
+ artifact_path = Path("outputs") / "participants.csv"
93
+ write_participants(
94
+ artifact_path=artifact_path,
95
+ input_path=Path("data/raw/participants.csv"),
96
+ )
97
+ ```
98
+
99
+ This writes:
100
+
101
+ ```text
102
+ outputs/participants.csv
103
+ outputs/participants.csv.annotation.json
104
+ outputs/participants.csv.README.md
105
+ ```
106
+
107
+ ## CLI
108
+
109
+ The CLI supports retrospective annotation, provenance inspection, source
110
+ recovery, and sanitized publish bundles.
111
+
112
+ ```bash
113
+ data-annotations annotate file path/to/participants.csv --write-readme
114
+ data-annotations annotate directory path/to/run-001 --recursive
115
+ data-annotations provenance match path/to/participants.csv
116
+ data-annotations provenance chain path/to/participants.csv
117
+ data-annotations provenance checkout path/to/participants.csv
118
+ data-annotations publish path/to/run-001 path/to/publish-bundle
119
+ ```
120
+
121
+ ## Development
122
+
123
+ From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
124
+
125
+ ```bash
126
+ task install
127
+ task lint
128
+ task type-check
129
+ task test
130
+ ```
131
+
132
+ Build or preview the documentation site:
133
+
134
+ ```bash
135
+ task docs-build
136
+ task docs-serve
137
+ ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "data-annotations"
3
- version = "2.8.0"
3
+ version = "2.9.0"
4
4
  description = "Annotate data artifacts with provenance and descriptions"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -42,8 +42,10 @@ build-backend = "uv_build"
42
42
  [dependency-groups]
43
43
  dev = [
44
44
  "ipykernel>=7.2.0",
45
+ "mkdocstrings-python>=2.0.4",
45
46
  "prek>=0.3.9",
46
47
  "pytest>=9.0.3",
47
48
  "ruff>=0.15.10",
48
49
  "ty>=0.0.31",
50
+ "zensical>=0.0.45",
49
51
  ]