data-annotations 2.8.0__tar.gz → 2.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_annotations-2.8.1/PKG-INFO +161 -0
- data_annotations-2.8.1/README.md +131 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/pyproject.toml +3 -1
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/answers.py +213 -46
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/decorators.py +54 -12
- data_annotations-2.8.1/src/data_annotations/annotations/models.py +98 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/writers.py +119 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/decorators.py +38 -12
- data_annotations-2.8.1/src/data_annotations/description/models.py +261 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/writers.py +73 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/decorators.py +34 -10
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/git.py +14 -3
- data_annotations-2.8.1/src/data_annotations/provenance/models.py +244 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/chain.py +24 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/matching.py +15 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/sources.py +27 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/types.py +6 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/runtime.py +19 -2
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/writers.py +66 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/publish.py +82 -0
- data_annotations-2.8.0/PKG-INFO +0 -1059
- data_annotations-2.8.0/README.md +0 -1029
- data_annotations-2.8.0/src/data_annotations/annotations/models.py +0 -47
- data_annotations-2.8.0/src/data_annotations/description/models.py +0 -104
- data_annotations-2.8.0/src/data_annotations/provenance/models.py +0 -88
- {data_annotations-2.8.0 → data_annotations-2.8.1}/LICENSE +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/_decorators.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/annotate/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/annotate/helpers.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/answers.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/common.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/prompts.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/provenance_commands.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/publish.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/__init__.py +0 -0
- {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/manifest.py +0 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-annotations
|
|
3
|
+
Version: 2.8.1
|
|
4
|
+
Summary: Annotate data artifacts with provenance and descriptions
|
|
5
|
+
Keywords: annotations,data,metadata,provenance,reproducibility
|
|
6
|
+
Author: Rodrigo C. G. Pena
|
|
7
|
+
Author-email: Rodrigo C. G. Pena <rodrigo.cerqueiragonzalezpena@unibas.ch>
|
|
8
|
+
License-Expression: BSD-3-Clause
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Dist: pydantic>=2.13.1
|
|
21
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
22
|
+
Requires-Dist: questionary>=2.1.1 ; extra == 'cli'
|
|
23
|
+
Requires-Dist: typer>=0.16.0 ; extra == 'cli'
|
|
24
|
+
Requires-Python: >=3.12
|
|
25
|
+
Project-URL: Source, https://gitlab.com/ceda-unibas/tools/data-annotations
|
|
26
|
+
Project-URL: Changelog, https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
|
|
27
|
+
Project-URL: Issues, https://gitlab.com/ceda-unibas/tools/data-annotations/-/issues
|
|
28
|
+
Provides-Extra: cli
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# data-annotations
|
|
32
|
+
|
|
33
|
+
`data-annotations` is a Python package for attaching provenance and structured
|
|
34
|
+
descriptions to the files and directories your workflows produce.
|
|
35
|
+
|
|
36
|
+
It writes plain JSON annotation sidecars that are easy to inspect, archive, and
|
|
37
|
+
publish with research outputs:
|
|
38
|
+
|
|
39
|
+
- files use `artifact.ext.annotation.json`
|
|
40
|
+
- directories use `data-annotations.json` at their root
|
|
41
|
+
|
|
42
|
+
Optional Markdown README sidecars can be generated for human-readable summaries.
|
|
43
|
+
|
|
44
|
+
## Documentation
|
|
45
|
+
|
|
46
|
+
The full documentation is organized as a [Diátaxis](https://diataxis.fr/) site:
|
|
47
|
+
https://ceda-unibas.gitlab.io/tools/data-annotations/
|
|
48
|
+
|
|
49
|
+
- Source: https://gitlab.com/ceda-unibas/tools/data-annotations
|
|
50
|
+
- Changelog: https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
|
|
51
|
+
- Work items: https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
Install the core library from PyPI:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install data-annotations
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Or add it to a project with `uv`:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
uv add data-annotations
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Install CLI support when you want the `data-annotations` command:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install "data-annotations[cli]"
|
|
71
|
+
uv add "data-annotations[cli]"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick start
|
|
75
|
+
|
|
76
|
+
Decorate a function that writes an artifact. When the function runs,
|
|
77
|
+
`data-annotations` records provenance and writes the JSON sidecar.
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from pathlib import Path
|
|
81
|
+
|
|
82
|
+
from data_annotations.annotations import record_file_annotation
|
|
83
|
+
from data_annotations.description import FieldDefinition
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@record_file_annotation(
|
|
87
|
+
title="Participant Cohort",
|
|
88
|
+
summary="Participant-level cohort assignments.",
|
|
89
|
+
fields=[
|
|
90
|
+
FieldDefinition(
|
|
91
|
+
name="participant_id",
|
|
92
|
+
data_type="string",
|
|
93
|
+
summary="Stable participant identifier.",
|
|
94
|
+
required=True,
|
|
95
|
+
nullable=False,
|
|
96
|
+
),
|
|
97
|
+
],
|
|
98
|
+
primary_key=["participant_id"],
|
|
99
|
+
artifact_kind="dataset",
|
|
100
|
+
write_readme=True,
|
|
101
|
+
)
|
|
102
|
+
def write_participants(artifact_path: Path, input_path: Path) -> Path:
|
|
103
|
+
participant_ids = [
|
|
104
|
+
line.strip()
|
|
105
|
+
for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
|
|
106
|
+
if line.strip()
|
|
107
|
+
]
|
|
108
|
+
artifact_path.parent.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
artifact_path.write_text(
|
|
110
|
+
"participant_id\n" + "\n".join(participant_ids) + "\n",
|
|
111
|
+
encoding="utf-8",
|
|
112
|
+
)
|
|
113
|
+
return artifact_path
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
artifact_path = Path("outputs") / "participants.csv"
|
|
117
|
+
write_participants(
|
|
118
|
+
artifact_path=artifact_path,
|
|
119
|
+
input_path=Path("data/raw/participants.csv"),
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
This writes:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
outputs/participants.csv
|
|
127
|
+
outputs/participants.csv.annotation.json
|
|
128
|
+
outputs/participants.csv.README.md
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## CLI
|
|
132
|
+
|
|
133
|
+
The CLI supports retrospective annotation, provenance inspection, source
|
|
134
|
+
recovery, and sanitized publish bundles.
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
data-annotations annotate file path/to/participants.csv --write-readme
|
|
138
|
+
data-annotations annotate directory path/to/run-001 --recursive
|
|
139
|
+
data-annotations provenance match path/to/participants.csv
|
|
140
|
+
data-annotations provenance chain path/to/participants.csv
|
|
141
|
+
data-annotations provenance checkout path/to/participants.csv
|
|
142
|
+
data-annotations publish path/to/run-001 path/to/publish-bundle
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Development
|
|
146
|
+
|
|
147
|
+
From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
task install
|
|
151
|
+
task lint
|
|
152
|
+
task type-check
|
|
153
|
+
task test
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Build or preview the documentation site:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
task docs-build
|
|
160
|
+
task docs-serve
|
|
161
|
+
```
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# data-annotations
|
|
2
|
+
|
|
3
|
+
`data-annotations` is a Python package for attaching provenance and structured
|
|
4
|
+
descriptions to the files and directories your workflows produce.
|
|
5
|
+
|
|
6
|
+
It writes plain JSON annotation sidecars that are easy to inspect, archive, and
|
|
7
|
+
publish with research outputs:
|
|
8
|
+
|
|
9
|
+
- files use `artifact.ext.annotation.json`
|
|
10
|
+
- directories use `data-annotations.json` at their root
|
|
11
|
+
|
|
12
|
+
Optional Markdown README sidecars can be generated for human-readable summaries.
|
|
13
|
+
|
|
14
|
+
## Documentation
|
|
15
|
+
|
|
16
|
+
The full documentation is organized as a [Diátaxis](https://diataxis.fr/) site:
|
|
17
|
+
https://ceda-unibas.gitlab.io/tools/data-annotations/
|
|
18
|
+
|
|
19
|
+
- Source: https://gitlab.com/ceda-unibas/tools/data-annotations
|
|
20
|
+
- Changelog: https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
|
|
21
|
+
- Work items: https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
Install the core library from PyPI:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install data-annotations
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or add it to a project with `uv`:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv add data-annotations
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Install CLI support when you want the `data-annotations` command:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install "data-annotations[cli]"
|
|
41
|
+
uv add "data-annotations[cli]"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
Decorate a function that writes an artifact. When the function runs,
|
|
47
|
+
`data-annotations` records provenance and writes the JSON sidecar.
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from pathlib import Path
|
|
51
|
+
|
|
52
|
+
from data_annotations.annotations import record_file_annotation
|
|
53
|
+
from data_annotations.description import FieldDefinition
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@record_file_annotation(
|
|
57
|
+
title="Participant Cohort",
|
|
58
|
+
summary="Participant-level cohort assignments.",
|
|
59
|
+
fields=[
|
|
60
|
+
FieldDefinition(
|
|
61
|
+
name="participant_id",
|
|
62
|
+
data_type="string",
|
|
63
|
+
summary="Stable participant identifier.",
|
|
64
|
+
required=True,
|
|
65
|
+
nullable=False,
|
|
66
|
+
),
|
|
67
|
+
],
|
|
68
|
+
primary_key=["participant_id"],
|
|
69
|
+
artifact_kind="dataset",
|
|
70
|
+
write_readme=True,
|
|
71
|
+
)
|
|
72
|
+
def write_participants(artifact_path: Path, input_path: Path) -> Path:
|
|
73
|
+
participant_ids = [
|
|
74
|
+
line.strip()
|
|
75
|
+
for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
|
|
76
|
+
if line.strip()
|
|
77
|
+
]
|
|
78
|
+
artifact_path.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
artifact_path.write_text(
|
|
80
|
+
"participant_id\n" + "\n".join(participant_ids) + "\n",
|
|
81
|
+
encoding="utf-8",
|
|
82
|
+
)
|
|
83
|
+
return artifact_path
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
artifact_path = Path("outputs") / "participants.csv"
|
|
87
|
+
write_participants(
|
|
88
|
+
artifact_path=artifact_path,
|
|
89
|
+
input_path=Path("data/raw/participants.csv"),
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
This writes:
|
|
94
|
+
|
|
95
|
+
```text
|
|
96
|
+
outputs/participants.csv
|
|
97
|
+
outputs/participants.csv.annotation.json
|
|
98
|
+
outputs/participants.csv.README.md
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## CLI
|
|
102
|
+
|
|
103
|
+
The CLI supports retrospective annotation, provenance inspection, source
|
|
104
|
+
recovery, and sanitized publish bundles.
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
data-annotations annotate file path/to/participants.csv --write-readme
|
|
108
|
+
data-annotations annotate directory path/to/run-001 --recursive
|
|
109
|
+
data-annotations provenance match path/to/participants.csv
|
|
110
|
+
data-annotations provenance chain path/to/participants.csv
|
|
111
|
+
data-annotations provenance checkout path/to/participants.csv
|
|
112
|
+
data-annotations publish path/to/run-001 path/to/publish-bundle
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
task install
|
|
121
|
+
task lint
|
|
122
|
+
task type-check
|
|
123
|
+
task test
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Build or preview the documentation site:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
task docs-build
|
|
130
|
+
task docs-serve
|
|
131
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "data-annotations"
|
|
3
|
-
version = "2.8.
|
|
3
|
+
version = "2.8.1"
|
|
4
4
|
description = "Annotate data artifacts with provenance and descriptions"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -42,8 +42,10 @@ build-backend = "uv_build"
|
|
|
42
42
|
[dependency-groups]
|
|
43
43
|
dev = [
|
|
44
44
|
"ipykernel>=7.2.0",
|
|
45
|
+
"mkdocstrings-python>=2.0.4",
|
|
45
46
|
"prek>=0.3.9",
|
|
46
47
|
"pytest>=9.0.3",
|
|
47
48
|
"ruff>=0.15.10",
|
|
48
49
|
"ty>=0.0.31",
|
|
50
|
+
"zensical>=0.0.45",
|
|
49
51
|
]
|
{data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/answers.py
RENAMED
|
@@ -125,21 +125,62 @@ _EXPLICIT_PROVENANCE_OVERRIDE_FIELDS = {
|
|
|
125
125
|
|
|
126
126
|
|
|
127
127
|
class ProvenanceAnswers(BaseModel):
|
|
128
|
+
"""Optional provenance overrides supplied through an answers payload."""
|
|
129
|
+
|
|
128
130
|
model_config = ConfigDict(extra="forbid")
|
|
129
131
|
|
|
130
|
-
command: str | list[str] | None =
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
132
|
+
command: str | list[str] | None = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="Command line to record instead of the captured runtime command.",
|
|
135
|
+
)
|
|
136
|
+
script: str | None = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
description="Script path to record instead of the captured runtime script.",
|
|
139
|
+
)
|
|
140
|
+
script_repo_path: str | None = Field(
|
|
141
|
+
default=None,
|
|
142
|
+
description="Script path relative to the source repository root.",
|
|
143
|
+
)
|
|
144
|
+
function: str | None = Field(
|
|
145
|
+
default=None,
|
|
146
|
+
description="Qualified callable name to record in provenance.",
|
|
147
|
+
)
|
|
148
|
+
git_sha: str | None = Field(
|
|
149
|
+
default=None,
|
|
150
|
+
description="Git commit SHA to record in provenance.",
|
|
151
|
+
)
|
|
152
|
+
git_branch: str | None = Field(
|
|
153
|
+
default=None,
|
|
154
|
+
description="Git branch name to record in provenance.",
|
|
155
|
+
)
|
|
156
|
+
git_dirty: bool | None = Field(
|
|
157
|
+
default=None,
|
|
158
|
+
description="Whether the recorded Git worktree was dirty.",
|
|
159
|
+
)
|
|
160
|
+
git_remote_name: str | None = Field(
|
|
161
|
+
default=None,
|
|
162
|
+
description="Git remote name to record in provenance.",
|
|
163
|
+
)
|
|
164
|
+
git_remote_url: str | None = Field(
|
|
165
|
+
default=None,
|
|
166
|
+
description="Git remote URL to record in provenance.",
|
|
167
|
+
)
|
|
168
|
+
git_tags: list[str] | None = Field(
|
|
169
|
+
default=None,
|
|
170
|
+
description="Git tags to record for the captured revision.",
|
|
171
|
+
)
|
|
172
|
+
git_describe: str | None = Field(
|
|
173
|
+
default=None,
|
|
174
|
+
description="git describe output to record for the captured revision.",
|
|
175
|
+
)
|
|
176
|
+
source_code: SourceCodeReference | None = Field(
|
|
177
|
+
default=None,
|
|
178
|
+
description="Explicit source-code reference to record for recovery.",
|
|
179
|
+
)
|
|
180
|
+
infer_from_runtime: list[str] = Field(
|
|
181
|
+
default_factory=list,
|
|
182
|
+
description="Runtime fields that should remain inferred instead of overridden.",
|
|
183
|
+
)
|
|
143
184
|
|
|
144
185
|
@field_validator("infer_from_runtime", mode="before")
|
|
145
186
|
@classmethod
|
|
@@ -185,6 +226,8 @@ class ProvenanceAnswers(BaseModel):
|
|
|
185
226
|
return self
|
|
186
227
|
|
|
187
228
|
def command_tokens(self) -> list[str] | None:
|
|
229
|
+
"""Return `command` as shell-like tokens."""
|
|
230
|
+
|
|
188
231
|
if self.command is None:
|
|
189
232
|
return None
|
|
190
233
|
if isinstance(self.command, list):
|
|
@@ -195,6 +238,8 @@ class ProvenanceAnswers(BaseModel):
|
|
|
195
238
|
raise AnswersError(f"invalid provenance.command: {exc}") from exc
|
|
196
239
|
|
|
197
240
|
def runtime_inference_fields(self) -> set[str]:
|
|
241
|
+
"""Expand runtime inference groups into concrete provenance fields."""
|
|
242
|
+
|
|
198
243
|
fields: set[str] = set()
|
|
199
244
|
for field in self.infer_from_runtime:
|
|
200
245
|
fields.update(_RUNTIME_INFERENCE_GROUPS.get(field, {field}))
|
|
@@ -202,47 +247,124 @@ class ProvenanceAnswers(BaseModel):
|
|
|
202
247
|
|
|
203
248
|
|
|
204
249
|
class BaseAnswers(BaseModel):
|
|
250
|
+
"""Common answers payload fields shared by file and directory modes."""
|
|
251
|
+
|
|
205
252
|
model_config = ConfigDict(extra="forbid")
|
|
206
253
|
|
|
207
|
-
target: str | None =
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
254
|
+
target: str | None = Field(
|
|
255
|
+
default=None,
|
|
256
|
+
description="Artifact path supplied by the answers payload.",
|
|
257
|
+
)
|
|
258
|
+
title: str | None = Field(
|
|
259
|
+
default=None,
|
|
260
|
+
description="Display title for generated descriptions or README files.",
|
|
261
|
+
)
|
|
262
|
+
summary: str | None = Field(
|
|
263
|
+
default=None,
|
|
264
|
+
description="Short artifact summary for generated descriptions.",
|
|
265
|
+
)
|
|
266
|
+
inputs: list[str] = Field(
|
|
267
|
+
default_factory=list,
|
|
268
|
+
description="Input paths or URIs to record in provenance.",
|
|
269
|
+
)
|
|
270
|
+
params: dict[str, Any] = Field(
|
|
271
|
+
default_factory=dict,
|
|
272
|
+
description="Parameter values to record in provenance.",
|
|
273
|
+
)
|
|
274
|
+
provenance: ProvenanceAnswers = Field(
|
|
275
|
+
default_factory=ProvenanceAnswers,
|
|
276
|
+
description="Provenance overrides and runtime inference controls.",
|
|
277
|
+
)
|
|
213
278
|
|
|
214
279
|
|
|
215
280
|
class FileAnswers(BaseAnswers):
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
281
|
+
"""Validated answers payload for annotating one existing file."""
|
|
282
|
+
|
|
283
|
+
kind: ArtifactKind = Field(
|
|
284
|
+
default="other",
|
|
285
|
+
description="High-level artifact category for the file.",
|
|
286
|
+
)
|
|
287
|
+
sha256: str | None = Field(
|
|
288
|
+
default=None,
|
|
289
|
+
description="Precomputed SHA-256 digest for the file.",
|
|
290
|
+
)
|
|
291
|
+
fields: list[FieldDefinition] = Field(
|
|
292
|
+
default_factory=list,
|
|
293
|
+
description="Field-level descriptions for the file.",
|
|
294
|
+
)
|
|
295
|
+
primary_key: list[str] = Field(
|
|
296
|
+
default_factory=list,
|
|
297
|
+
description="Field names that uniquely identify records in the file.",
|
|
298
|
+
)
|
|
299
|
+
missing_value_codes: dict[str, str] = Field(
|
|
300
|
+
default_factory=dict,
|
|
301
|
+
description="Mapping of missing-value markers to their meanings.",
|
|
302
|
+
)
|
|
221
303
|
|
|
222
304
|
|
|
223
305
|
class DirectoryArtifactAnswers(BaseModel):
|
|
306
|
+
"""Answers entry describing one artifact inside a directory."""
|
|
307
|
+
|
|
224
308
|
model_config = ConfigDict(extra="forbid")
|
|
225
309
|
|
|
226
|
-
path: str
|
|
227
|
-
kind: ArtifactKind =
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
310
|
+
path: str = Field(description="Artifact path relative to the target directory.")
|
|
311
|
+
kind: ArtifactKind = Field(
|
|
312
|
+
default="other",
|
|
313
|
+
description="High-level artifact category.",
|
|
314
|
+
)
|
|
315
|
+
title: str | None = Field(
|
|
316
|
+
default=None,
|
|
317
|
+
description="Display title for the artifact.",
|
|
318
|
+
)
|
|
319
|
+
summary: str | None = Field(
|
|
320
|
+
default=None,
|
|
321
|
+
description="Short description of the artifact.",
|
|
322
|
+
)
|
|
323
|
+
fields: list[FieldDefinition] = Field(
|
|
324
|
+
default_factory=list,
|
|
325
|
+
description="Field-level descriptions for the artifact.",
|
|
326
|
+
)
|
|
327
|
+
primary_key: list[str] = Field(
|
|
328
|
+
default_factory=list,
|
|
329
|
+
description="Field names that uniquely identify records in the artifact.",
|
|
330
|
+
)
|
|
331
|
+
missing_value_codes: dict[str, str] = Field(
|
|
332
|
+
default_factory=dict,
|
|
333
|
+
description="Mapping of missing-value markers to their meanings.",
|
|
334
|
+
)
|
|
233
335
|
|
|
234
336
|
|
|
235
337
|
class DirectoryArtifactGroupAnswers(BaseModel):
|
|
338
|
+
"""Answers entry describing a group of artifacts inside a directory."""
|
|
339
|
+
|
|
236
340
|
model_config = ConfigDict(extra="forbid")
|
|
237
341
|
|
|
238
|
-
title: str
|
|
239
|
-
summary: str | None =
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
342
|
+
title: str = Field(description="Display title for the artifact group.")
|
|
343
|
+
summary: str | None = Field(
|
|
344
|
+
default=None,
|
|
345
|
+
description="Short description shared by group members.",
|
|
346
|
+
)
|
|
347
|
+
kind: ArtifactKind = Field(
|
|
348
|
+
default="other",
|
|
349
|
+
description="High-level category shared by group members.",
|
|
350
|
+
)
|
|
351
|
+
paths: list[str] = Field(description="Artifact paths included in the group.")
|
|
352
|
+
selector: str | None = Field(
|
|
353
|
+
default=None,
|
|
354
|
+
description="Pattern or rule used to select members of the group.",
|
|
355
|
+
)
|
|
356
|
+
fields: list[FieldDefinition] = Field(
|
|
357
|
+
default_factory=list,
|
|
358
|
+
description="Field-level descriptions shared by group members.",
|
|
359
|
+
)
|
|
360
|
+
primary_key: list[str] = Field(
|
|
361
|
+
default_factory=list,
|
|
362
|
+
description="Field names that uniquely identify records in each member.",
|
|
363
|
+
)
|
|
364
|
+
missing_value_codes: dict[str, str] = Field(
|
|
365
|
+
default_factory=dict,
|
|
366
|
+
description="Mapping of missing-value markers to their meanings.",
|
|
367
|
+
)
|
|
246
368
|
|
|
247
369
|
@field_validator("paths")
|
|
248
370
|
@classmethod
|
|
@@ -255,18 +377,39 @@ class DirectoryArtifactGroupAnswers(BaseModel):
|
|
|
255
377
|
|
|
256
378
|
|
|
257
379
|
class ChildBundleAnswers(BaseModel):
|
|
380
|
+
"""Answers entry for a nested annotated directory bundle."""
|
|
381
|
+
|
|
258
382
|
model_config = ConfigDict(extra="forbid")
|
|
259
383
|
|
|
260
|
-
path: str
|
|
261
|
-
annotation_path: str
|
|
262
|
-
|
|
384
|
+
path: str = Field(description="Path to the child bundle directory.")
|
|
385
|
+
annotation_path: str = Field(
|
|
386
|
+
description="Path to the child bundle annotation document.",
|
|
387
|
+
)
|
|
388
|
+
content_digest: str | None = Field(
|
|
389
|
+
default=None,
|
|
390
|
+
description="Expected content digest for the child bundle.",
|
|
391
|
+
)
|
|
263
392
|
|
|
264
393
|
|
|
265
394
|
class DirectoryAnswers(BaseAnswers):
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
395
|
+
"""Validated answers payload for annotating an existing directory."""
|
|
396
|
+
|
|
397
|
+
artifacts: list[DirectoryArtifactAnswers] = Field(
|
|
398
|
+
default_factory=list,
|
|
399
|
+
description="Individual artifacts to document inside the directory.",
|
|
400
|
+
)
|
|
401
|
+
artifact_groups: list[DirectoryArtifactGroupAnswers] = Field(
|
|
402
|
+
default_factory=list,
|
|
403
|
+
description="Groups of artifacts to document together.",
|
|
404
|
+
)
|
|
405
|
+
child_bundles: list[ChildBundleAnswers] = Field(
|
|
406
|
+
default_factory=list,
|
|
407
|
+
description="Nested annotated bundles to include in the directory subject.",
|
|
408
|
+
)
|
|
409
|
+
checksums: dict[str, str] = Field(
|
|
410
|
+
default_factory=dict,
|
|
411
|
+
description="Precomputed checksums keyed by artifact path.",
|
|
412
|
+
)
|
|
270
413
|
|
|
271
414
|
|
|
272
415
|
FileAnswersInput: TypeAlias = str | Path | Mapping[str, Any] | FileAnswers
|
|
@@ -274,10 +417,34 @@ DirectoryAnswersInput: TypeAlias = str | Path | Mapping[str, Any] | DirectoryAns
|
|
|
274
417
|
|
|
275
418
|
|
|
276
419
|
def load_file_answers(source: FileAnswersInput) -> FileAnswers:
|
|
420
|
+
"""Load and validate answers for file annotation.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
source: YAML path, mapping, or existing `FileAnswers` instance.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Validated file answers.
|
|
427
|
+
|
|
428
|
+
Raises:
|
|
429
|
+
AnswersError: If the source cannot be loaded or validated.
|
|
430
|
+
"""
|
|
431
|
+
|
|
277
432
|
return _validate_answers(source, mode="file")
|
|
278
433
|
|
|
279
434
|
|
|
280
435
|
def load_directory_answers(source: DirectoryAnswersInput) -> DirectoryAnswers:
|
|
436
|
+
"""Load and validate answers for directory annotation.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
source: YAML path, mapping, or existing `DirectoryAnswers` instance.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Validated directory answers.
|
|
443
|
+
|
|
444
|
+
Raises:
|
|
445
|
+
AnswersError: If the source cannot be loaded or validated.
|
|
446
|
+
"""
|
|
447
|
+
|
|
281
448
|
return _validate_answers(source, mode="directory")
|
|
282
449
|
|
|
283
450
|
|