data-annotations 2.8.0__tar.gz → 2.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data_annotations-2.8.1/PKG-INFO +161 -0
  2. data_annotations-2.8.1/README.md +131 -0
  3. {data_annotations-2.8.0 → data_annotations-2.8.1}/pyproject.toml +3 -1
  4. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/answers.py +213 -46
  5. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/decorators.py +54 -12
  6. data_annotations-2.8.1/src/data_annotations/annotations/models.py +98 -0
  7. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/writers.py +119 -0
  8. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/decorators.py +38 -12
  9. data_annotations-2.8.1/src/data_annotations/description/models.py +261 -0
  10. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/writers.py +73 -0
  11. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/decorators.py +34 -10
  12. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/git.py +14 -3
  13. data_annotations-2.8.1/src/data_annotations/provenance/models.py +244 -0
  14. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/chain.py +24 -0
  15. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/matching.py +15 -0
  16. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/sources.py +27 -0
  17. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/types.py +6 -0
  18. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/runtime.py +19 -2
  19. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/writers.py +66 -0
  20. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/publish.py +82 -0
  21. data_annotations-2.8.0/PKG-INFO +0 -1059
  22. data_annotations-2.8.0/README.md +0 -1029
  23. data_annotations-2.8.0/src/data_annotations/annotations/models.py +0 -47
  24. data_annotations-2.8.0/src/data_annotations/description/models.py +0 -104
  25. data_annotations-2.8.0/src/data_annotations/provenance/models.py +0 -88
  26. {data_annotations-2.8.0 → data_annotations-2.8.1}/LICENSE +0 -0
  27. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/__init__.py +0 -0
  28. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/_decorators.py +0 -0
  29. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/annotations/__init__.py +0 -0
  30. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli.py +0 -0
  31. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/__init__.py +0 -0
  32. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/annotate/__init__.py +0 -0
  33. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/annotate/helpers.py +0 -0
  34. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/answers.py +0 -0
  35. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/common.py +0 -0
  36. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/prompts.py +0 -0
  37. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/provenance_commands.py +0 -0
  38. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/cli_app/publish.py +0 -0
  39. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/description/__init__.py +0 -0
  40. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/__init__.py +0 -0
  41. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/__init__.py +0 -0
  42. {data_annotations-2.8.0 → data_annotations-2.8.1}/src/data_annotations/provenance/recovery/manifest.py +0 -0
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-annotations
3
+ Version: 2.8.1
4
+ Summary: Annotate data artifacts with provenance and descriptions
5
+ Keywords: annotations,data,metadata,provenance,reproducibility
6
+ Author: Rodrigo C. G. Pena
7
+ Author-email: Rodrigo C. G. Pena <rodrigo.cerqueiragonzalezpena@unibas.ch>
8
+ License-Expression: BSD-3-Clause
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: pydantic>=2.13.1
21
+ Requires-Dist: pyyaml>=6.0.2
22
+ Requires-Dist: questionary>=2.1.1 ; extra == 'cli'
23
+ Requires-Dist: typer>=0.16.0 ; extra == 'cli'
24
+ Requires-Python: >=3.12
25
+ Project-URL: Source, https://gitlab.com/ceda-unibas/tools/data-annotations
26
+ Project-URL: Changelog, https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
27
+ Project-URL: Issues, https://gitlab.com/ceda-unibas/tools/data-annotations/-/issues
28
+ Provides-Extra: cli
29
+ Description-Content-Type: text/markdown
30
+
31
+ # data-annotations
32
+
33
+ `data-annotations` is a Python package for attaching provenance and structured
34
+ descriptions to the files and directories your workflows produce.
35
+
36
+ It writes plain JSON annotation sidecars that are easy to inspect, archive, and
37
+ publish with research outputs:
38
+
39
+ - files use `artifact.ext.annotation.json`
40
+ - directories use `data-annotations.json` at their root
41
+
42
+ Optional Markdown README sidecars can be generated for human-readable summaries.
43
+
44
+ ## Documentation
45
+
46
+ The full documentation is organized as a [Diátaxis](https://diataxis.fr/) site:
47
+ https://ceda-unibas.gitlab.io/tools/data-annotations/
48
+
49
+ - Source: https://gitlab.com/ceda-unibas/tools/data-annotations
50
+ - Changelog: https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
51
+ - Work items: https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items
52
+
53
+ ## Installation
54
+
55
+ Install the core library from PyPI:
56
+
57
+ ```bash
58
+ pip install data-annotations
59
+ ```
60
+
61
+ Or add it to a project with `uv`:
62
+
63
+ ```bash
64
+ uv add data-annotations
65
+ ```
66
+
67
+ Install CLI support when you want the `data-annotations` command:
68
+
69
+ ```bash
70
+ pip install "data-annotations[cli]"
71
+ uv add "data-annotations[cli]"
72
+ ```
73
+
74
+ ## Quick start
75
+
76
+ Decorate a function that writes an artifact. When the function runs,
77
+ `data-annotations` records provenance and writes the JSON sidecar.
78
+
79
+ ```python
80
+ from pathlib import Path
81
+
82
+ from data_annotations.annotations import record_file_annotation
83
+ from data_annotations.description import FieldDefinition
84
+
85
+
86
+ @record_file_annotation(
87
+ title="Participant Cohort",
88
+ summary="Participant-level cohort assignments.",
89
+ fields=[
90
+ FieldDefinition(
91
+ name="participant_id",
92
+ data_type="string",
93
+ summary="Stable participant identifier.",
94
+ required=True,
95
+ nullable=False,
96
+ ),
97
+ ],
98
+ primary_key=["participant_id"],
99
+ artifact_kind="dataset",
100
+ write_readme=True,
101
+ )
102
+ def write_participants(artifact_path: Path, input_path: Path) -> Path:
103
+ participant_ids = [
104
+ line.strip()
105
+ for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
106
+ if line.strip()
107
+ ]
108
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
109
+ artifact_path.write_text(
110
+ "participant_id\n" + "\n".join(participant_ids) + "\n",
111
+ encoding="utf-8",
112
+ )
113
+ return artifact_path
114
+
115
+
116
+ artifact_path = Path("outputs") / "participants.csv"
117
+ write_participants(
118
+ artifact_path=artifact_path,
119
+ input_path=Path("data/raw/participants.csv"),
120
+ )
121
+ ```
122
+
123
+ This writes:
124
+
125
+ ```text
126
+ outputs/participants.csv
127
+ outputs/participants.csv.annotation.json
128
+ outputs/participants.csv.README.md
129
+ ```
130
+
131
+ ## CLI
132
+
133
+ The CLI supports retrospective annotation, provenance inspection, source
134
+ recovery, and sanitized publish bundles.
135
+
136
+ ```bash
137
+ data-annotations annotate file path/to/participants.csv --write-readme
138
+ data-annotations annotate directory path/to/run-001 --recursive
139
+ data-annotations provenance match path/to/participants.csv
140
+ data-annotations provenance chain path/to/participants.csv
141
+ data-annotations provenance checkout path/to/participants.csv
142
+ data-annotations publish path/to/run-001 path/to/publish-bundle
143
+ ```
144
+
145
+ ## Development
146
+
147
+ From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
148
+
149
+ ```bash
150
+ task install
151
+ task lint
152
+ task type-check
153
+ task test
154
+ ```
155
+
156
+ Build or preview the documentation site:
157
+
158
+ ```bash
159
+ task docs-build
160
+ task docs-serve
161
+ ```
@@ -0,0 +1,131 @@
1
+ # data-annotations
2
+
3
+ `data-annotations` is a Python package for attaching provenance and structured
4
+ descriptions to the files and directories your workflows produce.
5
+
6
+ It writes plain JSON annotation sidecars that are easy to inspect, archive, and
7
+ publish with research outputs:
8
+
9
+ - files use `artifact.ext.annotation.json`
10
+ - directories use `data-annotations.json` at their root
11
+
12
+ Optional Markdown README sidecars can be generated for human-readable summaries.
13
+
14
+ ## Documentation
15
+
16
+ The full documentation is organized as a [Diátaxis](https://diataxis.fr/) site:
17
+ https://ceda-unibas.gitlab.io/tools/data-annotations/
18
+
19
+ - Source: https://gitlab.com/ceda-unibas/tools/data-annotations
20
+ - Changelog: https://gitlab.com/ceda-unibas/tools/data-annotations/-/blob/main/CHANGELOG.md
21
+ - Work items: https://gitlab.com/ceda-unibas/tools/data-annotations/-/work_items
22
+
23
+ ## Installation
24
+
25
+ Install the core library from PyPI:
26
+
27
+ ```bash
28
+ pip install data-annotations
29
+ ```
30
+
31
+ Or add it to a project with `uv`:
32
+
33
+ ```bash
34
+ uv add data-annotations
35
+ ```
36
+
37
+ Install CLI support when you want the `data-annotations` command:
38
+
39
+ ```bash
40
+ pip install "data-annotations[cli]"
41
+ uv add "data-annotations[cli]"
42
+ ```
43
+
44
+ ## Quick start
45
+
46
+ Decorate a function that writes an artifact. When the function runs,
47
+ `data-annotations` records provenance and writes the JSON sidecar.
48
+
49
+ ```python
50
+ from pathlib import Path
51
+
52
+ from data_annotations.annotations import record_file_annotation
53
+ from data_annotations.description import FieldDefinition
54
+
55
+
56
+ @record_file_annotation(
57
+ title="Participant Cohort",
58
+ summary="Participant-level cohort assignments.",
59
+ fields=[
60
+ FieldDefinition(
61
+ name="participant_id",
62
+ data_type="string",
63
+ summary="Stable participant identifier.",
64
+ required=True,
65
+ nullable=False,
66
+ ),
67
+ ],
68
+ primary_key=["participant_id"],
69
+ artifact_kind="dataset",
70
+ write_readme=True,
71
+ )
72
+ def write_participants(artifact_path: Path, input_path: Path) -> Path:
73
+ participant_ids = [
74
+ line.strip()
75
+ for line in input_path.read_text(encoding="utf-8").splitlines()[1:]
76
+ if line.strip()
77
+ ]
78
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
79
+ artifact_path.write_text(
80
+ "participant_id\n" + "\n".join(participant_ids) + "\n",
81
+ encoding="utf-8",
82
+ )
83
+ return artifact_path
84
+
85
+
86
+ artifact_path = Path("outputs") / "participants.csv"
87
+ write_participants(
88
+ artifact_path=artifact_path,
89
+ input_path=Path("data/raw/participants.csv"),
90
+ )
91
+ ```
92
+
93
+ This writes:
94
+
95
+ ```text
96
+ outputs/participants.csv
97
+ outputs/participants.csv.annotation.json
98
+ outputs/participants.csv.README.md
99
+ ```
100
+
101
+ ## CLI
102
+
103
+ The CLI supports retrospective annotation, provenance inspection, source
104
+ recovery, and sanitized publish bundles.
105
+
106
+ ```bash
107
+ data-annotations annotate file path/to/participants.csv --write-readme
108
+ data-annotations annotate directory path/to/run-001 --recursive
109
+ data-annotations provenance match path/to/participants.csv
110
+ data-annotations provenance chain path/to/participants.csv
111
+ data-annotations provenance checkout path/to/participants.csv
112
+ data-annotations publish path/to/run-001 path/to/publish-bundle
113
+ ```
114
+
115
+ ## Development
116
+
117
+ From a source checkout (assuming you have [Task installed](https://taskfile.dev/docs/installation)):
118
+
119
+ ```bash
120
+ task install
121
+ task lint
122
+ task type-check
123
+ task test
124
+ ```
125
+
126
+ Build or preview the documentation site:
127
+
128
+ ```bash
129
+ task docs-build
130
+ task docs-serve
131
+ ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "data-annotations"
3
- version = "2.8.0"
3
+ version = "2.8.1"
4
4
  description = "Annotate data artifacts with provenance and descriptions"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -42,8 +42,10 @@ build-backend = "uv_build"
42
42
  [dependency-groups]
43
43
  dev = [
44
44
  "ipykernel>=7.2.0",
45
+ "mkdocstrings-python>=2.0.4",
45
46
  "prek>=0.3.9",
46
47
  "pytest>=9.0.3",
47
48
  "ruff>=0.15.10",
48
49
  "ty>=0.0.31",
50
+ "zensical>=0.0.45",
49
51
  ]
@@ -125,21 +125,62 @@ _EXPLICIT_PROVENANCE_OVERRIDE_FIELDS = {
125
125
 
126
126
 
127
127
  class ProvenanceAnswers(BaseModel):
128
+ """Optional provenance overrides supplied through an answers payload."""
129
+
128
130
  model_config = ConfigDict(extra="forbid")
129
131
 
130
- command: str | list[str] | None = None
131
- script: str | None = None
132
- script_repo_path: str | None = None
133
- function: str | None = None
134
- git_sha: str | None = None
135
- git_branch: str | None = None
136
- git_dirty: bool | None = None
137
- git_remote_name: str | None = None
138
- git_remote_url: str | None = None
139
- git_tags: list[str] | None = None
140
- git_describe: str | None = None
141
- source_code: SourceCodeReference | None = None
142
- infer_from_runtime: list[str] = Field(default_factory=list)
132
+ command: str | list[str] | None = Field(
133
+ default=None,
134
+ description="Command line to record instead of the captured runtime command.",
135
+ )
136
+ script: str | None = Field(
137
+ default=None,
138
+ description="Script path to record instead of the captured runtime script.",
139
+ )
140
+ script_repo_path: str | None = Field(
141
+ default=None,
142
+ description="Script path relative to the source repository root.",
143
+ )
144
+ function: str | None = Field(
145
+ default=None,
146
+ description="Qualified callable name to record in provenance.",
147
+ )
148
+ git_sha: str | None = Field(
149
+ default=None,
150
+ description="Git commit SHA to record in provenance.",
151
+ )
152
+ git_branch: str | None = Field(
153
+ default=None,
154
+ description="Git branch name to record in provenance.",
155
+ )
156
+ git_dirty: bool | None = Field(
157
+ default=None,
158
+ description="Whether the recorded Git worktree was dirty.",
159
+ )
160
+ git_remote_name: str | None = Field(
161
+ default=None,
162
+ description="Git remote name to record in provenance.",
163
+ )
164
+ git_remote_url: str | None = Field(
165
+ default=None,
166
+ description="Git remote URL to record in provenance.",
167
+ )
168
+ git_tags: list[str] | None = Field(
169
+ default=None,
170
+ description="Git tags to record for the captured revision.",
171
+ )
172
+ git_describe: str | None = Field(
173
+ default=None,
174
+ description="git describe output to record for the captured revision.",
175
+ )
176
+ source_code: SourceCodeReference | None = Field(
177
+ default=None,
178
+ description="Explicit source-code reference to record for recovery.",
179
+ )
180
+ infer_from_runtime: list[str] = Field(
181
+ default_factory=list,
182
+ description="Runtime fields that should remain inferred instead of overridden.",
183
+ )
143
184
 
144
185
  @field_validator("infer_from_runtime", mode="before")
145
186
  @classmethod
@@ -185,6 +226,8 @@ class ProvenanceAnswers(BaseModel):
185
226
  return self
186
227
 
187
228
  def command_tokens(self) -> list[str] | None:
229
+ """Return `command` as shell-like tokens."""
230
+
188
231
  if self.command is None:
189
232
  return None
190
233
  if isinstance(self.command, list):
@@ -195,6 +238,8 @@ class ProvenanceAnswers(BaseModel):
195
238
  raise AnswersError(f"invalid provenance.command: {exc}") from exc
196
239
 
197
240
  def runtime_inference_fields(self) -> set[str]:
241
+ """Expand runtime inference groups into concrete provenance fields."""
242
+
198
243
  fields: set[str] = set()
199
244
  for field in self.infer_from_runtime:
200
245
  fields.update(_RUNTIME_INFERENCE_GROUPS.get(field, {field}))
@@ -202,47 +247,124 @@ class ProvenanceAnswers(BaseModel):
202
247
 
203
248
 
204
249
  class BaseAnswers(BaseModel):
250
+ """Common answers payload fields shared by file and directory modes."""
251
+
205
252
  model_config = ConfigDict(extra="forbid")
206
253
 
207
- target: str | None = None
208
- title: str | None = None
209
- summary: str | None = None
210
- inputs: list[str] = Field(default_factory=list)
211
- params: dict[str, Any] = Field(default_factory=dict)
212
- provenance: ProvenanceAnswers = Field(default_factory=ProvenanceAnswers)
254
+ target: str | None = Field(
255
+ default=None,
256
+ description="Artifact path supplied by the answers payload.",
257
+ )
258
+ title: str | None = Field(
259
+ default=None,
260
+ description="Display title for generated descriptions or README files.",
261
+ )
262
+ summary: str | None = Field(
263
+ default=None,
264
+ description="Short artifact summary for generated descriptions.",
265
+ )
266
+ inputs: list[str] = Field(
267
+ default_factory=list,
268
+ description="Input paths or URIs to record in provenance.",
269
+ )
270
+ params: dict[str, Any] = Field(
271
+ default_factory=dict,
272
+ description="Parameter values to record in provenance.",
273
+ )
274
+ provenance: ProvenanceAnswers = Field(
275
+ default_factory=ProvenanceAnswers,
276
+ description="Provenance overrides and runtime inference controls.",
277
+ )
213
278
 
214
279
 
215
280
  class FileAnswers(BaseAnswers):
216
- kind: ArtifactKind = "other"
217
- sha256: str | None = None
218
- fields: list[FieldDefinition] = Field(default_factory=list)
219
- primary_key: list[str] = Field(default_factory=list)
220
- missing_value_codes: dict[str, str] = Field(default_factory=dict)
281
+ """Validated answers payload for annotating one existing file."""
282
+
283
+ kind: ArtifactKind = Field(
284
+ default="other",
285
+ description="High-level artifact category for the file.",
286
+ )
287
+ sha256: str | None = Field(
288
+ default=None,
289
+ description="Precomputed SHA-256 digest for the file.",
290
+ )
291
+ fields: list[FieldDefinition] = Field(
292
+ default_factory=list,
293
+ description="Field-level descriptions for the file.",
294
+ )
295
+ primary_key: list[str] = Field(
296
+ default_factory=list,
297
+ description="Field names that uniquely identify records in the file.",
298
+ )
299
+ missing_value_codes: dict[str, str] = Field(
300
+ default_factory=dict,
301
+ description="Mapping of missing-value markers to their meanings.",
302
+ )
221
303
 
222
304
 
223
305
  class DirectoryArtifactAnswers(BaseModel):
306
+ """Answers entry describing one artifact inside a directory."""
307
+
224
308
  model_config = ConfigDict(extra="forbid")
225
309
 
226
- path: str
227
- kind: ArtifactKind = "other"
228
- title: str | None = None
229
- summary: str | None = None
230
- fields: list[FieldDefinition] = Field(default_factory=list)
231
- primary_key: list[str] = Field(default_factory=list)
232
- missing_value_codes: dict[str, str] = Field(default_factory=dict)
310
+ path: str = Field(description="Artifact path relative to the target directory.")
311
+ kind: ArtifactKind = Field(
312
+ default="other",
313
+ description="High-level artifact category.",
314
+ )
315
+ title: str | None = Field(
316
+ default=None,
317
+ description="Display title for the artifact.",
318
+ )
319
+ summary: str | None = Field(
320
+ default=None,
321
+ description="Short description of the artifact.",
322
+ )
323
+ fields: list[FieldDefinition] = Field(
324
+ default_factory=list,
325
+ description="Field-level descriptions for the artifact.",
326
+ )
327
+ primary_key: list[str] = Field(
328
+ default_factory=list,
329
+ description="Field names that uniquely identify records in the artifact.",
330
+ )
331
+ missing_value_codes: dict[str, str] = Field(
332
+ default_factory=dict,
333
+ description="Mapping of missing-value markers to their meanings.",
334
+ )
233
335
 
234
336
 
235
337
  class DirectoryArtifactGroupAnswers(BaseModel):
338
+ """Answers entry describing a group of artifacts inside a directory."""
339
+
236
340
  model_config = ConfigDict(extra="forbid")
237
341
 
238
- title: str
239
- summary: str | None = None
240
- kind: ArtifactKind = "other"
241
- paths: list[str]
242
- selector: str | None = None
243
- fields: list[FieldDefinition] = Field(default_factory=list)
244
- primary_key: list[str] = Field(default_factory=list)
245
- missing_value_codes: dict[str, str] = Field(default_factory=dict)
342
+ title: str = Field(description="Display title for the artifact group.")
343
+ summary: str | None = Field(
344
+ default=None,
345
+ description="Short description shared by group members.",
346
+ )
347
+ kind: ArtifactKind = Field(
348
+ default="other",
349
+ description="High-level category shared by group members.",
350
+ )
351
+ paths: list[str] = Field(description="Artifact paths included in the group.")
352
+ selector: str | None = Field(
353
+ default=None,
354
+ description="Pattern or rule used to select members of the group.",
355
+ )
356
+ fields: list[FieldDefinition] = Field(
357
+ default_factory=list,
358
+ description="Field-level descriptions shared by group members.",
359
+ )
360
+ primary_key: list[str] = Field(
361
+ default_factory=list,
362
+ description="Field names that uniquely identify records in each member.",
363
+ )
364
+ missing_value_codes: dict[str, str] = Field(
365
+ default_factory=dict,
366
+ description="Mapping of missing-value markers to their meanings.",
367
+ )
246
368
 
247
369
  @field_validator("paths")
248
370
  @classmethod
@@ -255,18 +377,39 @@ class DirectoryArtifactGroupAnswers(BaseModel):
255
377
 
256
378
 
257
379
  class ChildBundleAnswers(BaseModel):
380
+ """Answers entry for a nested annotated directory bundle."""
381
+
258
382
  model_config = ConfigDict(extra="forbid")
259
383
 
260
- path: str
261
- annotation_path: str
262
- content_digest: str | None = None
384
+ path: str = Field(description="Path to the child bundle directory.")
385
+ annotation_path: str = Field(
386
+ description="Path to the child bundle annotation document.",
387
+ )
388
+ content_digest: str | None = Field(
389
+ default=None,
390
+ description="Expected content digest for the child bundle.",
391
+ )
263
392
 
264
393
 
265
394
  class DirectoryAnswers(BaseAnswers):
266
- artifacts: list[DirectoryArtifactAnswers] = Field(default_factory=list)
267
- artifact_groups: list[DirectoryArtifactGroupAnswers] = Field(default_factory=list)
268
- child_bundles: list[ChildBundleAnswers] = Field(default_factory=list)
269
- checksums: dict[str, str] = Field(default_factory=dict)
395
+ """Validated answers payload for annotating an existing directory."""
396
+
397
+ artifacts: list[DirectoryArtifactAnswers] = Field(
398
+ default_factory=list,
399
+ description="Individual artifacts to document inside the directory.",
400
+ )
401
+ artifact_groups: list[DirectoryArtifactGroupAnswers] = Field(
402
+ default_factory=list,
403
+ description="Groups of artifacts to document together.",
404
+ )
405
+ child_bundles: list[ChildBundleAnswers] = Field(
406
+ default_factory=list,
407
+ description="Nested annotated bundles to include in the directory subject.",
408
+ )
409
+ checksums: dict[str, str] = Field(
410
+ default_factory=dict,
411
+ description="Precomputed checksums keyed by artifact path.",
412
+ )
270
413
 
271
414
 
272
415
  FileAnswersInput: TypeAlias = str | Path | Mapping[str, Any] | FileAnswers
@@ -274,10 +417,34 @@ DirectoryAnswersInput: TypeAlias = str | Path | Mapping[str, Any] | DirectoryAns
274
417
 
275
418
 
276
419
  def load_file_answers(source: FileAnswersInput) -> FileAnswers:
420
+ """Load and validate answers for file annotation.
421
+
422
+ Args:
423
+ source: YAML path, mapping, or existing `FileAnswers` instance.
424
+
425
+ Returns:
426
+ Validated file answers.
427
+
428
+ Raises:
429
+ AnswersError: If the source cannot be loaded or validated.
430
+ """
431
+
277
432
  return _validate_answers(source, mode="file")
278
433
 
279
434
 
280
435
  def load_directory_answers(source: DirectoryAnswersInput) -> DirectoryAnswers:
436
+ """Load and validate answers for directory annotation.
437
+
438
+ Args:
439
+ source: YAML path, mapping, or existing `DirectoryAnswers` instance.
440
+
441
+ Returns:
442
+ Validated directory answers.
443
+
444
+ Raises:
445
+ AnswersError: If the source cannot be loaded or validated.
446
+ """
447
+
281
448
  return _validate_answers(source, mode="directory")
282
449
 
283
450