data-annotations 2.7.0__tar.gz → 2.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {data_annotations-2.7.0 → data_annotations-2.8.0}/PKG-INFO +22 -10
  2. {data_annotations-2.7.0 → data_annotations-2.8.0}/README.md +21 -9
  3. {data_annotations-2.7.0 → data_annotations-2.8.0}/pyproject.toml +1 -1
  4. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/decorators.py +2 -2
  5. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/writers.py +2 -2
  6. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/annotate/__init__.py +20 -4
  7. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/annotate/helpers.py +25 -19
  8. {data_annotations-2.7.0 → data_annotations-2.8.0}/LICENSE +0 -0
  9. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/__init__.py +0 -0
  10. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/_decorators.py +0 -0
  11. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/__init__.py +0 -0
  12. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/answers.py +0 -0
  13. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/models.py +0 -0
  14. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli.py +0 -0
  15. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/__init__.py +0 -0
  16. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/answers.py +0 -0
  17. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/common.py +0 -0
  18. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/prompts.py +0 -0
  19. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/provenance_commands.py +0 -0
  20. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/publish.py +0 -0
  21. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/description/__init__.py +0 -0
  22. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/description/decorators.py +0 -0
  23. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/description/models.py +0 -0
  24. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/description/writers.py +0 -0
  25. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/__init__.py +0 -0
  26. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/decorators.py +0 -0
  27. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/git.py +0 -0
  28. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/models.py +0 -0
  29. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/__init__.py +0 -0
  30. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/chain.py +0 -0
  31. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/manifest.py +0 -0
  32. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/matching.py +0 -0
  33. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/sources.py +0 -0
  34. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/recovery/types.py +0 -0
  35. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/runtime.py +0 -0
  36. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/provenance/writers.py +0 -0
  37. {data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/publish.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-annotations
3
- Version: 2.7.0
3
+ Version: 2.8.0
4
4
  Summary: Annotate data artifacts with provenance and descriptions
5
5
  Keywords: annotations,data,metadata,provenance,reproducibility
6
6
  Author: Rodrigo C. G. Pena
@@ -126,13 +126,13 @@ Git tags and `git_describe` are human-friendly hints only. For Git sources,
126
126
  The recommended way to annotate your data artifacts is to decorate pipeline
127
127
  functions that consume some inputs and parameters, then write those artifacts.
128
128
  This keeps the artifact-writing logic explicit while letting `data-annotations` capture
129
- provenance and emit sidecars automatically.
129
+ provenance and emit the annotation JSON sidecar automatically.
130
130
 
131
131
  For example, here is a complete file-level annotation workflow using the
132
132
  `record_file_annotation(...)` decorator. Once `write_participants` is called, it
133
- automatically generates sidecars `participants.csv.annotation.json` and `participants.csv.README.md`.
134
- The JSON sidecar will contain provenance and description metadata, and the Markdown sidecar
135
- will have a human-friendly rendering of the description provided in the decorator.
133
+ automatically generates `participants.csv.annotation.json`. Set
134
+ `write_readme=True` when you also want `participants.csv.README.md`, a
135
+ human-friendly Markdown rendering of the description provided in the decorator.
136
136
 
137
137
  ```python
138
138
  from pathlib import Path
@@ -165,6 +165,7 @@ from data_annotations.description import AllowedValue, FieldDefinition
165
165
  artifact_kind="dataset",
166
166
  acquisition_context={"source": "Study A registry export"},
167
167
  generation_context={"pipeline": "baseline-v1"},
168
+ write_readme=True,
168
169
  )
169
170
  def write_participants(
170
171
  artifact_path: Path,
@@ -285,6 +286,7 @@ from data_annotations.provenance import ProducedFile
285
286
  summary="Directory-level documentation for the validation run outputs.",
286
287
  acquisition_context={"source": "Study A registry export"},
287
288
  generation_context={"pipeline": "baseline-v1"},
289
+ write_readme=True,
288
290
  )
289
291
  def build_outputs(
290
292
  output_dir: Path,
@@ -397,7 +399,11 @@ annotate_file(
397
399
  answers={"title": "Run Summary", "summary": "Validation run summary."},
398
400
  )
399
401
 
400
- @record_file_annotation(answers="participants.yaml")
402
+ # Add write_readme=True when you also want Markdown README sidecars.
403
+ annotate_file(answers="participants.yaml", write_readme=True)
404
+ annotate_directory(answers="run-001.yaml", write_readme=True)
405
+
406
+ @record_file_annotation(answers="participants.yaml", write_readme=True)
401
407
  def write_participants(artifact_path, input_path):
402
408
  ...
403
409
  ```
@@ -707,9 +713,14 @@ data-annotations annotate directory path/to/run-001 \
707
713
  --group-kind plot
708
714
  ```
709
715
 
710
- These commands prompt for missing details, write `*.annotation.json` or `data-annotations.json`,
711
- and optionally derive README sidecars. Post-hoc records are marked with
712
- `capture_mode="post_hoc"`.
716
+ These commands prompt for missing details and write `*.annotation.json` or
717
+ `data-annotations.json`. Post-hoc records are marked with
718
+ `capture_mode="post_hoc"`. README sidecars are opt-in:
719
+
720
+ ```bash
721
+ data-annotations annotate file path/to/participants.csv --write-readme
722
+ data-annotations annotate directory path/to/run-001 --write-readme
723
+ ```
713
724
 
714
725
  For shell workflows, you can move the prompt answers into a YAML file and run
715
726
  the command non-interactively:
@@ -1044,4 +1055,5 @@ uv run python examples/publish_cli.py
1044
1055
  ```
1045
1056
 
1046
1057
  Each example writes its outputs to a fresh temporary directory and prints the
1047
- location so you can inspect the generated annotation documents and README sidecars.
1058
+ location so you can inspect the generated annotation documents and any requested
1059
+ README sidecars.
@@ -96,13 +96,13 @@ Git tags and `git_describe` are human-friendly hints only. For Git sources,
96
96
  The recommended way to annotate your data artifacts is to decorate pipeline
97
97
  functions that consume some inputs and parameters, then write those artifacts.
98
98
  This keeps the artifact-writing logic explicit while letting `data-annotations` capture
99
- provenance and emit sidecars automatically.
99
+ provenance and emit the annotation JSON sidecar automatically.
100
100
 
101
101
  For example, here is a complete file-level annotation workflow using the
102
102
  `record_file_annotation(...)` decorator. Once `write_participants` is called, it
103
- automatically generates sidecars `participants.csv.annotation.json` and `participants.csv.README.md`.
104
- The JSON sidecar will contain provenance and description metadata, and the Markdown sidecar
105
- will have a human-friendly rendering of the description provided in the decorator.
103
+ automatically generates `participants.csv.annotation.json`. Set
104
+ `write_readme=True` when you also want `participants.csv.README.md`, a
105
+ human-friendly Markdown rendering of the description provided in the decorator.
106
106
 
107
107
  ```python
108
108
  from pathlib import Path
@@ -135,6 +135,7 @@ from data_annotations.description import AllowedValue, FieldDefinition
135
135
  artifact_kind="dataset",
136
136
  acquisition_context={"source": "Study A registry export"},
137
137
  generation_context={"pipeline": "baseline-v1"},
138
+ write_readme=True,
138
139
  )
139
140
  def write_participants(
140
141
  artifact_path: Path,
@@ -255,6 +256,7 @@ from data_annotations.provenance import ProducedFile
255
256
  summary="Directory-level documentation for the validation run outputs.",
256
257
  acquisition_context={"source": "Study A registry export"},
257
258
  generation_context={"pipeline": "baseline-v1"},
259
+ write_readme=True,
258
260
  )
259
261
  def build_outputs(
260
262
  output_dir: Path,
@@ -367,7 +369,11 @@ annotate_file(
367
369
  answers={"title": "Run Summary", "summary": "Validation run summary."},
368
370
  )
369
371
 
370
- @record_file_annotation(answers="participants.yaml")
372
+ # Add write_readme=True when you also want Markdown README sidecars.
373
+ annotate_file(answers="participants.yaml", write_readme=True)
374
+ annotate_directory(answers="run-001.yaml", write_readme=True)
375
+
376
+ @record_file_annotation(answers="participants.yaml", write_readme=True)
371
377
  def write_participants(artifact_path, input_path):
372
378
  ...
373
379
  ```
@@ -677,9 +683,14 @@ data-annotations annotate directory path/to/run-001 \
677
683
  --group-kind plot
678
684
  ```
679
685
 
680
- These commands prompt for missing details, write `*.annotation.json` or `data-annotations.json`,
681
- and optionally derive README sidecars. Post-hoc records are marked with
682
- `capture_mode="post_hoc"`.
686
+ These commands prompt for missing details and write `*.annotation.json` or
687
+ `data-annotations.json`. Post-hoc records are marked with
688
+ `capture_mode="post_hoc"`. README sidecars are opt-in:
689
+
690
+ ```bash
691
+ data-annotations annotate file path/to/participants.csv --write-readme
692
+ data-annotations annotate directory path/to/run-001 --write-readme
693
+ ```
683
694
 
684
695
  For shell workflows, you can move the prompt answers into a YAML file and run
685
696
  the command non-interactively:
@@ -1014,4 +1025,5 @@ uv run python examples/publish_cli.py
1014
1025
  ```
1015
1026
 
1016
1027
  Each example writes its outputs to a fresh temporary directory and prints the
1017
- location so you can inspect the generated annotation documents and README sidecars.
1028
+ location so you can inspect the generated annotation documents and any requested
1029
+ README sidecars.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "data-annotations"
3
- version = "2.7.0"
3
+ version = "2.8.0"
4
4
  description = "Annotate data artifacts with provenance and descriptions"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -103,7 +103,7 @@ def record_file_annotation(
103
103
  generation_context: dict[str, Any] | None = None,
104
104
  artifact_kind: ArtifactKind | None = None,
105
105
  artifact_sha256: str | None = None,
106
- write_readme: bool = True,
106
+ write_readme: bool = False,
107
107
  annotation_suffix: str = ".annotation.json",
108
108
  readme_suffix: str = ".README.md",
109
109
  checksum_policy: ChecksumPolicy = "auto",
@@ -172,7 +172,7 @@ def record_directory_annotation(
172
172
  summary: str | None = None,
173
173
  acquisition_context: dict[str, Any] | None = None,
174
174
  generation_context: dict[str, Any] | None = None,
175
- write_readme: bool = True,
175
+ write_readme: bool = False,
176
176
  annotation_filename: str = "data-annotations.json",
177
177
  readme_filename: str = "README.md",
178
178
  checksum_policy: ChecksumPolicy = "auto",
@@ -528,7 +528,7 @@ def annotate_file(
528
528
  params: dict[str, Any] | None = None,
529
529
  inputs: Sequence[str | Path] | None = None,
530
530
  function: Callable[..., Any] | None = None,
531
- write_readme: bool = True,
531
+ write_readme: bool = False,
532
532
  annotation_suffix: str = ".annotation.json",
533
533
  readme_suffix: str = ".README.md",
534
534
  checksum_policy: ChecksumPolicy = "auto",
@@ -645,7 +645,7 @@ def annotate_directory(
645
645
  params: dict[str, Any] | None = None,
646
646
  inputs: Sequence[str | Path] | None = None,
647
647
  function: Callable[..., Any] | None = None,
648
- write_readme: bool = True,
648
+ write_readme: bool = False,
649
649
  annotation_filename: str = "data-annotations.json",
650
650
  readme_filename: str = "README.md",
651
651
  checksum_policy: ChecksumPolicy = "auto",
@@ -129,6 +129,11 @@ def annotate_file_command(
129
129
  max_checksum_bytes: MaxChecksumBytesOption = 10 * 1024**3,
130
130
  sha256: Sha256Option = None,
131
131
  checksum_values: ChecksumValuesOption = None,
132
+ write_readme: bool = typer.Option(
133
+ False,
134
+ "--write-readme",
135
+ help="Write the optional Markdown README sidecar.",
136
+ ),
132
137
  force: ForceOption = False,
133
138
  ) -> None:
134
139
  file_answers = _load_file_answers(answers_path)
@@ -152,8 +157,9 @@ def annotate_file_command(
152
157
  except answer_files.AnswersError as exc:
153
158
  _error(str(exc))
154
159
 
160
+ annotation_paths = _annotation_paths_for_file(artifact_path)
155
161
  _ensure_annotation_outputs_available(
156
- _annotation_paths_for_file(artifact_path),
162
+ annotation_paths if write_readme else annotation_paths[:1],
157
163
  force=force,
158
164
  )
159
165
 
@@ -242,10 +248,12 @@ def annotate_file_command(
242
248
  checksum_policy=selected_checksum_policy,
243
249
  max_checksum_bytes=max_checksum_bytes,
244
250
  checksum_overrides=checksum_overrides,
251
+ write_readme=write_readme,
245
252
  )
246
253
  typer.echo("")
247
254
  typer.echo(f"Annotation: {annotation_path}")
248
- typer.echo(f"README: {readme_path}")
255
+ if readme_path is not None:
256
+ typer.echo(f"README: {readme_path}")
249
257
 
250
258
 
251
259
  @annotate_app.command("directory")
@@ -321,6 +329,11 @@ def annotate_directory_command(
321
329
  checksum_policy: ChecksumPolicyOption = "auto",
322
330
  max_checksum_bytes: MaxChecksumBytesOption = 10 * 1024**3,
323
331
  checksum_values: ChecksumValuesOption = None,
332
+ write_readme: bool = typer.Option(
333
+ False,
334
+ "--write-readme",
335
+ help="Write the optional Markdown README sidecar.",
336
+ ),
324
337
  force: ForceOption = False,
325
338
  ) -> None:
326
339
  directory_answers = _load_directory_answers(answers_path)
@@ -380,8 +393,9 @@ def annotate_directory_command(
380
393
  code=1,
381
394
  )
382
395
 
396
+ annotation_paths = _annotation_paths_for_directory(output_dir)
383
397
  _ensure_annotation_outputs_available(
384
- _annotation_paths_for_directory(output_dir),
398
+ annotation_paths if write_readme else annotation_paths[:1],
385
399
  force=force,
386
400
  )
387
401
 
@@ -518,7 +532,9 @@ def annotate_directory_command(
518
532
  checksum_policy=selected_checksum_policy,
519
533
  max_checksum_bytes=max_checksum_bytes,
520
534
  checksum_overrides=checksum_overrides,
535
+ write_readme=write_readme,
521
536
  )
522
537
 
523
538
  typer.echo(f"Annotation: {annotation_path}")
524
- typer.echo(f"README: {readme_path}")
539
+ if readme_path is not None:
540
+ typer.echo(f"README: {readme_path}")
@@ -50,8 +50,9 @@ def _write_post_hoc_file_bundle(
50
50
  checksum_policy: ChecksumPolicy = "auto",
51
51
  max_checksum_bytes: int = provenance_writers.DEFAULT_MAX_CHECKSUM_BYTES,
52
52
  checksum_overrides: Mapping[str | Path, str] | None = None,
53
- ) -> tuple[Path, Path]:
54
- annotation_path, readme_path = _annotation_paths_for_file(artifact_path)
53
+ write_readme: bool = False,
54
+ ) -> tuple[Path, Path | None]:
55
+ annotation_path, default_readme_path = _annotation_paths_for_file(artifact_path)
55
56
  write_file_annotation(
56
57
  artifact_path,
57
58
  title=title,
@@ -70,13 +71,15 @@ def _write_post_hoc_file_bundle(
70
71
  max_checksum_bytes=max_checksum_bytes,
71
72
  checksum_overrides=checksum_overrides,
72
73
  )
73
- document = provenance_recovery._load_annotation_document(annotation_path)
74
- write_file_readme(
75
- readme_path,
76
- artifact_path=document.subject.path,
77
- artifact_kind=document.subject.kind,
78
- description=document.description,
79
- )
74
+ readme_path: Path | None = None
75
+ if write_readme:
76
+ document = provenance_recovery._load_annotation_document(annotation_path)
77
+ readme_path = write_file_readme(
78
+ default_readme_path,
79
+ artifact_path=document.subject.path,
80
+ artifact_kind=document.subject.kind,
81
+ description=document.description,
82
+ )
80
83
  return annotation_path, readme_path
81
84
 
82
85
 
@@ -94,8 +97,9 @@ def _write_post_hoc_directory_bundle(
94
97
  checksum_policy: ChecksumPolicy = "auto",
95
98
  max_checksum_bytes: int = provenance_writers.DEFAULT_MAX_CHECKSUM_BYTES,
96
99
  checksum_overrides: Mapping[str | Path, str] | None = None,
97
- ) -> tuple[Path, Path]:
98
- annotation_path, readme_path = _annotation_paths_for_directory(output_dir)
100
+ write_readme: bool = False,
101
+ ) -> tuple[Path, Path | None]:
102
+ annotation_path, default_readme_path = _annotation_paths_for_directory(output_dir)
99
103
  write_directory_annotation(
100
104
  output_dir,
101
105
  artifacts=artifacts,
@@ -112,14 +116,16 @@ def _write_post_hoc_directory_bundle(
112
116
  max_checksum_bytes=max_checksum_bytes,
113
117
  checksum_overrides=checksum_overrides,
114
118
  )
115
- document = provenance_recovery._load_annotation_document(annotation_path)
116
- write_directory_readme(
117
- readme_path,
118
- output_dir=document.subject.path,
119
- produced_files=document.subject.produced_files,
120
- child_bundles=document.subject.child_bundles,
121
- description=document.description,
122
- )
119
+ readme_path: Path | None = None
120
+ if write_readme:
121
+ document = provenance_recovery._load_annotation_document(annotation_path)
122
+ readme_path = write_directory_readme(
123
+ default_readme_path,
124
+ output_dir=document.subject.path,
125
+ produced_files=document.subject.produced_files,
126
+ child_bundles=document.subject.child_bundles,
127
+ description=document.description,
128
+ )
123
129
  return annotation_path, readme_path
124
130
 
125
131