PyPI - data-annotations - Versions diffs - 2.7.0__tar.gz → 2.8.0__tar.gz - Mend

data-annotations 2.7.0tar.gz → 2.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{data_annotations-2.7.0 → data_annotations-2.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-annotations
-Version: 2.7.0
+Version: 2.8.0
 Summary: Annotate data artifacts with provenance and descriptions
 Keywords: annotations,data,metadata,provenance,reproducibility
 Author: Rodrigo C.  G.  Pena
@@ -126,13 +126,13 @@ Git tags and `git_describe` are human-friendly hints only. For Git sources,
 The recommended way to annotate your data artifacts is to decorate pipeline
 functions that consume some inputs and parameters, then write those artifacts.
 This keeps the artifact-writing logic explicit while letting `data-annotations` capture
-provenance and emit sidecars automatically.
+provenance and emit the annotation JSON sidecar automatically.
 For example, here is a complete file-level annotation workflow using the
 `record_file_annotation(...)` decorator. Once `write_participants` is called, it
-automatically generates sidecars `participants.csv.annotation.json` and `participants.csv.README.md`.
-The JSON sidecar will contain provenance and description metadata, and the Markdown sidecar
-will have a human-friendly rendering of the description provided in the decorator.
+automatically generates `participants.csv.annotation.json`. Set
+`write_readme=True` when you also want `participants.csv.README.md`, a
+human-friendly Markdown rendering of the description provided in the decorator.
 ```python
 from pathlib import Path
@@ -165,6 +165,7 @@ from data_annotations.description import AllowedValue, FieldDefinition
     artifact_kind="dataset",
     acquisition_context={"source": "Study A registry export"},
     generation_context={"pipeline": "baseline-v1"},
+    write_readme=True,
 )
 def write_participants(
     artifact_path: Path,
@@ -285,6 +286,7 @@ from data_annotations.provenance import ProducedFile
     summary="Directory-level documentation for the validation run outputs.",
     acquisition_context={"source": "Study A registry export"},
     generation_context={"pipeline": "baseline-v1"},
+    write_readme=True,
 )
 def build_outputs(
     output_dir: Path,
@@ -397,7 +399,11 @@ annotate_file(
     answers={"title": "Run Summary", "summary": "Validation run summary."},
 )
-@record_file_annotation(answers="participants.yaml")
+# Add write_readme=True when you also want Markdown README sidecars.
+annotate_file(answers="participants.yaml", write_readme=True)
+annotate_directory(answers="run-001.yaml", write_readme=True)
+@record_file_annotation(answers="participants.yaml", write_readme=True)
 def write_participants(artifact_path, input_path):
     ...
 ```
@@ -707,9 +713,14 @@ data-annotations annotate directory path/to/run-001 \
   --group-kind plot
 ```
-These commands prompt for missing details, write `*.annotation.json` or `data-annotations.json`,
-and optionally derive README sidecars. Post-hoc records are marked with
-`capture_mode="post_hoc"`.
+These commands prompt for missing details and write `*.annotation.json` or
+`data-annotations.json`. Post-hoc records are marked with
+`capture_mode="post_hoc"`. README sidecars are opt-in:
+```bash
+data-annotations annotate file path/to/participants.csv --write-readme
+data-annotations annotate directory path/to/run-001 --write-readme
+```
 For shell workflows, you can move the prompt answers into a YAML file and run
 the command non-interactively:
@@ -1044,4 +1055,5 @@ uv run python examples/publish_cli.py
 ```
 Each example writes its outputs to a fresh temporary directory and prints the
-location so you can inspect the generated annotation documents and README sidecars.
+location so you can inspect the generated annotation documents and any requested
+README sidecars.

{data_annotations-2.7.0 → data_annotations-2.8.0}/README.md RENAMED Viewed

@@ -96,13 +96,13 @@ Git tags and `git_describe` are human-friendly hints only. For Git sources,
 The recommended way to annotate your data artifacts is to decorate pipeline
 functions that consume some inputs and parameters, then write those artifacts.
 This keeps the artifact-writing logic explicit while letting `data-annotations` capture
-provenance and emit sidecars automatically.
+provenance and emit the annotation JSON sidecar automatically.
 For example, here is a complete file-level annotation workflow using the
 `record_file_annotation(...)` decorator. Once `write_participants` is called, it
-automatically generates sidecars `participants.csv.annotation.json` and `participants.csv.README.md`.
-The JSON sidecar will contain provenance and description metadata, and the Markdown sidecar
-will have a human-friendly rendering of the description provided in the decorator.
+automatically generates `participants.csv.annotation.json`. Set
+`write_readme=True` when you also want `participants.csv.README.md`, a
+human-friendly Markdown rendering of the description provided in the decorator.
 ```python
 from pathlib import Path
@@ -135,6 +135,7 @@ from data_annotations.description import AllowedValue, FieldDefinition
     artifact_kind="dataset",
     acquisition_context={"source": "Study A registry export"},
     generation_context={"pipeline": "baseline-v1"},
+    write_readme=True,
 )
 def write_participants(
     artifact_path: Path,
@@ -255,6 +256,7 @@ from data_annotations.provenance import ProducedFile
     summary="Directory-level documentation for the validation run outputs.",
     acquisition_context={"source": "Study A registry export"},
     generation_context={"pipeline": "baseline-v1"},
+    write_readme=True,
 )
 def build_outputs(
     output_dir: Path,
@@ -367,7 +369,11 @@ annotate_file(
     answers={"title": "Run Summary", "summary": "Validation run summary."},
 )
-@record_file_annotation(answers="participants.yaml")
+# Add write_readme=True when you also want Markdown README sidecars.
+annotate_file(answers="participants.yaml", write_readme=True)
+annotate_directory(answers="run-001.yaml", write_readme=True)
+@record_file_annotation(answers="participants.yaml", write_readme=True)
 def write_participants(artifact_path, input_path):
     ...
 ```
@@ -677,9 +683,14 @@ data-annotations annotate directory path/to/run-001 \
   --group-kind plot
 ```
-These commands prompt for missing details, write `*.annotation.json` or `data-annotations.json`,
-and optionally derive README sidecars. Post-hoc records are marked with
-`capture_mode="post_hoc"`.
+These commands prompt for missing details and write `*.annotation.json` or
+`data-annotations.json`. Post-hoc records are marked with
+`capture_mode="post_hoc"`. README sidecars are opt-in:
+```bash
+data-annotations annotate file path/to/participants.csv --write-readme
+data-annotations annotate directory path/to/run-001 --write-readme
+```
 For shell workflows, you can move the prompt answers into a YAML file and run
 the command non-interactively:
@@ -1014,4 +1025,5 @@ uv run python examples/publish_cli.py
 ```
 Each example writes its outputs to a fresh temporary directory and prints the
-location so you can inspect the generated annotation documents and README sidecars.
+location so you can inspect the generated annotation documents and any requested
+README sidecars.

{data_annotations-2.7.0 → data_annotations-2.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "data-annotations"
-version = "2.7.0"
+version = "2.8.0"
 description = "Annotate data artifacts with provenance and descriptions"
 readme = "README.md"
 authors = [

{data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/decorators.py RENAMED Viewed

@@ -103,7 +103,7 @@ def record_file_annotation(
     generation_context: dict[str, Any] | None = None,
     artifact_kind: ArtifactKind | None = None,
     artifact_sha256: str | None = None,
-    write_readme: bool = True,
+    write_readme: bool = False,
     annotation_suffix: str = ".annotation.json",
     readme_suffix: str = ".README.md",
     checksum_policy: ChecksumPolicy = "auto",
@@ -172,7 +172,7 @@ def record_directory_annotation(
     summary: str | None = None,
     acquisition_context: dict[str, Any] | None = None,
     generation_context: dict[str, Any] | None = None,
-    write_readme: bool = True,
+    write_readme: bool = False,
     annotation_filename: str = "data-annotations.json",
     readme_filename: str = "README.md",
     checksum_policy: ChecksumPolicy = "auto",

{data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/annotations/writers.py RENAMED Viewed

@@ -528,7 +528,7 @@ def annotate_file(
     params: dict[str, Any] | None = None,
     inputs: Sequence[str | Path] | None = None,
     function: Callable[..., Any] | None = None,
-    write_readme: bool = True,
+    write_readme: bool = False,
     annotation_suffix: str = ".annotation.json",
     readme_suffix: str = ".README.md",
     checksum_policy: ChecksumPolicy = "auto",
@@ -645,7 +645,7 @@ def annotate_directory(
     params: dict[str, Any] | None = None,
     inputs: Sequence[str | Path] | None = None,
     function: Callable[..., Any] | None = None,
-    write_readme: bool = True,
+    write_readme: bool = False,
     annotation_filename: str = "data-annotations.json",
     readme_filename: str = "README.md",
     checksum_policy: ChecksumPolicy = "auto",

{data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/annotate/__init__.py RENAMED Viewed

@@ -129,6 +129,11 @@ def annotate_file_command(
     max_checksum_bytes: MaxChecksumBytesOption = 10 * 1024**3,
     sha256: Sha256Option = None,
     checksum_values: ChecksumValuesOption = None,
+    write_readme: bool = typer.Option(
+        False,
+        "--write-readme",
+        help="Write the optional Markdown README sidecar.",
+    ),
     force: ForceOption = False,
 ) -> None:
     file_answers = _load_file_answers(answers_path)
@@ -152,8 +157,9 @@ def annotate_file_command(
         except answer_files.AnswersError as exc:
             _error(str(exc))
+    annotation_paths = _annotation_paths_for_file(artifact_path)
     _ensure_annotation_outputs_available(
-        _annotation_paths_for_file(artifact_path),
+        annotation_paths if write_readme else annotation_paths[:1],
         force=force,
     )
@@ -242,10 +248,12 @@ def annotate_file_command(
         checksum_policy=selected_checksum_policy,
         max_checksum_bytes=max_checksum_bytes,
         checksum_overrides=checksum_overrides,
+        write_readme=write_readme,
     )
     typer.echo("")
     typer.echo(f"Annotation: {annotation_path}")
-    typer.echo(f"README: {readme_path}")
+    if readme_path is not None:
+        typer.echo(f"README: {readme_path}")
 @annotate_app.command("directory")
@@ -321,6 +329,11 @@ def annotate_directory_command(
     checksum_policy: ChecksumPolicyOption = "auto",
     max_checksum_bytes: MaxChecksumBytesOption = 10 * 1024**3,
     checksum_values: ChecksumValuesOption = None,
+    write_readme: bool = typer.Option(
+        False,
+        "--write-readme",
+        help="Write the optional Markdown README sidecar.",
+    ),
     force: ForceOption = False,
 ) -> None:
     directory_answers = _load_directory_answers(answers_path)
@@ -380,8 +393,9 @@ def annotate_directory_command(
             code=1,
         )
+    annotation_paths = _annotation_paths_for_directory(output_dir)
     _ensure_annotation_outputs_available(
-        _annotation_paths_for_directory(output_dir),
+        annotation_paths if write_readme else annotation_paths[:1],
         force=force,
     )
@@ -518,7 +532,9 @@ def annotate_directory_command(
         checksum_policy=selected_checksum_policy,
         max_checksum_bytes=max_checksum_bytes,
         checksum_overrides=checksum_overrides,
+        write_readme=write_readme,
     )
     typer.echo(f"Annotation: {annotation_path}")
-    typer.echo(f"README: {readme_path}")
+    if readme_path is not None:
+        typer.echo(f"README: {readme_path}")

{data_annotations-2.7.0 → data_annotations-2.8.0}/src/data_annotations/cli_app/annotate/helpers.py RENAMED Viewed

@@ -50,8 +50,9 @@ def _write_post_hoc_file_bundle(
     checksum_policy: ChecksumPolicy = "auto",
     max_checksum_bytes: int = provenance_writers.DEFAULT_MAX_CHECKSUM_BYTES,
     checksum_overrides: Mapping[str | Path, str] | None = None,
-) -> tuple[Path, Path]:
-    annotation_path, readme_path = _annotation_paths_for_file(artifact_path)
+    write_readme: bool = False,
+) -> tuple[Path, Path | None]:
+    annotation_path, default_readme_path = _annotation_paths_for_file(artifact_path)
     write_file_annotation(
         artifact_path,
         title=title,
@@ -70,13 +71,15 @@ def _write_post_hoc_file_bundle(
         max_checksum_bytes=max_checksum_bytes,
         checksum_overrides=checksum_overrides,
     )
-    document = provenance_recovery._load_annotation_document(annotation_path)
-    write_file_readme(
-        readme_path,
-        artifact_path=document.subject.path,
-        artifact_kind=document.subject.kind,
-        description=document.description,
-    )
+    readme_path: Path | None = None
+    if write_readme:
+        document = provenance_recovery._load_annotation_document(annotation_path)
+        readme_path = write_file_readme(
+            default_readme_path,
+            artifact_path=document.subject.path,
+            artifact_kind=document.subject.kind,
+            description=document.description,
+        )
     return annotation_path, readme_path
@@ -94,8 +97,9 @@ def _write_post_hoc_directory_bundle(
     checksum_policy: ChecksumPolicy = "auto",
     max_checksum_bytes: int = provenance_writers.DEFAULT_MAX_CHECKSUM_BYTES,
     checksum_overrides: Mapping[str | Path, str] | None = None,
-) -> tuple[Path, Path]:
-    annotation_path, readme_path = _annotation_paths_for_directory(output_dir)
+    write_readme: bool = False,
+) -> tuple[Path, Path | None]:
+    annotation_path, default_readme_path = _annotation_paths_for_directory(output_dir)
     write_directory_annotation(
         output_dir,
         artifacts=artifacts,
@@ -112,14 +116,16 @@ def _write_post_hoc_directory_bundle(
         max_checksum_bytes=max_checksum_bytes,
         checksum_overrides=checksum_overrides,
     )
-    document = provenance_recovery._load_annotation_document(annotation_path)
-    write_directory_readme(
-        readme_path,
-        output_dir=document.subject.path,
-        produced_files=document.subject.produced_files,
-        child_bundles=document.subject.child_bundles,
-        description=document.description,
-    )
+    readme_path: Path | None = None
+    if write_readme:
+        document = provenance_recovery._load_annotation_document(annotation_path)
+        readme_path = write_directory_readme(
+            default_readme_path,
+            output_dir=document.subject.path,
+            produced_files=document.subject.produced_files,
+            child_bundles=document.subject.child_bundles,
+            description=document.description,
+        )
     return annotation_path, readme_path