data-annotations 2.1.2__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_annotations-2.1.2 → data_annotations-2.3.0}/PKG-INFO +168 -29
- {data_annotations-2.1.2 → data_annotations-2.3.0}/README.md +167 -28
- {data_annotations-2.1.2 → data_annotations-2.3.0}/pyproject.toml +1 -1
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/_decorators.py +83 -19
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/annotations/decorators.py +14 -3
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/annotations/models.py +5 -3
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/annotations/writers.py +118 -23
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/cli_app/annotate.py +185 -9
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/cli_app/common.py +99 -13
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/cli_app/prompts.py +92 -2
- data_annotations-2.3.0/src/data_annotations/cli_app/provenance_commands.py +321 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/description/__init__.py +4 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/description/decorators.py +29 -2
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/description/models.py +42 -1
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/description/writers.py +58 -1
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/provenance/__init__.py +18 -1
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/provenance/decorators.py +9 -3
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/provenance/git.py +10 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/provenance/models.py +20 -0
- data_annotations-2.3.0/src/data_annotations/provenance/recovery.py +926 -0
- data_annotations-2.3.0/src/data_annotations/provenance/writers.py +439 -0
- data_annotations-2.1.2/src/data_annotations/cli_app/provenance_commands.py +0 -107
- data_annotations-2.1.2/src/data_annotations/provenance/recovery.py +0 -473
- data_annotations-2.1.2/src/data_annotations/provenance/writers.py +0 -206
- {data_annotations-2.1.2 → data_annotations-2.3.0}/LICENSE +0 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/__init__.py +0 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/annotations/__init__.py +0 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/cli.py +0 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/cli_app/__init__.py +0 -0
- {data_annotations-2.1.2 → data_annotations-2.3.0}/src/data_annotations/provenance/runtime.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-annotations
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: Annotate generated data artifacts
|
|
5
5
|
Keywords: annotations,data,metadata,provenance,reproducibility
|
|
6
6
|
Author: Rodrigo C. G. Pena
|
|
@@ -29,7 +29,7 @@ Description-Content-Type: text/markdown
|
|
|
29
29
|
|
|
30
30
|
# data-annotations
|
|
31
31
|
|
|
32
|
-
A
|
|
32
|
+
A Python package for attaching provenance and structured descriptions to the
|
|
33
33
|
files and directories your workflows produce.
|
|
34
34
|
|
|
35
35
|
It is designed for lightweight research and reproducibility pipelines where you want
|
|
@@ -37,11 +37,11 @@ generated datasets, tables, plots, or reports to carry enough context to explain
|
|
|
37
37
|
where they came from and what they contain.
|
|
38
38
|
|
|
39
39
|
The package captures common provenance automatically and writes plain JSON and
|
|
40
|
-
Markdown artifacts that are easy to inspect or archive. The canonical on-disk
|
|
41
|
-
|
|
40
|
+
Markdown artifacts that are easy to inspect or archive. The canonical on-disk
|
|
41
|
+
format uses one JSON annotation document per artifact:
|
|
42
42
|
|
|
43
|
-
- Files use `artifact.ext.
|
|
44
|
-
- Directories
|
|
43
|
+
- Files use `artifact.ext.annotation.json`
|
|
44
|
+
- Directories carry `data-annotations.json` at their root
|
|
45
45
|
|
|
46
46
|
Each annotation document stores four top-level sections:
|
|
47
47
|
|
|
@@ -50,6 +50,10 @@ Each annotation document stores four top-level sections:
|
|
|
50
50
|
- `provenance`
|
|
51
51
|
- `description`
|
|
52
52
|
|
|
53
|
+
Here's the mental model: files get a visible sibling annotation, and
|
|
54
|
+
directories carry one visible annotation at their root. Treat the annotation as
|
|
55
|
+
part of the research output bundle.
|
|
56
|
+
|
|
53
57
|
See the [changelog](CHANGELOG.md) for release history and upgrade-oriented notes.
|
|
54
58
|
|
|
55
59
|
## Installation
|
|
@@ -95,12 +99,18 @@ Every annotation document includes provenance with:
|
|
|
95
99
|
- Hostname and username
|
|
96
100
|
- The script path and command-line arguments
|
|
97
101
|
- The script path relative to the Git repo root when it can be determined
|
|
98
|
-
- Git commit, branch, dirty state,
|
|
102
|
+
- Git commit, branch, dirty state, canonical repository remote, exact tags, and
|
|
103
|
+
`git describe` output when available
|
|
99
104
|
- The current `SLURM_JOB_ID` when available
|
|
105
|
+
- Structured snapshots for recorded local inputs, including file checksums,
|
|
106
|
+
directory content digests, and upstream annotation sidecar references when
|
|
107
|
+
present
|
|
100
108
|
|
|
101
109
|
You can also attach your own parameters, input file paths, and function names.
|
|
102
110
|
Local filesystem paths in provenance are stored as absolute paths. URI-style inputs
|
|
103
111
|
such as `s3://...` or `https://...` are preserved as provided.
|
|
112
|
+
Git tags and `git_describe` are human-friendly hints only; `git_sha` remains the
|
|
113
|
+
source of truth for reproducibility, matching, and source checkout.
|
|
104
114
|
|
|
105
115
|
## Quick Start
|
|
106
116
|
|
|
@@ -111,7 +121,7 @@ provenance and emit sidecars automatically.
|
|
|
111
121
|
|
|
112
122
|
For example, here is a complete file-level annotation workflow using the
|
|
113
123
|
`record_file_annotation(...)` decorator. Once `write_participants` is called, it
|
|
114
|
-
automatically generates sidecars `participants.csv.
|
|
124
|
+
automatically generates sidecars `participants.csv.annotation.json` and `participants.csv.README.md`.
|
|
115
125
|
The JSON sidecar will contain provenance and description metadata, and the Markdown sidecar
|
|
116
126
|
will have a human-friendly rendering of the description provided in the decorator.
|
|
117
127
|
|
|
@@ -182,7 +192,7 @@ write_participants(
|
|
|
182
192
|
split="validation",
|
|
183
193
|
)
|
|
184
194
|
|
|
185
|
-
print(f"{artifact_path}.
|
|
195
|
+
print(f"{artifact_path}.annotation.json")
|
|
186
196
|
print(f"{artifact_path}.README.md")
|
|
187
197
|
```
|
|
188
198
|
|
|
@@ -235,7 +245,12 @@ Accepted directory return items are:
|
|
|
235
245
|
|
|
236
246
|
- `DocumentedArtifact` when you want per-artifact title, summary, fields,
|
|
237
247
|
keys, or missing-value metadata.
|
|
248
|
+
- `DocumentedArtifactGroup` for `record_directory_annotation(...)` and
|
|
249
|
+
`record_directory_description(...)` when many files share one title, summary,
|
|
250
|
+
kind, and optional schema metadata.
|
|
238
251
|
- `ProducedFile` when you only need path, kind, and optional precomputed hash.
|
|
252
|
+
- `ChildBundle` when an annotated child directory should be referenced as its
|
|
253
|
+
own independently shareable bundle.
|
|
239
254
|
- `(path, kind)` tuples when path and artifact kind are enough.
|
|
240
255
|
- plain path-like values when the artifact kind can default to `"other"`.
|
|
241
256
|
|
|
@@ -249,7 +264,11 @@ Here is another decorator pattern example with `record_directory_annotation(...)
|
|
|
249
264
|
from pathlib import Path
|
|
250
265
|
|
|
251
266
|
from data_annotations.annotations import record_directory_annotation
|
|
252
|
-
from data_annotations.description import
|
|
267
|
+
from data_annotations.description import (
|
|
268
|
+
DocumentedArtifact,
|
|
269
|
+
DocumentedArtifactGroup,
|
|
270
|
+
FieldDefinition,
|
|
271
|
+
)
|
|
253
272
|
from data_annotations.provenance import ProducedFile
|
|
254
273
|
|
|
255
274
|
@record_directory_annotation(
|
|
@@ -294,13 +313,16 @@ def build_outputs(
|
|
|
294
313
|
encoding="utf-8",
|
|
295
314
|
)
|
|
296
315
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
316
|
+
plot_paths = []
|
|
317
|
+
for day in ["2024-01-01", "2024-01-02", "2024-01-03"]:
|
|
318
|
+
plot_path = output_dir / f"sma_{day}.png"
|
|
319
|
+
plot_path.write_bytes(
|
|
320
|
+
(
|
|
321
|
+
f"plot placeholder for the SMA variable on {day}, "
|
|
322
|
+
f"derived from {input_path.name}\n"
|
|
323
|
+
).encode("utf-8")
|
|
324
|
+
)
|
|
325
|
+
plot_paths.append(plot_path)
|
|
304
326
|
|
|
305
327
|
return [
|
|
306
328
|
DocumentedArtifact(
|
|
@@ -321,7 +343,13 @@ def build_outputs(
|
|
|
321
343
|
],
|
|
322
344
|
),
|
|
323
345
|
ProducedFile(path=str(report_path), kind="report"),
|
|
324
|
-
(
|
|
346
|
+
DocumentedArtifactGroup(
|
|
347
|
+
title="Daily SMA plots",
|
|
348
|
+
summary="Plots of the same variable on different days.",
|
|
349
|
+
kind="plot",
|
|
350
|
+
paths=[str(path) for path in plot_paths],
|
|
351
|
+
selector="sma_*.png",
|
|
352
|
+
),
|
|
325
353
|
]
|
|
326
354
|
|
|
327
355
|
|
|
@@ -332,7 +360,7 @@ build_outputs(
|
|
|
332
360
|
split="validation",
|
|
333
361
|
)
|
|
334
362
|
|
|
335
|
-
print(output_dir / "
|
|
363
|
+
print(output_dir / "data-annotations.json")
|
|
336
364
|
print(output_dir / "README.md")
|
|
337
365
|
```
|
|
338
366
|
|
|
@@ -355,6 +383,7 @@ File annotations store:
|
|
|
355
383
|
- `subject.kind`
|
|
356
384
|
- `subject.sha256`
|
|
357
385
|
- `provenance.*`
|
|
386
|
+
- `provenance.input_artifacts[]`
|
|
358
387
|
- `description.title`
|
|
359
388
|
- `description.summary`
|
|
360
389
|
- `description.fields`
|
|
@@ -368,16 +397,67 @@ Directory annotations store:
|
|
|
368
397
|
|
|
369
398
|
- `subject.path`
|
|
370
399
|
- `subject.produced_files[]`
|
|
400
|
+
- `subject.child_bundles[]`
|
|
401
|
+
- `subject.content_digest`
|
|
371
402
|
- `provenance.*`
|
|
403
|
+
- `provenance.input_artifacts[]`
|
|
372
404
|
- `description.title`
|
|
373
405
|
- `description.summary`
|
|
406
|
+
- `description.artifact_groups[]`
|
|
374
407
|
- `description.artifacts[]`
|
|
375
408
|
- `description.acquisition_context`
|
|
376
409
|
- `description.generation_context`
|
|
377
410
|
- `description.description_updated_at`
|
|
378
411
|
|
|
379
|
-
|
|
380
|
-
|
|
412
|
+
Use `description.artifact_groups[]` when many files have the same meaning, and
|
|
413
|
+
use `description.artifacts[]` only for file-specific notes, overrides, or schema.
|
|
414
|
+
Groups are descriptive only. Integrity still lives in `subject.produced_files[]`,
|
|
415
|
+
which tracks every concrete file by path, kind, and checksum.
|
|
416
|
+
|
|
417
|
+
The `description` section intentionally excludes provenance linkage fields.
|
|
418
|
+
Directory `produced_files[].path` values are stored relative to `subject.path`,
|
|
419
|
+
which keeps verification stable when a complete output directory is copied or
|
|
420
|
+
archived elsewhere. `subject.content_digest` is computed from sorted tracked file
|
|
421
|
+
paths, file checksums, and referenced child bundle digests.
|
|
422
|
+
|
|
423
|
+
## Artifact Groups
|
|
424
|
+
|
|
425
|
+
Artifact groups are for homogeneous sets of files that researchers naturally
|
|
426
|
+
understand as one output family: for example, 100 PNG plots of the same variable,
|
|
427
|
+
one per acquisition day. A group stores the shared title, summary, kind, optional
|
|
428
|
+
schema fields, and the concrete member paths. It can also store an informational
|
|
429
|
+
`selector`, such as `plots/*.png`, to show how the group was chosen.
|
|
430
|
+
|
|
431
|
+
Rules of thumb:
|
|
432
|
+
|
|
433
|
+
- Use artifact groups when many files have the same meaning.
|
|
434
|
+
- Use individual artifacts for file-specific notes, exceptions, or overrides.
|
|
435
|
+
- It is OK for an individual artifact to also appear in a group.
|
|
436
|
+
- Do not rely on groups for integrity. `subject.produced_files[]` remains the
|
|
437
|
+
complete checksum inventory.
|
|
438
|
+
|
|
439
|
+
## Nested Directory Policy
|
|
440
|
+
|
|
441
|
+
Annotate the smallest thing you would share as a unit. If a directory is one
|
|
442
|
+
research output, give that directory one `data-annotations.json`, even when its
|
|
443
|
+
tracked files live in nested subdirectories.
|
|
444
|
+
|
|
445
|
+
Use recursive directory annotations for one bundle with nested files:
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
data-annotations annotate directory path/to/run-001 --recursive
|
|
449
|
+
data-annotations annotate directory path/to/run-001 --max-depth 2
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
Use child bundle annotations when a subdirectory is independently meaningful,
|
|
453
|
+
shareable, or reusable. In that case, annotate the child directory first, then
|
|
454
|
+
annotate the parent. The parent records a compact `child_bundles[]` reference
|
|
455
|
+
with the child path, child annotation path, and child content digest; it does not
|
|
456
|
+
copy the child file inventory into the parent JSON.
|
|
457
|
+
|
|
458
|
+
Post-hoc directory discovery follows the same rule. `--recursive` discovers
|
|
459
|
+
nested files, but it stops at annotated child directories containing
|
|
460
|
+
`data-annotations.json` and records them as child bundles.
|
|
381
461
|
|
|
382
462
|
## Provenance Decorators And Writers
|
|
383
463
|
|
|
@@ -412,7 +492,9 @@ write_report(
|
|
|
412
492
|
|
|
413
493
|
Use `record_directory_manifest(...)` for directory outputs. Directory decorators
|
|
414
494
|
accept `DocumentedArtifact`, `ProducedFile`, `(path, kind)`, and plain path-like
|
|
415
|
-
return values.
|
|
495
|
+
return values. Provenance-only APIs do not accept description groups; use
|
|
496
|
+
unified annotation or description APIs when groups should appear in the JSON or
|
|
497
|
+
README.
|
|
416
498
|
|
|
417
499
|
If you want the direct writer approach instead, use `write_file_manifest(...)` and
|
|
418
500
|
`write_directory_manifest(...)` (see `examples/`).
|
|
@@ -428,7 +510,9 @@ Key public description models:
|
|
|
428
510
|
- `AllowedValue`
|
|
429
511
|
- `FieldDefinition`
|
|
430
512
|
- `DocumentedArtifact`
|
|
513
|
+
- `DocumentedArtifactGroup`
|
|
431
514
|
- `ArtifactDescription`
|
|
515
|
+
- `ArtifactGroupDescription`
|
|
432
516
|
- `FileDescription`
|
|
433
517
|
- `DirectoryDescription`
|
|
434
518
|
|
|
@@ -450,21 +534,26 @@ per call.
|
|
|
450
534
|
## Recovery Helpers
|
|
451
535
|
|
|
452
536
|
Use `artifact_matches_manifest(...)` to verify whether a detached artifact still
|
|
453
|
-
matches an annotation document
|
|
454
|
-
recorded
|
|
537
|
+
matches an annotation document. Use `analyze_provenance_chain(...)` when you also
|
|
538
|
+
want to verify recorded inputs and recursively follow upstream annotation
|
|
539
|
+
sidecars. Use `checkout_manifest_source(...)` to recover the recorded code state
|
|
540
|
+
from Git metadata.
|
|
455
541
|
|
|
456
542
|
```python
|
|
457
543
|
from pathlib import Path
|
|
458
544
|
|
|
459
545
|
from data_annotations.provenance import (
|
|
546
|
+
analyze_provenance_chain,
|
|
460
547
|
artifact_matches_manifest,
|
|
461
548
|
checkout_manifest_source,
|
|
462
549
|
)
|
|
463
550
|
|
|
464
|
-
annotation_path = Path("outputs/participants.csv.
|
|
551
|
+
annotation_path = Path("outputs/participants.csv.annotation.json")
|
|
465
552
|
artifact_path = Path("downloads/participants.csv")
|
|
466
553
|
|
|
467
554
|
if artifact_matches_manifest(artifact_path, annotation_path):
|
|
555
|
+
chain = analyze_provenance_chain(artifact_path)
|
|
556
|
+
print(chain.status)
|
|
468
557
|
recovered = checkout_manifest_source(annotation_path)
|
|
469
558
|
print(recovered.checkout_path)
|
|
470
559
|
print(recovered.script_path)
|
|
@@ -483,8 +572,8 @@ still attach provenance and description after the fact.
|
|
|
483
572
|
Post-hoc descriptions can still be very useful, but the quality of post-hoc
|
|
484
573
|
provenance depends on how exact the supplied answers are. In particular, fields
|
|
485
574
|
such as the generating script, command, function, Git commit, repository path,
|
|
486
|
-
inputs, and parameters are only as reliable as
|
|
487
|
-
annotation.
|
|
575
|
+
Git tags, `git describe` output, inputs, and parameters are only as reliable as
|
|
576
|
+
the information entered during annotation.
|
|
488
577
|
|
|
489
578
|
## CLI Workflow
|
|
490
579
|
|
|
@@ -496,22 +585,51 @@ For post-hoc annotation:
|
|
|
496
585
|
```bash
|
|
497
586
|
data-annotations annotate file path/to/participants.csv
|
|
498
587
|
data-annotations annotate directory path/to/run-001
|
|
588
|
+
data-annotations annotate directory path/to/run-001 --recursive
|
|
589
|
+
data-annotations annotate directory path/to/run-001 --max-depth 2
|
|
590
|
+
data-annotations annotate directory path/to/run-001 \
|
|
591
|
+
--recursive \
|
|
592
|
+
--group-selector "plots/*.png" \
|
|
593
|
+
--group-title "Daily SMA plots" \
|
|
594
|
+
--group-summary "Plots of the same variable on different days." \
|
|
595
|
+
--group-kind plot
|
|
499
596
|
```
|
|
500
597
|
|
|
501
|
-
These commands prompt for missing details, write `*.
|
|
598
|
+
These commands prompt for missing details, write `*.annotation.json` or `data-annotations.json`,
|
|
502
599
|
and optionally derive README sidecars. Post-hoc records are marked with
|
|
503
600
|
`capture_mode="post_hoc"`.
|
|
504
601
|
|
|
602
|
+
When group selectors are provided, the CLI expands them to concrete member paths
|
|
603
|
+
at annotation time. Grouped files are tracked in `subject.produced_files[]` but
|
|
604
|
+
are skipped by the per-file prompt flow, so you do not have to answer the same
|
|
605
|
+
questions for every matching file.
|
|
606
|
+
|
|
607
|
+
For post-hoc provenance, use repeatable `--git-tag` and optional
|
|
608
|
+
`--git-describe` when you know the original code state. These values are stored
|
|
609
|
+
as human-readable hints; `--git-sha` remains the field used for recovery.
|
|
610
|
+
|
|
505
611
|
For provenance inspection and source recovery:
|
|
506
612
|
|
|
507
613
|
```bash
|
|
508
614
|
data-annotations provenance match path/to/artifact
|
|
615
|
+
data-annotations provenance chain path/to/artifact
|
|
616
|
+
data-annotations provenance chain path/to/artifact --full-paths
|
|
509
617
|
data-annotations provenance checkout path/to/artifact
|
|
510
618
|
```
|
|
511
619
|
|
|
512
|
-
Command `match` auto-discovers `*.
|
|
620
|
+
Command `match` auto-discovers `*.annotation.json` for files and `data-annotations.json` for
|
|
513
621
|
directories, prints a verification summary, and suggests the exact `checkout`
|
|
514
622
|
command to run next when Git recovery metadata is available.
|
|
623
|
+
Command `chain` uses the same sidecar discovery, then verifies the artifact,
|
|
624
|
+
recorded input snapshots, and any upstream annotation sidecars reachable from
|
|
625
|
+
those inputs. Its default output shows a compact relative-path tree and lists
|
|
626
|
+
stale, missing, or unverifiable nodes first; use `--full-paths` when you need
|
|
627
|
+
absolute paths.
|
|
628
|
+
|
|
629
|
+
If `data-annotations provenance --help` does not list `chain`, your shell is
|
|
630
|
+
resolving an older installed command. From a source checkout, use
|
|
631
|
+
`uv run data-annotations provenance chain ...`, or reinstall the CLI from the
|
|
632
|
+
updated source before using the bare `data-annotations` command.
|
|
515
633
|
|
|
516
634
|
### Run With `uvx`
|
|
517
635
|
|
|
@@ -536,6 +654,7 @@ the project environment. You can then run:
|
|
|
536
654
|
uv run data-annotations annotate file path/to/participants.csv
|
|
537
655
|
uv run data-annotations annotate directory path/to/run-001
|
|
538
656
|
uv run data-annotations provenance match path/to/participants.csv
|
|
657
|
+
uv run data-annotations provenance chain path/to/participants.csv
|
|
539
658
|
uv run data-annotations provenance checkout path/to/participants.csv
|
|
540
659
|
```
|
|
541
660
|
|
|
@@ -562,6 +681,17 @@ uv run data-annotations provenance checkout path/to/participants.csv
|
|
|
562
681
|
- `annotate_file(...)`
|
|
563
682
|
- `annotate_directory(...)`
|
|
564
683
|
|
|
684
|
+
### Description Models
|
|
685
|
+
|
|
686
|
+
- `AllowedValue`
|
|
687
|
+
- `FieldDefinition`
|
|
688
|
+
- `DocumentedArtifact`
|
|
689
|
+
- `DocumentedArtifactGroup`
|
|
690
|
+
- `ArtifactDescription`
|
|
691
|
+
- `ArtifactGroupDescription`
|
|
692
|
+
- `FileDescription`
|
|
693
|
+
- `DirectoryDescription`
|
|
694
|
+
|
|
565
695
|
### Description Functions
|
|
566
696
|
|
|
567
697
|
- `record_file_description(...)`
|
|
@@ -576,9 +706,13 @@ uv run data-annotations provenance checkout path/to/participants.csv
|
|
|
576
706
|
### Provenance Models
|
|
577
707
|
|
|
578
708
|
- `ProducedFile`
|
|
709
|
+
- `ChildBundle`
|
|
710
|
+
- `InputArtifact`
|
|
579
711
|
- `BaseProvenance`
|
|
580
712
|
- `FileManifest`
|
|
581
713
|
- `DirectoryManifest`
|
|
714
|
+
- `ProvenanceChainNode`
|
|
715
|
+
- `ProvenanceChainReport`
|
|
582
716
|
- `RecoveredSource`
|
|
583
717
|
|
|
584
718
|
### Provenance Functions
|
|
@@ -587,6 +721,9 @@ uv run data-annotations provenance checkout path/to/participants.csv
|
|
|
587
721
|
- `record_directory_manifest(...)`
|
|
588
722
|
- `write_file_manifest(...)`
|
|
589
723
|
- `write_directory_manifest(...)`
|
|
724
|
+
- `directory_content_digest(...)`
|
|
725
|
+
- `analyze_provenance_chain(...)`
|
|
726
|
+
- `provenance_chain_is_fresh(...)`
|
|
590
727
|
- `artifact_matches_manifest(...)`
|
|
591
728
|
- `checkout_manifest_source(...)`
|
|
592
729
|
|
|
@@ -608,6 +745,8 @@ uv run python examples/write_file_manifest.py
|
|
|
608
745
|
uv run python examples/write_directory_manifest.py
|
|
609
746
|
uv run python examples/write_file_description.py
|
|
610
747
|
uv run python examples/write_directory_description.py
|
|
748
|
+
uv run python examples/provenance_chain.py
|
|
749
|
+
uv run python examples/provenance_chain_cli.py
|
|
611
750
|
uv run python examples/recover_provenance.py
|
|
612
751
|
uv run python examples/recover_provenance_cli.py
|
|
613
752
|
```
|