data-annotations 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_annotations/__init__.py +2 -0
- data_annotations/_decorators.py +140 -0
- data_annotations/annotations/__init__.py +30 -0
- data_annotations/annotations/decorators.py +147 -0
- data_annotations/annotations/models.py +45 -0
- data_annotations/annotations/writers.py +368 -0
- data_annotations/cli.py +37 -0
- data_annotations/cli_app/__init__.py +1 -0
- data_annotations/cli_app/annotate.py +307 -0
- data_annotations/cli_app/common.py +276 -0
- data_annotations/cli_app/prompts.py +534 -0
- data_annotations/cli_app/provenance_commands.py +107 -0
- data_annotations/description/__init__.py +37 -0
- data_annotations/description/decorators.py +145 -0
- data_annotations/description/models.py +63 -0
- data_annotations/description/writers.py +321 -0
- data_annotations/provenance/__init__.py +37 -0
- data_annotations/provenance/decorators.py +111 -0
- data_annotations/provenance/git.py +121 -0
- data_annotations/provenance/models.py +50 -0
- data_annotations/provenance/recovery.py +473 -0
- data_annotations/provenance/runtime.py +248 -0
- data_annotations/provenance/writers.py +206 -0
- data_annotations-2.1.2.dist-info/METADATA +616 -0
- data_annotations-2.1.2.dist-info/RECORD +28 -0
- data_annotations-2.1.2.dist-info/WHEEL +4 -0
- data_annotations-2.1.2.dist-info/entry_points.txt +3 -0
- data_annotations-2.1.2.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from data_annotations.description.models import DocumentedArtifact
|
|
8
|
+
from data_annotations.provenance.models import ProducedFile
|
|
9
|
+
|
|
10
|
+
DEFAULT_INPUT_ARGS = ("input_path", "input_paths")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def bind_arguments(
|
|
14
|
+
fn: Callable[..., Any],
|
|
15
|
+
args: tuple[Any, ...],
|
|
16
|
+
kwargs: dict[str, Any],
|
|
17
|
+
) -> inspect.BoundArguments:
|
|
18
|
+
bound = inspect.signature(fn).bind(*args, **kwargs)
|
|
19
|
+
bound.apply_defaults()
|
|
20
|
+
return bound
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def normalized_path(path: str | Path) -> Path:
|
|
24
|
+
from data_annotations.provenance import writers as provenance_writers
|
|
25
|
+
|
|
26
|
+
return Path(provenance_writers._normalize_local_path(path))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def argument_path(
|
|
30
|
+
bound: inspect.BoundArguments,
|
|
31
|
+
*,
|
|
32
|
+
argument_name: str,
|
|
33
|
+
) -> Path:
|
|
34
|
+
try:
|
|
35
|
+
value = bound.arguments[argument_name]
|
|
36
|
+
except KeyError as exc:
|
|
37
|
+
raise ValueError(
|
|
38
|
+
f"decorator argument {argument_name!r} was not found in the wrapped call"
|
|
39
|
+
) from exc
|
|
40
|
+
return normalized_path(value)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def extract_params(
|
|
44
|
+
bound: inspect.BoundArguments,
|
|
45
|
+
*,
|
|
46
|
+
target_args: Iterable[str],
|
|
47
|
+
input_args: Iterable[str],
|
|
48
|
+
) -> dict[str, Any]:
|
|
49
|
+
excluded = set(target_args) | set(input_args)
|
|
50
|
+
return {
|
|
51
|
+
name: value for name, value in bound.arguments.items() if name not in excluded
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def extract_inputs(
|
|
56
|
+
bound: inspect.BoundArguments,
|
|
57
|
+
*,
|
|
58
|
+
input_args: Iterable[str],
|
|
59
|
+
) -> list[str]:
|
|
60
|
+
inputs: list[str] = []
|
|
61
|
+
for name in input_args:
|
|
62
|
+
if name not in bound.arguments:
|
|
63
|
+
continue
|
|
64
|
+
value = bound.arguments[name]
|
|
65
|
+
if value is None:
|
|
66
|
+
continue
|
|
67
|
+
if isinstance(value, (list, tuple, set)):
|
|
68
|
+
inputs.extend(str(item) for item in value)
|
|
69
|
+
continue
|
|
70
|
+
inputs.append(str(value))
|
|
71
|
+
return inputs
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def coerce_produced_file(item: Any) -> "ProducedFile":
|
|
75
|
+
from data_annotations.description.models import DocumentedArtifact
|
|
76
|
+
from data_annotations.provenance import writers as provenance_writers
|
|
77
|
+
from data_annotations.provenance.models import ProducedFile
|
|
78
|
+
|
|
79
|
+
if isinstance(item, DocumentedArtifact):
|
|
80
|
+
path = normalized_path(item.path)
|
|
81
|
+
return ProducedFile(
|
|
82
|
+
path=str(path),
|
|
83
|
+
kind=item.kind,
|
|
84
|
+
sha256=provenance_writers.sha256_file(path) if path.exists() else None,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if isinstance(item, ProducedFile):
|
|
88
|
+
path = normalized_path(item.path)
|
|
89
|
+
return item.model_copy(update={"path": str(path)})
|
|
90
|
+
|
|
91
|
+
if isinstance(item, tuple) and len(item) == 2:
|
|
92
|
+
path, kind = item
|
|
93
|
+
normalized = normalized_path(path)
|
|
94
|
+
return ProducedFile(
|
|
95
|
+
path=str(normalized),
|
|
96
|
+
kind=kind,
|
|
97
|
+
sha256=(
|
|
98
|
+
provenance_writers.sha256_file(normalized)
|
|
99
|
+
if normalized.exists()
|
|
100
|
+
else None
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
path = normalized_path(item)
|
|
105
|
+
return ProducedFile(
|
|
106
|
+
path=str(path),
|
|
107
|
+
kind="other",
|
|
108
|
+
sha256=provenance_writers.sha256_file(path) if path.exists() else None,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def coerce_produced_files(items: Iterable[Any]) -> list["ProducedFile"]:
|
|
113
|
+
return [coerce_produced_file(item) for item in items]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def coerce_documented_artifact(item: Any) -> "DocumentedArtifact":
|
|
117
|
+
from data_annotations.description.models import DocumentedArtifact
|
|
118
|
+
from data_annotations.provenance.models import ProducedFile
|
|
119
|
+
|
|
120
|
+
if isinstance(item, DocumentedArtifact):
|
|
121
|
+
return item.model_copy(update={"path": str(normalized_path(item.path))})
|
|
122
|
+
|
|
123
|
+
if isinstance(item, ProducedFile):
|
|
124
|
+
return DocumentedArtifact(
|
|
125
|
+
path=str(normalized_path(item.path)),
|
|
126
|
+
kind=item.kind,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if isinstance(item, tuple) and len(item) == 2:
|
|
130
|
+
path, kind = item
|
|
131
|
+
return DocumentedArtifact(
|
|
132
|
+
path=str(normalized_path(path)),
|
|
133
|
+
kind=kind,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return DocumentedArtifact(path=str(normalized_path(item)))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def coerce_documented_artifacts(items: Iterable[Any]) -> list["DocumentedArtifact"]:
|
|
140
|
+
return [coerce_documented_artifact(item) for item in items]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .models import (
|
|
2
|
+
DirectoryAnnotationDocument,
|
|
3
|
+
DirectoryAnnotationResult,
|
|
4
|
+
DirectoryArtifactSubject,
|
|
5
|
+
FileAnnotationDocument,
|
|
6
|
+
FileAnnotationResult,
|
|
7
|
+
FileArtifactSubject,
|
|
8
|
+
)
|
|
9
|
+
from .decorators import record_directory_annotation, record_file_annotation
|
|
10
|
+
from .writers import (
|
|
11
|
+
annotate_directory,
|
|
12
|
+
annotate_file,
|
|
13
|
+
write_directory_annotation,
|
|
14
|
+
write_file_annotation,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"annotate_directory",
|
|
19
|
+
"annotate_file",
|
|
20
|
+
"record_directory_annotation",
|
|
21
|
+
"record_file_annotation",
|
|
22
|
+
"write_directory_annotation",
|
|
23
|
+
"write_file_annotation",
|
|
24
|
+
"DirectoryAnnotationDocument",
|
|
25
|
+
"DirectoryAnnotationResult",
|
|
26
|
+
"DirectoryArtifactSubject",
|
|
27
|
+
"FileAnnotationDocument",
|
|
28
|
+
"FileAnnotationResult",
|
|
29
|
+
"FileArtifactSubject",
|
|
30
|
+
]
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import Any, Callable
|
|
3
|
+
|
|
4
|
+
from data_annotations._decorators import (
|
|
5
|
+
DEFAULT_INPUT_ARGS,
|
|
6
|
+
argument_path,
|
|
7
|
+
bind_arguments,
|
|
8
|
+
coerce_documented_artifacts,
|
|
9
|
+
extract_inputs,
|
|
10
|
+
extract_params,
|
|
11
|
+
)
|
|
12
|
+
from data_annotations.description.models import DocumentedArtifact, FieldDefinition
|
|
13
|
+
from data_annotations.provenance.models import ArtifactKind
|
|
14
|
+
|
|
15
|
+
from .writers import annotate_directory, annotate_file
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def record_file_annotation(
|
|
19
|
+
*,
|
|
20
|
+
artifact_path_arg: str = "artifact_path",
|
|
21
|
+
input_args: tuple[str, ...] = DEFAULT_INPUT_ARGS,
|
|
22
|
+
title: str | None = None,
|
|
23
|
+
summary: str | None = None,
|
|
24
|
+
fields: list[FieldDefinition] | None = None,
|
|
25
|
+
primary_key: list[str] | None = None,
|
|
26
|
+
missing_value_codes: dict[str, str] | None = None,
|
|
27
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
28
|
+
generation_context: dict[str, Any] | None = None,
|
|
29
|
+
artifact_kind: ArtifactKind = "other",
|
|
30
|
+
write_readme: bool = True,
|
|
31
|
+
write_schema: bool | None = None,
|
|
32
|
+
annotation_suffix: str = ".meta.json",
|
|
33
|
+
readme_suffix: str = ".README.md",
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Decorate a function that writes one annotated artifact.
|
|
37
|
+
|
|
38
|
+
Wrapped function contract:
|
|
39
|
+
- Accept a local output path argument, named ``artifact_path`` by default.
|
|
40
|
+
- Any bound arguments named in ``input_args`` are recorded as provenance inputs.
|
|
41
|
+
- Remaining bound arguments become provenance params.
|
|
42
|
+
- The decorator writes the annotation sidecars from ``artifact_path``.
|
|
43
|
+
- The return value is not inspected and is returned unchanged.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def deco(fn: Callable[..., Any]):
|
|
47
|
+
@wraps(fn)
|
|
48
|
+
def wrapper(*args, **kwargs):
|
|
49
|
+
bound = bind_arguments(fn, args, kwargs)
|
|
50
|
+
result = fn(*args, **kwargs)
|
|
51
|
+
|
|
52
|
+
artifact_path = argument_path(bound, argument_name=artifact_path_arg)
|
|
53
|
+
params = extract_params(
|
|
54
|
+
bound,
|
|
55
|
+
target_args=(artifact_path_arg,),
|
|
56
|
+
input_args=input_args,
|
|
57
|
+
)
|
|
58
|
+
inputs = extract_inputs(bound, input_args=input_args)
|
|
59
|
+
annotate_file(
|
|
60
|
+
artifact_path,
|
|
61
|
+
title=title,
|
|
62
|
+
summary=summary,
|
|
63
|
+
fields=fields,
|
|
64
|
+
primary_key=primary_key,
|
|
65
|
+
missing_value_codes=missing_value_codes,
|
|
66
|
+
acquisition_context=acquisition_context,
|
|
67
|
+
generation_context=generation_context,
|
|
68
|
+
artifact_kind=artifact_kind,
|
|
69
|
+
params=params,
|
|
70
|
+
inputs=inputs,
|
|
71
|
+
function=fn,
|
|
72
|
+
write_readme=write_readme,
|
|
73
|
+
write_schema=write_schema,
|
|
74
|
+
annotation_suffix=annotation_suffix,
|
|
75
|
+
readme_suffix=readme_suffix,
|
|
76
|
+
)
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
return wrapper
|
|
80
|
+
|
|
81
|
+
return deco
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def record_directory_annotation(
|
|
85
|
+
*,
|
|
86
|
+
output_dir_arg: str = "output_dir",
|
|
87
|
+
input_args: tuple[str, ...] = DEFAULT_INPUT_ARGS,
|
|
88
|
+
title: str | None = None,
|
|
89
|
+
summary: str | None = None,
|
|
90
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
91
|
+
generation_context: dict[str, Any] | None = None,
|
|
92
|
+
write_readme: bool = True,
|
|
93
|
+
write_schema: bool | None = None,
|
|
94
|
+
annotation_filename: str = "manifest.json",
|
|
95
|
+
readme_filename: str = "README.md",
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Decorate a function that writes several annotated outputs in a directory.
|
|
99
|
+
|
|
100
|
+
Wrapped function contract:
|
|
101
|
+
- Accept a local output directory argument, named ``output_dir`` by default.
|
|
102
|
+
- Return a materialized iterable, usually a ``list`` or ``tuple``.
|
|
103
|
+
- Supported return items are:
|
|
104
|
+
- DocumentedArtifact
|
|
105
|
+
- ProducedFile
|
|
106
|
+
- (path, kind)
|
|
107
|
+
- path-like objects (kind defaults to ``"other"``)
|
|
108
|
+
- Any bound arguments named in ``input_args`` are recorded as provenance inputs.
|
|
109
|
+
- Remaining bound arguments become provenance params.
|
|
110
|
+
- The original return value is passed through unchanged.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def deco(fn: Callable[..., Any]):
|
|
114
|
+
@wraps(fn)
|
|
115
|
+
def wrapper(*args, **kwargs):
|
|
116
|
+
bound = bind_arguments(fn, args, kwargs)
|
|
117
|
+
result = fn(*args, **kwargs)
|
|
118
|
+
|
|
119
|
+
items = list(result)
|
|
120
|
+
output_dir = argument_path(bound, argument_name=output_dir_arg)
|
|
121
|
+
artifacts: list[DocumentedArtifact] = coerce_documented_artifacts(items)
|
|
122
|
+
params = extract_params(
|
|
123
|
+
bound,
|
|
124
|
+
target_args=(output_dir_arg,),
|
|
125
|
+
input_args=input_args,
|
|
126
|
+
)
|
|
127
|
+
inputs = extract_inputs(bound, input_args=input_args)
|
|
128
|
+
annotate_directory(
|
|
129
|
+
output_dir,
|
|
130
|
+
artifacts=artifacts,
|
|
131
|
+
title=title,
|
|
132
|
+
summary=summary,
|
|
133
|
+
acquisition_context=acquisition_context,
|
|
134
|
+
generation_context=generation_context,
|
|
135
|
+
params=params,
|
|
136
|
+
inputs=inputs,
|
|
137
|
+
function=fn,
|
|
138
|
+
write_readme=write_readme,
|
|
139
|
+
write_schema=write_schema,
|
|
140
|
+
annotation_filename=annotation_filename,
|
|
141
|
+
readme_filename=readme_filename,
|
|
142
|
+
)
|
|
143
|
+
return result
|
|
144
|
+
|
|
145
|
+
return wrapper
|
|
146
|
+
|
|
147
|
+
return deco
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from data_annotations.description import DirectoryDescription, FileDescription
|
|
7
|
+
from data_annotations.provenance import BaseProvenance, ProducedFile
|
|
8
|
+
from data_annotations.provenance.models import ArtifactKind
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileArtifactSubject(BaseModel):
|
|
12
|
+
path: str
|
|
13
|
+
kind: ArtifactKind = "other"
|
|
14
|
+
sha256: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DirectoryArtifactSubject(BaseModel):
|
|
18
|
+
path: str
|
|
19
|
+
produced_files: list[ProducedFile] = Field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FileAnnotationDocument(BaseModel):
|
|
23
|
+
annotation_version: Literal["3"] = "3"
|
|
24
|
+
subject: FileArtifactSubject
|
|
25
|
+
provenance: BaseProvenance
|
|
26
|
+
description: FileDescription
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DirectoryAnnotationDocument(BaseModel):
|
|
30
|
+
annotation_version: Literal["3"] = "3"
|
|
31
|
+
subject: DirectoryArtifactSubject
|
|
32
|
+
provenance: BaseProvenance
|
|
33
|
+
description: DirectoryDescription
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileAnnotationResult(BaseModel):
|
|
37
|
+
artifact_path: Path
|
|
38
|
+
annotation_path: Path
|
|
39
|
+
readme_path: Path | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DirectoryAnnotationResult(BaseModel):
|
|
43
|
+
output_dir: Path
|
|
44
|
+
annotation_path: Path
|
|
45
|
+
readme_path: Path | None = None
|