data-annotations 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ def main() -> None:
2
+ print("Hello from data-annotations!")
@@ -0,0 +1,140 @@
1
+ import inspect
2
+ from collections.abc import Iterable
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING, Any, Callable
5
+
6
+ if TYPE_CHECKING:
7
+ from data_annotations.description.models import DocumentedArtifact
8
+ from data_annotations.provenance.models import ProducedFile
9
+
10
+ DEFAULT_INPUT_ARGS = ("input_path", "input_paths")
11
+
12
+
13
+ def bind_arguments(
14
+ fn: Callable[..., Any],
15
+ args: tuple[Any, ...],
16
+ kwargs: dict[str, Any],
17
+ ) -> inspect.BoundArguments:
18
+ bound = inspect.signature(fn).bind(*args, **kwargs)
19
+ bound.apply_defaults()
20
+ return bound
21
+
22
+
23
+ def normalized_path(path: str | Path) -> Path:
24
+ from data_annotations.provenance import writers as provenance_writers
25
+
26
+ return Path(provenance_writers._normalize_local_path(path))
27
+
28
+
29
+ def argument_path(
30
+ bound: inspect.BoundArguments,
31
+ *,
32
+ argument_name: str,
33
+ ) -> Path:
34
+ try:
35
+ value = bound.arguments[argument_name]
36
+ except KeyError as exc:
37
+ raise ValueError(
38
+ f"decorator argument {argument_name!r} was not found in the wrapped call"
39
+ ) from exc
40
+ return normalized_path(value)
41
+
42
+
43
+ def extract_params(
44
+ bound: inspect.BoundArguments,
45
+ *,
46
+ target_args: Iterable[str],
47
+ input_args: Iterable[str],
48
+ ) -> dict[str, Any]:
49
+ excluded = set(target_args) | set(input_args)
50
+ return {
51
+ name: value for name, value in bound.arguments.items() if name not in excluded
52
+ }
53
+
54
+
55
+ def extract_inputs(
56
+ bound: inspect.BoundArguments,
57
+ *,
58
+ input_args: Iterable[str],
59
+ ) -> list[str]:
60
+ inputs: list[str] = []
61
+ for name in input_args:
62
+ if name not in bound.arguments:
63
+ continue
64
+ value = bound.arguments[name]
65
+ if value is None:
66
+ continue
67
+ if isinstance(value, (list, tuple, set)):
68
+ inputs.extend(str(item) for item in value)
69
+ continue
70
+ inputs.append(str(value))
71
+ return inputs
72
+
73
+
74
+ def coerce_produced_file(item: Any) -> "ProducedFile":
75
+ from data_annotations.description.models import DocumentedArtifact
76
+ from data_annotations.provenance import writers as provenance_writers
77
+ from data_annotations.provenance.models import ProducedFile
78
+
79
+ if isinstance(item, DocumentedArtifact):
80
+ path = normalized_path(item.path)
81
+ return ProducedFile(
82
+ path=str(path),
83
+ kind=item.kind,
84
+ sha256=provenance_writers.sha256_file(path) if path.exists() else None,
85
+ )
86
+
87
+ if isinstance(item, ProducedFile):
88
+ path = normalized_path(item.path)
89
+ return item.model_copy(update={"path": str(path)})
90
+
91
+ if isinstance(item, tuple) and len(item) == 2:
92
+ path, kind = item
93
+ normalized = normalized_path(path)
94
+ return ProducedFile(
95
+ path=str(normalized),
96
+ kind=kind,
97
+ sha256=(
98
+ provenance_writers.sha256_file(normalized)
99
+ if normalized.exists()
100
+ else None
101
+ ),
102
+ )
103
+
104
+ path = normalized_path(item)
105
+ return ProducedFile(
106
+ path=str(path),
107
+ kind="other",
108
+ sha256=provenance_writers.sha256_file(path) if path.exists() else None,
109
+ )
110
+
111
+
112
+ def coerce_produced_files(items: Iterable[Any]) -> list["ProducedFile"]:
113
+ return [coerce_produced_file(item) for item in items]
114
+
115
+
116
+ def coerce_documented_artifact(item: Any) -> "DocumentedArtifact":
117
+ from data_annotations.description.models import DocumentedArtifact
118
+ from data_annotations.provenance.models import ProducedFile
119
+
120
+ if isinstance(item, DocumentedArtifact):
121
+ return item.model_copy(update={"path": str(normalized_path(item.path))})
122
+
123
+ if isinstance(item, ProducedFile):
124
+ return DocumentedArtifact(
125
+ path=str(normalized_path(item.path)),
126
+ kind=item.kind,
127
+ )
128
+
129
+ if isinstance(item, tuple) and len(item) == 2:
130
+ path, kind = item
131
+ return DocumentedArtifact(
132
+ path=str(normalized_path(path)),
133
+ kind=kind,
134
+ )
135
+
136
+ return DocumentedArtifact(path=str(normalized_path(item)))
137
+
138
+
139
+ def coerce_documented_artifacts(items: Iterable[Any]) -> list["DocumentedArtifact"]:
140
+ return [coerce_documented_artifact(item) for item in items]
@@ -0,0 +1,30 @@
1
+ from .models import (
2
+ DirectoryAnnotationDocument,
3
+ DirectoryAnnotationResult,
4
+ DirectoryArtifactSubject,
5
+ FileAnnotationDocument,
6
+ FileAnnotationResult,
7
+ FileArtifactSubject,
8
+ )
9
+ from .decorators import record_directory_annotation, record_file_annotation
10
+ from .writers import (
11
+ annotate_directory,
12
+ annotate_file,
13
+ write_directory_annotation,
14
+ write_file_annotation,
15
+ )
16
+
17
+ __all__ = [
18
+ "annotate_directory",
19
+ "annotate_file",
20
+ "record_directory_annotation",
21
+ "record_file_annotation",
22
+ "write_directory_annotation",
23
+ "write_file_annotation",
24
+ "DirectoryAnnotationDocument",
25
+ "DirectoryAnnotationResult",
26
+ "DirectoryArtifactSubject",
27
+ "FileAnnotationDocument",
28
+ "FileAnnotationResult",
29
+ "FileArtifactSubject",
30
+ ]
@@ -0,0 +1,147 @@
1
+ from functools import wraps
2
+ from typing import Any, Callable
3
+
4
+ from data_annotations._decorators import (
5
+ DEFAULT_INPUT_ARGS,
6
+ argument_path,
7
+ bind_arguments,
8
+ coerce_documented_artifacts,
9
+ extract_inputs,
10
+ extract_params,
11
+ )
12
+ from data_annotations.description.models import DocumentedArtifact, FieldDefinition
13
+ from data_annotations.provenance.models import ArtifactKind
14
+
15
+ from .writers import annotate_directory, annotate_file
16
+
17
+
18
+ def record_file_annotation(
19
+ *,
20
+ artifact_path_arg: str = "artifact_path",
21
+ input_args: tuple[str, ...] = DEFAULT_INPUT_ARGS,
22
+ title: str | None = None,
23
+ summary: str | None = None,
24
+ fields: list[FieldDefinition] | None = None,
25
+ primary_key: list[str] | None = None,
26
+ missing_value_codes: dict[str, str] | None = None,
27
+ acquisition_context: dict[str, Any] | None = None,
28
+ generation_context: dict[str, Any] | None = None,
29
+ artifact_kind: ArtifactKind = "other",
30
+ write_readme: bool = True,
31
+ write_schema: bool | None = None,
32
+ annotation_suffix: str = ".meta.json",
33
+ readme_suffix: str = ".README.md",
34
+ ):
35
+ """
36
+ Decorate a function that writes one annotated artifact.
37
+
38
+ Wrapped function contract:
39
+ - Accept a local output path argument, named ``artifact_path`` by default.
40
+ - Any bound arguments named in ``input_args`` are recorded as provenance inputs.
41
+ - Remaining bound arguments become provenance params.
42
+ - The decorator writes the annotation sidecars from ``artifact_path``.
43
+ - The return value is not inspected and is returned unchanged.
44
+ """
45
+
46
+ def deco(fn: Callable[..., Any]):
47
+ @wraps(fn)
48
+ def wrapper(*args, **kwargs):
49
+ bound = bind_arguments(fn, args, kwargs)
50
+ result = fn(*args, **kwargs)
51
+
52
+ artifact_path = argument_path(bound, argument_name=artifact_path_arg)
53
+ params = extract_params(
54
+ bound,
55
+ target_args=(artifact_path_arg,),
56
+ input_args=input_args,
57
+ )
58
+ inputs = extract_inputs(bound, input_args=input_args)
59
+ annotate_file(
60
+ artifact_path,
61
+ title=title,
62
+ summary=summary,
63
+ fields=fields,
64
+ primary_key=primary_key,
65
+ missing_value_codes=missing_value_codes,
66
+ acquisition_context=acquisition_context,
67
+ generation_context=generation_context,
68
+ artifact_kind=artifact_kind,
69
+ params=params,
70
+ inputs=inputs,
71
+ function=fn,
72
+ write_readme=write_readme,
73
+ write_schema=write_schema,
74
+ annotation_suffix=annotation_suffix,
75
+ readme_suffix=readme_suffix,
76
+ )
77
+ return result
78
+
79
+ return wrapper
80
+
81
+ return deco
82
+
83
+
84
+ def record_directory_annotation(
85
+ *,
86
+ output_dir_arg: str = "output_dir",
87
+ input_args: tuple[str, ...] = DEFAULT_INPUT_ARGS,
88
+ title: str | None = None,
89
+ summary: str | None = None,
90
+ acquisition_context: dict[str, Any] | None = None,
91
+ generation_context: dict[str, Any] | None = None,
92
+ write_readme: bool = True,
93
+ write_schema: bool | None = None,
94
+ annotation_filename: str = "manifest.json",
95
+ readme_filename: str = "README.md",
96
+ ):
97
+ """
98
+ Decorate a function that writes several annotated outputs in a directory.
99
+
100
+ Wrapped function contract:
101
+ - Accept a local output directory argument, named ``output_dir`` by default.
102
+ - Return a materialized iterable, usually a ``list`` or ``tuple``.
103
+ - Supported return items are:
104
+ - DocumentedArtifact
105
+ - ProducedFile
106
+ - (path, kind)
107
+ - path-like objects (kind defaults to ``"other"``)
108
+ - Any bound arguments named in ``input_args`` are recorded as provenance inputs.
109
+ - Remaining bound arguments become provenance params.
110
+ - The original return value is passed through unchanged.
111
+ """
112
+
113
+ def deco(fn: Callable[..., Any]):
114
+ @wraps(fn)
115
+ def wrapper(*args, **kwargs):
116
+ bound = bind_arguments(fn, args, kwargs)
117
+ result = fn(*args, **kwargs)
118
+
119
+ items = list(result)
120
+ output_dir = argument_path(bound, argument_name=output_dir_arg)
121
+ artifacts: list[DocumentedArtifact] = coerce_documented_artifacts(items)
122
+ params = extract_params(
123
+ bound,
124
+ target_args=(output_dir_arg,),
125
+ input_args=input_args,
126
+ )
127
+ inputs = extract_inputs(bound, input_args=input_args)
128
+ annotate_directory(
129
+ output_dir,
130
+ artifacts=artifacts,
131
+ title=title,
132
+ summary=summary,
133
+ acquisition_context=acquisition_context,
134
+ generation_context=generation_context,
135
+ params=params,
136
+ inputs=inputs,
137
+ function=fn,
138
+ write_readme=write_readme,
139
+ write_schema=write_schema,
140
+ annotation_filename=annotation_filename,
141
+ readme_filename=readme_filename,
142
+ )
143
+ return result
144
+
145
+ return wrapper
146
+
147
+ return deco
@@ -0,0 +1,45 @@
1
+ from pathlib import Path
2
+ from typing import Literal
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from data_annotations.description import DirectoryDescription, FileDescription
7
+ from data_annotations.provenance import BaseProvenance, ProducedFile
8
+ from data_annotations.provenance.models import ArtifactKind
9
+
10
+
11
+ class FileArtifactSubject(BaseModel):
12
+ path: str
13
+ kind: ArtifactKind = "other"
14
+ sha256: str | None = None
15
+
16
+
17
+ class DirectoryArtifactSubject(BaseModel):
18
+ path: str
19
+ produced_files: list[ProducedFile] = Field(default_factory=list)
20
+
21
+
22
+ class FileAnnotationDocument(BaseModel):
23
+ annotation_version: Literal["3"] = "3"
24
+ subject: FileArtifactSubject
25
+ provenance: BaseProvenance
26
+ description: FileDescription
27
+
28
+
29
+ class DirectoryAnnotationDocument(BaseModel):
30
+ annotation_version: Literal["3"] = "3"
31
+ subject: DirectoryArtifactSubject
32
+ provenance: BaseProvenance
33
+ description: DirectoryDescription
34
+
35
+
36
+ class FileAnnotationResult(BaseModel):
37
+ artifact_path: Path
38
+ annotation_path: Path
39
+ readme_path: Path | None = None
40
+
41
+
42
+ class DirectoryAnnotationResult(BaseModel):
43
+ output_dir: Path
44
+ annotation_path: Path
45
+ readme_path: Path | None = None