data-annotations 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_annotations/__init__.py +2 -0
- data_annotations/_decorators.py +140 -0
- data_annotations/annotations/__init__.py +30 -0
- data_annotations/annotations/decorators.py +147 -0
- data_annotations/annotations/models.py +45 -0
- data_annotations/annotations/writers.py +368 -0
- data_annotations/cli.py +37 -0
- data_annotations/cli_app/__init__.py +1 -0
- data_annotations/cli_app/annotate.py +307 -0
- data_annotations/cli_app/common.py +276 -0
- data_annotations/cli_app/prompts.py +534 -0
- data_annotations/cli_app/provenance_commands.py +107 -0
- data_annotations/description/__init__.py +37 -0
- data_annotations/description/decorators.py +145 -0
- data_annotations/description/models.py +63 -0
- data_annotations/description/writers.py +321 -0
- data_annotations/provenance/__init__.py +37 -0
- data_annotations/provenance/decorators.py +111 -0
- data_annotations/provenance/git.py +121 -0
- data_annotations/provenance/models.py +50 -0
- data_annotations/provenance/recovery.py +473 -0
- data_annotations/provenance/runtime.py +248 -0
- data_annotations/provenance/writers.py +206 -0
- data_annotations-2.1.2.dist-info/METADATA +616 -0
- data_annotations-2.1.2.dist-info/RECORD +28 -0
- data_annotations-2.1.2.dist-info/WHEEL +4 -0
- data_annotations-2.1.2.dist-info/entry_points.txt +3 -0
- data_annotations-2.1.2.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import shlex
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Any, NoReturn, cast
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from data_annotations.provenance.models import ArtifactKind
|
|
9
|
+
from data_annotations.provenance import git as provenance_git
|
|
10
|
+
from data_annotations.provenance import recovery as provenance_recovery
|
|
11
|
+
|
|
12
|
+
_ARTIFACT_KINDS: tuple[ArtifactKind, ...] = (
|
|
13
|
+
"plot",
|
|
14
|
+
"model",
|
|
15
|
+
"table",
|
|
16
|
+
"dataset",
|
|
17
|
+
"report",
|
|
18
|
+
"other",
|
|
19
|
+
)
|
|
20
|
+
_DIRECTORY_SIDECARS = frozenset({"manifest.json", "README.md", "schema.json"})
|
|
21
|
+
_ANNOTATION_SUFFIXES = (".meta.json", ".README.md", ".prov.json", ".schema.json")
|
|
22
|
+
_UNKNOWN_SELECTION = "__unknown__"
|
|
23
|
+
|
|
24
|
+
INPUT_OPTION = typer.Option(
|
|
25
|
+
"--input",
|
|
26
|
+
help="Input file path or URI used to generate the artifact. Repeatable.",
|
|
27
|
+
)
|
|
28
|
+
PARAM_OPTION = typer.Option(
|
|
29
|
+
"--param",
|
|
30
|
+
help="Generation parameter as KEY=VALUE. Repeatable.",
|
|
31
|
+
)
|
|
32
|
+
SCRIPT_OPTION = typer.Option(
|
|
33
|
+
"--script",
|
|
34
|
+
help="Original script path used to generate the artifact.",
|
|
35
|
+
)
|
|
36
|
+
SCRIPT_REPO_PATH_OPTION = typer.Option(
|
|
37
|
+
"--script-repo-path",
|
|
38
|
+
help="Original generating script path relative to the repository root.",
|
|
39
|
+
)
|
|
40
|
+
COMMAND_OPTION = typer.Option(
|
|
41
|
+
"--command",
|
|
42
|
+
help="Original command used to generate the artifact, as one quoted string.",
|
|
43
|
+
)
|
|
44
|
+
FUNCTION_OPTION = typer.Option(
|
|
45
|
+
"--function",
|
|
46
|
+
help="Generating function to record in the manifest.",
|
|
47
|
+
)
|
|
48
|
+
GIT_SHA_OPTION = typer.Option("--git-sha", help="Original Git SHA.")
|
|
49
|
+
GIT_BRANCH_OPTION = typer.Option(
|
|
50
|
+
"--git-branch",
|
|
51
|
+
help="Original Git branch.",
|
|
52
|
+
)
|
|
53
|
+
GIT_REMOTE_NAME_OPTION = typer.Option(
|
|
54
|
+
"--git-remote-name",
|
|
55
|
+
help="Original Git remote name.",
|
|
56
|
+
)
|
|
57
|
+
GIT_REMOTE_URL_OPTION = typer.Option(
|
|
58
|
+
"--git-remote-url",
|
|
59
|
+
help="Original Git remote URL.",
|
|
60
|
+
)
|
|
61
|
+
GIT_DIRTY_OPTION = typer.Option(
|
|
62
|
+
"--git-dirty/--no-git-dirty",
|
|
63
|
+
help="Whether the original working tree was dirty.",
|
|
64
|
+
)
|
|
65
|
+
FORCE_OPTION = typer.Option(
|
|
66
|
+
"--force",
|
|
67
|
+
help="Overwrite any existing annotation sidecars.",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
InputValuesOption = Annotated[list[str] | None, INPUT_OPTION]
|
|
71
|
+
ParamValuesOption = Annotated[list[str] | None, PARAM_OPTION]
|
|
72
|
+
ScriptOption = Annotated[str | None, SCRIPT_OPTION]
|
|
73
|
+
ScriptRepoPathOption = Annotated[str | None, SCRIPT_REPO_PATH_OPTION]
|
|
74
|
+
CommandOption = Annotated[str | None, COMMAND_OPTION]
|
|
75
|
+
FunctionOption = Annotated[str | None, FUNCTION_OPTION]
|
|
76
|
+
GitShaOption = Annotated[str | None, GIT_SHA_OPTION]
|
|
77
|
+
GitBranchOption = Annotated[str | None, GIT_BRANCH_OPTION]
|
|
78
|
+
GitRemoteNameOption = Annotated[str | None, GIT_REMOTE_NAME_OPTION]
|
|
79
|
+
GitRemoteUrlOption = Annotated[str | None, GIT_REMOTE_URL_OPTION]
|
|
80
|
+
GitDirtyOption = Annotated[bool | None, GIT_DIRTY_OPTION]
|
|
81
|
+
ForceOption = Annotated[bool, FORCE_OPTION]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _error(message: str, *, code: int = 2) -> NoReturn:
|
|
85
|
+
typer.secho(f"Error: {message}", err=True, fg=typer.colors.RED)
|
|
86
|
+
raise typer.Exit(code=code)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _is_manifest_path(path: Path) -> bool:
|
|
90
|
+
return path.name == "manifest.json" or str(path).endswith(".meta.json")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _resolved_path(path: str | Path) -> Path:
|
|
94
|
+
return Path(path).expanduser().resolve()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _resolve_manifest_path(target: Path, manifest: Path | None) -> Path:
|
|
98
|
+
if manifest is not None:
|
|
99
|
+
manifest_path = _resolved_path(manifest)
|
|
100
|
+
if not manifest_path.is_file():
|
|
101
|
+
_error(f"manifest not found: {manifest_path}")
|
|
102
|
+
return manifest_path
|
|
103
|
+
|
|
104
|
+
target_path = _resolved_path(target)
|
|
105
|
+
if _is_manifest_path(target_path):
|
|
106
|
+
if not target_path.is_file():
|
|
107
|
+
_error(f"manifest not found: {target_path}")
|
|
108
|
+
return target_path
|
|
109
|
+
|
|
110
|
+
if target_path.is_file():
|
|
111
|
+
manifest_path = Path(str(target_path) + ".meta.json")
|
|
112
|
+
if manifest_path.is_file():
|
|
113
|
+
return manifest_path.resolve()
|
|
114
|
+
_error(f"no annotation sidecar found for file: {target_path}")
|
|
115
|
+
|
|
116
|
+
if target_path.is_dir():
|
|
117
|
+
manifest_path = (target_path / "manifest.json").resolve()
|
|
118
|
+
if manifest_path.is_file():
|
|
119
|
+
return manifest_path
|
|
120
|
+
_error(f"no manifest.json found in directory: {target_path}")
|
|
121
|
+
|
|
122
|
+
_error(f"target does not exist: {target_path}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _match_target_path(target: Path, manifest: Path | None) -> Path:
|
|
126
|
+
if manifest is not None:
|
|
127
|
+
target_path = _resolved_path(target)
|
|
128
|
+
if not target_path.exists():
|
|
129
|
+
_error(f"target does not exist: {target_path}")
|
|
130
|
+
return target_path
|
|
131
|
+
|
|
132
|
+
target_path = _resolved_path(target)
|
|
133
|
+
if not _is_manifest_path(target_path):
|
|
134
|
+
if not target_path.exists():
|
|
135
|
+
_error(f"target does not exist: {target_path}")
|
|
136
|
+
return target_path
|
|
137
|
+
|
|
138
|
+
loaded_manifest = provenance_recovery._load_manifest(target_path)
|
|
139
|
+
derived_target = (
|
|
140
|
+
_resolved_path(loaded_manifest.artifact_path)
|
|
141
|
+
if isinstance(loaded_manifest, provenance_recovery.FileManifest)
|
|
142
|
+
else _resolved_path(loaded_manifest.output_dir)
|
|
143
|
+
)
|
|
144
|
+
if not derived_target.exists():
|
|
145
|
+
_error(
|
|
146
|
+
f"recorded artifact path from manifest does not exist locally: {derived_target}"
|
|
147
|
+
)
|
|
148
|
+
return derived_target
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _echo_entries(title: str, entries: tuple[str, ...]) -> None:
|
|
152
|
+
if not entries:
|
|
153
|
+
return
|
|
154
|
+
typer.echo(f"{title}:")
|
|
155
|
+
for entry in entries:
|
|
156
|
+
typer.echo(f" - {entry}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _checkout_hint(target: str, manifest: str | None) -> str:
|
|
160
|
+
parts = [
|
|
161
|
+
"data-annotations",
|
|
162
|
+
"provenance",
|
|
163
|
+
"checkout",
|
|
164
|
+
shlex.quote(target),
|
|
165
|
+
]
|
|
166
|
+
if manifest is not None:
|
|
167
|
+
parts.extend(["--manifest", shlex.quote(manifest)])
|
|
168
|
+
return " ".join(parts)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _missing_checkout_fields(
|
|
172
|
+
manifest: provenance_recovery.FileManifest | provenance_recovery.DirectoryManifest,
|
|
173
|
+
) -> tuple[str, ...]:
|
|
174
|
+
missing: list[str] = []
|
|
175
|
+
if manifest.git_remote_url is None:
|
|
176
|
+
missing.append("git_remote_url")
|
|
177
|
+
if manifest.git_sha is None:
|
|
178
|
+
missing.append("git_sha")
|
|
179
|
+
return tuple(missing)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _annotation_paths_for_file(artifact_path: Path) -> tuple[Path, Path]:
|
|
183
|
+
return (
|
|
184
|
+
Path(str(artifact_path) + ".meta.json"),
|
|
185
|
+
Path(str(artifact_path) + ".README.md"),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _annotation_paths_for_directory(output_dir: Path) -> tuple[Path, Path]:
|
|
190
|
+
return (
|
|
191
|
+
output_dir / "manifest.json",
|
|
192
|
+
output_dir / "README.md",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _ensure_annotation_outputs_available(
|
|
197
|
+
paths: tuple[Path, ...],
|
|
198
|
+
*,
|
|
199
|
+
force: bool,
|
|
200
|
+
) -> None:
|
|
201
|
+
existing_paths = [path for path in paths if path.exists()]
|
|
202
|
+
if existing_paths and not force:
|
|
203
|
+
formatted_paths = ", ".join(str(path) for path in existing_paths)
|
|
204
|
+
_error(
|
|
205
|
+
"annotation sidecars already exist; rerun with --force to overwrite: "
|
|
206
|
+
+ formatted_paths,
|
|
207
|
+
code=1,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _is_annotation_sidecar(path: Path) -> bool:
|
|
212
|
+
return path.name in _DIRECTORY_SIDECARS or path.name.endswith(_ANNOTATION_SUFFIXES)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _discover_directory_files(output_dir: Path) -> list[Path]:
|
|
216
|
+
return sorted(
|
|
217
|
+
[
|
|
218
|
+
candidate.resolve()
|
|
219
|
+
for candidate in output_dir.iterdir()
|
|
220
|
+
if candidate.is_file() and not _is_annotation_sidecar(candidate)
|
|
221
|
+
],
|
|
222
|
+
key=lambda candidate: candidate.name,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _validate_artifact_kind(value: str) -> ArtifactKind:
|
|
227
|
+
normalized = value.strip().lower()
|
|
228
|
+
if normalized not in _ARTIFACT_KINDS:
|
|
229
|
+
allowed = ", ".join(_ARTIFACT_KINDS)
|
|
230
|
+
_error(f"invalid artifact kind {value!r}; expected one of: {allowed}")
|
|
231
|
+
return cast(ArtifactKind, normalized)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _parse_command_string(value: str) -> list[str]:
|
|
235
|
+
try:
|
|
236
|
+
return shlex.split(value)
|
|
237
|
+
except ValueError as exc:
|
|
238
|
+
_error(f"invalid shell command: {exc}")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _parse_param_entry(entry: str) -> tuple[str, Any]:
|
|
242
|
+
if "=" not in entry:
|
|
243
|
+
_error(f"invalid --param value {entry!r}; expected KEY=VALUE")
|
|
244
|
+
key, raw_value = entry.split("=", 1)
|
|
245
|
+
key = key.strip()
|
|
246
|
+
if not key:
|
|
247
|
+
_error(f"invalid --param value {entry!r}; key cannot be empty")
|
|
248
|
+
try:
|
|
249
|
+
value = json.loads(raw_value)
|
|
250
|
+
except json.JSONDecodeError:
|
|
251
|
+
value = raw_value
|
|
252
|
+
return key, value
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _collect_params(param_entries: list[str] | None) -> dict[str, Any]:
|
|
256
|
+
params: dict[str, Any] = {}
|
|
257
|
+
for entry in param_entries or []:
|
|
258
|
+
key, value = _parse_param_entry(entry)
|
|
259
|
+
params[key] = value
|
|
260
|
+
return params
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _normalize_script_path(value: str) -> str:
|
|
264
|
+
return str(_resolved_path(value))
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _normalize_remote_url(value: str) -> str:
|
|
268
|
+
sanitized = provenance_git._sanitize_remote_url(value)
|
|
269
|
+
return value if sanitized is None else sanitized
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _is_current_annotation_command(command: list[str]) -> bool:
|
|
273
|
+
if len(command) < 2:
|
|
274
|
+
return False
|
|
275
|
+
first = Path(command[0]).name
|
|
276
|
+
return first == "data-annotations" and command[1] == "annotate"
|