data-annotations 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_annotations/__init__.py +2 -0
- data_annotations/_decorators.py +140 -0
- data_annotations/annotations/__init__.py +30 -0
- data_annotations/annotations/decorators.py +147 -0
- data_annotations/annotations/models.py +45 -0
- data_annotations/annotations/writers.py +368 -0
- data_annotations/cli.py +37 -0
- data_annotations/cli_app/__init__.py +1 -0
- data_annotations/cli_app/annotate.py +307 -0
- data_annotations/cli_app/common.py +276 -0
- data_annotations/cli_app/prompts.py +534 -0
- data_annotations/cli_app/provenance_commands.py +107 -0
- data_annotations/description/__init__.py +37 -0
- data_annotations/description/decorators.py +145 -0
- data_annotations/description/models.py +63 -0
- data_annotations/description/writers.py +321 -0
- data_annotations/provenance/__init__.py +37 -0
- data_annotations/provenance/decorators.py +111 -0
- data_annotations/provenance/git.py +121 -0
- data_annotations/provenance/models.py +50 -0
- data_annotations/provenance/recovery.py +473 -0
- data_annotations/provenance/runtime.py +248 -0
- data_annotations/provenance/writers.py +206 -0
- data_annotations-2.1.2.dist-info/METADATA +616 -0
- data_annotations-2.1.2.dist-info/RECORD +28 -0
- data_annotations-2.1.2.dist-info/WHEEL +4 -0
- data_annotations-2.1.2.dist-info/entry_points.txt +3 -0
- data_annotations-2.1.2.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any, Callable
|
|
3
|
+
|
|
4
|
+
from data_annotations.description import (
|
|
5
|
+
ArtifactDescription,
|
|
6
|
+
DirectoryDescription,
|
|
7
|
+
DocumentedArtifact,
|
|
8
|
+
FieldDefinition,
|
|
9
|
+
FileDescription,
|
|
10
|
+
write_directory_readme,
|
|
11
|
+
write_file_readme,
|
|
12
|
+
)
|
|
13
|
+
from data_annotations.provenance import ArtifactKind, BaseProvenance, ProducedFile
|
|
14
|
+
from data_annotations.provenance import writers as provenance_writers
|
|
15
|
+
|
|
16
|
+
from .models import (
|
|
17
|
+
DirectoryAnnotationDocument,
|
|
18
|
+
DirectoryAnnotationResult,
|
|
19
|
+
DirectoryArtifactSubject,
|
|
20
|
+
FileAnnotationDocument,
|
|
21
|
+
FileAnnotationResult,
|
|
22
|
+
FileArtifactSubject,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _validated_file_readme_fields(
|
|
27
|
+
*, title: str | None, summary: str | None
|
|
28
|
+
) -> tuple[str, str]:
|
|
29
|
+
if title is None:
|
|
30
|
+
raise ValueError("title is required when write_readme=True")
|
|
31
|
+
if summary is None:
|
|
32
|
+
raise ValueError("summary is required when write_readme=True")
|
|
33
|
+
return title, summary
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _validated_directory_readme_fields(
|
|
37
|
+
*, title: str | None, summary: str | None
|
|
38
|
+
) -> tuple[str, str]:
|
|
39
|
+
if title is None:
|
|
40
|
+
raise ValueError("title is required when write_readme=True")
|
|
41
|
+
if summary is None:
|
|
42
|
+
raise ValueError("summary is required when write_readme=True")
|
|
43
|
+
return title, summary
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _base_provenance_from_manifest(
|
|
47
|
+
manifest: Any,
|
|
48
|
+
*,
|
|
49
|
+
excluded_fields: set[str],
|
|
50
|
+
) -> BaseProvenance:
|
|
51
|
+
return BaseProvenance.model_validate(
|
|
52
|
+
manifest.model_dump(exclude=excluded_fields),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _artifact_description(artifact: DocumentedArtifact) -> ArtifactDescription:
|
|
57
|
+
fields = [FieldDefinition.model_validate(field) for field in artifact.fields]
|
|
58
|
+
return ArtifactDescription(
|
|
59
|
+
path=str(Path(artifact.path)),
|
|
60
|
+
title=artifact.title,
|
|
61
|
+
summary=artifact.summary,
|
|
62
|
+
fields=fields,
|
|
63
|
+
primary_key=list(artifact.primary_key),
|
|
64
|
+
missing_value_codes=dict(artifact.missing_value_codes),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _to_produced_file(artifact: DocumentedArtifact) -> ProducedFile:
|
|
69
|
+
path = Path(artifact.path)
|
|
70
|
+
return ProducedFile(
|
|
71
|
+
path=str(path),
|
|
72
|
+
kind=artifact.kind,
|
|
73
|
+
sha256=provenance_writers.sha256_file(path) if path.exists() else None,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _coerce_fields(
|
|
78
|
+
fields: list[FieldDefinition] | None,
|
|
79
|
+
) -> list[FieldDefinition]:
|
|
80
|
+
return [FieldDefinition.model_validate(field) for field in (fields or [])]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _build_file_annotation_document(
|
|
84
|
+
artifact_path: str | Path,
|
|
85
|
+
*,
|
|
86
|
+
title: str | None = None,
|
|
87
|
+
summary: str | None = None,
|
|
88
|
+
fields: list[FieldDefinition] | None = None,
|
|
89
|
+
primary_key: list[str] | None = None,
|
|
90
|
+
missing_value_codes: dict[str, str] | None = None,
|
|
91
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
92
|
+
generation_context: dict[str, Any] | None = None,
|
|
93
|
+
artifact_kind: ArtifactKind = "other",
|
|
94
|
+
params: dict[str, Any] | None = None,
|
|
95
|
+
inputs: list[str] | None = None,
|
|
96
|
+
function: Callable[..., Any] | None = None,
|
|
97
|
+
capture_mode: str = "runtime",
|
|
98
|
+
provenance_overrides: dict[str, Any] | None = None,
|
|
99
|
+
normalize_inputs: bool = True,
|
|
100
|
+
) -> FileAnnotationDocument:
|
|
101
|
+
manifest = provenance_writers._build_file_manifest(
|
|
102
|
+
artifact_path,
|
|
103
|
+
artifact_kind=artifact_kind,
|
|
104
|
+
params=params,
|
|
105
|
+
inputs=inputs,
|
|
106
|
+
function=function,
|
|
107
|
+
capture_mode=capture_mode,
|
|
108
|
+
overrides=provenance_overrides,
|
|
109
|
+
normalize_inputs=normalize_inputs,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return FileAnnotationDocument(
|
|
113
|
+
subject=FileArtifactSubject(
|
|
114
|
+
path=manifest.artifact_path,
|
|
115
|
+
kind=manifest.artifact_kind,
|
|
116
|
+
sha256=manifest.artifact_sha256,
|
|
117
|
+
),
|
|
118
|
+
provenance=_base_provenance_from_manifest(
|
|
119
|
+
manifest,
|
|
120
|
+
excluded_fields={"artifact_path", "artifact_kind", "artifact_sha256"},
|
|
121
|
+
),
|
|
122
|
+
description=FileDescription(
|
|
123
|
+
title=title,
|
|
124
|
+
summary=summary,
|
|
125
|
+
fields=_coerce_fields(fields),
|
|
126
|
+
primary_key=primary_key or [],
|
|
127
|
+
missing_value_codes=missing_value_codes or {},
|
|
128
|
+
acquisition_context=acquisition_context or {},
|
|
129
|
+
generation_context=generation_context or {},
|
|
130
|
+
description_updated_at=manifest.created_at,
|
|
131
|
+
),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _build_directory_annotation_document(
|
|
136
|
+
output_dir: str | Path,
|
|
137
|
+
*,
|
|
138
|
+
artifacts: list[DocumentedArtifact],
|
|
139
|
+
title: str | None = None,
|
|
140
|
+
summary: str | None = None,
|
|
141
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
142
|
+
generation_context: dict[str, Any] | None = None,
|
|
143
|
+
params: dict[str, Any] | None = None,
|
|
144
|
+
inputs: list[str] | None = None,
|
|
145
|
+
function: Callable[..., Any] | None = None,
|
|
146
|
+
capture_mode: str = "runtime",
|
|
147
|
+
provenance_overrides: dict[str, Any] | None = None,
|
|
148
|
+
normalize_inputs: bool = True,
|
|
149
|
+
) -> DirectoryAnnotationDocument:
|
|
150
|
+
produced_files = [_to_produced_file(artifact) for artifact in artifacts]
|
|
151
|
+
manifest = provenance_writers._build_directory_manifest(
|
|
152
|
+
output_dir,
|
|
153
|
+
produced_files=produced_files,
|
|
154
|
+
params=params,
|
|
155
|
+
inputs=inputs,
|
|
156
|
+
function=function,
|
|
157
|
+
capture_mode=capture_mode,
|
|
158
|
+
overrides=provenance_overrides,
|
|
159
|
+
normalize_inputs=normalize_inputs,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return DirectoryAnnotationDocument(
|
|
163
|
+
subject=DirectoryArtifactSubject(
|
|
164
|
+
path=manifest.output_dir,
|
|
165
|
+
produced_files=list(manifest.produced_files),
|
|
166
|
+
),
|
|
167
|
+
provenance=_base_provenance_from_manifest(
|
|
168
|
+
manifest,
|
|
169
|
+
excluded_fields={"output_dir", "produced_files"},
|
|
170
|
+
),
|
|
171
|
+
description=DirectoryDescription(
|
|
172
|
+
title=title,
|
|
173
|
+
summary=summary,
|
|
174
|
+
artifacts=[_artifact_description(artifact) for artifact in artifacts],
|
|
175
|
+
acquisition_context=acquisition_context or {},
|
|
176
|
+
generation_context=generation_context or {},
|
|
177
|
+
description_updated_at=manifest.created_at,
|
|
178
|
+
),
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _write_annotation_document(
|
|
183
|
+
document: FileAnnotationDocument | DirectoryAnnotationDocument,
|
|
184
|
+
path: Path,
|
|
185
|
+
) -> Path:
|
|
186
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
path.write_text(document.model_dump_json(indent=2), encoding="utf-8")
|
|
188
|
+
return path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def write_file_annotation(
|
|
192
|
+
artifact_path: str | Path,
|
|
193
|
+
*,
|
|
194
|
+
title: str | None = None,
|
|
195
|
+
summary: str | None = None,
|
|
196
|
+
fields: list[FieldDefinition] | None = None,
|
|
197
|
+
primary_key: list[str] | None = None,
|
|
198
|
+
missing_value_codes: dict[str, str] | None = None,
|
|
199
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
200
|
+
generation_context: dict[str, Any] | None = None,
|
|
201
|
+
artifact_kind: ArtifactKind = "other",
|
|
202
|
+
params: dict[str, Any] | None = None,
|
|
203
|
+
inputs: list[str] | None = None,
|
|
204
|
+
function: Callable[..., Any] | None = None,
|
|
205
|
+
capture_mode: str = "runtime",
|
|
206
|
+
provenance_overrides: dict[str, Any] | None = None,
|
|
207
|
+
normalize_inputs: bool = True,
|
|
208
|
+
suffix: str = ".meta.json",
|
|
209
|
+
) -> Path:
|
|
210
|
+
document = _build_file_annotation_document(
|
|
211
|
+
artifact_path,
|
|
212
|
+
title=title,
|
|
213
|
+
summary=summary,
|
|
214
|
+
fields=fields,
|
|
215
|
+
primary_key=primary_key,
|
|
216
|
+
missing_value_codes=missing_value_codes,
|
|
217
|
+
acquisition_context=acquisition_context,
|
|
218
|
+
generation_context=generation_context,
|
|
219
|
+
artifact_kind=artifact_kind,
|
|
220
|
+
params=params,
|
|
221
|
+
inputs=inputs,
|
|
222
|
+
function=function,
|
|
223
|
+
capture_mode=capture_mode,
|
|
224
|
+
provenance_overrides=provenance_overrides,
|
|
225
|
+
normalize_inputs=normalize_inputs,
|
|
226
|
+
)
|
|
227
|
+
annotation_path = Path(str(document.subject.path) + suffix)
|
|
228
|
+
return _write_annotation_document(document, annotation_path)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def write_directory_annotation(
|
|
232
|
+
output_dir: str | Path,
|
|
233
|
+
*,
|
|
234
|
+
artifacts: list[DocumentedArtifact],
|
|
235
|
+
title: str | None = None,
|
|
236
|
+
summary: str | None = None,
|
|
237
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
238
|
+
generation_context: dict[str, Any] | None = None,
|
|
239
|
+
params: dict[str, Any] | None = None,
|
|
240
|
+
inputs: list[str] | None = None,
|
|
241
|
+
function: Callable[..., Any] | None = None,
|
|
242
|
+
capture_mode: str = "runtime",
|
|
243
|
+
provenance_overrides: dict[str, Any] | None = None,
|
|
244
|
+
normalize_inputs: bool = True,
|
|
245
|
+
filename: str = "manifest.json",
|
|
246
|
+
) -> Path:
|
|
247
|
+
document = _build_directory_annotation_document(
|
|
248
|
+
output_dir,
|
|
249
|
+
artifacts=artifacts,
|
|
250
|
+
title=title,
|
|
251
|
+
summary=summary,
|
|
252
|
+
acquisition_context=acquisition_context,
|
|
253
|
+
generation_context=generation_context,
|
|
254
|
+
params=params,
|
|
255
|
+
inputs=inputs,
|
|
256
|
+
function=function,
|
|
257
|
+
capture_mode=capture_mode,
|
|
258
|
+
provenance_overrides=provenance_overrides,
|
|
259
|
+
normalize_inputs=normalize_inputs,
|
|
260
|
+
)
|
|
261
|
+
annotation_path = Path(document.subject.path) / filename
|
|
262
|
+
return _write_annotation_document(document, annotation_path)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def annotate_file(
|
|
266
|
+
artifact_path: str | Path,
|
|
267
|
+
*,
|
|
268
|
+
title: str | None = None,
|
|
269
|
+
summary: str | None = None,
|
|
270
|
+
fields: list[FieldDefinition] | None = None,
|
|
271
|
+
primary_key: list[str] | None = None,
|
|
272
|
+
missing_value_codes: dict[str, str] | None = None,
|
|
273
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
274
|
+
generation_context: dict[str, Any] | None = None,
|
|
275
|
+
artifact_kind: ArtifactKind = "other",
|
|
276
|
+
params: dict[str, Any] | None = None,
|
|
277
|
+
inputs: list[str] | None = None,
|
|
278
|
+
function: Callable[..., Any] | None = None,
|
|
279
|
+
write_readme: bool = True,
|
|
280
|
+
write_schema: bool | None = None,
|
|
281
|
+
annotation_suffix: str = ".meta.json",
|
|
282
|
+
readme_suffix: str = ".README.md",
|
|
283
|
+
) -> FileAnnotationResult:
|
|
284
|
+
document = _build_file_annotation_document(
|
|
285
|
+
artifact_path,
|
|
286
|
+
title=title,
|
|
287
|
+
summary=summary,
|
|
288
|
+
fields=fields,
|
|
289
|
+
primary_key=primary_key,
|
|
290
|
+
missing_value_codes=missing_value_codes,
|
|
291
|
+
acquisition_context=acquisition_context,
|
|
292
|
+
generation_context=generation_context,
|
|
293
|
+
artifact_kind=artifact_kind,
|
|
294
|
+
params=params,
|
|
295
|
+
inputs=inputs,
|
|
296
|
+
function=function,
|
|
297
|
+
)
|
|
298
|
+
artifact_path = Path(document.subject.path)
|
|
299
|
+
annotation_path = _write_annotation_document(
|
|
300
|
+
document,
|
|
301
|
+
Path(str(artifact_path) + annotation_suffix),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
readme_path: Path | None = None
|
|
305
|
+
if write_readme:
|
|
306
|
+
_validated_file_readme_fields(title=title, summary=summary)
|
|
307
|
+
readme_path = write_file_readme(
|
|
308
|
+
Path(str(artifact_path) + readme_suffix),
|
|
309
|
+
artifact_path=document.subject.path,
|
|
310
|
+
artifact_kind=document.subject.kind,
|
|
311
|
+
description=document.description,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return FileAnnotationResult(
|
|
315
|
+
artifact_path=artifact_path,
|
|
316
|
+
annotation_path=annotation_path,
|
|
317
|
+
readme_path=readme_path,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def annotate_directory(
|
|
322
|
+
output_dir: str | Path,
|
|
323
|
+
*,
|
|
324
|
+
artifacts: list[DocumentedArtifact],
|
|
325
|
+
title: str | None = None,
|
|
326
|
+
summary: str | None = None,
|
|
327
|
+
acquisition_context: dict[str, Any] | None = None,
|
|
328
|
+
generation_context: dict[str, Any] | None = None,
|
|
329
|
+
params: dict[str, Any] | None = None,
|
|
330
|
+
inputs: list[str] | None = None,
|
|
331
|
+
function: Callable[..., Any] | None = None,
|
|
332
|
+
write_readme: bool = True,
|
|
333
|
+
write_schema: bool | None = None,
|
|
334
|
+
annotation_filename: str = "manifest.json",
|
|
335
|
+
readme_filename: str = "README.md",
|
|
336
|
+
) -> DirectoryAnnotationResult:
|
|
337
|
+
document = _build_directory_annotation_document(
|
|
338
|
+
output_dir,
|
|
339
|
+
artifacts=artifacts,
|
|
340
|
+
title=title,
|
|
341
|
+
summary=summary,
|
|
342
|
+
acquisition_context=acquisition_context,
|
|
343
|
+
generation_context=generation_context,
|
|
344
|
+
params=params,
|
|
345
|
+
inputs=inputs,
|
|
346
|
+
function=function,
|
|
347
|
+
)
|
|
348
|
+
output_dir = Path(document.subject.path)
|
|
349
|
+
annotation_path = _write_annotation_document(
|
|
350
|
+
document,
|
|
351
|
+
output_dir / annotation_filename,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
readme_path: Path | None = None
|
|
355
|
+
if write_readme:
|
|
356
|
+
_validated_directory_readme_fields(title=title, summary=summary)
|
|
357
|
+
readme_path = write_directory_readme(
|
|
358
|
+
output_dir / readme_filename,
|
|
359
|
+
output_dir=document.subject.path,
|
|
360
|
+
produced_files=document.subject.produced_files,
|
|
361
|
+
description=document.description,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
return DirectoryAnnotationResult(
|
|
365
|
+
output_dir=output_dir,
|
|
366
|
+
annotation_path=annotation_path,
|
|
367
|
+
readme_path=readme_path,
|
|
368
|
+
)
|
data_annotations/cli.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
_CLI_IMPORT_ERROR: ModuleNotFoundError | None = None
|
|
4
|
+
_CLI_OPTIONAL_DEPENDENCIES = {"questionary", "typer"}
|
|
5
|
+
app: Any = None
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from data_annotations.cli_app.annotate import annotate_app
|
|
11
|
+
from data_annotations.cli_app.provenance_commands import provenance_app
|
|
12
|
+
except ModuleNotFoundError as exc:
|
|
13
|
+
if exc.name not in _CLI_OPTIONAL_DEPENDENCIES:
|
|
14
|
+
raise
|
|
15
|
+
_CLI_IMPORT_ERROR = exc
|
|
16
|
+
else:
|
|
17
|
+
app = typer.Typer(no_args_is_help=True)
|
|
18
|
+
app.add_typer(annotate_app, name="annotate")
|
|
19
|
+
app.add_typer(provenance_app, name="provenance")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main() -> None:
|
|
23
|
+
if _CLI_IMPORT_ERROR is not None:
|
|
24
|
+
missing_dependency = _CLI_IMPORT_ERROR.name or "an optional CLI dependency"
|
|
25
|
+
raise SystemExit(
|
|
26
|
+
"The data-annotations command requires the optional CLI dependencies.\n"
|
|
27
|
+
f"Missing dependency: {missing_dependency}\n\n"
|
|
28
|
+
"Install them with one of:\n"
|
|
29
|
+
' pip install "data-annotations[cli]"\n'
|
|
30
|
+
' uv tool install "data-annotations[cli]"\n'
|
|
31
|
+
' uv add "data-annotations[cli]"'
|
|
32
|
+
)
|
|
33
|
+
app()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI command modules for data-annotations."""
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from data_annotations.annotations import (
|
|
7
|
+
write_directory_annotation,
|
|
8
|
+
write_file_annotation,
|
|
9
|
+
)
|
|
10
|
+
from data_annotations.description import (
|
|
11
|
+
ArtifactKind,
|
|
12
|
+
DocumentedArtifact,
|
|
13
|
+
FieldDefinition,
|
|
14
|
+
write_directory_readme,
|
|
15
|
+
write_file_readme,
|
|
16
|
+
)
|
|
17
|
+
from data_annotations.provenance import recovery as provenance_recovery
|
|
18
|
+
|
|
19
|
+
from . import prompts
|
|
20
|
+
from .common import (
|
|
21
|
+
CommandOption,
|
|
22
|
+
ForceOption,
|
|
23
|
+
FunctionOption,
|
|
24
|
+
GitBranchOption,
|
|
25
|
+
GitDirtyOption,
|
|
26
|
+
GitRemoteNameOption,
|
|
27
|
+
GitRemoteUrlOption,
|
|
28
|
+
GitShaOption,
|
|
29
|
+
InputValuesOption,
|
|
30
|
+
ParamValuesOption,
|
|
31
|
+
ScriptOption,
|
|
32
|
+
ScriptRepoPathOption,
|
|
33
|
+
_annotation_paths_for_directory,
|
|
34
|
+
_annotation_paths_for_file,
|
|
35
|
+
_discover_directory_files,
|
|
36
|
+
_ensure_annotation_outputs_available,
|
|
37
|
+
_error,
|
|
38
|
+
_resolved_path,
|
|
39
|
+
_validate_artifact_kind,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
annotate_app = typer.Typer(
|
|
43
|
+
no_args_is_help=True,
|
|
44
|
+
help="Create unified annotation sidecars for existing artifacts.",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _write_post_hoc_file_bundle(
|
|
49
|
+
artifact_path: Path,
|
|
50
|
+
*,
|
|
51
|
+
title: str,
|
|
52
|
+
summary: str,
|
|
53
|
+
artifact_kind: ArtifactKind,
|
|
54
|
+
fields: list[FieldDefinition],
|
|
55
|
+
primary_key: list[str],
|
|
56
|
+
missing_value_codes: dict[str, str],
|
|
57
|
+
inputs: list[str],
|
|
58
|
+
params: dict[str, Any],
|
|
59
|
+
provenance_overrides: dict[str, Any],
|
|
60
|
+
) -> tuple[Path, Path]:
|
|
61
|
+
annotation_path, readme_path = _annotation_paths_for_file(artifact_path)
|
|
62
|
+
write_file_annotation(
|
|
63
|
+
artifact_path,
|
|
64
|
+
title=title,
|
|
65
|
+
summary=summary,
|
|
66
|
+
fields=fields,
|
|
67
|
+
primary_key=primary_key,
|
|
68
|
+
missing_value_codes=missing_value_codes,
|
|
69
|
+
artifact_kind=artifact_kind,
|
|
70
|
+
params=params,
|
|
71
|
+
inputs=inputs,
|
|
72
|
+
capture_mode="post_hoc",
|
|
73
|
+
provenance_overrides=provenance_overrides,
|
|
74
|
+
normalize_inputs=False,
|
|
75
|
+
)
|
|
76
|
+
document = provenance_recovery._load_annotation_document(annotation_path)
|
|
77
|
+
write_file_readme(
|
|
78
|
+
readme_path,
|
|
79
|
+
artifact_path=document.subject.path,
|
|
80
|
+
artifact_kind=document.subject.kind,
|
|
81
|
+
description=document.description,
|
|
82
|
+
)
|
|
83
|
+
return annotation_path, readme_path
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _write_post_hoc_directory_bundle(
|
|
87
|
+
output_dir: Path,
|
|
88
|
+
*,
|
|
89
|
+
title: str,
|
|
90
|
+
summary: str,
|
|
91
|
+
artifacts: list[DocumentedArtifact],
|
|
92
|
+
inputs: list[str],
|
|
93
|
+
params: dict[str, Any],
|
|
94
|
+
provenance_overrides: dict[str, Any],
|
|
95
|
+
) -> tuple[Path, Path]:
|
|
96
|
+
annotation_path, readme_path = _annotation_paths_for_directory(output_dir)
|
|
97
|
+
write_directory_annotation(
|
|
98
|
+
output_dir,
|
|
99
|
+
artifacts=artifacts,
|
|
100
|
+
title=title,
|
|
101
|
+
summary=summary,
|
|
102
|
+
params=params,
|
|
103
|
+
inputs=inputs,
|
|
104
|
+
capture_mode="post_hoc",
|
|
105
|
+
provenance_overrides=provenance_overrides,
|
|
106
|
+
normalize_inputs=False,
|
|
107
|
+
)
|
|
108
|
+
document = provenance_recovery._load_annotation_document(annotation_path)
|
|
109
|
+
write_directory_readme(
|
|
110
|
+
readme_path,
|
|
111
|
+
output_dir=document.subject.path,
|
|
112
|
+
produced_files=document.subject.produced_files,
|
|
113
|
+
description=document.description,
|
|
114
|
+
)
|
|
115
|
+
return annotation_path, readme_path
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@annotate_app.command("file")
|
|
119
|
+
def annotate_file_command(
|
|
120
|
+
target: Path = typer.Argument(..., help="Existing file to annotate."),
|
|
121
|
+
title: str | None = typer.Option(None, "--title", help="Human-readable title."),
|
|
122
|
+
summary: str | None = typer.Option(
|
|
123
|
+
None, "--summary", help="Human-readable summary."
|
|
124
|
+
),
|
|
125
|
+
kind: str | None = typer.Option(None, "--kind", help="Artifact kind."),
|
|
126
|
+
input_values: InputValuesOption = None,
|
|
127
|
+
param_values: ParamValuesOption = None,
|
|
128
|
+
script: ScriptOption = None,
|
|
129
|
+
script_repo_path: ScriptRepoPathOption = None,
|
|
130
|
+
command: CommandOption = None,
|
|
131
|
+
function: FunctionOption = None,
|
|
132
|
+
git_sha: GitShaOption = None,
|
|
133
|
+
git_branch: GitBranchOption = None,
|
|
134
|
+
git_remote_name: GitRemoteNameOption = None,
|
|
135
|
+
git_remote_url: GitRemoteUrlOption = None,
|
|
136
|
+
git_dirty: GitDirtyOption = None,
|
|
137
|
+
force: ForceOption = False,
|
|
138
|
+
) -> None:
|
|
139
|
+
artifact_path = _resolved_path(target)
|
|
140
|
+
if not artifact_path.exists():
|
|
141
|
+
_error(f"target does not exist: {artifact_path}")
|
|
142
|
+
if not artifact_path.is_file():
|
|
143
|
+
_error(f"target is not a file: {artifact_path}")
|
|
144
|
+
|
|
145
|
+
_ensure_annotation_outputs_available(
|
|
146
|
+
_annotation_paths_for_file(artifact_path),
|
|
147
|
+
force=force,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
typer.echo("\nGeneral questions: optional fields may be skipped.")
|
|
151
|
+
|
|
152
|
+
artifact_kind = (
|
|
153
|
+
_validate_artifact_kind(kind)
|
|
154
|
+
if kind is not None
|
|
155
|
+
else prompts._prompt_artifact_kind(
|
|
156
|
+
"Artifact kind",
|
|
157
|
+
default="other",
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
title_value = title or prompts._prompt_required_text(
|
|
161
|
+
"Title", default=artifact_path.stem
|
|
162
|
+
)
|
|
163
|
+
summary_value = summary or prompts._prompt_required_text("Summary")
|
|
164
|
+
inputs, params, provenance_overrides = prompts._collect_post_hoc_provenance(
|
|
165
|
+
input_values=input_values,
|
|
166
|
+
param_values=param_values,
|
|
167
|
+
script=script,
|
|
168
|
+
script_repo_path=script_repo_path,
|
|
169
|
+
command=command,
|
|
170
|
+
function=function,
|
|
171
|
+
git_sha=git_sha,
|
|
172
|
+
git_branch=git_branch,
|
|
173
|
+
git_remote_name=git_remote_name,
|
|
174
|
+
git_remote_url=git_remote_url,
|
|
175
|
+
git_dirty=git_dirty,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
fields: list[FieldDefinition] = []
|
|
179
|
+
primary_key: list[str] = []
|
|
180
|
+
missing_value_codes: dict[str, str] = {}
|
|
181
|
+
if artifact_kind in {"dataset", "table"}:
|
|
182
|
+
fields, primary_key, missing_value_codes = prompts._prompt_schema_details()
|
|
183
|
+
|
|
184
|
+
annotation_path, readme_path = _write_post_hoc_file_bundle(
|
|
185
|
+
artifact_path,
|
|
186
|
+
title=title_value,
|
|
187
|
+
summary=summary_value,
|
|
188
|
+
artifact_kind=artifact_kind,
|
|
189
|
+
fields=fields,
|
|
190
|
+
primary_key=primary_key,
|
|
191
|
+
missing_value_codes=missing_value_codes,
|
|
192
|
+
inputs=inputs,
|
|
193
|
+
params=params,
|
|
194
|
+
provenance_overrides=provenance_overrides,
|
|
195
|
+
)
|
|
196
|
+
typer.echo("")
|
|
197
|
+
typer.echo(f"Annotation: {annotation_path}")
|
|
198
|
+
typer.echo(f"README: {readme_path}")
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@annotate_app.command("directory")
|
|
202
|
+
def annotate_directory_command(
|
|
203
|
+
target: Path = typer.Argument(..., help="Existing directory to annotate."),
|
|
204
|
+
title: str | None = typer.Option(None, "--title", help="Directory title."),
|
|
205
|
+
summary: str | None = typer.Option(None, "--summary", help="Directory summary."),
|
|
206
|
+
kind: str | None = typer.Option(
|
|
207
|
+
None,
|
|
208
|
+
"--kind",
|
|
209
|
+
help="Default artifact kind to apply to all included files.",
|
|
210
|
+
),
|
|
211
|
+
input_values: InputValuesOption = None,
|
|
212
|
+
param_values: ParamValuesOption = None,
|
|
213
|
+
script: ScriptOption = None,
|
|
214
|
+
script_repo_path: ScriptRepoPathOption = None,
|
|
215
|
+
command: CommandOption = None,
|
|
216
|
+
function: FunctionOption = None,
|
|
217
|
+
git_sha: GitShaOption = None,
|
|
218
|
+
git_branch: GitBranchOption = None,
|
|
219
|
+
git_remote_name: GitRemoteNameOption = None,
|
|
220
|
+
git_remote_url: GitRemoteUrlOption = None,
|
|
221
|
+
git_dirty: GitDirtyOption = None,
|
|
222
|
+
force: ForceOption = False,
|
|
223
|
+
) -> None:
|
|
224
|
+
output_dir = _resolved_path(target)
|
|
225
|
+
if not output_dir.exists():
|
|
226
|
+
_error(f"target does not exist: {output_dir}")
|
|
227
|
+
if not output_dir.is_dir():
|
|
228
|
+
_error(f"target is not a directory: {output_dir}")
|
|
229
|
+
|
|
230
|
+
discovered_files = _discover_directory_files(output_dir)
|
|
231
|
+
if not discovered_files:
|
|
232
|
+
_error(
|
|
233
|
+
"no immediate child files found in directory; nested files are intentionally ignored and should be annotated from their own subdirectory",
|
|
234
|
+
code=1,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
_ensure_annotation_outputs_available(
|
|
238
|
+
_annotation_paths_for_directory(output_dir),
|
|
239
|
+
force=force,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
title_value = title or prompts._prompt_required_text(
|
|
243
|
+
"Directory title", default=output_dir.name
|
|
244
|
+
)
|
|
245
|
+
summary_value = summary or prompts._prompt_required_text("Directory summary")
|
|
246
|
+
inputs, params, provenance_overrides = prompts._collect_post_hoc_provenance(
|
|
247
|
+
input_values=input_values,
|
|
248
|
+
param_values=param_values,
|
|
249
|
+
script=script,
|
|
250
|
+
script_repo_path=script_repo_path,
|
|
251
|
+
command=command,
|
|
252
|
+
function=function,
|
|
253
|
+
git_sha=git_sha,
|
|
254
|
+
git_branch=git_branch,
|
|
255
|
+
git_remote_name=git_remote_name,
|
|
256
|
+
git_remote_url=git_remote_url,
|
|
257
|
+
git_dirty=git_dirty,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
default_kind = _validate_artifact_kind(kind) if kind is not None else None
|
|
261
|
+
artifacts: list[DocumentedArtifact] = []
|
|
262
|
+
selected_files = prompts._prompt_directory_file_selection(discovered_files)
|
|
263
|
+
for candidate in selected_files:
|
|
264
|
+
artifact_kind = (
|
|
265
|
+
default_kind
|
|
266
|
+
if default_kind is not None
|
|
267
|
+
else prompts._prompt_artifact_kind(
|
|
268
|
+
f"Artifact kind for {candidate.name}",
|
|
269
|
+
default="other",
|
|
270
|
+
)
|
|
271
|
+
)
|
|
272
|
+
artifact_title = prompts._prompt_required_text(
|
|
273
|
+
f"Title for {candidate.name}",
|
|
274
|
+
default=candidate.name,
|
|
275
|
+
)
|
|
276
|
+
artifact_summary = prompts._prompt_optional_text(
|
|
277
|
+
f"Summary for {candidate.name}"
|
|
278
|
+
)
|
|
279
|
+
fields: list[FieldDefinition] = []
|
|
280
|
+
primary_key: list[str] = []
|
|
281
|
+
missing_value_codes: dict[str, str] = {}
|
|
282
|
+
if artifact_kind in {"dataset", "table"}:
|
|
283
|
+
fields, primary_key, missing_value_codes = prompts._prompt_schema_details()
|
|
284
|
+
artifacts.append(
|
|
285
|
+
DocumentedArtifact(
|
|
286
|
+
path=str(candidate),
|
|
287
|
+
kind=artifact_kind,
|
|
288
|
+
title=artifact_title,
|
|
289
|
+
summary=artifact_summary,
|
|
290
|
+
fields=fields,
|
|
291
|
+
primary_key=primary_key,
|
|
292
|
+
missing_value_codes=missing_value_codes,
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
annotation_path, readme_path = _write_post_hoc_directory_bundle(
|
|
297
|
+
output_dir,
|
|
298
|
+
title=title_value,
|
|
299
|
+
summary=summary_value,
|
|
300
|
+
artifacts=artifacts,
|
|
301
|
+
inputs=inputs,
|
|
302
|
+
params=params,
|
|
303
|
+
provenance_overrides=provenance_overrides,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
typer.echo(f"Annotation: {annotation_path}")
|
|
307
|
+
typer.echo(f"README: {readme_path}")
|