datadoom 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datadoom/__init__.py +23 -0
- datadoom/adapters/__init__.py +29 -0
- datadoom/adapters/frameworks.py +94 -0
- datadoom/adapters/loaders.py +72 -0
- datadoom/api/__init__.py +11 -0
- datadoom/api/app.py +109 -0
- datadoom/api/deps.py +30 -0
- datadoom/api/errors.py +89 -0
- datadoom/api/estimate.py +82 -0
- datadoom/api/routes/__init__.py +7 -0
- datadoom/api/routes/artifacts.py +147 -0
- datadoom/api/routes/datasets.py +180 -0
- datadoom/api/routes/meta.py +45 -0
- datadoom/api/routes/plugins.py +22 -0
- datadoom/api/routes/runs.py +144 -0
- datadoom/api/routes/specs.py +73 -0
- datadoom/api/routes/templates.py +30 -0
- datadoom/api/schemas.py +230 -0
- datadoom/api/serializers.py +143 -0
- datadoom/api/state.py +24 -0
- datadoom/api/store_helpers.py +56 -0
- datadoom/api/ws.py +72 -0
- datadoom/cli/__init__.py +1 -0
- datadoom/cli/main.py +313 -0
- datadoom/config.py +108 -0
- datadoom/engine/__init__.py +38 -0
- datadoom/engine/advice.py +289 -0
- datadoom/engine/audit.py +290 -0
- datadoom/engine/causal/__init__.py +15 -0
- datadoom/engine/causal/execute.py +116 -0
- datadoom/engine/causal/functions.py +116 -0
- datadoom/engine/causal/graph.py +54 -0
- datadoom/engine/difficulty/__init__.py +36 -0
- datadoom/engine/difficulty/calibrate.py +235 -0
- datadoom/engine/difficulty/knobs.py +171 -0
- datadoom/engine/difficulty/probes.py +181 -0
- datadoom/engine/dist/__init__.py +35 -0
- datadoom/engine/dist/base.py +46 -0
- datadoom/engine/dist/builtins.py +172 -0
- datadoom/engine/dist/compliance.py +344 -0
- datadoom/engine/dist/providers.py +117 -0
- datadoom/engine/errors.py +32 -0
- datadoom/engine/export/__init__.py +27 -0
- datadoom/engine/export/base.py +49 -0
- datadoom/engine/export/checksums.py +18 -0
- datadoom/engine/export/csv_exporter.py +34 -0
- datadoom/engine/export/json_exporter.py +67 -0
- datadoom/engine/export/metadata.py +58 -0
- datadoom/engine/export/parquet_exporter.py +45 -0
- datadoom/engine/failure/__init__.py +18 -0
- datadoom/engine/failure/apply.py +37 -0
- datadoom/engine/failure/base.py +116 -0
- datadoom/engine/failure/modes.py +442 -0
- datadoom/engine/pipeline.py +418 -0
- datadoom/engine/profile.py +327 -0
- datadoom/engine/progress.py +14 -0
- datadoom/engine/reference.py +338 -0
- datadoom/engine/reports.py +206 -0
- datadoom/engine/rng.py +79 -0
- datadoom/engine/spec/__init__.py +45 -0
- datadoom/engine/spec/hashing.py +57 -0
- datadoom/engine/spec/models.py +238 -0
- datadoom/engine/spec/validate.py +345 -0
- datadoom/engine/timeseries.py +88 -0
- datadoom/jobs/__init__.py +14 -0
- datadoom/jobs/progress.py +155 -0
- datadoom/jobs/worker.py +162 -0
- datadoom/plugin.py +35 -0
- datadoom/plugins/__init__.py +47 -0
- datadoom/plugins/contracts.py +72 -0
- datadoom/plugins/loader.py +125 -0
- datadoom/plugins/registry.py +214 -0
- datadoom/plugins/scaffold.py +434 -0
- datadoom/store/__init__.py +47 -0
- datadoom/store/artifacts.py +67 -0
- datadoom/store/db.py +104 -0
- datadoom/store/migrations/__init__.py +0 -0
- datadoom/store/migrations/env.py +53 -0
- datadoom/store/migrations/script.py.mako +24 -0
- datadoom/store/migrations/versions/0001_init.py +149 -0
- datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
- datadoom/store/migrations/versions/0003_run_name.py +23 -0
- datadoom/store/migrations/versions/0004_report_profile.py +24 -0
- datadoom/store/models.py +170 -0
- datadoom/store/repositories.py +279 -0
- datadoom/templates/__init__.py +239 -0
- datadoom/templates/ab_test.datadoom.yaml +46 -0
- datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
- datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
- datadoom/templates/customer_churn.datadoom.yaml +60 -0
- datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
- datadoom/templates/fraud_detection.datadoom.yaml +57 -0
- datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
- datadoom/templates/insurance_claims.datadoom.yaml +43 -0
- datadoom/templates/iot_sensors.datadoom.yaml +44 -0
- datadoom/templates/people_directory.datadoom.yaml +56 -0
- datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
- datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
- datadoom/version.py +3 -0
- datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
- datadoom/webdist/assets/index-doRjyG5s.css +1 -0
- datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
- datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
- datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
- datadoom/webdist/index.html +15 -0
- datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
- datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
- datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
- datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Artifacts, preview, report, and bundle download (08 §8).
|
|
2
|
+
|
|
3
|
+
These power the Results screen: list output files, stream a download (with the
|
|
4
|
+
reproducibility checksum in a header), preview the first rows, fetch the full
|
|
5
|
+
report, or download a zip bundle (artifacts + metadata + spec).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import io
|
|
11
|
+
import zipfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from fastapi import APIRouter, Depends, Query
|
|
16
|
+
from fastapi.responses import FileResponse, StreamingResponse
|
|
17
|
+
from sqlalchemy.orm import Session
|
|
18
|
+
|
|
19
|
+
from .. import serializers
|
|
20
|
+
from ..deps import get_session, get_state
|
|
21
|
+
from ..errors import http_error
|
|
22
|
+
from ..schemas import Artifact, PreviewResponse, Report
|
|
23
|
+
from ..state import AppState
|
|
24
|
+
from ..store_helpers import (
|
|
25
|
+
ArtifactRepository,
|
|
26
|
+
ReportRepository,
|
|
27
|
+
SpecRepository,
|
|
28
|
+
load_run,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
router = APIRouter(prefix="/api", tags=["artifacts"])
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@router.get("/runs/{run_id}/artifacts", response_model=list[Artifact])
|
|
35
|
+
def list_artifacts(run_id: str, s: Session = Depends(get_session)) -> list[Artifact]:
|
|
36
|
+
load_run(s, run_id)
|
|
37
|
+
return [serializers.artifact(a) for a in ArtifactRepository(s).list_for_run(run_id)]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@router.get("/artifacts/{artifact_id}/download")
|
|
41
|
+
def download_artifact(
|
|
42
|
+
artifact_id: str,
|
|
43
|
+
s: Session = Depends(get_session),
|
|
44
|
+
state: AppState = Depends(get_state),
|
|
45
|
+
) -> FileResponse:
|
|
46
|
+
art = ArtifactRepository(s).get(artifact_id)
|
|
47
|
+
if art is None:
|
|
48
|
+
raise http_error(404, "not_found", f"artifact {artifact_id} not found")
|
|
49
|
+
path = state.artifacts.open_uri(art.storage_uri)
|
|
50
|
+
if not path.exists():
|
|
51
|
+
raise http_error(404, "not_found", "artifact file is missing on disk")
|
|
52
|
+
return FileResponse(
|
|
53
|
+
path,
|
|
54
|
+
filename=path.name,
|
|
55
|
+
headers={"X-Checksum-SHA256": art.checksum_sha256},
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@router.get("/runs/{run_id}/spec.yaml")
|
|
60
|
+
def spec_yaml(
|
|
61
|
+
run_id: str,
|
|
62
|
+
s: Session = Depends(get_session),
|
|
63
|
+
state: AppState = Depends(get_state),
|
|
64
|
+
) -> FileResponse:
|
|
65
|
+
"""Download the locked, resolved spec YAML (spec + baked-in seed) for a run.
|
|
66
|
+
|
|
67
|
+
This is the version-control / reproducibility record: the exact spec, with the
|
|
68
|
+
resolved seed, that produced this generation. Regenerating from it yields
|
|
69
|
+
byte-identical data.
|
|
70
|
+
"""
|
|
71
|
+
run = load_run(s, run_id)
|
|
72
|
+
path = state.artifacts.run_dir(run.dataset_id, run_id) / "spec.resolved.yaml"
|
|
73
|
+
if not path.exists():
|
|
74
|
+
raise http_error(404, "not_found", "resolved spec is not available for this run")
|
|
75
|
+
return FileResponse(
|
|
76
|
+
path,
|
|
77
|
+
filename=f"{run_id[:8]}.spec.datadoom.yaml",
|
|
78
|
+
media_type="application/x-yaml",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@router.get("/runs/{run_id}/report", response_model=Report)
|
|
83
|
+
def get_report(run_id: str, s: Session = Depends(get_session)) -> Report:
|
|
84
|
+
load_run(s, run_id)
|
|
85
|
+
rep = ReportRepository(s).get_for_run(run_id)
|
|
86
|
+
if rep is None:
|
|
87
|
+
raise http_error(404, "not_found", "no report (run not completed)")
|
|
88
|
+
return serializers.report(rep)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@router.get("/runs/{run_id}/preview", response_model=PreviewResponse)
|
|
92
|
+
def preview(
|
|
93
|
+
run_id: str,
|
|
94
|
+
version: str = "clean",
|
|
95
|
+
split: str = "full",
|
|
96
|
+
limit: int = Query(100, ge=1, le=5000),
|
|
97
|
+
s: Session = Depends(get_session),
|
|
98
|
+
state: AppState = Depends(get_state),
|
|
99
|
+
) -> PreviewResponse:
|
|
100
|
+
run = load_run(s, run_id)
|
|
101
|
+
arts = [
|
|
102
|
+
a
|
|
103
|
+
for a in ArtifactRepository(s).list_for_run(run_id)
|
|
104
|
+
if a.version == version and (a.split or "full") == split
|
|
105
|
+
]
|
|
106
|
+
# Prefer CSV (always readable); fall back to JSON, then Parquet if that's all
|
|
107
|
+
# the spec exported. Keeps preview working for non-CSV format selections.
|
|
108
|
+
priority = {"csv": 0, "json": 1, "parquet": 2}
|
|
109
|
+
candidates = sorted(
|
|
110
|
+
(a for a in arts if a.format in priority), key=lambda a: priority[a.format]
|
|
111
|
+
)
|
|
112
|
+
target = candidates[0] if candidates else None
|
|
113
|
+
if target is None:
|
|
114
|
+
raise http_error(404, "not_found", "no matching data artifact to preview")
|
|
115
|
+
|
|
116
|
+
path = state.artifacts.open_uri(target.storage_uri)
|
|
117
|
+
if not path.exists():
|
|
118
|
+
raise http_error(404, "not_found", "artifact file is missing on disk")
|
|
119
|
+
|
|
120
|
+
if target.format == "json":
|
|
121
|
+
frame = pd.read_json(path).head(limit)
|
|
122
|
+
elif target.format == "parquet":
|
|
123
|
+
frame = pd.read_parquet(path).head(limit)
|
|
124
|
+
else:
|
|
125
|
+
frame = pd.read_csv(path, nrows=limit)
|
|
126
|
+
spec_row = SpecRepository(s).get(run.spec_id)
|
|
127
|
+
total = (spec_row.body.get("rows") if spec_row else None) or len(frame)
|
|
128
|
+
rows = frame.where(pd.notna(frame), None).values.tolist()
|
|
129
|
+
return PreviewResponse(columns=list(frame.columns), rows=rows, total=int(total))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@router.get("/runs/{run_id}/bundle")
|
|
133
|
+
def bundle(
|
|
134
|
+
run_id: str,
|
|
135
|
+
s: Session = Depends(get_session),
|
|
136
|
+
state: AppState = Depends(get_state),
|
|
137
|
+
) -> StreamingResponse:
|
|
138
|
+
run = load_run(s, run_id)
|
|
139
|
+
run_dir: Path = state.artifacts.run_dir(run.dataset_id, run_id)
|
|
140
|
+
buf = io.BytesIO()
|
|
141
|
+
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
142
|
+
for f in sorted(run_dir.glob("*")):
|
|
143
|
+
if f.is_file():
|
|
144
|
+
zf.write(f, arcname=f.name)
|
|
145
|
+
buf.seek(0)
|
|
146
|
+
headers = {"Content-Disposition": f'attachment; filename="{run_id}.zip"'}
|
|
147
|
+
return StreamingResponse(buf, media_type="application/zip", headers=headers)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Dataset CRUD + spec versioning (08 §4-5).
|
|
2
|
+
|
|
3
|
+
Editing a spec never mutates a row — it creates a new immutable version and
|
|
4
|
+
repoints ``current_spec_id`` (the immutability invariant, 06 §5).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Depends, Query, Response
|
|
10
|
+
from sqlalchemy.orm import Session
|
|
11
|
+
|
|
12
|
+
from datadoom.engine import parse_spec
|
|
13
|
+
|
|
14
|
+
from .. import serializers
|
|
15
|
+
from ..deps import get_session, get_state
|
|
16
|
+
from ..errors import http_error
|
|
17
|
+
from ..schemas import (
|
|
18
|
+
CreateDatasetRequest,
|
|
19
|
+
Dataset,
|
|
20
|
+
DatasetList,
|
|
21
|
+
SaveSpecResponse,
|
|
22
|
+
SpecBody,
|
|
23
|
+
SpecDetail,
|
|
24
|
+
SpecSummary,
|
|
25
|
+
UpdateDatasetRequest,
|
|
26
|
+
)
|
|
27
|
+
from ..state import AppState
|
|
28
|
+
from ..store_helpers import (
|
|
29
|
+
DatasetRepository,
|
|
30
|
+
RunRepository,
|
|
31
|
+
SpecRepository,
|
|
32
|
+
latest_run_row,
|
|
33
|
+
load_dataset,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
router = APIRouter(prefix="/api/datasets", tags=["datasets"])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@router.get("", response_model=DatasetList)
|
|
40
|
+
def list_datasets(
|
|
41
|
+
status: str | None = None,
|
|
42
|
+
q: str | None = None,
|
|
43
|
+
limit: int = Query(50, ge=1, le=500),
|
|
44
|
+
offset: int = Query(0, ge=0),
|
|
45
|
+
s: Session = Depends(get_session),
|
|
46
|
+
) -> DatasetList:
|
|
47
|
+
rows, total = DatasetRepository(s).list(status=status, q=q, limit=limit, offset=offset)
|
|
48
|
+
specs = SpecRepository(s)
|
|
49
|
+
runs = RunRepository(s)
|
|
50
|
+
items = [
|
|
51
|
+
serializers.dataset_summary(r, specs.current(r), latest_run_row(runs, r))
|
|
52
|
+
for r in rows
|
|
53
|
+
]
|
|
54
|
+
return DatasetList(items=items, total=total)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@router.post("", response_model=Dataset, status_code=201)
|
|
58
|
+
def create_dataset(
|
|
59
|
+
req: CreateDatasetRequest, s: Session = Depends(get_session)
|
|
60
|
+
) -> Dataset:
|
|
61
|
+
datasets = DatasetRepository(s)
|
|
62
|
+
if datasets.get_by_name(req.name) is not None:
|
|
63
|
+
raise http_error(409, "conflict", f"a dataset named {req.name!r} already exists")
|
|
64
|
+
|
|
65
|
+
row = datasets.create(name=req.name, description=req.description)
|
|
66
|
+
current_spec = None
|
|
67
|
+
if req.spec is not None:
|
|
68
|
+
spec = parse_spec(req.spec) # raises 422 with locator on invalid
|
|
69
|
+
current_spec = SpecRepository(s).create_version(
|
|
70
|
+
row, spec.body(), spec.spec_hash(), spec.datadoom_version
|
|
71
|
+
)
|
|
72
|
+
return serializers.dataset(row, current_spec, None)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.get("/{dataset_id}", response_model=Dataset)
|
|
76
|
+
def get_dataset(dataset_id: str, s: Session = Depends(get_session)) -> Dataset:
|
|
77
|
+
row = load_dataset(s, dataset_id)
|
|
78
|
+
current_spec = SpecRepository(s).current(row)
|
|
79
|
+
latest = latest_run_row(RunRepository(s), row)
|
|
80
|
+
return serializers.dataset(row, current_spec, latest)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@router.patch("/{dataset_id}", response_model=Dataset)
|
|
84
|
+
def update_dataset(
|
|
85
|
+
dataset_id: str, req: UpdateDatasetRequest, s: Session = Depends(get_session)
|
|
86
|
+
) -> Dataset:
|
|
87
|
+
datasets = DatasetRepository(s)
|
|
88
|
+
row = load_dataset(s, dataset_id)
|
|
89
|
+
if (
|
|
90
|
+
req.name is not None
|
|
91
|
+
and req.name != row.name
|
|
92
|
+
and datasets.get_by_name(req.name) is not None
|
|
93
|
+
):
|
|
94
|
+
raise http_error(409, "conflict", f"a dataset named {req.name!r} already exists")
|
|
95
|
+
datasets.update(row, name=req.name, description=req.description)
|
|
96
|
+
current_spec = SpecRepository(s).current(row)
|
|
97
|
+
latest = latest_run_row(RunRepository(s), row)
|
|
98
|
+
return serializers.dataset(row, current_spec, latest)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@router.delete("/{dataset_id}", status_code=204)
|
|
102
|
+
def delete_dataset(
|
|
103
|
+
dataset_id: str,
|
|
104
|
+
s: Session = Depends(get_session),
|
|
105
|
+
state: AppState = Depends(get_state),
|
|
106
|
+
) -> Response:
|
|
107
|
+
row = load_dataset(s, dataset_id)
|
|
108
|
+
DatasetRepository(s).delete(row) # ORM cascade -> specs/runs/artifacts/reports
|
|
109
|
+
state.artifacts.remove_dataset(dataset_id) # removes the artifact directory
|
|
110
|
+
return Response(status_code=204)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@router.post("/{dataset_id}/duplicate", response_model=Dataset, status_code=201)
|
|
114
|
+
def duplicate_dataset(dataset_id: str, s: Session = Depends(get_session)) -> Dataset:
|
|
115
|
+
datasets = DatasetRepository(s)
|
|
116
|
+
specs = SpecRepository(s)
|
|
117
|
+
src = load_dataset(s, dataset_id)
|
|
118
|
+
src_spec = specs.current(src)
|
|
119
|
+
|
|
120
|
+
new_name = _unique_copy_name(datasets, src.name)
|
|
121
|
+
clone = datasets.create(name=new_name, description=src.description)
|
|
122
|
+
new_spec = None
|
|
123
|
+
if src_spec is not None:
|
|
124
|
+
new_spec = specs.create_version(
|
|
125
|
+
clone, dict(src_spec.body), src_spec.spec_hash, src_spec.datadoom_version
|
|
126
|
+
)
|
|
127
|
+
return serializers.dataset(clone, new_spec, None)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- spec versioning ----------------------------------------------------------
|
|
131
|
+
@router.put("/{dataset_id}/spec", response_model=SaveSpecResponse)
|
|
132
|
+
def save_spec(
|
|
133
|
+
dataset_id: str, body: SpecBody, s: Session = Depends(get_session)
|
|
134
|
+
) -> SaveSpecResponse:
|
|
135
|
+
row = load_dataset(s, dataset_id)
|
|
136
|
+
spec = parse_spec(body) # 422 with locator on invalid
|
|
137
|
+
new_spec = SpecRepository(s).create_version(
|
|
138
|
+
row, spec.body(), spec.spec_hash(), spec.datadoom_version
|
|
139
|
+
)
|
|
140
|
+
# A fresh edit returns the dataset to a draft state (a new run regenerates it).
|
|
141
|
+
if row.status in {"completed", "failed"}:
|
|
142
|
+
DatasetRepository(s).set_status(row, "draft")
|
|
143
|
+
return SaveSpecResponse(
|
|
144
|
+
spec_id=new_spec.spec_id, spec_hash=new_spec.spec_hash, version=new_spec.version
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@router.get("/{dataset_id}/spec", response_model=SpecDetail)
|
|
149
|
+
def get_current_spec(dataset_id: str, s: Session = Depends(get_session)) -> SpecDetail:
|
|
150
|
+
row = load_dataset(s, dataset_id)
|
|
151
|
+
spec = SpecRepository(s).current(row)
|
|
152
|
+
if spec is None:
|
|
153
|
+
raise http_error(404, "not_found", "dataset has no spec yet")
|
|
154
|
+
return serializers.spec_detail(spec)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@router.get("/{dataset_id}/spec/history", response_model=list[SpecSummary])
|
|
158
|
+
def spec_history(dataset_id: str, s: Session = Depends(get_session)) -> list[SpecSummary]:
|
|
159
|
+
load_dataset(s, dataset_id)
|
|
160
|
+
return [serializers.spec_summary(r) for r in SpecRepository(s).history(dataset_id)]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@router.get("/{dataset_id}/spec/{version}", response_model=SpecDetail)
|
|
164
|
+
def get_spec_version(
|
|
165
|
+
dataset_id: str, version: int, s: Session = Depends(get_session)
|
|
166
|
+
) -> SpecDetail:
|
|
167
|
+
load_dataset(s, dataset_id)
|
|
168
|
+
spec = SpecRepository(s).by_version(dataset_id, version)
|
|
169
|
+
if spec is None:
|
|
170
|
+
raise http_error(404, "not_found", f"no spec version {version}")
|
|
171
|
+
return serializers.spec_detail(spec)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _unique_copy_name(datasets: DatasetRepository, base: str) -> str:
|
|
175
|
+
candidate = f"{base}-copy"
|
|
176
|
+
i = 2
|
|
177
|
+
while datasets.get_by_name(candidate) is not None:
|
|
178
|
+
candidate = f"{base}-copy-{i}"
|
|
179
|
+
i += 1
|
|
180
|
+
return candidate
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Meta endpoints (08 §11): health + version."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import platform
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter
|
|
10
|
+
|
|
11
|
+
from datadoom.engine.reference import build_capabilities
|
|
12
|
+
from datadoom.version import __version__
|
|
13
|
+
|
|
14
|
+
from ..schemas import HealthResponse, VersionResponse
|
|
15
|
+
|
|
16
|
+
router = APIRouter(prefix="/api", tags=["meta"])
|
|
17
|
+
|
|
18
|
+
# The spec format version DataDoom currently authors/reads (independent of the
|
|
19
|
+
# HTTP API version, 08 §13).
|
|
20
|
+
DATADOOM_SPEC_VERSION = "1"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@router.get("/health", response_model=HealthResponse)
|
|
24
|
+
def health() -> HealthResponse:
|
|
25
|
+
return HealthResponse(status="ok")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.get("/version", response_model=VersionResponse)
|
|
29
|
+
def version() -> VersionResponse:
|
|
30
|
+
return VersionResponse(
|
|
31
|
+
version=__version__,
|
|
32
|
+
datadoom_version=DATADOOM_SPEC_VERSION,
|
|
33
|
+
python=platform.python_version(),
|
|
34
|
+
platform=f"{platform.system()} {platform.release()} ({sys.platform})",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.get("/spec-reference")
|
|
39
|
+
def spec_reference() -> dict[str, Any]:
|
|
40
|
+
"""Machine-readable spec capabilities manifest (for AI/tooling authoring).
|
|
41
|
+
|
|
42
|
+
Built from the live registries, so plugin-registered capabilities are
|
|
43
|
+
included. Mirrors the ``datadoom spec-reference`` CLI.
|
|
44
|
+
"""
|
|
45
|
+
return build_capabilities()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Plugins endpoint (08 §10).
|
|
2
|
+
|
|
3
|
+
Returns the live plugin registry — core built-ins plus anything discovered from
|
|
4
|
+
entry points or the local plugins directory at startup (09 §3). The Canvas reads
|
|
5
|
+
each entry's ``schema`` fragment to render config controls for third-party
|
|
6
|
+
capabilities with no frontend changes (09 §6).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter
|
|
12
|
+
|
|
13
|
+
from datadoom.plugins import get_registry
|
|
14
|
+
|
|
15
|
+
from ..schemas import PluginInfo
|
|
16
|
+
|
|
17
|
+
router = APIRouter(prefix="/api/plugins", tags=["plugins"])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@router.get("", response_model=list[PluginInfo])
|
|
21
|
+
def list_plugins() -> list[PluginInfo]:
|
|
22
|
+
return [PluginInfo(**record.to_info()) for record in get_registry().records()]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Generation run endpoints (08 §6, §9).
|
|
2
|
+
|
|
3
|
+
Creating a run returns ``202`` immediately with a resolved seed and a WebSocket
|
|
4
|
+
path; the worker executes it asynchronously and streams progress. Repeated
|
|
5
|
+
``Idempotency-Key`` headers return the existing run (``200``).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from fastapi import APIRouter, Depends, Header, Response
|
|
11
|
+
from sqlalchemy.orm import Session
|
|
12
|
+
|
|
13
|
+
from datadoom.engine import parse_spec, resolve_seed
|
|
14
|
+
|
|
15
|
+
from .. import serializers
|
|
16
|
+
from ..deps import get_session, get_state
|
|
17
|
+
from ..errors import http_error
|
|
18
|
+
from ..schemas import (
|
|
19
|
+
CancelResponse,
|
|
20
|
+
CreateRunRequest,
|
|
21
|
+
CreateRunResponse,
|
|
22
|
+
RunSummary,
|
|
23
|
+
UpdateRunRequest,
|
|
24
|
+
)
|
|
25
|
+
from ..state import AppState
|
|
26
|
+
from ..store_helpers import (
|
|
27
|
+
RunRepository,
|
|
28
|
+
SpecRepository,
|
|
29
|
+
load_dataset,
|
|
30
|
+
load_run,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
router = APIRouter(prefix="/api", tags=["runs"])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _ws_path(run_id: str) -> str:
|
|
37
|
+
return f"/api/ws/runs/{run_id}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@router.post("/datasets/{dataset_id}/runs", response_model=CreateRunResponse, status_code=202)
|
|
41
|
+
def create_run(
|
|
42
|
+
dataset_id: str,
|
|
43
|
+
req: CreateRunRequest,
|
|
44
|
+
response: Response,
|
|
45
|
+
idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
|
|
46
|
+
s: Session = Depends(get_session),
|
|
47
|
+
state: AppState = Depends(get_state),
|
|
48
|
+
) -> CreateRunResponse:
|
|
49
|
+
dataset = load_dataset(s, dataset_id)
|
|
50
|
+
spec_row = SpecRepository(s).current(dataset)
|
|
51
|
+
if spec_row is None:
|
|
52
|
+
raise http_error(400, "bad_request", "dataset has no spec to generate from")
|
|
53
|
+
|
|
54
|
+
# Idempotency replay (08 §1): same key -> the existing run, 200.
|
|
55
|
+
if idempotency_key is not None:
|
|
56
|
+
existing_id = state.idempotency.get((dataset_id, idempotency_key))
|
|
57
|
+
if existing_id is not None:
|
|
58
|
+
existing = RunRepository(s).get(existing_id)
|
|
59
|
+
if existing is not None:
|
|
60
|
+
response.status_code = 200
|
|
61
|
+
return CreateRunResponse(
|
|
62
|
+
run_id=existing.run_id,
|
|
63
|
+
status=existing.status,
|
|
64
|
+
seed=existing.seed,
|
|
65
|
+
ws=_ws_path(existing.run_id),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
spec = parse_spec(dict(spec_row.body))
|
|
69
|
+
seed = resolve_seed(spec, req.seed)
|
|
70
|
+
name = req.name.strip() if req.name and req.name.strip() else None
|
|
71
|
+
run = RunRepository(s).create(dataset.dataset_id, spec_row.spec_id, seed, name=name)
|
|
72
|
+
run_id = run.run_id
|
|
73
|
+
if idempotency_key is not None:
|
|
74
|
+
state.idempotency[(dataset_id, idempotency_key)] = run_id
|
|
75
|
+
|
|
76
|
+
# Commit the queued row before handing off to the worker thread so it is
|
|
77
|
+
# visible when the worker opens its own session (the request's own context
|
|
78
|
+
# manager will no-op commit again at the end).
|
|
79
|
+
s.commit()
|
|
80
|
+
state.worker.submit(run_id)
|
|
81
|
+
|
|
82
|
+
return CreateRunResponse(run_id=run_id, status="queued", seed=seed, ws=_ws_path(run_id))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@router.get("/runs/{run_id}", response_model=RunSummary)
|
|
86
|
+
def get_run(run_id: str, s: Session = Depends(get_session)) -> RunSummary:
|
|
87
|
+
return serializers.run_summary(load_run(s, run_id))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@router.get("/datasets/{dataset_id}/runs", response_model=list[RunSummary])
|
|
91
|
+
def list_runs(dataset_id: str, s: Session = Depends(get_session)) -> list[RunSummary]:
|
|
92
|
+
load_dataset(s, dataset_id)
|
|
93
|
+
rows = RunRepository(s).list_for_dataset(dataset_id)
|
|
94
|
+
return [serializers.run_summary(r) for r in rows]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@router.patch("/runs/{run_id}", response_model=RunSummary)
|
|
98
|
+
def update_run(
|
|
99
|
+
run_id: str, req: UpdateRunRequest, s: Session = Depends(get_session)
|
|
100
|
+
) -> RunSummary:
|
|
101
|
+
run = load_run(s, run_id)
|
|
102
|
+
name = req.name.strip()
|
|
103
|
+
if not name:
|
|
104
|
+
raise http_error(422, "validation_error", "a generation name is required")
|
|
105
|
+
RunRepository(s).set_name(run, name)
|
|
106
|
+
return serializers.run_summary(run)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@router.delete("/runs/{run_id}", status_code=204)
|
|
110
|
+
def delete_run(
|
|
111
|
+
run_id: str,
|
|
112
|
+
s: Session = Depends(get_session),
|
|
113
|
+
state: AppState = Depends(get_state),
|
|
114
|
+
) -> Response:
|
|
115
|
+
run = load_run(s, run_id)
|
|
116
|
+
if run.status in {"queued", "running"}:
|
|
117
|
+
raise http_error(409, "conflict", "cancel the run before deleting it")
|
|
118
|
+
dataset_id = run.dataset_id
|
|
119
|
+
RunRepository(s).delete(run) # ORM cascade -> artifacts/report rows
|
|
120
|
+
state.artifacts.remove_run(dataset_id, run_id) # remove the run's files
|
|
121
|
+
return Response(status_code=204)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@router.post("/runs/{run_id}/cancel", response_model=CancelResponse)
|
|
125
|
+
def cancel_run(
|
|
126
|
+
run_id: str,
|
|
127
|
+
s: Session = Depends(get_session),
|
|
128
|
+
state: AppState = Depends(get_state),
|
|
129
|
+
) -> CancelResponse:
|
|
130
|
+
run = load_run(s, run_id)
|
|
131
|
+
if run.status in {"completed", "failed", "cancelled"}:
|
|
132
|
+
return CancelResponse(status=run.status)
|
|
133
|
+
# Cooperative: flag the run; the worker aborts at the next stage boundary.
|
|
134
|
+
state.hub.request_cancel(run_id)
|
|
135
|
+
return CancelResponse(status="cancelling")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@router.post("/runs/{run_id}/inject", status_code=501)
|
|
139
|
+
def inject_failures(run_id: str, s: Session = Depends(get_session)) -> Response:
|
|
140
|
+
"""Failure-injected variant (08 §9). The failure engine lands in P3 (task 13)."""
|
|
141
|
+
load_run(s, run_id)
|
|
142
|
+
raise http_error(
|
|
143
|
+
501, "not_implemented", "failure injection arrives in Phase 3 (engine/failure)"
|
|
144
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Stateless spec helpers (08 §3): validate, hash, estimate.
|
|
2
|
+
|
|
3
|
+
These never touch the DB — they parse the posted spec through ``engine.spec``
|
|
4
|
+
(the single source of validation truth) and return derived facts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
from fastapi import APIRouter
|
|
11
|
+
|
|
12
|
+
from datadoom.engine import parse_spec
|
|
13
|
+
from datadoom.engine.errors import SpecValidationError
|
|
14
|
+
|
|
15
|
+
from ..estimate import estimate as estimate_spec
|
|
16
|
+
from ..schemas import (
|
|
17
|
+
EstimateResponse,
|
|
18
|
+
HashResponse,
|
|
19
|
+
ParseResponse,
|
|
20
|
+
ParseTextRequest,
|
|
21
|
+
SpecBody,
|
|
22
|
+
ValidateResponse,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
router = APIRouter(prefix="/api/specs", tags=["specs"])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.post("/validate", response_model=ValidateResponse)
|
|
29
|
+
def validate(body: SpecBody) -> ValidateResponse:
|
|
30
|
+
# parse_spec raises SpecValidationError -> 422 with locator (handled centrally).
|
|
31
|
+
spec = parse_spec(body)
|
|
32
|
+
return ValidateResponse(valid=True, spec_hash=spec.spec_hash(), warnings=[])
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@router.post("/parse", response_model=ParseResponse)
|
|
36
|
+
def parse(body: ParseTextRequest) -> ParseResponse:
|
|
37
|
+
"""Parse raw YAML/JSON spec text → validated spec body (web 'New from YAML').
|
|
38
|
+
|
|
39
|
+
YAML is parsed by the same PyYAML loader the CLI uses, then validated through
|
|
40
|
+
the single ``engine.spec`` path, so the web import accepts exactly what
|
|
41
|
+
``datadoom run file.yaml`` would. Syntax and validation errors come back as a
|
|
42
|
+
422 with a ``locator`` (handled centrally).
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
data = yaml.safe_load(body.text)
|
|
46
|
+
except yaml.YAMLError as exc:
|
|
47
|
+
mark = getattr(exc, "problem_mark", None)
|
|
48
|
+
loc = f"line {mark.line + 1}" if mark is not None else None
|
|
49
|
+
raise SpecValidationError(f"invalid YAML: {exc}", locator=loc) from exc
|
|
50
|
+
if not isinstance(data, dict):
|
|
51
|
+
raise SpecValidationError("spec must be a mapping at the top level (key: value …)")
|
|
52
|
+
spec = parse_spec(data)
|
|
53
|
+
return ParseResponse(valid=True, spec_hash=spec.spec_hash(), spec=spec.body())
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@router.post("/hash", response_model=HashResponse)
|
|
57
|
+
def spec_hash(body: SpecBody) -> HashResponse:
|
|
58
|
+
spec = parse_spec(body)
|
|
59
|
+
return HashResponse(spec_hash=spec.spec_hash())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.post("/estimate", response_model=EstimateResponse)
|
|
63
|
+
def estimate(body: SpecBody) -> EstimateResponse:
|
|
64
|
+
spec = parse_spec(body)
|
|
65
|
+
est = estimate_spec(spec)
|
|
66
|
+
return EstimateResponse(
|
|
67
|
+
estimated_runtime_seconds=est.estimated_runtime_seconds,
|
|
68
|
+
estimated_ram_mb=est.estimated_ram_mb,
|
|
69
|
+
estimated_size_bytes=est.estimated_size_bytes,
|
|
70
|
+
features=est.features,
|
|
71
|
+
edges=est.edges,
|
|
72
|
+
gpu_required=est.gpu_required,
|
|
73
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Templates endpoints (08 §10).
|
|
2
|
+
|
|
3
|
+
Surfaces the built-in domain templates (17 step 18). The gallery lists them; the
|
|
4
|
+
detail endpoint returns the full spec so the Canvas can create a dataset from it
|
|
5
|
+
in one click (the existing create flow accepts a ``spec``).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from fastapi import APIRouter
|
|
11
|
+
|
|
12
|
+
from datadoom.templates import get_template, list_templates, load_template_body
|
|
13
|
+
|
|
14
|
+
from ..errors import http_error
|
|
15
|
+
from ..schemas import TemplateDetail, TemplateSummary
|
|
16
|
+
|
|
17
|
+
router = APIRouter(prefix="/api/templates", tags=["templates"])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@router.get("", response_model=list[TemplateSummary])
|
|
21
|
+
def list_all() -> list[TemplateSummary]:
|
|
22
|
+
return [TemplateSummary(**t.to_summary()) for t in list_templates()]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@router.get("/{template_id}", response_model=TemplateDetail)
|
|
26
|
+
def get_one(template_id: str) -> TemplateDetail:
|
|
27
|
+
meta = get_template(template_id)
|
|
28
|
+
if meta is None:
|
|
29
|
+
raise http_error(404, "not_found", f"template {template_id!r} not found")
|
|
30
|
+
return TemplateDetail(**meta.to_summary(), spec=load_template_body(template_id))
|