datadoom 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. datadoom/__init__.py +23 -0
  2. datadoom/adapters/__init__.py +29 -0
  3. datadoom/adapters/frameworks.py +94 -0
  4. datadoom/adapters/loaders.py +72 -0
  5. datadoom/api/__init__.py +11 -0
  6. datadoom/api/app.py +109 -0
  7. datadoom/api/deps.py +30 -0
  8. datadoom/api/errors.py +89 -0
  9. datadoom/api/estimate.py +82 -0
  10. datadoom/api/routes/__init__.py +7 -0
  11. datadoom/api/routes/artifacts.py +147 -0
  12. datadoom/api/routes/datasets.py +180 -0
  13. datadoom/api/routes/meta.py +45 -0
  14. datadoom/api/routes/plugins.py +22 -0
  15. datadoom/api/routes/runs.py +144 -0
  16. datadoom/api/routes/specs.py +73 -0
  17. datadoom/api/routes/templates.py +30 -0
  18. datadoom/api/schemas.py +230 -0
  19. datadoom/api/serializers.py +143 -0
  20. datadoom/api/state.py +24 -0
  21. datadoom/api/store_helpers.py +56 -0
  22. datadoom/api/ws.py +72 -0
  23. datadoom/cli/__init__.py +1 -0
  24. datadoom/cli/main.py +313 -0
  25. datadoom/config.py +108 -0
  26. datadoom/engine/__init__.py +38 -0
  27. datadoom/engine/advice.py +289 -0
  28. datadoom/engine/audit.py +290 -0
  29. datadoom/engine/causal/__init__.py +15 -0
  30. datadoom/engine/causal/execute.py +116 -0
  31. datadoom/engine/causal/functions.py +116 -0
  32. datadoom/engine/causal/graph.py +54 -0
  33. datadoom/engine/difficulty/__init__.py +36 -0
  34. datadoom/engine/difficulty/calibrate.py +235 -0
  35. datadoom/engine/difficulty/knobs.py +171 -0
  36. datadoom/engine/difficulty/probes.py +181 -0
  37. datadoom/engine/dist/__init__.py +35 -0
  38. datadoom/engine/dist/base.py +46 -0
  39. datadoom/engine/dist/builtins.py +172 -0
  40. datadoom/engine/dist/compliance.py +344 -0
  41. datadoom/engine/dist/providers.py +117 -0
  42. datadoom/engine/errors.py +32 -0
  43. datadoom/engine/export/__init__.py +27 -0
  44. datadoom/engine/export/base.py +49 -0
  45. datadoom/engine/export/checksums.py +18 -0
  46. datadoom/engine/export/csv_exporter.py +34 -0
  47. datadoom/engine/export/json_exporter.py +67 -0
  48. datadoom/engine/export/metadata.py +58 -0
  49. datadoom/engine/export/parquet_exporter.py +45 -0
  50. datadoom/engine/failure/__init__.py +18 -0
  51. datadoom/engine/failure/apply.py +37 -0
  52. datadoom/engine/failure/base.py +116 -0
  53. datadoom/engine/failure/modes.py +442 -0
  54. datadoom/engine/pipeline.py +418 -0
  55. datadoom/engine/profile.py +327 -0
  56. datadoom/engine/progress.py +14 -0
  57. datadoom/engine/reference.py +338 -0
  58. datadoom/engine/reports.py +206 -0
  59. datadoom/engine/rng.py +79 -0
  60. datadoom/engine/spec/__init__.py +45 -0
  61. datadoom/engine/spec/hashing.py +57 -0
  62. datadoom/engine/spec/models.py +238 -0
  63. datadoom/engine/spec/validate.py +345 -0
  64. datadoom/engine/timeseries.py +88 -0
  65. datadoom/jobs/__init__.py +14 -0
  66. datadoom/jobs/progress.py +155 -0
  67. datadoom/jobs/worker.py +162 -0
  68. datadoom/plugin.py +35 -0
  69. datadoom/plugins/__init__.py +47 -0
  70. datadoom/plugins/contracts.py +72 -0
  71. datadoom/plugins/loader.py +125 -0
  72. datadoom/plugins/registry.py +214 -0
  73. datadoom/plugins/scaffold.py +434 -0
  74. datadoom/store/__init__.py +47 -0
  75. datadoom/store/artifacts.py +67 -0
  76. datadoom/store/db.py +104 -0
  77. datadoom/store/migrations/__init__.py +0 -0
  78. datadoom/store/migrations/env.py +53 -0
  79. datadoom/store/migrations/script.py.mako +24 -0
  80. datadoom/store/migrations/versions/0001_init.py +149 -0
  81. datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
  82. datadoom/store/migrations/versions/0003_run_name.py +23 -0
  83. datadoom/store/migrations/versions/0004_report_profile.py +24 -0
  84. datadoom/store/models.py +170 -0
  85. datadoom/store/repositories.py +279 -0
  86. datadoom/templates/__init__.py +239 -0
  87. datadoom/templates/ab_test.datadoom.yaml +46 -0
  88. datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
  89. datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
  90. datadoom/templates/customer_churn.datadoom.yaml +60 -0
  91. datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
  92. datadoom/templates/fraud_detection.datadoom.yaml +57 -0
  93. datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
  94. datadoom/templates/insurance_claims.datadoom.yaml +43 -0
  95. datadoom/templates/iot_sensors.datadoom.yaml +44 -0
  96. datadoom/templates/people_directory.datadoom.yaml +56 -0
  97. datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
  98. datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
  99. datadoom/version.py +3 -0
  100. datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
  101. datadoom/webdist/assets/index-doRjyG5s.css +1 -0
  102. datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
  103. datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
  104. datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
  105. datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
  106. datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
  107. datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
  108. datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
  109. datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
  110. datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
  111. datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
  112. datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
  113. datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
  114. datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
  115. datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
  116. datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
  117. datadoom/webdist/index.html +15 -0
  118. datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
  119. datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
  120. datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
  121. datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  122. datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,147 @@
1
+ """Artifacts, preview, report, and bundle download (08 §8).
2
+
3
+ These power the Results screen: list output files, stream a download (with the
4
+ reproducibility checksum in a header), preview the first rows, fetch the full
5
+ report, or download a zip bundle (artifacts + metadata + spec).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import io
11
+ import zipfile
12
+ from pathlib import Path
13
+
14
+ import pandas as pd
15
+ from fastapi import APIRouter, Depends, Query
16
+ from fastapi.responses import FileResponse, StreamingResponse
17
+ from sqlalchemy.orm import Session
18
+
19
+ from .. import serializers
20
+ from ..deps import get_session, get_state
21
+ from ..errors import http_error
22
+ from ..schemas import Artifact, PreviewResponse, Report
23
+ from ..state import AppState
24
+ from ..store_helpers import (
25
+ ArtifactRepository,
26
+ ReportRepository,
27
+ SpecRepository,
28
+ load_run,
29
+ )
30
+
31
+ router = APIRouter(prefix="/api", tags=["artifacts"])
32
+
33
+
34
+ @router.get("/runs/{run_id}/artifacts", response_model=list[Artifact])
35
+ def list_artifacts(run_id: str, s: Session = Depends(get_session)) -> list[Artifact]:
36
+ load_run(s, run_id)
37
+ return [serializers.artifact(a) for a in ArtifactRepository(s).list_for_run(run_id)]
38
+
39
+
40
+ @router.get("/artifacts/{artifact_id}/download")
41
+ def download_artifact(
42
+ artifact_id: str,
43
+ s: Session = Depends(get_session),
44
+ state: AppState = Depends(get_state),
45
+ ) -> FileResponse:
46
+ art = ArtifactRepository(s).get(artifact_id)
47
+ if art is None:
48
+ raise http_error(404, "not_found", f"artifact {artifact_id} not found")
49
+ path = state.artifacts.open_uri(art.storage_uri)
50
+ if not path.exists():
51
+ raise http_error(404, "not_found", "artifact file is missing on disk")
52
+ return FileResponse(
53
+ path,
54
+ filename=path.name,
55
+ headers={"X-Checksum-SHA256": art.checksum_sha256},
56
+ )
57
+
58
+
59
+ @router.get("/runs/{run_id}/spec.yaml")
60
+ def spec_yaml(
61
+ run_id: str,
62
+ s: Session = Depends(get_session),
63
+ state: AppState = Depends(get_state),
64
+ ) -> FileResponse:
65
+ """Download the locked, resolved spec YAML (spec + baked-in seed) for a run.
66
+
67
+ This is the version-control / reproducibility record: the exact spec, with the
68
+ resolved seed, that produced this generation. Regenerating from it yields
69
+ byte-identical data.
70
+ """
71
+ run = load_run(s, run_id)
72
+ path = state.artifacts.run_dir(run.dataset_id, run_id) / "spec.resolved.yaml"
73
+ if not path.exists():
74
+ raise http_error(404, "not_found", "resolved spec is not available for this run")
75
+ return FileResponse(
76
+ path,
77
+ filename=f"{run_id[:8]}.spec.datadoom.yaml",
78
+ media_type="application/x-yaml",
79
+ )
80
+
81
+
82
+ @router.get("/runs/{run_id}/report", response_model=Report)
83
+ def get_report(run_id: str, s: Session = Depends(get_session)) -> Report:
84
+ load_run(s, run_id)
85
+ rep = ReportRepository(s).get_for_run(run_id)
86
+ if rep is None:
87
+ raise http_error(404, "not_found", "no report (run not completed)")
88
+ return serializers.report(rep)
89
+
90
+
91
+ @router.get("/runs/{run_id}/preview", response_model=PreviewResponse)
92
+ def preview(
93
+ run_id: str,
94
+ version: str = "clean",
95
+ split: str = "full",
96
+ limit: int = Query(100, ge=1, le=5000),
97
+ s: Session = Depends(get_session),
98
+ state: AppState = Depends(get_state),
99
+ ) -> PreviewResponse:
100
+ run = load_run(s, run_id)
101
+ arts = [
102
+ a
103
+ for a in ArtifactRepository(s).list_for_run(run_id)
104
+ if a.version == version and (a.split or "full") == split
105
+ ]
106
+ # Prefer CSV (always readable); fall back to JSON, then Parquet if that's all
107
+ # the spec exported. Keeps preview working for non-CSV format selections.
108
+ priority = {"csv": 0, "json": 1, "parquet": 2}
109
+ candidates = sorted(
110
+ (a for a in arts if a.format in priority), key=lambda a: priority[a.format]
111
+ )
112
+ target = candidates[0] if candidates else None
113
+ if target is None:
114
+ raise http_error(404, "not_found", "no matching data artifact to preview")
115
+
116
+ path = state.artifacts.open_uri(target.storage_uri)
117
+ if not path.exists():
118
+ raise http_error(404, "not_found", "artifact file is missing on disk")
119
+
120
+ if target.format == "json":
121
+ frame = pd.read_json(path).head(limit)
122
+ elif target.format == "parquet":
123
+ frame = pd.read_parquet(path).head(limit)
124
+ else:
125
+ frame = pd.read_csv(path, nrows=limit)
126
+ spec_row = SpecRepository(s).get(run.spec_id)
127
+ total = (spec_row.body.get("rows") if spec_row else None) or len(frame)
128
+ rows = frame.where(pd.notna(frame), None).values.tolist()
129
+ return PreviewResponse(columns=list(frame.columns), rows=rows, total=int(total))
130
+
131
+
132
+ @router.get("/runs/{run_id}/bundle")
133
+ def bundle(
134
+ run_id: str,
135
+ s: Session = Depends(get_session),
136
+ state: AppState = Depends(get_state),
137
+ ) -> StreamingResponse:
138
+ run = load_run(s, run_id)
139
+ run_dir: Path = state.artifacts.run_dir(run.dataset_id, run_id)
140
+ buf = io.BytesIO()
141
+ with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
142
+ for f in sorted(run_dir.glob("*")):
143
+ if f.is_file():
144
+ zf.write(f, arcname=f.name)
145
+ buf.seek(0)
146
+ headers = {"Content-Disposition": f'attachment; filename="{run_id}.zip"'}
147
+ return StreamingResponse(buf, media_type="application/zip", headers=headers)
@@ -0,0 +1,180 @@
1
+ """Dataset CRUD + spec versioning (08 §4-5).
2
+
3
+ Editing a spec never mutates a row — it creates a new immutable version and
4
+ repoints ``current_spec_id`` (the immutability invariant, 06 §5).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from fastapi import APIRouter, Depends, Query, Response
10
+ from sqlalchemy.orm import Session
11
+
12
+ from datadoom.engine import parse_spec
13
+
14
+ from .. import serializers
15
+ from ..deps import get_session, get_state
16
+ from ..errors import http_error
17
+ from ..schemas import (
18
+ CreateDatasetRequest,
19
+ Dataset,
20
+ DatasetList,
21
+ SaveSpecResponse,
22
+ SpecBody,
23
+ SpecDetail,
24
+ SpecSummary,
25
+ UpdateDatasetRequest,
26
+ )
27
+ from ..state import AppState
28
+ from ..store_helpers import (
29
+ DatasetRepository,
30
+ RunRepository,
31
+ SpecRepository,
32
+ latest_run_row,
33
+ load_dataset,
34
+ )
35
+
36
+ router = APIRouter(prefix="/api/datasets", tags=["datasets"])
37
+
38
+
39
+ @router.get("", response_model=DatasetList)
40
+ def list_datasets(
41
+ status: str | None = None,
42
+ q: str | None = None,
43
+ limit: int = Query(50, ge=1, le=500),
44
+ offset: int = Query(0, ge=0),
45
+ s: Session = Depends(get_session),
46
+ ) -> DatasetList:
47
+ rows, total = DatasetRepository(s).list(status=status, q=q, limit=limit, offset=offset)
48
+ specs = SpecRepository(s)
49
+ runs = RunRepository(s)
50
+ items = [
51
+ serializers.dataset_summary(r, specs.current(r), latest_run_row(runs, r))
52
+ for r in rows
53
+ ]
54
+ return DatasetList(items=items, total=total)
55
+
56
+
57
+ @router.post("", response_model=Dataset, status_code=201)
58
+ def create_dataset(
59
+ req: CreateDatasetRequest, s: Session = Depends(get_session)
60
+ ) -> Dataset:
61
+ datasets = DatasetRepository(s)
62
+ if datasets.get_by_name(req.name) is not None:
63
+ raise http_error(409, "conflict", f"a dataset named {req.name!r} already exists")
64
+
65
+ row = datasets.create(name=req.name, description=req.description)
66
+ current_spec = None
67
+ if req.spec is not None:
68
+ spec = parse_spec(req.spec) # raises 422 with locator on invalid
69
+ current_spec = SpecRepository(s).create_version(
70
+ row, spec.body(), spec.spec_hash(), spec.datadoom_version
71
+ )
72
+ return serializers.dataset(row, current_spec, None)
73
+
74
+
75
+ @router.get("/{dataset_id}", response_model=Dataset)
76
+ def get_dataset(dataset_id: str, s: Session = Depends(get_session)) -> Dataset:
77
+ row = load_dataset(s, dataset_id)
78
+ current_spec = SpecRepository(s).current(row)
79
+ latest = latest_run_row(RunRepository(s), row)
80
+ return serializers.dataset(row, current_spec, latest)
81
+
82
+
83
+ @router.patch("/{dataset_id}", response_model=Dataset)
84
+ def update_dataset(
85
+ dataset_id: str, req: UpdateDatasetRequest, s: Session = Depends(get_session)
86
+ ) -> Dataset:
87
+ datasets = DatasetRepository(s)
88
+ row = load_dataset(s, dataset_id)
89
+ if (
90
+ req.name is not None
91
+ and req.name != row.name
92
+ and datasets.get_by_name(req.name) is not None
93
+ ):
94
+ raise http_error(409, "conflict", f"a dataset named {req.name!r} already exists")
95
+ datasets.update(row, name=req.name, description=req.description)
96
+ current_spec = SpecRepository(s).current(row)
97
+ latest = latest_run_row(RunRepository(s), row)
98
+ return serializers.dataset(row, current_spec, latest)
99
+
100
+
101
+ @router.delete("/{dataset_id}", status_code=204)
102
+ def delete_dataset(
103
+ dataset_id: str,
104
+ s: Session = Depends(get_session),
105
+ state: AppState = Depends(get_state),
106
+ ) -> Response:
107
+ row = load_dataset(s, dataset_id)
108
+ DatasetRepository(s).delete(row) # ORM cascade -> specs/runs/artifacts/reports
109
+ state.artifacts.remove_dataset(dataset_id) # removes the artifact directory
110
+ return Response(status_code=204)
111
+
112
+
113
+ @router.post("/{dataset_id}/duplicate", response_model=Dataset, status_code=201)
114
+ def duplicate_dataset(dataset_id: str, s: Session = Depends(get_session)) -> Dataset:
115
+ datasets = DatasetRepository(s)
116
+ specs = SpecRepository(s)
117
+ src = load_dataset(s, dataset_id)
118
+ src_spec = specs.current(src)
119
+
120
+ new_name = _unique_copy_name(datasets, src.name)
121
+ clone = datasets.create(name=new_name, description=src.description)
122
+ new_spec = None
123
+ if src_spec is not None:
124
+ new_spec = specs.create_version(
125
+ clone, dict(src_spec.body), src_spec.spec_hash, src_spec.datadoom_version
126
+ )
127
+ return serializers.dataset(clone, new_spec, None)
128
+
129
+
130
+ # --- spec versioning ----------------------------------------------------------
131
+ @router.put("/{dataset_id}/spec", response_model=SaveSpecResponse)
132
+ def save_spec(
133
+ dataset_id: str, body: SpecBody, s: Session = Depends(get_session)
134
+ ) -> SaveSpecResponse:
135
+ row = load_dataset(s, dataset_id)
136
+ spec = parse_spec(body) # 422 with locator on invalid
137
+ new_spec = SpecRepository(s).create_version(
138
+ row, spec.body(), spec.spec_hash(), spec.datadoom_version
139
+ )
140
+ # A fresh edit returns the dataset to a draft state (a new run regenerates it).
141
+ if row.status in {"completed", "failed"}:
142
+ DatasetRepository(s).set_status(row, "draft")
143
+ return SaveSpecResponse(
144
+ spec_id=new_spec.spec_id, spec_hash=new_spec.spec_hash, version=new_spec.version
145
+ )
146
+
147
+
148
+ @router.get("/{dataset_id}/spec", response_model=SpecDetail)
149
+ def get_current_spec(dataset_id: str, s: Session = Depends(get_session)) -> SpecDetail:
150
+ row = load_dataset(s, dataset_id)
151
+ spec = SpecRepository(s).current(row)
152
+ if spec is None:
153
+ raise http_error(404, "not_found", "dataset has no spec yet")
154
+ return serializers.spec_detail(spec)
155
+
156
+
157
+ @router.get("/{dataset_id}/spec/history", response_model=list[SpecSummary])
158
+ def spec_history(dataset_id: str, s: Session = Depends(get_session)) -> list[SpecSummary]:
159
+ load_dataset(s, dataset_id)
160
+ return [serializers.spec_summary(r) for r in SpecRepository(s).history(dataset_id)]
161
+
162
+
163
+ @router.get("/{dataset_id}/spec/{version}", response_model=SpecDetail)
164
+ def get_spec_version(
165
+ dataset_id: str, version: int, s: Session = Depends(get_session)
166
+ ) -> SpecDetail:
167
+ load_dataset(s, dataset_id)
168
+ spec = SpecRepository(s).by_version(dataset_id, version)
169
+ if spec is None:
170
+ raise http_error(404, "not_found", f"no spec version {version}")
171
+ return serializers.spec_detail(spec)
172
+
173
+
174
+ def _unique_copy_name(datasets: DatasetRepository, base: str) -> str:
175
+ candidate = f"{base}-copy"
176
+ i = 2
177
+ while datasets.get_by_name(candidate) is not None:
178
+ candidate = f"{base}-copy-{i}"
179
+ i += 1
180
+ return candidate
@@ -0,0 +1,45 @@
1
+ """Meta endpoints (08 §11): health + version."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import platform
6
+ import sys
7
+ from typing import Any
8
+
9
+ from fastapi import APIRouter
10
+
11
+ from datadoom.engine.reference import build_capabilities
12
+ from datadoom.version import __version__
13
+
14
+ from ..schemas import HealthResponse, VersionResponse
15
+
16
+ router = APIRouter(prefix="/api", tags=["meta"])
17
+
18
+ # The spec format version DataDoom currently authors/reads (independent of the
19
+ # HTTP API version, 08 §13).
20
+ DATADOOM_SPEC_VERSION = "1"
21
+
22
+
23
+ @router.get("/health", response_model=HealthResponse)
24
+ def health() -> HealthResponse:
25
+ return HealthResponse(status="ok")
26
+
27
+
28
+ @router.get("/version", response_model=VersionResponse)
29
+ def version() -> VersionResponse:
30
+ return VersionResponse(
31
+ version=__version__,
32
+ datadoom_version=DATADOOM_SPEC_VERSION,
33
+ python=platform.python_version(),
34
+ platform=f"{platform.system()} {platform.release()} ({sys.platform})",
35
+ )
36
+
37
+
38
+ @router.get("/spec-reference")
39
+ def spec_reference() -> dict[str, Any]:
40
+ """Machine-readable spec capabilities manifest (for AI/tooling authoring).
41
+
42
+ Built from the live registries, so plugin-registered capabilities are
43
+ included. Mirrors the ``datadoom spec-reference`` CLI.
44
+ """
45
+ return build_capabilities()
@@ -0,0 +1,22 @@
1
+ """Plugins endpoint (08 §10).
2
+
3
+ Returns the live plugin registry — core built-ins plus anything discovered from
4
+ entry points or the local plugins directory at startup (09 §3). The Canvas reads
5
+ each entry's ``schema`` fragment to render config controls for third-party
6
+ capabilities with no frontend changes (09 §6).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from fastapi import APIRouter
12
+
13
+ from datadoom.plugins import get_registry
14
+
15
+ from ..schemas import PluginInfo
16
+
17
+ router = APIRouter(prefix="/api/plugins", tags=["plugins"])
18
+
19
+
20
+ @router.get("", response_model=list[PluginInfo])
21
+ def list_plugins() -> list[PluginInfo]:
22
+ return [PluginInfo(**record.to_info()) for record in get_registry().records()]
@@ -0,0 +1,144 @@
1
+ """Generation run endpoints (08 §6, §9).
2
+
3
+ Creating a run returns ``202`` immediately with a resolved seed and a WebSocket
4
+ path; the worker executes it asynchronously and streams progress. Repeated
5
+ ``Idempotency-Key`` headers return the existing run (``200``).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from fastapi import APIRouter, Depends, Header, Response
11
+ from sqlalchemy.orm import Session
12
+
13
+ from datadoom.engine import parse_spec, resolve_seed
14
+
15
+ from .. import serializers
16
+ from ..deps import get_session, get_state
17
+ from ..errors import http_error
18
+ from ..schemas import (
19
+ CancelResponse,
20
+ CreateRunRequest,
21
+ CreateRunResponse,
22
+ RunSummary,
23
+ UpdateRunRequest,
24
+ )
25
+ from ..state import AppState
26
+ from ..store_helpers import (
27
+ RunRepository,
28
+ SpecRepository,
29
+ load_dataset,
30
+ load_run,
31
+ )
32
+
33
+ router = APIRouter(prefix="/api", tags=["runs"])
34
+
35
+
36
+ def _ws_path(run_id: str) -> str:
37
+ return f"/api/ws/runs/{run_id}"
38
+
39
+
40
+ @router.post("/datasets/{dataset_id}/runs", response_model=CreateRunResponse, status_code=202)
41
+ def create_run(
42
+ dataset_id: str,
43
+ req: CreateRunRequest,
44
+ response: Response,
45
+ idempotency_key: str | None = Header(default=None, alias="Idempotency-Key"),
46
+ s: Session = Depends(get_session),
47
+ state: AppState = Depends(get_state),
48
+ ) -> CreateRunResponse:
49
+ dataset = load_dataset(s, dataset_id)
50
+ spec_row = SpecRepository(s).current(dataset)
51
+ if spec_row is None:
52
+ raise http_error(400, "bad_request", "dataset has no spec to generate from")
53
+
54
+ # Idempotency replay (08 §1): same key -> the existing run, 200.
55
+ if idempotency_key is not None:
56
+ existing_id = state.idempotency.get((dataset_id, idempotency_key))
57
+ if existing_id is not None:
58
+ existing = RunRepository(s).get(existing_id)
59
+ if existing is not None:
60
+ response.status_code = 200
61
+ return CreateRunResponse(
62
+ run_id=existing.run_id,
63
+ status=existing.status,
64
+ seed=existing.seed,
65
+ ws=_ws_path(existing.run_id),
66
+ )
67
+
68
+ spec = parse_spec(dict(spec_row.body))
69
+ seed = resolve_seed(spec, req.seed)
70
+ name = req.name.strip() if req.name and req.name.strip() else None
71
+ run = RunRepository(s).create(dataset.dataset_id, spec_row.spec_id, seed, name=name)
72
+ run_id = run.run_id
73
+ if idempotency_key is not None:
74
+ state.idempotency[(dataset_id, idempotency_key)] = run_id
75
+
76
+ # Commit the queued row before handing off to the worker thread so it is
77
+ # visible when the worker opens its own session (the request's own context
78
+ # manager will no-op commit again at the end).
79
+ s.commit()
80
+ state.worker.submit(run_id)
81
+
82
+ return CreateRunResponse(run_id=run_id, status="queued", seed=seed, ws=_ws_path(run_id))
83
+
84
+
85
+ @router.get("/runs/{run_id}", response_model=RunSummary)
86
+ def get_run(run_id: str, s: Session = Depends(get_session)) -> RunSummary:
87
+ return serializers.run_summary(load_run(s, run_id))
88
+
89
+
90
+ @router.get("/datasets/{dataset_id}/runs", response_model=list[RunSummary])
91
+ def list_runs(dataset_id: str, s: Session = Depends(get_session)) -> list[RunSummary]:
92
+ load_dataset(s, dataset_id)
93
+ rows = RunRepository(s).list_for_dataset(dataset_id)
94
+ return [serializers.run_summary(r) for r in rows]
95
+
96
+
97
+ @router.patch("/runs/{run_id}", response_model=RunSummary)
98
+ def update_run(
99
+ run_id: str, req: UpdateRunRequest, s: Session = Depends(get_session)
100
+ ) -> RunSummary:
101
+ run = load_run(s, run_id)
102
+ name = req.name.strip()
103
+ if not name:
104
+ raise http_error(422, "validation_error", "a generation name is required")
105
+ RunRepository(s).set_name(run, name)
106
+ return serializers.run_summary(run)
107
+
108
+
109
+ @router.delete("/runs/{run_id}", status_code=204)
110
+ def delete_run(
111
+ run_id: str,
112
+ s: Session = Depends(get_session),
113
+ state: AppState = Depends(get_state),
114
+ ) -> Response:
115
+ run = load_run(s, run_id)
116
+ if run.status in {"queued", "running"}:
117
+ raise http_error(409, "conflict", "cancel the run before deleting it")
118
+ dataset_id = run.dataset_id
119
+ RunRepository(s).delete(run) # ORM cascade -> artifacts/report rows
120
+ state.artifacts.remove_run(dataset_id, run_id) # remove the run's files
121
+ return Response(status_code=204)
122
+
123
+
124
+ @router.post("/runs/{run_id}/cancel", response_model=CancelResponse)
125
+ def cancel_run(
126
+ run_id: str,
127
+ s: Session = Depends(get_session),
128
+ state: AppState = Depends(get_state),
129
+ ) -> CancelResponse:
130
+ run = load_run(s, run_id)
131
+ if run.status in {"completed", "failed", "cancelled"}:
132
+ return CancelResponse(status=run.status)
133
+ # Cooperative: flag the run; the worker aborts at the next stage boundary.
134
+ state.hub.request_cancel(run_id)
135
+ return CancelResponse(status="cancelling")
136
+
137
+
138
+ @router.post("/runs/{run_id}/inject", status_code=501)
139
+ def inject_failures(run_id: str, s: Session = Depends(get_session)) -> Response:
140
+ """Failure-injected variant (08 §9). The failure engine lands in P3 (task 13)."""
141
+ load_run(s, run_id)
142
+ raise http_error(
143
+ 501, "not_implemented", "failure injection arrives in Phase 3 (engine/failure)"
144
+ )
@@ -0,0 +1,73 @@
1
+ """Stateless spec helpers (08 §3): validate, hash, estimate.
2
+
3
+ These never touch the DB — they parse the posted spec through ``engine.spec``
4
+ (the single source of validation truth) and return derived facts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import yaml
10
+ from fastapi import APIRouter
11
+
12
+ from datadoom.engine import parse_spec
13
+ from datadoom.engine.errors import SpecValidationError
14
+
15
+ from ..estimate import estimate as estimate_spec
16
+ from ..schemas import (
17
+ EstimateResponse,
18
+ HashResponse,
19
+ ParseResponse,
20
+ ParseTextRequest,
21
+ SpecBody,
22
+ ValidateResponse,
23
+ )
24
+
25
+ router = APIRouter(prefix="/api/specs", tags=["specs"])
26
+
27
+
28
+ @router.post("/validate", response_model=ValidateResponse)
29
+ def validate(body: SpecBody) -> ValidateResponse:
30
+ # parse_spec raises SpecValidationError -> 422 with locator (handled centrally).
31
+ spec = parse_spec(body)
32
+ return ValidateResponse(valid=True, spec_hash=spec.spec_hash(), warnings=[])
33
+
34
+
35
+ @router.post("/parse", response_model=ParseResponse)
36
+ def parse(body: ParseTextRequest) -> ParseResponse:
37
+ """Parse raw YAML/JSON spec text → validated spec body (web 'New from YAML').
38
+
39
+ YAML is parsed by the same PyYAML loader the CLI uses, then validated through
40
+ the single ``engine.spec`` path, so the web import accepts exactly what
41
+ ``datadoom run file.yaml`` would. Syntax and validation errors come back as a
42
+ 422 with a ``locator`` (handled centrally).
43
+ """
44
+ try:
45
+ data = yaml.safe_load(body.text)
46
+ except yaml.YAMLError as exc:
47
+ mark = getattr(exc, "problem_mark", None)
48
+ loc = f"line {mark.line + 1}" if mark is not None else None
49
+ raise SpecValidationError(f"invalid YAML: {exc}", locator=loc) from exc
50
+ if not isinstance(data, dict):
51
+ raise SpecValidationError("spec must be a mapping at the top level (key: value …)")
52
+ spec = parse_spec(data)
53
+ return ParseResponse(valid=True, spec_hash=spec.spec_hash(), spec=spec.body())
54
+
55
+
56
+ @router.post("/hash", response_model=HashResponse)
57
+ def spec_hash(body: SpecBody) -> HashResponse:
58
+ spec = parse_spec(body)
59
+ return HashResponse(spec_hash=spec.spec_hash())
60
+
61
+
62
+ @router.post("/estimate", response_model=EstimateResponse)
63
+ def estimate(body: SpecBody) -> EstimateResponse:
64
+ spec = parse_spec(body)
65
+ est = estimate_spec(spec)
66
+ return EstimateResponse(
67
+ estimated_runtime_seconds=est.estimated_runtime_seconds,
68
+ estimated_ram_mb=est.estimated_ram_mb,
69
+ estimated_size_bytes=est.estimated_size_bytes,
70
+ features=est.features,
71
+ edges=est.edges,
72
+ gpu_required=est.gpu_required,
73
+ )
@@ -0,0 +1,30 @@
1
+ """Templates endpoints (08 §10).
2
+
3
+ Surfaces the built-in domain templates (17 step 18). The gallery lists them; the
4
+ detail endpoint returns the full spec so the Canvas can create a dataset from it
5
+ in one click (the existing create flow accepts a ``spec``).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from fastapi import APIRouter
11
+
12
+ from datadoom.templates import get_template, list_templates, load_template_body
13
+
14
+ from ..errors import http_error
15
+ from ..schemas import TemplateDetail, TemplateSummary
16
+
17
+ router = APIRouter(prefix="/api/templates", tags=["templates"])
18
+
19
+
20
+ @router.get("", response_model=list[TemplateSummary])
21
+ def list_all() -> list[TemplateSummary]:
22
+ return [TemplateSummary(**t.to_summary()) for t in list_templates()]
23
+
24
+
25
+ @router.get("/{template_id}", response_model=TemplateDetail)
26
+ def get_one(template_id: str) -> TemplateDetail:
27
+ meta = get_template(template_id)
28
+ if meta is None:
29
+ raise http_error(404, "not_found", f"template {template_id!r} not found")
30
+ return TemplateDetail(**meta.to_summary(), spec=load_template_body(template_id))