datadoom 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datadoom/__init__.py +23 -0
- datadoom/adapters/__init__.py +29 -0
- datadoom/adapters/frameworks.py +94 -0
- datadoom/adapters/loaders.py +72 -0
- datadoom/api/__init__.py +11 -0
- datadoom/api/app.py +109 -0
- datadoom/api/deps.py +30 -0
- datadoom/api/errors.py +89 -0
- datadoom/api/estimate.py +82 -0
- datadoom/api/routes/__init__.py +7 -0
- datadoom/api/routes/artifacts.py +147 -0
- datadoom/api/routes/datasets.py +180 -0
- datadoom/api/routes/meta.py +45 -0
- datadoom/api/routes/plugins.py +22 -0
- datadoom/api/routes/runs.py +144 -0
- datadoom/api/routes/specs.py +73 -0
- datadoom/api/routes/templates.py +30 -0
- datadoom/api/schemas.py +230 -0
- datadoom/api/serializers.py +143 -0
- datadoom/api/state.py +24 -0
- datadoom/api/store_helpers.py +56 -0
- datadoom/api/ws.py +72 -0
- datadoom/cli/__init__.py +1 -0
- datadoom/cli/main.py +313 -0
- datadoom/config.py +108 -0
- datadoom/engine/__init__.py +38 -0
- datadoom/engine/advice.py +289 -0
- datadoom/engine/audit.py +290 -0
- datadoom/engine/causal/__init__.py +15 -0
- datadoom/engine/causal/execute.py +116 -0
- datadoom/engine/causal/functions.py +116 -0
- datadoom/engine/causal/graph.py +54 -0
- datadoom/engine/difficulty/__init__.py +36 -0
- datadoom/engine/difficulty/calibrate.py +235 -0
- datadoom/engine/difficulty/knobs.py +171 -0
- datadoom/engine/difficulty/probes.py +181 -0
- datadoom/engine/dist/__init__.py +35 -0
- datadoom/engine/dist/base.py +46 -0
- datadoom/engine/dist/builtins.py +172 -0
- datadoom/engine/dist/compliance.py +344 -0
- datadoom/engine/dist/providers.py +117 -0
- datadoom/engine/errors.py +32 -0
- datadoom/engine/export/__init__.py +27 -0
- datadoom/engine/export/base.py +49 -0
- datadoom/engine/export/checksums.py +18 -0
- datadoom/engine/export/csv_exporter.py +34 -0
- datadoom/engine/export/json_exporter.py +67 -0
- datadoom/engine/export/metadata.py +58 -0
- datadoom/engine/export/parquet_exporter.py +45 -0
- datadoom/engine/failure/__init__.py +18 -0
- datadoom/engine/failure/apply.py +37 -0
- datadoom/engine/failure/base.py +116 -0
- datadoom/engine/failure/modes.py +442 -0
- datadoom/engine/pipeline.py +418 -0
- datadoom/engine/profile.py +327 -0
- datadoom/engine/progress.py +14 -0
- datadoom/engine/reference.py +338 -0
- datadoom/engine/reports.py +206 -0
- datadoom/engine/rng.py +79 -0
- datadoom/engine/spec/__init__.py +45 -0
- datadoom/engine/spec/hashing.py +57 -0
- datadoom/engine/spec/models.py +238 -0
- datadoom/engine/spec/validate.py +345 -0
- datadoom/engine/timeseries.py +88 -0
- datadoom/jobs/__init__.py +14 -0
- datadoom/jobs/progress.py +155 -0
- datadoom/jobs/worker.py +162 -0
- datadoom/plugin.py +35 -0
- datadoom/plugins/__init__.py +47 -0
- datadoom/plugins/contracts.py +72 -0
- datadoom/plugins/loader.py +125 -0
- datadoom/plugins/registry.py +214 -0
- datadoom/plugins/scaffold.py +434 -0
- datadoom/store/__init__.py +47 -0
- datadoom/store/artifacts.py +67 -0
- datadoom/store/db.py +104 -0
- datadoom/store/migrations/__init__.py +0 -0
- datadoom/store/migrations/env.py +53 -0
- datadoom/store/migrations/script.py.mako +24 -0
- datadoom/store/migrations/versions/0001_init.py +149 -0
- datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
- datadoom/store/migrations/versions/0003_run_name.py +23 -0
- datadoom/store/migrations/versions/0004_report_profile.py +24 -0
- datadoom/store/models.py +170 -0
- datadoom/store/repositories.py +279 -0
- datadoom/templates/__init__.py +239 -0
- datadoom/templates/ab_test.datadoom.yaml +46 -0
- datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
- datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
- datadoom/templates/customer_churn.datadoom.yaml +60 -0
- datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
- datadoom/templates/fraud_detection.datadoom.yaml +57 -0
- datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
- datadoom/templates/insurance_claims.datadoom.yaml +43 -0
- datadoom/templates/iot_sensors.datadoom.yaml +44 -0
- datadoom/templates/people_directory.datadoom.yaml +56 -0
- datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
- datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
- datadoom/version.py +3 -0
- datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
- datadoom/webdist/assets/index-doRjyG5s.css +1 -0
- datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
- datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
- datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
- datadoom/webdist/index.html +15 -0
- datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
- datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
- datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
- datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Repositories — the only way the app reads/writes metadata rows.
|
|
2
|
+
|
|
3
|
+
Each repository wraps a live :class:`~sqlalchemy.orm.Session`. They enforce the
|
|
4
|
+
domain invariants from doc 06 §5 — most importantly **spec immutability**: an
|
|
5
|
+
edit never updates a spec row, it creates a new version and repoints the
|
|
6
|
+
dataset's ``current_spec_id``.
|
|
7
|
+
|
|
8
|
+
UUID PKs are generated here with ``uuid4``. This is the persistence layer, NOT
|
|
9
|
+
the engine data path, so the determinism ban on ``uuid4`` does not apply (DB
|
|
10
|
+
identity has no bearing on reproducible artifact bytes).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import uuid
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from sqlalchemy import func, select
|
|
19
|
+
from sqlalchemy.orm import Session
|
|
20
|
+
|
|
21
|
+
from .db import utcnow_iso
|
|
22
|
+
from .models import (
|
|
23
|
+
ArtifactRow,
|
|
24
|
+
DatasetRow,
|
|
25
|
+
GenerationRunRow,
|
|
26
|
+
ReportRow,
|
|
27
|
+
SpecRow,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _uid() -> str:
|
|
32
|
+
return str(uuid.uuid4())
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DatasetRepository:
|
|
36
|
+
def __init__(self, session: Session) -> None:
|
|
37
|
+
self.s = session
|
|
38
|
+
|
|
39
|
+
def create(
|
|
40
|
+
self, name: str, description: str | None = None, owner_id: str | None = None
|
|
41
|
+
) -> DatasetRow:
|
|
42
|
+
now = utcnow_iso()
|
|
43
|
+
row = DatasetRow(
|
|
44
|
+
dataset_id=_uid(),
|
|
45
|
+
name=name,
|
|
46
|
+
description=description,
|
|
47
|
+
status="draft",
|
|
48
|
+
owner_id=owner_id,
|
|
49
|
+
created_at=now,
|
|
50
|
+
updated_at=now,
|
|
51
|
+
)
|
|
52
|
+
self.s.add(row)
|
|
53
|
+
self.s.flush()
|
|
54
|
+
return row
|
|
55
|
+
|
|
56
|
+
def get(self, dataset_id: str) -> DatasetRow | None:
|
|
57
|
+
return self.s.get(DatasetRow, dataset_id)
|
|
58
|
+
|
|
59
|
+
def get_by_name(self, name: str, owner_id: str | None = None) -> DatasetRow | None:
|
|
60
|
+
stmt = select(DatasetRow).where(
|
|
61
|
+
DatasetRow.name == name, DatasetRow.owner_id.is_(owner_id)
|
|
62
|
+
)
|
|
63
|
+
return self.s.scalars(stmt).first()
|
|
64
|
+
|
|
65
|
+
def list(
|
|
66
|
+
self,
|
|
67
|
+
status: str | None = None,
|
|
68
|
+
q: str | None = None,
|
|
69
|
+
limit: int = 50,
|
|
70
|
+
offset: int = 0,
|
|
71
|
+
) -> tuple[list[DatasetRow], int]:
|
|
72
|
+
stmt = select(DatasetRow)
|
|
73
|
+
count_stmt = select(func.count()).select_from(DatasetRow)
|
|
74
|
+
if status:
|
|
75
|
+
stmt = stmt.where(DatasetRow.status == status)
|
|
76
|
+
count_stmt = count_stmt.where(DatasetRow.status == status)
|
|
77
|
+
if q:
|
|
78
|
+
like = f"%{q}%"
|
|
79
|
+
stmt = stmt.where(DatasetRow.name.like(like))
|
|
80
|
+
count_stmt = count_stmt.where(DatasetRow.name.like(like))
|
|
81
|
+
total = self.s.scalar(count_stmt) or 0
|
|
82
|
+
stmt = stmt.order_by(DatasetRow.updated_at.desc()).limit(limit).offset(offset)
|
|
83
|
+
return list(self.s.scalars(stmt).all()), int(total)
|
|
84
|
+
|
|
85
|
+
def touch(self, row: DatasetRow) -> None:
|
|
86
|
+
row.updated_at = utcnow_iso()
|
|
87
|
+
|
|
88
|
+
def update(
|
|
89
|
+
self, row: DatasetRow, name: str | None = None, description: str | None = None
|
|
90
|
+
) -> DatasetRow:
|
|
91
|
+
if name is not None:
|
|
92
|
+
row.name = name
|
|
93
|
+
if description is not None:
|
|
94
|
+
row.description = description
|
|
95
|
+
self.touch(row)
|
|
96
|
+
return row
|
|
97
|
+
|
|
98
|
+
def set_status(self, row: DatasetRow, status: str) -> None:
|
|
99
|
+
row.status = status
|
|
100
|
+
self.touch(row)
|
|
101
|
+
|
|
102
|
+
def delete(self, row: DatasetRow) -> None:
|
|
103
|
+
# ORM cascade removes specs/runs/artifacts/reports rows.
|
|
104
|
+
self.s.delete(row)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class SpecRepository:
|
|
108
|
+
def __init__(self, session: Session) -> None:
|
|
109
|
+
self.s = session
|
|
110
|
+
|
|
111
|
+
def create_version(
|
|
112
|
+
self,
|
|
113
|
+
dataset: DatasetRow,
|
|
114
|
+
body: dict[str, Any],
|
|
115
|
+
spec_hash: str,
|
|
116
|
+
datadoom_version: str,
|
|
117
|
+
) -> SpecRow:
|
|
118
|
+
"""Create the next immutable spec snapshot and repoint the dataset."""
|
|
119
|
+
next_version = (
|
|
120
|
+
self.s.scalar(
|
|
121
|
+
select(func.coalesce(func.max(SpecRow.version), 0)).where(
|
|
122
|
+
SpecRow.dataset_id == dataset.dataset_id
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
or 0
|
|
126
|
+
) + 1
|
|
127
|
+
row = SpecRow(
|
|
128
|
+
spec_id=_uid(),
|
|
129
|
+
dataset_id=dataset.dataset_id,
|
|
130
|
+
spec_hash=spec_hash,
|
|
131
|
+
body=body,
|
|
132
|
+
datadoom_version=datadoom_version,
|
|
133
|
+
version=next_version,
|
|
134
|
+
created_at=utcnow_iso(),
|
|
135
|
+
)
|
|
136
|
+
self.s.add(row)
|
|
137
|
+
self.s.flush()
|
|
138
|
+
dataset.current_spec_id = row.spec_id
|
|
139
|
+
dataset.updated_at = utcnow_iso()
|
|
140
|
+
return row
|
|
141
|
+
|
|
142
|
+
def get(self, spec_id: str) -> SpecRow | None:
|
|
143
|
+
return self.s.get(SpecRow, spec_id)
|
|
144
|
+
|
|
145
|
+
def current(self, dataset: DatasetRow) -> SpecRow | None:
|
|
146
|
+
if dataset.current_spec_id is None:
|
|
147
|
+
return None
|
|
148
|
+
return self.s.get(SpecRow, dataset.current_spec_id)
|
|
149
|
+
|
|
150
|
+
def history(self, dataset_id: str) -> list[SpecRow]:
|
|
151
|
+
stmt = (
|
|
152
|
+
select(SpecRow)
|
|
153
|
+
.where(SpecRow.dataset_id == dataset_id)
|
|
154
|
+
.order_by(SpecRow.version.desc())
|
|
155
|
+
)
|
|
156
|
+
return list(self.s.scalars(stmt).all())
|
|
157
|
+
|
|
158
|
+
def by_version(self, dataset_id: str, version: int) -> SpecRow | None:
|
|
159
|
+
stmt = select(SpecRow).where(
|
|
160
|
+
SpecRow.dataset_id == dataset_id, SpecRow.version == version
|
|
161
|
+
)
|
|
162
|
+
return self.s.scalars(stmt).first()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class RunRepository:
|
|
166
|
+
def __init__(self, session: Session) -> None:
|
|
167
|
+
self.s = session
|
|
168
|
+
|
|
169
|
+
def create(
|
|
170
|
+
self, dataset_id: str, spec_id: str, seed: int, name: str | None = None
|
|
171
|
+
) -> GenerationRunRow:
|
|
172
|
+
row = GenerationRunRow(
|
|
173
|
+
run_id=_uid(),
|
|
174
|
+
dataset_id=dataset_id,
|
|
175
|
+
spec_id=spec_id,
|
|
176
|
+
name=name,
|
|
177
|
+
seed=seed,
|
|
178
|
+
status="queued",
|
|
179
|
+
progress_pct=0,
|
|
180
|
+
created_at=utcnow_iso(),
|
|
181
|
+
)
|
|
182
|
+
self.s.add(row)
|
|
183
|
+
self.s.flush()
|
|
184
|
+
return row
|
|
185
|
+
|
|
186
|
+
def get(self, run_id: str) -> GenerationRunRow | None:
|
|
187
|
+
return self.s.get(GenerationRunRow, run_id)
|
|
188
|
+
|
|
189
|
+
def set_name(self, row: GenerationRunRow, name: str) -> GenerationRunRow:
|
|
190
|
+
row.name = name
|
|
191
|
+
return row
|
|
192
|
+
|
|
193
|
+
def delete(self, row: GenerationRunRow) -> None:
|
|
194
|
+
# Clear the dataset's recorded latest-run pointer if it referenced this run
|
|
195
|
+
# (it's a soft reference, so the cascade won't touch it).
|
|
196
|
+
dataset = self.s.get(DatasetRow, row.dataset_id)
|
|
197
|
+
if dataset is not None and dataset.latest_run_id == row.run_id:
|
|
198
|
+
dataset.latest_run_id = None
|
|
199
|
+
# ORM cascade removes artifact/report rows.
|
|
200
|
+
self.s.delete(row)
|
|
201
|
+
|
|
202
|
+
def list_for_dataset(self, dataset_id: str) -> list[GenerationRunRow]:
|
|
203
|
+
stmt = (
|
|
204
|
+
select(GenerationRunRow)
|
|
205
|
+
.where(GenerationRunRow.dataset_id == dataset_id)
|
|
206
|
+
.order_by(GenerationRunRow.created_at.desc())
|
|
207
|
+
)
|
|
208
|
+
return list(self.s.scalars(stmt).all())
|
|
209
|
+
|
|
210
|
+
def find_repro(self, spec_id: str, seed: int) -> GenerationRunRow | None:
|
|
211
|
+
stmt = (
|
|
212
|
+
select(GenerationRunRow)
|
|
213
|
+
.where(GenerationRunRow.spec_id == spec_id, GenerationRunRow.seed == seed)
|
|
214
|
+
.order_by(GenerationRunRow.created_at.desc())
|
|
215
|
+
)
|
|
216
|
+
return self.s.scalars(stmt).first()
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class ArtifactRepository:
|
|
220
|
+
def __init__(self, session: Session) -> None:
|
|
221
|
+
self.s = session
|
|
222
|
+
|
|
223
|
+
def add(
|
|
224
|
+
self,
|
|
225
|
+
run_id: str,
|
|
226
|
+
version: str,
|
|
227
|
+
fmt: str,
|
|
228
|
+
storage_uri: str,
|
|
229
|
+
checksum_sha256: str,
|
|
230
|
+
size_bytes: int,
|
|
231
|
+
split: str | None = None,
|
|
232
|
+
) -> ArtifactRow:
|
|
233
|
+
row = ArtifactRow(
|
|
234
|
+
artifact_id=_uid(),
|
|
235
|
+
run_id=run_id,
|
|
236
|
+
version=version,
|
|
237
|
+
split=split,
|
|
238
|
+
format=fmt,
|
|
239
|
+
storage_uri=storage_uri,
|
|
240
|
+
checksum_sha256=checksum_sha256,
|
|
241
|
+
size_bytes=size_bytes,
|
|
242
|
+
created_at=utcnow_iso(),
|
|
243
|
+
)
|
|
244
|
+
self.s.add(row)
|
|
245
|
+
self.s.flush()
|
|
246
|
+
return row
|
|
247
|
+
|
|
248
|
+
def get(self, artifact_id: str) -> ArtifactRow | None:
|
|
249
|
+
return self.s.get(ArtifactRow, artifact_id)
|
|
250
|
+
|
|
251
|
+
def list_for_run(self, run_id: str) -> list[ArtifactRow]:
|
|
252
|
+
stmt = select(ArtifactRow).where(ArtifactRow.run_id == run_id)
|
|
253
|
+
return list(self.s.scalars(stmt).all())
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class ReportRepository:
|
|
257
|
+
def __init__(self, session: Session) -> None:
|
|
258
|
+
self.s = session
|
|
259
|
+
|
|
260
|
+
def upsert(self, run_id: str, sections: dict[str, Any]) -> ReportRow:
|
|
261
|
+
existing = self.get_for_run(run_id)
|
|
262
|
+
if existing is None:
|
|
263
|
+
existing = ReportRow(report_id=_uid(), run_id=run_id, created_at=utcnow_iso())
|
|
264
|
+
self.s.add(existing)
|
|
265
|
+
existing.compliance_score = sections.get("compliance_score")
|
|
266
|
+
existing.distribution = sections.get("distribution")
|
|
267
|
+
existing.correlation = sections.get("correlation")
|
|
268
|
+
existing.mutual_information = sections.get("mutual_information")
|
|
269
|
+
existing.causal_truth = sections.get("causal_truth")
|
|
270
|
+
existing.difficulty = sections.get("difficulty")
|
|
271
|
+
existing.failures = sections.get("failures")
|
|
272
|
+
existing.profile = sections.get("profile")
|
|
273
|
+
existing.determinism = sections.get("determinism")
|
|
274
|
+
self.s.flush()
|
|
275
|
+
return existing
|
|
276
|
+
|
|
277
|
+
def get_for_run(self, run_id: str) -> ReportRow | None:
|
|
278
|
+
stmt = select(ReportRow).where(ReportRow.run_id == run_id)
|
|
279
|
+
return self.s.scalars(stmt).first()
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""Built-in domain templates (17 step 18, 09 §4.6).
|
|
2
|
+
|
|
3
|
+
A template is a curated, ready-to-run spec plus catalog metadata. The web gallery
|
|
4
|
+
and ``datadoom template`` surface them so a user can start from a realistic
|
|
5
|
+
domain dataset in one click. Templates are *data only* (no code); this module is
|
|
6
|
+
a thin loader that reads the bundled YAML via :mod:`importlib.resources`, so it
|
|
7
|
+
works the same from the source tree and an installed wheel.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from importlib import resources
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class TemplateMeta:
|
|
21
|
+
"""Catalog entry for one built-in template."""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
name: str
|
|
25
|
+
domain: str
|
|
26
|
+
description: str
|
|
27
|
+
tags: tuple[str, ...]
|
|
28
|
+
filename: str
|
|
29
|
+
features: tuple[str, ...] = () # showcased engine features (causal/failures/…)
|
|
30
|
+
level: str = "starter" # "starter" (learn one feature) | "hackathon" (full enterprise challenge)
|
|
31
|
+
|
|
32
|
+
def to_summary(self) -> dict[str, Any]:
|
|
33
|
+
return {
|
|
34
|
+
"id": self.id,
|
|
35
|
+
"name": self.name,
|
|
36
|
+
"domain": self.domain,
|
|
37
|
+
"description": self.description,
|
|
38
|
+
"tags": list(self.tags) + list(self.features),
|
|
39
|
+
"level": self.level,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
CATALOG: tuple[TemplateMeta, ...] = (
|
|
44
|
+
# ── Hackathon flagships ──────────────────────────────────────────────────
|
|
45
|
+
# Enterprise-grade ML challenges: each composes a deep causal DAG, a latent
|
|
46
|
+
# confounder, a stacked data-quality failure profile and (where applicable)
|
|
47
|
+
# a calibrated difficulty band — a realistic dataset to build a model on,
|
|
48
|
+
# carrying a `meta.challenge` brief (target / metric / split / gotchas).
|
|
49
|
+
TemplateMeta(
|
|
50
|
+
id="credit-default-challenge",
|
|
51
|
+
name="Credit default (challenge)",
|
|
52
|
+
domain="Finance",
|
|
53
|
+
description=(
|
|
54
|
+
"Consumer credit default. Demographics and employment drive income, which "
|
|
55
|
+
"feeds a latent risk score behind the default label — calibrated to the "
|
|
56
|
+
"'advanced' AUROC band, then corrupted with MNAR income, drifting debt-to-"
|
|
57
|
+
"income, a leaked collections proxy and label noise. Train/test split included."
|
|
58
|
+
),
|
|
59
|
+
tags=("credit-risk", "classification"),
|
|
60
|
+
features=("causal", "latent", "difficulty", "failure-injection", "leakage"),
|
|
61
|
+
filename="credit_default_challenge.datadoom.yaml",
|
|
62
|
+
level="hackathon",
|
|
63
|
+
),
|
|
64
|
+
TemplateMeta(
|
|
65
|
+
id="clinical-deterioration",
|
|
66
|
+
name="Clinical deterioration (challenge)",
|
|
67
|
+
domain="Healthcare",
|
|
68
|
+
description=(
|
|
69
|
+
"ICU early-warning with a hidden confounder: a latent illness severity drives "
|
|
70
|
+
"both the observed vitals (heart rate, lactate, BP) and the outcome, so the "
|
|
71
|
+
"vitals are confounded proxies. Calibrated to 'advanced', with realistic "
|
|
72
|
+
"MNAR/MAR/MCAR clinical missingness."
|
|
73
|
+
),
|
|
74
|
+
tags=("clinical", "classification"),
|
|
75
|
+
features=("causal", "latent", "confounder", "difficulty", "missingness"),
|
|
76
|
+
filename="clinical_deterioration.datadoom.yaml",
|
|
77
|
+
level="hackathon",
|
|
78
|
+
),
|
|
79
|
+
TemplateMeta(
|
|
80
|
+
id="predictive-maintenance",
|
|
81
|
+
name="Predictive maintenance (challenge)",
|
|
82
|
+
domain="Industrial IoT",
|
|
83
|
+
description=(
|
|
84
|
+
"Turbine maintenance on multi-sensor time-series (vibration, bearing temp, oil "
|
|
85
|
+
"pressure) plus load and component grade, driving a latent wear index behind a "
|
|
86
|
+
"maintenance label. The load regime drifts, gains sensor noise and drops "
|
|
87
|
+
"readings; a leaked alarm proxy is planted. Sequential — preserve row order."
|
|
88
|
+
),
|
|
89
|
+
tags=("predictive-maintenance", "classification"),
|
|
90
|
+
features=("time-series", "causal", "latent", "drift", "leakage"),
|
|
91
|
+
filename="predictive_maintenance.datadoom.yaml",
|
|
92
|
+
level="hackathon",
|
|
93
|
+
),
|
|
94
|
+
TemplateMeta(
|
|
95
|
+
id="telecom-churn-challenge",
|
|
96
|
+
name="Telecom churn (challenge)",
|
|
97
|
+
domain="Telecom",
|
|
98
|
+
description=(
|
|
99
|
+
"Customer churn with realistic records: believable identity fields (name, "
|
|
100
|
+
"email, city) sit beside the real signal — tenure, charges, support load, "
|
|
101
|
+
"contract and usage feed a latent dissatisfaction score. Calibrated to the hard "
|
|
102
|
+
"'kaggle' AUROC band, with MNAR usage and noisy labels. Drop the identifiers."
|
|
103
|
+
),
|
|
104
|
+
tags=("churn", "classification"),
|
|
105
|
+
features=("causal", "latent", "difficulty", "realistic-text", "missingness"),
|
|
106
|
+
filename="telecom_churn_challenge.datadoom.yaml",
|
|
107
|
+
level="hackathon",
|
|
108
|
+
),
|
|
109
|
+
# ── Starter templates (learn one capability at a time) ───────────────────
|
|
110
|
+
TemplateMeta(
|
|
111
|
+
id="fraud-detection",
|
|
112
|
+
name="Transaction fraud",
|
|
113
|
+
domain="Finance",
|
|
114
|
+
description=(
|
|
115
|
+
"Customer age and card type drive monthly spend, which drives a fraud-risk "
|
|
116
|
+
"label — then realistic data-quality failures (under-reported spend, random "
|
|
117
|
+
"gaps, mislabels) corrupt a copy so you can study robustness."
|
|
118
|
+
),
|
|
119
|
+
tags=("classification",),
|
|
120
|
+
features=("causal", "failure-injection"),
|
|
121
|
+
filename="fraud_detection.datadoom.yaml",
|
|
122
|
+
),
|
|
123
|
+
TemplateMeta(
|
|
124
|
+
id="customer-churn",
|
|
125
|
+
name="Customer churn",
|
|
126
|
+
domain="SaaS",
|
|
127
|
+
description=(
|
|
128
|
+
"Tenure, monthly charges and support load feed a latent satisfaction score "
|
|
129
|
+
"behind a churn label, calibrated down to an 'intermediate' baseline-AUROC "
|
|
130
|
+
"band so the dataset is hard in a measured, honest way."
|
|
131
|
+
),
|
|
132
|
+
tags=("classification",),
|
|
133
|
+
features=("difficulty", "latent", "causal"),
|
|
134
|
+
filename="customer_churn.datadoom.yaml",
|
|
135
|
+
),
|
|
136
|
+
TemplateMeta(
|
|
137
|
+
id="hospital-readmission",
|
|
138
|
+
name="Hospital readmission",
|
|
139
|
+
domain="Healthcare",
|
|
140
|
+
description=(
|
|
141
|
+
"Patient age, diagnosis count, length of stay and prior admissions drive a "
|
|
142
|
+
"latent severity score behind a 30-day readmission label — a clean causal "
|
|
143
|
+
"starter with a hidden confounder in the true graph."
|
|
144
|
+
),
|
|
145
|
+
tags=("classification",),
|
|
146
|
+
features=("causal", "latent"),
|
|
147
|
+
filename="hospital_readmission.datadoom.yaml",
|
|
148
|
+
),
|
|
149
|
+
TemplateMeta(
|
|
150
|
+
id="ecommerce-orders",
|
|
151
|
+
name="E-commerce orders",
|
|
152
|
+
domain="E-commerce",
|
|
153
|
+
description=(
|
|
154
|
+
"A fast orders table — lognormal order value, basket quantity, product "
|
|
155
|
+
"category, channel, order date and a return flag. Distribution-only, so it "
|
|
156
|
+
"generates instantly."
|
|
157
|
+
),
|
|
158
|
+
tags=("tabular",),
|
|
159
|
+
features=("distributions", "datetime"),
|
|
160
|
+
filename="ecommerce_orders.datadoom.yaml",
|
|
161
|
+
),
|
|
162
|
+
TemplateMeta(
|
|
163
|
+
id="iot-sensors",
|
|
164
|
+
name="IoT sensor readings",
|
|
165
|
+
domain="IoT",
|
|
166
|
+
description=(
|
|
167
|
+
"Hourly multi-sensor telemetry — temperature, humidity, pressure and battery "
|
|
168
|
+
"by device, with bounded distributions that stay physically plausible."
|
|
169
|
+
),
|
|
170
|
+
tags=("tabular",),
|
|
171
|
+
features=("numeric", "datetime"),
|
|
172
|
+
filename="iot_sensors.datadoom.yaml",
|
|
173
|
+
),
|
|
174
|
+
TemplateMeta(
|
|
175
|
+
id="people-directory",
|
|
176
|
+
name="People directory",
|
|
177
|
+
domain="People",
|
|
178
|
+
description=(
|
|
179
|
+
"Believable identities — names, emails, phones, companies, job titles, "
|
|
180
|
+
"cities and countries — via deterministic mimesis providers. Great for demos "
|
|
181
|
+
"and UIs that need realistic-looking records."
|
|
182
|
+
),
|
|
183
|
+
tags=("tabular",),
|
|
184
|
+
features=("realistic-text", "datetime"),
|
|
185
|
+
filename="people_directory.datadoom.yaml",
|
|
186
|
+
),
|
|
187
|
+
TemplateMeta(
|
|
188
|
+
id="marketing-ab-test",
|
|
189
|
+
name="Marketing A/B test",
|
|
190
|
+
domain="Marketing",
|
|
191
|
+
description=(
|
|
192
|
+
"A web experiment — 50/50 control vs. treatment, session engagement "
|
|
193
|
+
"(exponential dwell, Poisson pageviews), conversion and revenue. "
|
|
194
|
+
"Distribution-only and instant."
|
|
195
|
+
),
|
|
196
|
+
tags=("experiment",),
|
|
197
|
+
features=("distributions",),
|
|
198
|
+
filename="ab_test.datadoom.yaml",
|
|
199
|
+
),
|
|
200
|
+
TemplateMeta(
|
|
201
|
+
id="insurance-claims",
|
|
202
|
+
name="Insurance claims",
|
|
203
|
+
domain="Insurance",
|
|
204
|
+
description=(
|
|
205
|
+
"Claims with a heavy-tailed (Pareto) claim amount — most small, a few very "
|
|
206
|
+
"large — plus policyholder age, region, prior-claim count and a fraud flag."
|
|
207
|
+
),
|
|
208
|
+
tags=("tabular",),
|
|
209
|
+
features=("heavy-tail",),
|
|
210
|
+
filename="insurance_claims.datadoom.yaml",
|
|
211
|
+
),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
_BY_ID = {t.id: t for t in CATALOG}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def list_templates() -> list[TemplateMeta]:
|
|
218
|
+
"""Every built-in template, in catalog order."""
|
|
219
|
+
return list(CATALOG)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def get_template(template_id: str) -> TemplateMeta | None:
|
|
223
|
+
return _BY_ID.get(template_id)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def load_template_text(template_id: str) -> str:
|
|
227
|
+
"""The template's raw spec YAML (comments preserved — good for `template show`)."""
|
|
228
|
+
meta = _BY_ID.get(template_id)
|
|
229
|
+
if meta is None:
|
|
230
|
+
raise KeyError(f"unknown template {template_id!r}")
|
|
231
|
+
return (resources.files(__package__) / meta.filename).read_text(encoding="utf-8")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def load_template_body(template_id: str) -> dict[str, Any]:
|
|
235
|
+
"""Parse a template's spec YAML into a raw dict (not yet validated)."""
|
|
236
|
+
body = yaml.safe_load(load_template_text(template_id))
|
|
237
|
+
if not isinstance(body, dict):
|
|
238
|
+
raise ValueError(f"template {template_id!r} did not parse to a mapping")
|
|
239
|
+
return body
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
datadoom_version: "1"
|
|
2
|
+
name: "marketing-ab-test"
|
|
3
|
+
description: >
|
|
4
|
+
Marketing A/B-test starter — a fast experiment table: a 50/50 control vs.
|
|
5
|
+
treatment assignment, session engagement (exponential dwell time, Poisson
|
|
6
|
+
pageviews), a conversion flag and order revenue. Distribution-only, so it
|
|
7
|
+
generates instantly.
|
|
8
|
+
seed: 27
|
|
9
|
+
rows: 9000
|
|
10
|
+
|
|
11
|
+
features:
|
|
12
|
+
variant:
|
|
13
|
+
type: categorical
|
|
14
|
+
categories: [control, treatment]
|
|
15
|
+
weights: [0.5, 0.5]
|
|
16
|
+
device:
|
|
17
|
+
type: categorical
|
|
18
|
+
categories: [desktop, mobile, tablet]
|
|
19
|
+
weights: [0.5, 0.42, 0.08]
|
|
20
|
+
session_minutes:
|
|
21
|
+
type: numeric
|
|
22
|
+
dist: exponential
|
|
23
|
+
params: { scale: 6.0 }
|
|
24
|
+
min: 0
|
|
25
|
+
pageviews:
|
|
26
|
+
type: numeric
|
|
27
|
+
dist: poisson
|
|
28
|
+
params: { lam: 5 }
|
|
29
|
+
min: 0
|
|
30
|
+
dtype: int
|
|
31
|
+
revenue:
|
|
32
|
+
type: numeric
|
|
33
|
+
dist: lognormal
|
|
34
|
+
params: { mu: 2.6, sigma: 1.1 }
|
|
35
|
+
min: 0
|
|
36
|
+
converted:
|
|
37
|
+
type: boolean
|
|
38
|
+
rate: 0.12
|
|
39
|
+
|
|
40
|
+
export:
|
|
41
|
+
formats: [csv]
|
|
42
|
+
versions: [clean]
|
|
43
|
+
|
|
44
|
+
meta:
|
|
45
|
+
problem_statement: "Web A/B experiment data (assignment, engagement, conversion)."
|
|
46
|
+
tags: [marketing, experiment, tabular]
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
datadoom_version: "1"
|
|
2
|
+
name: "clinical-deterioration"
|
|
3
|
+
description: >
|
|
4
|
+
HACKATHON — ICU clinical deterioration with a hidden confounder. A latent
|
|
5
|
+
illness `severity` (emit: false) is driven by age, comorbidity burden and
|
|
6
|
+
admission type, and severity in turn drives BOTH the observed vitals
|
|
7
|
+
(heart rate ↑, lactate ↑, systolic BP ↓) AND the `deterioration` label. Because
|
|
8
|
+
the true cause is unobserved, the vitals are only *proxies* — a textbook
|
|
9
|
+
latent-confounder problem. Calibrated to the 'advanced' baseline-AUROC band.
|
|
10
|
+
Realistic clinical missingness corrupts a copy: lactate is MNAR (extreme assay
|
|
11
|
+
values dropped), systolic BP is MAR (missing for older patients), heart-rate
|
|
12
|
+
telemetry is MCAR, and 2% of outcomes are mislabelled. `sex` is included but
|
|
13
|
+
carries no causal signal — a fairness/feature-selection check.
|
|
14
|
+
seed: 202
|
|
15
|
+
rows: 7000
|
|
16
|
+
|
|
17
|
+
features:
|
|
18
|
+
patient_age:
|
|
19
|
+
type: numeric
|
|
20
|
+
dist: normal
|
|
21
|
+
params: { mean: 63, std: 17 }
|
|
22
|
+
min: 18
|
|
23
|
+
max: 100
|
|
24
|
+
dtype: int
|
|
25
|
+
sex:
|
|
26
|
+
type: categorical
|
|
27
|
+
categories: [female, male]
|
|
28
|
+
weights: [0.49, 0.51]
|
|
29
|
+
admission_type:
|
|
30
|
+
type: categorical
|
|
31
|
+
categories: [elective, urgent, emergency]
|
|
32
|
+
weights: [0.4, 0.35, 0.25]
|
|
33
|
+
num_comorbidities:
|
|
34
|
+
type: numeric
|
|
35
|
+
dist: poisson
|
|
36
|
+
params: { lam: 2.6 }
|
|
37
|
+
min: 0
|
|
38
|
+
dtype: int
|
|
39
|
+
severity:
|
|
40
|
+
type: numeric # LATENT confounder — the true generative driver
|
|
41
|
+
dtype: float
|
|
42
|
+
emit: false # not shipped; drives both vitals and outcome
|
|
43
|
+
heart_rate:
|
|
44
|
+
type: numeric # derived: observed proxy of severity (bpm)
|
|
45
|
+
dtype: float
|
|
46
|
+
min: 0
|
|
47
|
+
lactate:
|
|
48
|
+
type: numeric # derived: observed proxy of severity (mmol/L)
|
|
49
|
+
dtype: float
|
|
50
|
+
min: 0
|
|
51
|
+
systolic_bp:
|
|
52
|
+
type: numeric # derived: observed proxy of severity (mmHg)
|
|
53
|
+
dtype: float
|
|
54
|
+
min: 0
|
|
55
|
+
deterioration:
|
|
56
|
+
type: boolean # label (derived): logistic of severity
|
|
57
|
+
|
|
58
|
+
causal:
|
|
59
|
+
edges:
|
|
60
|
+
# roots → latent severity
|
|
61
|
+
- { from: patient_age, to: severity, fn: linear, weight: 0.025 }
|
|
62
|
+
- { from: num_comorbidities, to: severity, fn: linear, weight: 0.55 }
|
|
63
|
+
- { from: admission_type, to: severity, fn: map, mapping: { elective: -1.0, urgent: 0.5, emergency: 2.0 } }
|
|
64
|
+
# latent severity → observed vitals (reverse causation → confounding)
|
|
65
|
+
- { from: severity, to: heart_rate, fn: linear, weight: 9.0, bias: 78 }
|
|
66
|
+
- { from: severity, to: lactate, fn: linear, weight: 0.6, bias: 1.4 }
|
|
67
|
+
- { from: severity, to: systolic_bp, fn: linear, weight: -7.0, bias: 124 }
|
|
68
|
+
# latent severity → outcome
|
|
69
|
+
- { from: severity, to: deterioration, fn: logistic, weight: 1.2, bias: -5.5 }
|
|
70
|
+
noise:
|
|
71
|
+
severity: { dist: normal, params: { mean: 0, std: 0.5 } }
|
|
72
|
+
heart_rate: { dist: normal, params: { mean: 0, std: 8 } }
|
|
73
|
+
lactate: { dist: normal, params: { mean: 0, std: 0.4 } }
|
|
74
|
+
systolic_bp: { dist: normal, params: { mean: 0, std: 10 } }
|
|
75
|
+
deterioration: { dist: none }
|
|
76
|
+
|
|
77
|
+
difficulty:
|
|
78
|
+
target: advanced # baseline ROC-AUC calibrated into [0.72, 0.80]
|
|
79
|
+
label: deterioration
|
|
80
|
+
probe: logreg
|
|
81
|
+
max_iters: 10
|
|
82
|
+
knobs: [noise, label_noise]
|
|
83
|
+
|
|
84
|
+
failures:
|
|
85
|
+
- type: mnar # extreme lactate values not recorded
|
|
86
|
+
column: lactate
|
|
87
|
+
rate: 0.12
|
|
88
|
+
strength: 2.0
|
|
89
|
+
- type: mar # BP missing more often for older patients
|
|
90
|
+
column: systolic_bp
|
|
91
|
+
driver: patient_age
|
|
92
|
+
rate: 0.10
|
|
93
|
+
strength: 2.0
|
|
94
|
+
- type: mcar # random telemetry gaps in heart rate
|
|
95
|
+
columns: [heart_rate]
|
|
96
|
+
rate: 0.05
|
|
97
|
+
- type: label_noise # 2% chart-coding errors
|
|
98
|
+
column: deterioration
|
|
99
|
+
rate: 0.02
|
|
100
|
+
|
|
101
|
+
export:
|
|
102
|
+
formats: [csv]
|
|
103
|
+
versions: [clean, injected]
|
|
104
|
+
splits: { train: 0.8, test: 0.2 }
|
|
105
|
+
|
|
106
|
+
meta:
|
|
107
|
+
level: hackathon
|
|
108
|
+
challenge:
|
|
109
|
+
title: "Early warning for clinical deterioration"
|
|
110
|
+
task: classification
|
|
111
|
+
target: deterioration
|
|
112
|
+
metric: ROC-AUC
|
|
113
|
+
difficulty: advanced
|
|
114
|
+
baseline_auroc_band: [0.72, 0.80]
|
|
115
|
+
train_test_split: "80 / 20"
|
|
116
|
+
hidden_structure: >
|
|
117
|
+
The true cause (severity) is latent. It drives the observed vitals AND the
|
|
118
|
+
outcome, so heart_rate / lactate / systolic_bp are correlated proxies, not
|
|
119
|
+
independent causes.
|
|
120
|
+
gotchas:
|
|
121
|
+
- "lactate is MNAR (extreme values missing) and systolic_bp is MAR (by age) — imputation choice matters."
|
|
122
|
+
- "sex carries no causal signal; including it should not help — watch for spurious importance."
|
|
123
|
+
- "Vitals are confounded proxies of a hidden severity, not independent predictors."
|
|
124
|
+
tags: [healthcare, clinical, causal, latent, confounder, difficulty, missingness, classification]
|