datadoom 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datadoom/__init__.py +23 -0
- datadoom/adapters/__init__.py +29 -0
- datadoom/adapters/frameworks.py +94 -0
- datadoom/adapters/loaders.py +72 -0
- datadoom/api/__init__.py +11 -0
- datadoom/api/app.py +109 -0
- datadoom/api/deps.py +30 -0
- datadoom/api/errors.py +89 -0
- datadoom/api/estimate.py +82 -0
- datadoom/api/routes/__init__.py +7 -0
- datadoom/api/routes/artifacts.py +147 -0
- datadoom/api/routes/datasets.py +180 -0
- datadoom/api/routes/meta.py +45 -0
- datadoom/api/routes/plugins.py +22 -0
- datadoom/api/routes/runs.py +144 -0
- datadoom/api/routes/specs.py +73 -0
- datadoom/api/routes/templates.py +30 -0
- datadoom/api/schemas.py +230 -0
- datadoom/api/serializers.py +143 -0
- datadoom/api/state.py +24 -0
- datadoom/api/store_helpers.py +56 -0
- datadoom/api/ws.py +72 -0
- datadoom/cli/__init__.py +1 -0
- datadoom/cli/main.py +313 -0
- datadoom/config.py +108 -0
- datadoom/engine/__init__.py +38 -0
- datadoom/engine/advice.py +289 -0
- datadoom/engine/audit.py +290 -0
- datadoom/engine/causal/__init__.py +15 -0
- datadoom/engine/causal/execute.py +116 -0
- datadoom/engine/causal/functions.py +116 -0
- datadoom/engine/causal/graph.py +54 -0
- datadoom/engine/difficulty/__init__.py +36 -0
- datadoom/engine/difficulty/calibrate.py +235 -0
- datadoom/engine/difficulty/knobs.py +171 -0
- datadoom/engine/difficulty/probes.py +181 -0
- datadoom/engine/dist/__init__.py +35 -0
- datadoom/engine/dist/base.py +46 -0
- datadoom/engine/dist/builtins.py +172 -0
- datadoom/engine/dist/compliance.py +344 -0
- datadoom/engine/dist/providers.py +117 -0
- datadoom/engine/errors.py +32 -0
- datadoom/engine/export/__init__.py +27 -0
- datadoom/engine/export/base.py +49 -0
- datadoom/engine/export/checksums.py +18 -0
- datadoom/engine/export/csv_exporter.py +34 -0
- datadoom/engine/export/json_exporter.py +67 -0
- datadoom/engine/export/metadata.py +58 -0
- datadoom/engine/export/parquet_exporter.py +45 -0
- datadoom/engine/failure/__init__.py +18 -0
- datadoom/engine/failure/apply.py +37 -0
- datadoom/engine/failure/base.py +116 -0
- datadoom/engine/failure/modes.py +442 -0
- datadoom/engine/pipeline.py +418 -0
- datadoom/engine/profile.py +327 -0
- datadoom/engine/progress.py +14 -0
- datadoom/engine/reference.py +338 -0
- datadoom/engine/reports.py +206 -0
- datadoom/engine/rng.py +79 -0
- datadoom/engine/spec/__init__.py +45 -0
- datadoom/engine/spec/hashing.py +57 -0
- datadoom/engine/spec/models.py +238 -0
- datadoom/engine/spec/validate.py +345 -0
- datadoom/engine/timeseries.py +88 -0
- datadoom/jobs/__init__.py +14 -0
- datadoom/jobs/progress.py +155 -0
- datadoom/jobs/worker.py +162 -0
- datadoom/plugin.py +35 -0
- datadoom/plugins/__init__.py +47 -0
- datadoom/plugins/contracts.py +72 -0
- datadoom/plugins/loader.py +125 -0
- datadoom/plugins/registry.py +214 -0
- datadoom/plugins/scaffold.py +434 -0
- datadoom/store/__init__.py +47 -0
- datadoom/store/artifacts.py +67 -0
- datadoom/store/db.py +104 -0
- datadoom/store/migrations/__init__.py +0 -0
- datadoom/store/migrations/env.py +53 -0
- datadoom/store/migrations/script.py.mako +24 -0
- datadoom/store/migrations/versions/0001_init.py +149 -0
- datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
- datadoom/store/migrations/versions/0003_run_name.py +23 -0
- datadoom/store/migrations/versions/0004_report_profile.py +24 -0
- datadoom/store/models.py +170 -0
- datadoom/store/repositories.py +279 -0
- datadoom/templates/__init__.py +239 -0
- datadoom/templates/ab_test.datadoom.yaml +46 -0
- datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
- datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
- datadoom/templates/customer_churn.datadoom.yaml +60 -0
- datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
- datadoom/templates/fraud_detection.datadoom.yaml +57 -0
- datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
- datadoom/templates/insurance_claims.datadoom.yaml +43 -0
- datadoom/templates/iot_sensors.datadoom.yaml +44 -0
- datadoom/templates/people_directory.datadoom.yaml +56 -0
- datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
- datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
- datadoom/version.py +3 -0
- datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
- datadoom/webdist/assets/index-doRjyG5s.css +1 -0
- datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
- datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
- datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
- datadoom/webdist/index.html +15 -0
- datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
- datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
- datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
- datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
datadoom/cli/main.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""DataDoom CLI — launcher + headless automation (17 step 6).
|
|
2
|
+
|
|
3
|
+
In P0 the CLI is the only surface: ``run``, ``validate`` and ``verify`` exercise
|
|
4
|
+
the deterministic core. The web Canvas (``datadoom`` with no subcommand) arrives
|
|
5
|
+
in P1.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import tempfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import typer
|
|
15
|
+
|
|
16
|
+
from ..engine import generate, load_spec
|
|
17
|
+
from ..engine.errors import DataDoomError, ReproducibilityError
|
|
18
|
+
from ..engine.export.checksums import sha256_file
|
|
19
|
+
from ..plugins import (
|
|
20
|
+
PluginError,
|
|
21
|
+
check_plugin,
|
|
22
|
+
get_registry,
|
|
23
|
+
load_plugins,
|
|
24
|
+
scaffold_plugin,
|
|
25
|
+
)
|
|
26
|
+
from ..templates import get_template, list_templates, load_template_text
|
|
27
|
+
from ..version import __version__
|
|
28
|
+
|
|
29
|
+
app = typer.Typer(
|
|
30
|
+
add_completion=False,
|
|
31
|
+
help="DataDoom — reproducible synthetic data.",
|
|
32
|
+
no_args_is_help=True,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
plugin_app = typer.Typer(
|
|
36
|
+
help="Author and inspect DataDoom plugins (distributions, fns, failures, …).",
|
|
37
|
+
no_args_is_help=True,
|
|
38
|
+
)
|
|
39
|
+
app.add_typer(plugin_app, name="plugin")
|
|
40
|
+
|
|
41
|
+
template_app = typer.Typer(
|
|
42
|
+
help="Browse and start from built-in domain templates.",
|
|
43
|
+
no_args_is_help=True,
|
|
44
|
+
)
|
|
45
|
+
app.add_typer(template_app, name="template")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@app.command()
|
|
49
|
+
def version() -> None:
|
|
50
|
+
"""Print the installed DataDoom version."""
|
|
51
|
+
typer.echo(__version__)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@app.command(name="spec-reference")
|
|
55
|
+
def spec_reference(
|
|
56
|
+
out: Path = typer.Option(None, "--out", help="Write to a file instead of stdout"),
|
|
57
|
+
pretty: bool = typer.Option(True, "--pretty/--compact", help="Indent the JSON"),
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Emit the machine-readable spec capabilities manifest (for AI/tooling).
|
|
60
|
+
|
|
61
|
+
Lists every distribution, structural function, failure mode, difficulty tier,
|
|
62
|
+
feature type, exporter, and validation rule a spec accepts — built from the
|
|
63
|
+
live registries, so installed plugins are included. Feed it to an LLM/agent so
|
|
64
|
+
it can author a valid ``*.datadoom.yaml`` without guessing.
|
|
65
|
+
"""
|
|
66
|
+
load_plugins() # so plugin-registered capabilities appear in the manifest
|
|
67
|
+
from ..engine.reference import build_capabilities
|
|
68
|
+
|
|
69
|
+
payload = json.dumps(build_capabilities(), indent=2 if pretty else None, sort_keys=False)
|
|
70
|
+
if out is not None:
|
|
71
|
+
out.write_text(payload + "\n", encoding="utf-8")
|
|
72
|
+
typer.secho(f"wrote spec reference → {out}", fg=typer.colors.GREEN)
|
|
73
|
+
else:
|
|
74
|
+
typer.echo(payload)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@app.command()
|
|
78
|
+
def serve(
|
|
79
|
+
host: str = typer.Option(None, "--host", help="Bind host (default 127.0.0.1)"),
|
|
80
|
+
port: int = typer.Option(None, "--port", help="Bind port (default 8000)"),
|
|
81
|
+
reload: bool = typer.Option(False, "--reload", help="Auto-reload (development)"),
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Launch the local web server (REST + WebSocket) and the bundled Canvas UI.
|
|
84
|
+
|
|
85
|
+
Requires the server extra: ``pip install datadoom[server]``.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
import uvicorn
|
|
89
|
+
|
|
90
|
+
from ..api import create_app
|
|
91
|
+
from ..config import load_config
|
|
92
|
+
except ImportError as exc: # pragma: no cover - only without the server extra
|
|
93
|
+
typer.secho(
|
|
94
|
+
"The web server needs extra deps. Install with: pip install 'datadoom[server]'",
|
|
95
|
+
fg=typer.colors.RED,
|
|
96
|
+
err=True,
|
|
97
|
+
)
|
|
98
|
+
raise typer.Exit(code=1) from exc
|
|
99
|
+
|
|
100
|
+
cfg = load_config()
|
|
101
|
+
bind_host = host or cfg.host
|
|
102
|
+
bind_port = port or cfg.port
|
|
103
|
+
cfg.ensure_dirs()
|
|
104
|
+
|
|
105
|
+
typer.secho(
|
|
106
|
+
f"DataDoom serving on http://{bind_host}:{bind_port} (data: {cfg.home})",
|
|
107
|
+
fg=typer.colors.GREEN,
|
|
108
|
+
)
|
|
109
|
+
if reload:
|
|
110
|
+
# Reload needs an import string; create_app is the factory.
|
|
111
|
+
uvicorn.run(
|
|
112
|
+
"datadoom.api:create_app", host=bind_host, port=bind_port, reload=True, factory=True
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
uvicorn.run(create_app(cfg), host=bind_host, port=bind_port)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.command()
|
|
119
|
+
def validate(spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec")) -> None:
|
|
120
|
+
"""Validate a spec file (shape + cross-field) without generating."""
|
|
121
|
+
load_plugins()
|
|
122
|
+
try:
|
|
123
|
+
spec = load_spec(str(spec_path))
|
|
124
|
+
except DataDoomError as exc:
|
|
125
|
+
typer.secho(f"INVALID: {exc}", fg=typer.colors.RED, err=True)
|
|
126
|
+
raise typer.Exit(code=1) from exc
|
|
127
|
+
typer.secho(f"OK spec_hash={spec.spec_hash()}", fg=typer.colors.GREEN)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@app.command()
|
|
131
|
+
def run(
|
|
132
|
+
spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec"),
|
|
133
|
+
seed: int = typer.Option(None, "--seed", help="Override/resolve the random seed"),
|
|
134
|
+
out: Path = typer.Option(..., "--out", help="Output directory for artifacts"),
|
|
135
|
+
) -> None:
|
|
136
|
+
"""Generate a dataset from a spec and write CSV + metadata to --out."""
|
|
137
|
+
load_plugins()
|
|
138
|
+
try:
|
|
139
|
+
spec = load_spec(str(spec_path))
|
|
140
|
+
result = generate(spec, seed=seed, out_dir=out)
|
|
141
|
+
except DataDoomError as exc:
|
|
142
|
+
typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
|
|
143
|
+
raise typer.Exit(code=1) from exc
|
|
144
|
+
|
|
145
|
+
comp = result.compliance
|
|
146
|
+
applicable = sum(1 for f in comp.features if f.applicable)
|
|
147
|
+
na = len(comp.features) - applicable
|
|
148
|
+
suffix = f" ({applicable} assessed" + (f", {na} n/a" if na else "") + ")" if comp.features else ""
|
|
149
|
+
typer.echo(f"spec_hash {result.spec_hash}")
|
|
150
|
+
typer.echo(f"seed {result.seed}")
|
|
151
|
+
typer.echo(f"rows {len(result.frame)}")
|
|
152
|
+
typer.echo(f"compliance {comp.score:.3f}{suffix}")
|
|
153
|
+
for art in result.artifacts:
|
|
154
|
+
typer.echo(f"artifact {art.path} sha256={art.checksum_sha256[:16]}…")
|
|
155
|
+
typer.secho(f"written to {out}", fg=typer.colors.GREEN)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@app.command()
|
|
159
|
+
def verify(
|
|
160
|
+
spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec"),
|
|
161
|
+
seed: int = typer.Option(..., "--seed", help="Seed to verify reproducibility for"),
|
|
162
|
+
against: Path = typer.Option(
|
|
163
|
+
None,
|
|
164
|
+
"--against",
|
|
165
|
+
help="An existing run dir to compare against; omit to self-check (run twice).",
|
|
166
|
+
),
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Prove (spec_hash, seed) -> identical checksum.
|
|
169
|
+
|
|
170
|
+
With --against, regenerate and compare to that bundle's recorded checksum.
|
|
171
|
+
Without it, generate twice and assert the two checksums match.
|
|
172
|
+
"""
|
|
173
|
+
load_plugins()
|
|
174
|
+
try:
|
|
175
|
+
spec = load_spec(str(spec_path))
|
|
176
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
177
|
+
fresh = generate(spec, seed=seed, out_dir=tmp)
|
|
178
|
+
fresh_checksum = sha256_file(Path(tmp) / "data.csv")
|
|
179
|
+
|
|
180
|
+
if against is not None:
|
|
181
|
+
expected = _recorded_checksum(against)
|
|
182
|
+
source = str(against)
|
|
183
|
+
else:
|
|
184
|
+
with tempfile.TemporaryDirectory() as tmp2:
|
|
185
|
+
generate(spec, seed=seed, out_dir=tmp2)
|
|
186
|
+
expected = sha256_file(Path(tmp2) / "data.csv")
|
|
187
|
+
source = "second run"
|
|
188
|
+
except DataDoomError as exc:
|
|
189
|
+
typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
|
|
190
|
+
raise typer.Exit(code=1) from exc
|
|
191
|
+
|
|
192
|
+
if fresh_checksum != expected:
|
|
193
|
+
typer.secho(
|
|
194
|
+
f"MISMATCH\n this run: {fresh_checksum}\n {source}: {expected}",
|
|
195
|
+
fg=typer.colors.RED,
|
|
196
|
+
err=True,
|
|
197
|
+
)
|
|
198
|
+
raise ReproducibilityError("checksum mismatch") # noqa: TRY003
|
|
199
|
+
typer.secho(
|
|
200
|
+
f"OK reproducible (spec_hash={fresh.spec_hash}, seed={seed})\n"
|
|
201
|
+
f" sha256={fresh_checksum}",
|
|
202
|
+
fg=typer.colors.GREEN,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@plugin_app.command("list")
|
|
207
|
+
def plugin_list() -> None:
|
|
208
|
+
"""List every registered capability — core built-ins and discovered plugins."""
|
|
209
|
+
load_plugins()
|
|
210
|
+
records = get_registry().records()
|
|
211
|
+
width = max((len(r.name) for r in records), default=4)
|
|
212
|
+
for r in records:
|
|
213
|
+
tag = "core" if r.builtin else r.source
|
|
214
|
+
ver = f" v{r.version}" if r.version else ""
|
|
215
|
+
typer.echo(f"{r.kind:<14} {r.name:<{width}} [{tag}]{ver}")
|
|
216
|
+
typer.secho(f"{len(records)} capabilities", fg=typer.colors.GREEN)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@plugin_app.command("new")
|
|
220
|
+
def plugin_new(
|
|
221
|
+
kind: str = typer.Argument(
|
|
222
|
+
..., help="distribution | structural_fn | failure_mode | exporter | probe_model"
|
|
223
|
+
),
|
|
224
|
+
name: str = typer.Argument(..., help="Capability name, a lowercase identifier (e.g. weibull)"),
|
|
225
|
+
dir: Path = typer.Option(Path("."), "--dir", help="Where to create the package"),
|
|
226
|
+
) -> None:
|
|
227
|
+
"""Scaffold a ready-to-publish ``datadoom-plugin-*`` package."""
|
|
228
|
+
try:
|
|
229
|
+
root = scaffold_plugin(kind, name, dir)
|
|
230
|
+
except PluginError as exc:
|
|
231
|
+
typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
|
|
232
|
+
raise typer.Exit(code=1) from exc
|
|
233
|
+
typer.secho(f"created {root}", fg=typer.colors.GREEN)
|
|
234
|
+
typer.echo("next: pip install -e . && datadoom plugin check .")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@plugin_app.command("check")
|
|
238
|
+
def plugin_check(
|
|
239
|
+
target: Path = typer.Argument(..., help="A plugin .py file, package directory, or module"),
|
|
240
|
+
) -> None:
|
|
241
|
+
"""Run the plugin contract checks (interface, schema, determinism, RNG hygiene)."""
|
|
242
|
+
try:
|
|
243
|
+
reports = check_plugin(target)
|
|
244
|
+
except PluginError as exc:
|
|
245
|
+
typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
|
|
246
|
+
raise typer.Exit(code=1) from exc
|
|
247
|
+
|
|
248
|
+
ok = True
|
|
249
|
+
for report in reports:
|
|
250
|
+
typer.echo(report.summary())
|
|
251
|
+
ok = ok and report.ok
|
|
252
|
+
if ok:
|
|
253
|
+
typer.secho(f"OK {len(reports)} plugin(s) pass the contract", fg=typer.colors.GREEN)
|
|
254
|
+
else:
|
|
255
|
+
typer.secho("FAILED one or more contract checks failed", fg=typer.colors.RED, err=True)
|
|
256
|
+
raise typer.Exit(code=1)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@template_app.command("list")
|
|
260
|
+
def template_list(
|
|
261
|
+
level: str = typer.Option(
|
|
262
|
+
"all",
|
|
263
|
+
"--level",
|
|
264
|
+
help="Filter by level: all | hackathon | starter.",
|
|
265
|
+
),
|
|
266
|
+
) -> None:
|
|
267
|
+
"""List the built-in domain templates."""
|
|
268
|
+
templates = list_templates()
|
|
269
|
+
if level != "all":
|
|
270
|
+
templates = [t for t in templates if t.level == level]
|
|
271
|
+
width = max((len(t.id) for t in templates), default=4)
|
|
272
|
+
for t in templates:
|
|
273
|
+
tag = " [hackathon]" if t.level == "hackathon" else ""
|
|
274
|
+
typer.echo(f"{t.id:<{width}} {t.domain:<15} {t.name}{tag}")
|
|
275
|
+
typer.secho(f"{len(templates)} templates", fg=typer.colors.GREEN)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@template_app.command("show")
|
|
279
|
+
def template_show(
|
|
280
|
+
template_id: str = typer.Argument(..., help="Template id (see `datadoom template list`)"),
|
|
281
|
+
) -> None:
|
|
282
|
+
"""Print a template's spec YAML to stdout."""
|
|
283
|
+
try:
|
|
284
|
+
typer.echo(load_template_text(template_id))
|
|
285
|
+
except KeyError as exc:
|
|
286
|
+
typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
|
|
287
|
+
raise typer.Exit(code=1) from exc
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@template_app.command("use")
|
|
291
|
+
def template_use(
|
|
292
|
+
template_id: str = typer.Argument(..., help="Template id (see `datadoom template list`)"),
|
|
293
|
+
out: Path = typer.Option(..., "--out", help="Where to write the spec (e.g. my.datadoom.yaml)"),
|
|
294
|
+
) -> None:
|
|
295
|
+
"""Write a template's spec to a file so you can edit and `datadoom run` it."""
|
|
296
|
+
if get_template(template_id) is None:
|
|
297
|
+
typer.secho(f"ERROR: unknown template {template_id!r}", fg=typer.colors.RED, err=True)
|
|
298
|
+
raise typer.Exit(code=1)
|
|
299
|
+
out.write_text(load_template_text(template_id), encoding="utf-8")
|
|
300
|
+
typer.secho(f"wrote {out}", fg=typer.colors.GREEN)
|
|
301
|
+
typer.echo(f"next: datadoom run {out} --seed 1 --out .tmp_run")
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _recorded_checksum(run_dir: Path) -> str:
|
|
305
|
+
meta_path = run_dir / "metadata.json"
|
|
306
|
+
if not meta_path.exists():
|
|
307
|
+
raise DataDoomError(f"no metadata.json in {run_dir}") # noqa: TRY003
|
|
308
|
+
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
309
|
+
return meta["determinism"]["artifact_checksums"]["data.csv"]
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
if __name__ == "__main__":
|
|
313
|
+
app()
|
datadoom/config.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Layered application configuration (03 §7).
|
|
2
|
+
|
|
3
|
+
Resolution order (lowest precedence first): built-in defaults -> config file
|
|
4
|
+
(``<home>/config.toml``) -> environment variables. CLI flags override at the
|
|
5
|
+
call site. This module is intentionally tiny and dependency-light so both the
|
|
6
|
+
server (``api``/``store``/``jobs``) and the CLI can import it without pulling in
|
|
7
|
+
the engine or a web framework.
|
|
8
|
+
|
|
9
|
+
DataDoom home: ``$DATADOOM_HOME`` or the platform default ``~/.datadoom``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import tomllib
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from functools import lru_cache
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def datadoom_home() -> Path:
|
|
22
|
+
"""Resolve the DataDoom home directory (created on demand by callers)."""
|
|
23
|
+
env = os.environ.get("DATADOOM_HOME")
|
|
24
|
+
if env:
|
|
25
|
+
return Path(env).expanduser()
|
|
26
|
+
return Path.home() / ".datadoom"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Config:
|
|
31
|
+
"""Resolved, immutable-ish runtime configuration."""
|
|
32
|
+
|
|
33
|
+
home: Path
|
|
34
|
+
db_url: str
|
|
35
|
+
artifacts_dir: Path
|
|
36
|
+
host: str = "127.0.0.1"
|
|
37
|
+
port: int = 8000
|
|
38
|
+
# `determinism.pinned` mirrors 03 §5; informational here (the engine honors it).
|
|
39
|
+
pinned: bool = False
|
|
40
|
+
telemetry: bool = False
|
|
41
|
+
extra: dict = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def db_path(self) -> Path:
|
|
45
|
+
return self.home / "datadoom.db"
|
|
46
|
+
|
|
47
|
+
def ensure_dirs(self) -> None:
|
|
48
|
+
self.home.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
self.artifacts_dir.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _load_file(home: Path) -> dict:
|
|
53
|
+
path = home / "config.toml"
|
|
54
|
+
if not path.exists():
|
|
55
|
+
return {}
|
|
56
|
+
with open(path, "rb") as fh:
|
|
57
|
+
return tomllib.load(fh)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_config() -> Config:
|
|
61
|
+
"""Build a :class:`Config` from defaults, the config file, then the env."""
|
|
62
|
+
home = datadoom_home()
|
|
63
|
+
file_cfg = _load_file(home)
|
|
64
|
+
|
|
65
|
+
server = file_cfg.get("server", {})
|
|
66
|
+
storage = file_cfg.get("storage", {})
|
|
67
|
+
determinism = file_cfg.get("determinism", {})
|
|
68
|
+
telemetry = file_cfg.get("telemetry", {})
|
|
69
|
+
|
|
70
|
+
artifacts_dir = Path(
|
|
71
|
+
os.environ.get("DATADOOM_ARTIFACTS")
|
|
72
|
+
or storage.get("artifacts_dir")
|
|
73
|
+
or (home / "artifacts")
|
|
74
|
+
).expanduser()
|
|
75
|
+
|
|
76
|
+
db_url = (
|
|
77
|
+
os.environ.get("DATADOOM_DB_URL")
|
|
78
|
+
or storage.get("db_url")
|
|
79
|
+
or f"sqlite:///{(home / 'datadoom.db').as_posix()}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
host = os.environ.get("DATADOOM_HOST") or server.get("host", "127.0.0.1")
|
|
83
|
+
port = int(os.environ.get("DATADOOM_PORT") or server.get("port", 8000))
|
|
84
|
+
pinned = _as_bool(os.environ.get("DATADOOM_PINNED"), determinism.get("pinned", False))
|
|
85
|
+
tele = _as_bool(os.environ.get("DATADOOM_TELEMETRY"), telemetry.get("enabled", False))
|
|
86
|
+
|
|
87
|
+
return Config(
|
|
88
|
+
home=home,
|
|
89
|
+
db_url=db_url,
|
|
90
|
+
artifacts_dir=artifacts_dir,
|
|
91
|
+
host=host,
|
|
92
|
+
port=port,
|
|
93
|
+
pinned=pinned,
|
|
94
|
+
telemetry=tele,
|
|
95
|
+
extra=file_cfg,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _as_bool(env_value: str | None, default: bool) -> bool:
|
|
100
|
+
if env_value is None:
|
|
101
|
+
return bool(default)
|
|
102
|
+
return env_value.strip().lower() in {"1", "true", "yes", "on"}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@lru_cache(maxsize=1)
|
|
106
|
+
def get_config() -> Config:
|
|
107
|
+
"""Process-wide cached config. Tests that need isolation call ``load_config``."""
|
|
108
|
+
return load_config()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""The pure DataDoom engine — no web/DB/framework imports.
|
|
2
|
+
|
|
3
|
+
This package is an independently installable library: spec parsing, the seeded
|
|
4
|
+
RNG, distributions, export and the deterministic pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .errors import (
|
|
10
|
+
DataDoomError,
|
|
11
|
+
DistributionError,
|
|
12
|
+
ReproducibilityError,
|
|
13
|
+
SpecValidationError,
|
|
14
|
+
)
|
|
15
|
+
from .pipeline import RunContext, RunResult, generate, resolve_seed
|
|
16
|
+
from .reference import build_capabilities
|
|
17
|
+
from .reports import ReportBundle, build_report
|
|
18
|
+
from .rng import RNGFactory
|
|
19
|
+
from .spec import Spec, load_spec, parse_spec, validate_spec
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"generate",
|
|
23
|
+
"resolve_seed",
|
|
24
|
+
"RunContext",
|
|
25
|
+
"RunResult",
|
|
26
|
+
"ReportBundle",
|
|
27
|
+
"build_report",
|
|
28
|
+
"build_capabilities",
|
|
29
|
+
"RNGFactory",
|
|
30
|
+
"Spec",
|
|
31
|
+
"load_spec",
|
|
32
|
+
"parse_spec",
|
|
33
|
+
"validate_spec",
|
|
34
|
+
"DataDoomError",
|
|
35
|
+
"SpecValidationError",
|
|
36
|
+
"DistributionError",
|
|
37
|
+
"ReproducibilityError",
|
|
38
|
+
]
|