datadoom 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. datadoom/__init__.py +23 -0
  2. datadoom/adapters/__init__.py +29 -0
  3. datadoom/adapters/frameworks.py +94 -0
  4. datadoom/adapters/loaders.py +72 -0
  5. datadoom/api/__init__.py +11 -0
  6. datadoom/api/app.py +109 -0
  7. datadoom/api/deps.py +30 -0
  8. datadoom/api/errors.py +89 -0
  9. datadoom/api/estimate.py +82 -0
  10. datadoom/api/routes/__init__.py +7 -0
  11. datadoom/api/routes/artifacts.py +147 -0
  12. datadoom/api/routes/datasets.py +180 -0
  13. datadoom/api/routes/meta.py +45 -0
  14. datadoom/api/routes/plugins.py +22 -0
  15. datadoom/api/routes/runs.py +144 -0
  16. datadoom/api/routes/specs.py +73 -0
  17. datadoom/api/routes/templates.py +30 -0
  18. datadoom/api/schemas.py +230 -0
  19. datadoom/api/serializers.py +143 -0
  20. datadoom/api/state.py +24 -0
  21. datadoom/api/store_helpers.py +56 -0
  22. datadoom/api/ws.py +72 -0
  23. datadoom/cli/__init__.py +1 -0
  24. datadoom/cli/main.py +313 -0
  25. datadoom/config.py +108 -0
  26. datadoom/engine/__init__.py +38 -0
  27. datadoom/engine/advice.py +289 -0
  28. datadoom/engine/audit.py +290 -0
  29. datadoom/engine/causal/__init__.py +15 -0
  30. datadoom/engine/causal/execute.py +116 -0
  31. datadoom/engine/causal/functions.py +116 -0
  32. datadoom/engine/causal/graph.py +54 -0
  33. datadoom/engine/difficulty/__init__.py +36 -0
  34. datadoom/engine/difficulty/calibrate.py +235 -0
  35. datadoom/engine/difficulty/knobs.py +171 -0
  36. datadoom/engine/difficulty/probes.py +181 -0
  37. datadoom/engine/dist/__init__.py +35 -0
  38. datadoom/engine/dist/base.py +46 -0
  39. datadoom/engine/dist/builtins.py +172 -0
  40. datadoom/engine/dist/compliance.py +344 -0
  41. datadoom/engine/dist/providers.py +117 -0
  42. datadoom/engine/errors.py +32 -0
  43. datadoom/engine/export/__init__.py +27 -0
  44. datadoom/engine/export/base.py +49 -0
  45. datadoom/engine/export/checksums.py +18 -0
  46. datadoom/engine/export/csv_exporter.py +34 -0
  47. datadoom/engine/export/json_exporter.py +67 -0
  48. datadoom/engine/export/metadata.py +58 -0
  49. datadoom/engine/export/parquet_exporter.py +45 -0
  50. datadoom/engine/failure/__init__.py +18 -0
  51. datadoom/engine/failure/apply.py +37 -0
  52. datadoom/engine/failure/base.py +116 -0
  53. datadoom/engine/failure/modes.py +442 -0
  54. datadoom/engine/pipeline.py +418 -0
  55. datadoom/engine/profile.py +327 -0
  56. datadoom/engine/progress.py +14 -0
  57. datadoom/engine/reference.py +338 -0
  58. datadoom/engine/reports.py +206 -0
  59. datadoom/engine/rng.py +79 -0
  60. datadoom/engine/spec/__init__.py +45 -0
  61. datadoom/engine/spec/hashing.py +57 -0
  62. datadoom/engine/spec/models.py +238 -0
  63. datadoom/engine/spec/validate.py +345 -0
  64. datadoom/engine/timeseries.py +88 -0
  65. datadoom/jobs/__init__.py +14 -0
  66. datadoom/jobs/progress.py +155 -0
  67. datadoom/jobs/worker.py +162 -0
  68. datadoom/plugin.py +35 -0
  69. datadoom/plugins/__init__.py +47 -0
  70. datadoom/plugins/contracts.py +72 -0
  71. datadoom/plugins/loader.py +125 -0
  72. datadoom/plugins/registry.py +214 -0
  73. datadoom/plugins/scaffold.py +434 -0
  74. datadoom/store/__init__.py +47 -0
  75. datadoom/store/artifacts.py +67 -0
  76. datadoom/store/db.py +104 -0
  77. datadoom/store/migrations/__init__.py +0 -0
  78. datadoom/store/migrations/env.py +53 -0
  79. datadoom/store/migrations/script.py.mako +24 -0
  80. datadoom/store/migrations/versions/0001_init.py +149 -0
  81. datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
  82. datadoom/store/migrations/versions/0003_run_name.py +23 -0
  83. datadoom/store/migrations/versions/0004_report_profile.py +24 -0
  84. datadoom/store/models.py +170 -0
  85. datadoom/store/repositories.py +279 -0
  86. datadoom/templates/__init__.py +239 -0
  87. datadoom/templates/ab_test.datadoom.yaml +46 -0
  88. datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
  89. datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
  90. datadoom/templates/customer_churn.datadoom.yaml +60 -0
  91. datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
  92. datadoom/templates/fraud_detection.datadoom.yaml +57 -0
  93. datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
  94. datadoom/templates/insurance_claims.datadoom.yaml +43 -0
  95. datadoom/templates/iot_sensors.datadoom.yaml +44 -0
  96. datadoom/templates/people_directory.datadoom.yaml +56 -0
  97. datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
  98. datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
  99. datadoom/version.py +3 -0
  100. datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
  101. datadoom/webdist/assets/index-doRjyG5s.css +1 -0
  102. datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
  103. datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
  104. datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
  105. datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
  106. datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
  107. datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
  108. datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
  109. datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
  110. datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
  111. datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
  112. datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
  113. datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
  114. datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
  115. datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
  116. datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
  117. datadoom/webdist/index.html +15 -0
  118. datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
  119. datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
  120. datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
  121. datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  122. datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
datadoom/cli/main.py ADDED
@@ -0,0 +1,313 @@
1
+ """DataDoom CLI — launcher + headless automation (17 step 6).
2
+
3
+ In P0 the CLI is the only surface: ``run``, ``validate`` and ``verify`` exercise
4
+ the deterministic core. The web Canvas (``datadoom`` with no subcommand) arrives
5
+ in P1.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import tempfile
12
+ from pathlib import Path
13
+
14
+ import typer
15
+
16
+ from ..engine import generate, load_spec
17
+ from ..engine.errors import DataDoomError, ReproducibilityError
18
+ from ..engine.export.checksums import sha256_file
19
+ from ..plugins import (
20
+ PluginError,
21
+ check_plugin,
22
+ get_registry,
23
+ load_plugins,
24
+ scaffold_plugin,
25
+ )
26
+ from ..templates import get_template, list_templates, load_template_text
27
+ from ..version import __version__
28
+
29
+ app = typer.Typer(
30
+ add_completion=False,
31
+ help="DataDoom — reproducible synthetic data.",
32
+ no_args_is_help=True,
33
+ )
34
+
35
+ plugin_app = typer.Typer(
36
+ help="Author and inspect DataDoom plugins (distributions, fns, failures, …).",
37
+ no_args_is_help=True,
38
+ )
39
+ app.add_typer(plugin_app, name="plugin")
40
+
41
+ template_app = typer.Typer(
42
+ help="Browse and start from built-in domain templates.",
43
+ no_args_is_help=True,
44
+ )
45
+ app.add_typer(template_app, name="template")
46
+
47
+
48
+ @app.command()
49
+ def version() -> None:
50
+ """Print the installed DataDoom version."""
51
+ typer.echo(__version__)
52
+
53
+
54
+ @app.command(name="spec-reference")
55
+ def spec_reference(
56
+ out: Path = typer.Option(None, "--out", help="Write to a file instead of stdout"),
57
+ pretty: bool = typer.Option(True, "--pretty/--compact", help="Indent the JSON"),
58
+ ) -> None:
59
+ """Emit the machine-readable spec capabilities manifest (for AI/tooling).
60
+
61
+ Lists every distribution, structural function, failure mode, difficulty tier,
62
+ feature type, exporter, and validation rule a spec accepts — built from the
63
+ live registries, so installed plugins are included. Feed it to an LLM/agent so
64
+ it can author a valid ``*.datadoom.yaml`` without guessing.
65
+ """
66
+ load_plugins() # so plugin-registered capabilities appear in the manifest
67
+ from ..engine.reference import build_capabilities
68
+
69
+ payload = json.dumps(build_capabilities(), indent=2 if pretty else None, sort_keys=False)
70
+ if out is not None:
71
+ out.write_text(payload + "\n", encoding="utf-8")
72
+ typer.secho(f"wrote spec reference → {out}", fg=typer.colors.GREEN)
73
+ else:
74
+ typer.echo(payload)
75
+
76
+
77
+ @app.command()
78
+ def serve(
79
+ host: str = typer.Option(None, "--host", help="Bind host (default 127.0.0.1)"),
80
+ port: int = typer.Option(None, "--port", help="Bind port (default 8000)"),
81
+ reload: bool = typer.Option(False, "--reload", help="Auto-reload (development)"),
82
+ ) -> None:
83
+ """Launch the local web server (REST + WebSocket) and the bundled Canvas UI.
84
+
85
+ Requires the server extra: ``pip install datadoom[server]``.
86
+ """
87
+ try:
88
+ import uvicorn
89
+
90
+ from ..api import create_app
91
+ from ..config import load_config
92
+ except ImportError as exc: # pragma: no cover - only without the server extra
93
+ typer.secho(
94
+ "The web server needs extra deps. Install with: pip install 'datadoom[server]'",
95
+ fg=typer.colors.RED,
96
+ err=True,
97
+ )
98
+ raise typer.Exit(code=1) from exc
99
+
100
+ cfg = load_config()
101
+ bind_host = host or cfg.host
102
+ bind_port = port or cfg.port
103
+ cfg.ensure_dirs()
104
+
105
+ typer.secho(
106
+ f"DataDoom serving on http://{bind_host}:{bind_port} (data: {cfg.home})",
107
+ fg=typer.colors.GREEN,
108
+ )
109
+ if reload:
110
+ # Reload needs an import string; create_app is the factory.
111
+ uvicorn.run(
112
+ "datadoom.api:create_app", host=bind_host, port=bind_port, reload=True, factory=True
113
+ )
114
+ else:
115
+ uvicorn.run(create_app(cfg), host=bind_host, port=bind_port)
116
+
117
+
118
+ @app.command()
119
+ def validate(spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec")) -> None:
120
+ """Validate a spec file (shape + cross-field) without generating."""
121
+ load_plugins()
122
+ try:
123
+ spec = load_spec(str(spec_path))
124
+ except DataDoomError as exc:
125
+ typer.secho(f"INVALID: {exc}", fg=typer.colors.RED, err=True)
126
+ raise typer.Exit(code=1) from exc
127
+ typer.secho(f"OK spec_hash={spec.spec_hash()}", fg=typer.colors.GREEN)
128
+
129
+
130
+ @app.command()
131
+ def run(
132
+ spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec"),
133
+ seed: int = typer.Option(None, "--seed", help="Override/resolve the random seed"),
134
+ out: Path = typer.Option(..., "--out", help="Output directory for artifacts"),
135
+ ) -> None:
136
+ """Generate a dataset from a spec and write CSV + metadata to --out."""
137
+ load_plugins()
138
+ try:
139
+ spec = load_spec(str(spec_path))
140
+ result = generate(spec, seed=seed, out_dir=out)
141
+ except DataDoomError as exc:
142
+ typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
143
+ raise typer.Exit(code=1) from exc
144
+
145
+ comp = result.compliance
146
+ applicable = sum(1 for f in comp.features if f.applicable)
147
+ na = len(comp.features) - applicable
148
+ suffix = f" ({applicable} assessed" + (f", {na} n/a" if na else "") + ")" if comp.features else ""
149
+ typer.echo(f"spec_hash {result.spec_hash}")
150
+ typer.echo(f"seed {result.seed}")
151
+ typer.echo(f"rows {len(result.frame)}")
152
+ typer.echo(f"compliance {comp.score:.3f}{suffix}")
153
+ for art in result.artifacts:
154
+ typer.echo(f"artifact {art.path} sha256={art.checksum_sha256[:16]}…")
155
+ typer.secho(f"written to {out}", fg=typer.colors.GREEN)
156
+
157
+
158
+ @app.command()
159
+ def verify(
160
+ spec_path: Path = typer.Argument(..., help="Path to a *.datadoom.yaml spec"),
161
+ seed: int = typer.Option(..., "--seed", help="Seed to verify reproducibility for"),
162
+ against: Path = typer.Option(
163
+ None,
164
+ "--against",
165
+ help="An existing run dir to compare against; omit to self-check (run twice).",
166
+ ),
167
+ ) -> None:
168
+ """Prove (spec_hash, seed) -> identical checksum.
169
+
170
+ With --against, regenerate and compare to that bundle's recorded checksum.
171
+ Without it, generate twice and assert the two checksums match.
172
+ """
173
+ load_plugins()
174
+ try:
175
+ spec = load_spec(str(spec_path))
176
+ with tempfile.TemporaryDirectory() as tmp:
177
+ fresh = generate(spec, seed=seed, out_dir=tmp)
178
+ fresh_checksum = sha256_file(Path(tmp) / "data.csv")
179
+
180
+ if against is not None:
181
+ expected = _recorded_checksum(against)
182
+ source = str(against)
183
+ else:
184
+ with tempfile.TemporaryDirectory() as tmp2:
185
+ generate(spec, seed=seed, out_dir=tmp2)
186
+ expected = sha256_file(Path(tmp2) / "data.csv")
187
+ source = "second run"
188
+ except DataDoomError as exc:
189
+ typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
190
+ raise typer.Exit(code=1) from exc
191
+
192
+ if fresh_checksum != expected:
193
+ typer.secho(
194
+ f"MISMATCH\n this run: {fresh_checksum}\n {source}: {expected}",
195
+ fg=typer.colors.RED,
196
+ err=True,
197
+ )
198
+ raise ReproducibilityError("checksum mismatch") # noqa: TRY003
199
+ typer.secho(
200
+ f"OK reproducible (spec_hash={fresh.spec_hash}, seed={seed})\n"
201
+ f" sha256={fresh_checksum}",
202
+ fg=typer.colors.GREEN,
203
+ )
204
+
205
+
206
+ @plugin_app.command("list")
207
+ def plugin_list() -> None:
208
+ """List every registered capability — core built-ins and discovered plugins."""
209
+ load_plugins()
210
+ records = get_registry().records()
211
+ width = max((len(r.name) for r in records), default=4)
212
+ for r in records:
213
+ tag = "core" if r.builtin else r.source
214
+ ver = f" v{r.version}" if r.version else ""
215
+ typer.echo(f"{r.kind:<14} {r.name:<{width}} [{tag}]{ver}")
216
+ typer.secho(f"{len(records)} capabilities", fg=typer.colors.GREEN)
217
+
218
+
219
+ @plugin_app.command("new")
220
+ def plugin_new(
221
+ kind: str = typer.Argument(
222
+ ..., help="distribution | structural_fn | failure_mode | exporter | probe_model"
223
+ ),
224
+ name: str = typer.Argument(..., help="Capability name, a lowercase identifier (e.g. weibull)"),
225
+ dir: Path = typer.Option(Path("."), "--dir", help="Where to create the package"),
226
+ ) -> None:
227
+ """Scaffold a ready-to-publish ``datadoom-plugin-*`` package."""
228
+ try:
229
+ root = scaffold_plugin(kind, name, dir)
230
+ except PluginError as exc:
231
+ typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
232
+ raise typer.Exit(code=1) from exc
233
+ typer.secho(f"created {root}", fg=typer.colors.GREEN)
234
+ typer.echo("next: pip install -e . && datadoom plugin check .")
235
+
236
+
237
+ @plugin_app.command("check")
238
+ def plugin_check(
239
+ target: Path = typer.Argument(..., help="A plugin .py file, package directory, or module"),
240
+ ) -> None:
241
+ """Run the plugin contract checks (interface, schema, determinism, RNG hygiene)."""
242
+ try:
243
+ reports = check_plugin(target)
244
+ except PluginError as exc:
245
+ typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
246
+ raise typer.Exit(code=1) from exc
247
+
248
+ ok = True
249
+ for report in reports:
250
+ typer.echo(report.summary())
251
+ ok = ok and report.ok
252
+ if ok:
253
+ typer.secho(f"OK {len(reports)} plugin(s) pass the contract", fg=typer.colors.GREEN)
254
+ else:
255
+ typer.secho("FAILED one or more contract checks failed", fg=typer.colors.RED, err=True)
256
+ raise typer.Exit(code=1)
257
+
258
+
259
+ @template_app.command("list")
260
+ def template_list(
261
+ level: str = typer.Option(
262
+ "all",
263
+ "--level",
264
+ help="Filter by level: all | hackathon | starter.",
265
+ ),
266
+ ) -> None:
267
+ """List the built-in domain templates."""
268
+ templates = list_templates()
269
+ if level != "all":
270
+ templates = [t for t in templates if t.level == level]
271
+ width = max((len(t.id) for t in templates), default=4)
272
+ for t in templates:
273
+ tag = " [hackathon]" if t.level == "hackathon" else ""
274
+ typer.echo(f"{t.id:<{width}} {t.domain:<15} {t.name}{tag}")
275
+ typer.secho(f"{len(templates)} templates", fg=typer.colors.GREEN)
276
+
277
+
278
+ @template_app.command("show")
279
+ def template_show(
280
+ template_id: str = typer.Argument(..., help="Template id (see `datadoom template list`)"),
281
+ ) -> None:
282
+ """Print a template's spec YAML to stdout."""
283
+ try:
284
+ typer.echo(load_template_text(template_id))
285
+ except KeyError as exc:
286
+ typer.secho(f"ERROR: {exc}", fg=typer.colors.RED, err=True)
287
+ raise typer.Exit(code=1) from exc
288
+
289
+
290
+ @template_app.command("use")
291
+ def template_use(
292
+ template_id: str = typer.Argument(..., help="Template id (see `datadoom template list`)"),
293
+ out: Path = typer.Option(..., "--out", help="Where to write the spec (e.g. my.datadoom.yaml)"),
294
+ ) -> None:
295
+ """Write a template's spec to a file so you can edit and `datadoom run` it."""
296
+ if get_template(template_id) is None:
297
+ typer.secho(f"ERROR: unknown template {template_id!r}", fg=typer.colors.RED, err=True)
298
+ raise typer.Exit(code=1)
299
+ out.write_text(load_template_text(template_id), encoding="utf-8")
300
+ typer.secho(f"wrote {out}", fg=typer.colors.GREEN)
301
+ typer.echo(f"next: datadoom run {out} --seed 1 --out .tmp_run")
302
+
303
+
304
+ def _recorded_checksum(run_dir: Path) -> str:
305
+ meta_path = run_dir / "metadata.json"
306
+ if not meta_path.exists():
307
+ raise DataDoomError(f"no metadata.json in {run_dir}") # noqa: TRY003
308
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
309
+ return meta["determinism"]["artifact_checksums"]["data.csv"]
310
+
311
+
312
+ if __name__ == "__main__":
313
+ app()
datadoom/config.py ADDED
@@ -0,0 +1,108 @@
1
+ """Layered application configuration (03 §7).
2
+
3
+ Resolution order (lowest precedence first): built-in defaults -> config file
4
+ (``<home>/config.toml``) -> environment variables. CLI flags override at the
5
+ call site. This module is intentionally tiny and dependency-light so both the
6
+ server (``api``/``store``/``jobs``) and the CLI can import it without pulling in
7
+ the engine or a web framework.
8
+
9
+ DataDoom home: ``$DATADOOM_HOME`` or the platform default ``~/.datadoom``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import tomllib
16
+ from dataclasses import dataclass, field
17
+ from functools import lru_cache
18
+ from pathlib import Path
19
+
20
+
21
+ def datadoom_home() -> Path:
22
+ """Resolve the DataDoom home directory (created on demand by callers)."""
23
+ env = os.environ.get("DATADOOM_HOME")
24
+ if env:
25
+ return Path(env).expanduser()
26
+ return Path.home() / ".datadoom"
27
+
28
+
29
+ @dataclass
30
+ class Config:
31
+ """Resolved, immutable-ish runtime configuration."""
32
+
33
+ home: Path
34
+ db_url: str
35
+ artifacts_dir: Path
36
+ host: str = "127.0.0.1"
37
+ port: int = 8000
38
+ # `determinism.pinned` mirrors 03 §5; informational here (the engine honors it).
39
+ pinned: bool = False
40
+ telemetry: bool = False
41
+ extra: dict = field(default_factory=dict)
42
+
43
+ @property
44
+ def db_path(self) -> Path:
45
+ return self.home / "datadoom.db"
46
+
47
+ def ensure_dirs(self) -> None:
48
+ self.home.mkdir(parents=True, exist_ok=True)
49
+ self.artifacts_dir.mkdir(parents=True, exist_ok=True)
50
+
51
+
52
+ def _load_file(home: Path) -> dict:
53
+ path = home / "config.toml"
54
+ if not path.exists():
55
+ return {}
56
+ with open(path, "rb") as fh:
57
+ return tomllib.load(fh)
58
+
59
+
60
+ def load_config() -> Config:
61
+ """Build a :class:`Config` from defaults, the config file, then the env."""
62
+ home = datadoom_home()
63
+ file_cfg = _load_file(home)
64
+
65
+ server = file_cfg.get("server", {})
66
+ storage = file_cfg.get("storage", {})
67
+ determinism = file_cfg.get("determinism", {})
68
+ telemetry = file_cfg.get("telemetry", {})
69
+
70
+ artifacts_dir = Path(
71
+ os.environ.get("DATADOOM_ARTIFACTS")
72
+ or storage.get("artifacts_dir")
73
+ or (home / "artifacts")
74
+ ).expanduser()
75
+
76
+ db_url = (
77
+ os.environ.get("DATADOOM_DB_URL")
78
+ or storage.get("db_url")
79
+ or f"sqlite:///{(home / 'datadoom.db').as_posix()}"
80
+ )
81
+
82
+ host = os.environ.get("DATADOOM_HOST") or server.get("host", "127.0.0.1")
83
+ port = int(os.environ.get("DATADOOM_PORT") or server.get("port", 8000))
84
+ pinned = _as_bool(os.environ.get("DATADOOM_PINNED"), determinism.get("pinned", False))
85
+ tele = _as_bool(os.environ.get("DATADOOM_TELEMETRY"), telemetry.get("enabled", False))
86
+
87
+ return Config(
88
+ home=home,
89
+ db_url=db_url,
90
+ artifacts_dir=artifacts_dir,
91
+ host=host,
92
+ port=port,
93
+ pinned=pinned,
94
+ telemetry=tele,
95
+ extra=file_cfg,
96
+ )
97
+
98
+
99
+ def _as_bool(env_value: str | None, default: bool) -> bool:
100
+ if env_value is None:
101
+ return bool(default)
102
+ return env_value.strip().lower() in {"1", "true", "yes", "on"}
103
+
104
+
105
+ @lru_cache(maxsize=1)
106
+ def get_config() -> Config:
107
+ """Process-wide cached config. Tests that need isolation call ``load_config``."""
108
+ return load_config()
@@ -0,0 +1,38 @@
1
+ """The pure DataDoom engine — no web/DB/framework imports.
2
+
3
+ This package is an independently installable library: spec parsing, the seeded
4
+ RNG, distributions, export and the deterministic pipeline.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .errors import (
10
+ DataDoomError,
11
+ DistributionError,
12
+ ReproducibilityError,
13
+ SpecValidationError,
14
+ )
15
+ from .pipeline import RunContext, RunResult, generate, resolve_seed
16
+ from .reference import build_capabilities
17
+ from .reports import ReportBundle, build_report
18
+ from .rng import RNGFactory
19
+ from .spec import Spec, load_spec, parse_spec, validate_spec
20
+
21
+ __all__ = [
22
+ "generate",
23
+ "resolve_seed",
24
+ "RunContext",
25
+ "RunResult",
26
+ "ReportBundle",
27
+ "build_report",
28
+ "build_capabilities",
29
+ "RNGFactory",
30
+ "Spec",
31
+ "load_spec",
32
+ "parse_spec",
33
+ "validate_spec",
34
+ "DataDoomError",
35
+ "SpecValidationError",
36
+ "DistributionError",
37
+ "ReproducibilityError",
38
+ ]