datadoom 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. datadoom/__init__.py +23 -0
  2. datadoom/adapters/__init__.py +29 -0
  3. datadoom/adapters/frameworks.py +94 -0
  4. datadoom/adapters/loaders.py +72 -0
  5. datadoom/api/__init__.py +11 -0
  6. datadoom/api/app.py +109 -0
  7. datadoom/api/deps.py +30 -0
  8. datadoom/api/errors.py +89 -0
  9. datadoom/api/estimate.py +82 -0
  10. datadoom/api/routes/__init__.py +7 -0
  11. datadoom/api/routes/artifacts.py +147 -0
  12. datadoom/api/routes/datasets.py +180 -0
  13. datadoom/api/routes/meta.py +45 -0
  14. datadoom/api/routes/plugins.py +22 -0
  15. datadoom/api/routes/runs.py +144 -0
  16. datadoom/api/routes/specs.py +73 -0
  17. datadoom/api/routes/templates.py +30 -0
  18. datadoom/api/schemas.py +230 -0
  19. datadoom/api/serializers.py +143 -0
  20. datadoom/api/state.py +24 -0
  21. datadoom/api/store_helpers.py +56 -0
  22. datadoom/api/ws.py +72 -0
  23. datadoom/cli/__init__.py +1 -0
  24. datadoom/cli/main.py +313 -0
  25. datadoom/config.py +108 -0
  26. datadoom/engine/__init__.py +38 -0
  27. datadoom/engine/advice.py +289 -0
  28. datadoom/engine/audit.py +290 -0
  29. datadoom/engine/causal/__init__.py +15 -0
  30. datadoom/engine/causal/execute.py +116 -0
  31. datadoom/engine/causal/functions.py +116 -0
  32. datadoom/engine/causal/graph.py +54 -0
  33. datadoom/engine/difficulty/__init__.py +36 -0
  34. datadoom/engine/difficulty/calibrate.py +235 -0
  35. datadoom/engine/difficulty/knobs.py +171 -0
  36. datadoom/engine/difficulty/probes.py +181 -0
  37. datadoom/engine/dist/__init__.py +35 -0
  38. datadoom/engine/dist/base.py +46 -0
  39. datadoom/engine/dist/builtins.py +172 -0
  40. datadoom/engine/dist/compliance.py +344 -0
  41. datadoom/engine/dist/providers.py +117 -0
  42. datadoom/engine/errors.py +32 -0
  43. datadoom/engine/export/__init__.py +27 -0
  44. datadoom/engine/export/base.py +49 -0
  45. datadoom/engine/export/checksums.py +18 -0
  46. datadoom/engine/export/csv_exporter.py +34 -0
  47. datadoom/engine/export/json_exporter.py +67 -0
  48. datadoom/engine/export/metadata.py +58 -0
  49. datadoom/engine/export/parquet_exporter.py +45 -0
  50. datadoom/engine/failure/__init__.py +18 -0
  51. datadoom/engine/failure/apply.py +37 -0
  52. datadoom/engine/failure/base.py +116 -0
  53. datadoom/engine/failure/modes.py +442 -0
  54. datadoom/engine/pipeline.py +418 -0
  55. datadoom/engine/profile.py +327 -0
  56. datadoom/engine/progress.py +14 -0
  57. datadoom/engine/reference.py +338 -0
  58. datadoom/engine/reports.py +206 -0
  59. datadoom/engine/rng.py +79 -0
  60. datadoom/engine/spec/__init__.py +45 -0
  61. datadoom/engine/spec/hashing.py +57 -0
  62. datadoom/engine/spec/models.py +238 -0
  63. datadoom/engine/spec/validate.py +345 -0
  64. datadoom/engine/timeseries.py +88 -0
  65. datadoom/jobs/__init__.py +14 -0
  66. datadoom/jobs/progress.py +155 -0
  67. datadoom/jobs/worker.py +162 -0
  68. datadoom/plugin.py +35 -0
  69. datadoom/plugins/__init__.py +47 -0
  70. datadoom/plugins/contracts.py +72 -0
  71. datadoom/plugins/loader.py +125 -0
  72. datadoom/plugins/registry.py +214 -0
  73. datadoom/plugins/scaffold.py +434 -0
  74. datadoom/store/__init__.py +47 -0
  75. datadoom/store/artifacts.py +67 -0
  76. datadoom/store/db.py +104 -0
  77. datadoom/store/migrations/__init__.py +0 -0
  78. datadoom/store/migrations/env.py +53 -0
  79. datadoom/store/migrations/script.py.mako +24 -0
  80. datadoom/store/migrations/versions/0001_init.py +149 -0
  81. datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
  82. datadoom/store/migrations/versions/0003_run_name.py +23 -0
  83. datadoom/store/migrations/versions/0004_report_profile.py +24 -0
  84. datadoom/store/models.py +170 -0
  85. datadoom/store/repositories.py +279 -0
  86. datadoom/templates/__init__.py +239 -0
  87. datadoom/templates/ab_test.datadoom.yaml +46 -0
  88. datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
  89. datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
  90. datadoom/templates/customer_churn.datadoom.yaml +60 -0
  91. datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
  92. datadoom/templates/fraud_detection.datadoom.yaml +57 -0
  93. datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
  94. datadoom/templates/insurance_claims.datadoom.yaml +43 -0
  95. datadoom/templates/iot_sensors.datadoom.yaml +44 -0
  96. datadoom/templates/people_directory.datadoom.yaml +56 -0
  97. datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
  98. datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
  99. datadoom/version.py +3 -0
  100. datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
  101. datadoom/webdist/assets/index-doRjyG5s.css +1 -0
  102. datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
  103. datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
  104. datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
  105. datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
  106. datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
  107. datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
  108. datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
  109. datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
  110. datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
  111. datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
  112. datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
  113. datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
  114. datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
  115. datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
  116. datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
  117. datadoom/webdist/index.html +15 -0
  118. datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
  119. datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
  120. datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
  121. datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  122. datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,53 @@
1
+ """Alembic environment (07 §5).
2
+
3
+ Driven programmatically from ``store.db.init_database`` — the URL is injected via
4
+ ``Config.set_main_option("sqlalchemy.url", ...)``. We bind ``target_metadata`` to
5
+ the ORM ``Base`` so ``alembic revision --autogenerate`` stays usable, but the
6
+ checked-in linear history is authoritative.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from alembic import context
12
+ from sqlalchemy import engine_from_config, pool
13
+
14
+ from datadoom.store.models import Base
15
+
16
+ config = context.config
17
+ target_metadata = Base.metadata
18
+
19
+
20
+ def run_migrations_offline() -> None:
21
+ url = config.get_main_option("sqlalchemy.url")
22
+ context.configure(
23
+ url=url,
24
+ target_metadata=target_metadata,
25
+ literal_binds=True,
26
+ dialect_opts={"paramstyle": "named"},
27
+ render_as_batch=True,
28
+ )
29
+ with context.begin_transaction():
30
+ context.run_migrations()
31
+
32
+
33
+ def run_migrations_online() -> None:
34
+ connectable = engine_from_config(
35
+ config.get_section(config.config_ini_section, {}),
36
+ prefix="sqlalchemy.",
37
+ poolclass=pool.NullPool,
38
+ )
39
+ with connectable.connect() as connection:
40
+ context.configure(
41
+ connection=connection,
42
+ target_metadata=target_metadata,
43
+ render_as_batch=True,
44
+ )
45
+ with context.begin_transaction():
46
+ context.run_migrations()
47
+ connectable.dispose()
48
+
49
+
50
+ if context.is_offline_mode():
51
+ run_migrations_offline()
52
+ else:
53
+ run_migrations_online()
@@ -0,0 +1,24 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+ ${imports if imports else ""}
12
+
13
+ revision = ${repr(up_revision)}
14
+ down_revision = ${repr(down_revision)}
15
+ branch_labels = ${repr(branch_labels)}
16
+ depends_on = ${repr(depends_on)}
17
+
18
+
19
+ def upgrade() -> None:
20
+ ${upgrades if upgrades else "pass"}
21
+
22
+
23
+ def downgrade() -> None:
24
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,149 @@
1
+ """0001 init — core tables (07 §2, no team-mode ``users``).
2
+
3
+ Revision ID: 0001_init
4
+ Revises:
5
+ Create Date: 2026-06-01
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ revision = "0001_init"
13
+ down_revision = None
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.create_table(
20
+ "datasets",
21
+ sa.Column("dataset_id", sa.String(length=36), primary_key=True),
22
+ sa.Column("name", sa.String(), nullable=False),
23
+ sa.Column("description", sa.String(), nullable=True),
24
+ # Soft references (app-maintained) to break the datasets⇄specs cycle.
25
+ sa.Column("current_spec_id", sa.String(length=36), nullable=True),
26
+ sa.Column("latest_run_id", sa.String(length=36), nullable=True),
27
+ sa.Column("status", sa.String(), nullable=False, server_default="draft"),
28
+ sa.Column("owner_id", sa.String(length=36), nullable=True),
29
+ sa.Column("created_at", sa.String(), nullable=False),
30
+ sa.Column("updated_at", sa.String(), nullable=False),
31
+ )
32
+ op.create_index("ux_datasets_owner_name", "datasets", ["owner_id", "name"], unique=True)
33
+ op.create_index("ix_datasets_status", "datasets", ["status"])
34
+
35
+ op.create_table(
36
+ "specs",
37
+ sa.Column("spec_id", sa.String(length=36), primary_key=True),
38
+ sa.Column(
39
+ "dataset_id",
40
+ sa.String(length=36),
41
+ sa.ForeignKey("datasets.dataset_id", ondelete="CASCADE"),
42
+ nullable=False,
43
+ ),
44
+ sa.Column("spec_hash", sa.String(length=64), nullable=False),
45
+ sa.Column("body", sa.JSON(), nullable=False),
46
+ sa.Column("datadoom_version", sa.String(), nullable=False),
47
+ sa.Column("version", sa.Integer(), nullable=False),
48
+ sa.Column("created_at", sa.String(), nullable=False),
49
+ )
50
+ op.create_index("ix_specs_dataset", "specs", ["dataset_id"])
51
+ op.create_index("ix_specs_hash", "specs", ["spec_hash"])
52
+ op.create_index(
53
+ "ux_specs_dataset_version", "specs", ["dataset_id", "version"], unique=True
54
+ )
55
+
56
+ op.create_table(
57
+ "generation_runs",
58
+ sa.Column("run_id", sa.String(length=36), primary_key=True),
59
+ sa.Column(
60
+ "dataset_id",
61
+ sa.String(length=36),
62
+ sa.ForeignKey("datasets.dataset_id", ondelete="CASCADE"),
63
+ nullable=False,
64
+ ),
65
+ sa.Column(
66
+ "spec_id",
67
+ sa.String(length=36),
68
+ sa.ForeignKey("specs.spec_id", ondelete="CASCADE"),
69
+ nullable=False,
70
+ ),
71
+ sa.Column("seed", sa.Integer(), nullable=False),
72
+ sa.Column("status", sa.String(), nullable=False, server_default="queued"),
73
+ sa.Column("stage", sa.String(), nullable=True),
74
+ sa.Column("progress_pct", sa.Integer(), nullable=False, server_default="0"),
75
+ sa.Column("error", sa.JSON(), nullable=True),
76
+ sa.Column("metrics", sa.JSON(), nullable=True),
77
+ sa.Column("started_at", sa.String(), nullable=True),
78
+ sa.Column("finished_at", sa.String(), nullable=True),
79
+ sa.Column("created_at", sa.String(), nullable=False),
80
+ )
81
+ op.create_index("ix_runs_dataset", "generation_runs", ["dataset_id"])
82
+ op.create_index("ix_runs_status", "generation_runs", ["status"])
83
+ op.create_index("ix_runs_repro", "generation_runs", ["spec_id", "seed"])
84
+
85
+ op.create_table(
86
+ "artifacts",
87
+ sa.Column("artifact_id", sa.String(length=36), primary_key=True),
88
+ sa.Column(
89
+ "run_id",
90
+ sa.String(length=36),
91
+ sa.ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
92
+ nullable=False,
93
+ ),
94
+ sa.Column("version", sa.String(), nullable=False),
95
+ sa.Column("split", sa.String(), nullable=True),
96
+ sa.Column("format", sa.String(), nullable=False),
97
+ sa.Column("storage_uri", sa.String(), nullable=False),
98
+ sa.Column("checksum_sha256", sa.String(length=64), nullable=False),
99
+ sa.Column("size_bytes", sa.Integer(), nullable=False),
100
+ sa.Column("created_at", sa.String(), nullable=False),
101
+ )
102
+ op.create_index("ix_artifacts_run", "artifacts", ["run_id"])
103
+
104
+ op.create_table(
105
+ "reports",
106
+ sa.Column("report_id", sa.String(length=36), primary_key=True),
107
+ sa.Column(
108
+ "run_id",
109
+ sa.String(length=36),
110
+ sa.ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
111
+ nullable=False,
112
+ unique=True,
113
+ ),
114
+ sa.Column("compliance_score", sa.Float(), nullable=True),
115
+ sa.Column("distribution", sa.JSON(), nullable=True),
116
+ sa.Column("correlation", sa.JSON(), nullable=True),
117
+ sa.Column("causal_truth", sa.JSON(), nullable=True),
118
+ sa.Column("difficulty", sa.JSON(), nullable=True),
119
+ sa.Column("failures", sa.JSON(), nullable=True),
120
+ sa.Column("determinism", sa.JSON(), nullable=True),
121
+ sa.Column("created_at", sa.String(), nullable=False),
122
+ )
123
+
124
+ op.create_table(
125
+ "plugins",
126
+ sa.Column("name", sa.String(), primary_key=True),
127
+ sa.Column("kind", sa.String(), nullable=False),
128
+ sa.Column("version", sa.String(), nullable=True),
129
+ sa.Column("schema", sa.JSON(), nullable=True),
130
+ sa.Column("enabled", sa.Integer(), nullable=False, server_default="1"),
131
+ )
132
+
133
+
134
+ def downgrade() -> None:
135
+ op.drop_table("plugins")
136
+ op.drop_table("reports")
137
+ op.drop_index("ix_artifacts_run", table_name="artifacts")
138
+ op.drop_table("artifacts")
139
+ op.drop_index("ix_runs_repro", table_name="generation_runs")
140
+ op.drop_index("ix_runs_status", table_name="generation_runs")
141
+ op.drop_index("ix_runs_dataset", table_name="generation_runs")
142
+ op.drop_table("generation_runs")
143
+ op.drop_index("ux_specs_dataset_version", table_name="specs")
144
+ op.drop_index("ix_specs_hash", table_name="specs")
145
+ op.drop_index("ix_specs_dataset", table_name="specs")
146
+ op.drop_table("specs")
147
+ op.drop_index("ix_datasets_status", table_name="datasets")
148
+ op.drop_index("ux_datasets_owner_name", table_name="datasets")
149
+ op.drop_table("datasets")
@@ -0,0 +1,23 @@
1
+ """0002 — add ``reports.mutual_information`` (P2 causal: MI matrix, 05 §7).
2
+
3
+ Adds the JSON column that ``ReportRepository.upsert`` writes the mutual-information
4
+ matrix into. Separate from ``0001_init`` so databases created before this column
5
+ existed are upgraded in place on startup (``alembic upgrade head``).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ revision = "0002_report_mutual_information"
13
+ down_revision = "0001_init"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column("reports", sa.Column("mutual_information", sa.JSON(), nullable=True))
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.drop_column("reports", "mutual_information")
@@ -0,0 +1,23 @@
1
+ """0003 — add ``generation_runs.name`` (named generations).
2
+
3
+ Each generation gets an optional human label. Nullable so databases created
4
+ before naming existed upgrade in place; the UI requires a name on new runs and
5
+ falls back to the run id for older, unnamed rows.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ revision = "0003_run_name"
13
+ down_revision = "0002_report_mutual_information"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column("generation_runs", sa.Column("name", sa.String(), nullable=True))
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.drop_column("generation_runs", "name")
@@ -0,0 +1,24 @@
1
+ """0004 — add ``reports.profile`` (per-column data profile / "Column Guide").
2
+
3
+ Adds the JSON column ``ReportRepository.upsert`` writes the per-column profile
4
+ into: summary statistics, role, causal parents, failure attribution, and
5
+ ML-handling advice per column. Separate from earlier revisions so databases
6
+ created before this column existed upgrade in place on startup.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import sqlalchemy as sa
11
+ from alembic import op
12
+
13
+ revision = "0004_report_profile"
14
+ down_revision = "0003_run_name"
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade() -> None:
20
+ op.add_column("reports", sa.Column("profile", sa.JSON(), nullable=True))
21
+
22
+
23
+ def downgrade() -> None:
24
+ op.drop_column("reports", "profile")
@@ -0,0 +1,170 @@
1
+ """SQLAlchemy ORM models — mirror docs 06 §3 and 07 §2.
2
+
3
+ Portable DDL (SQLite default, Postgres-compatible): UUID string PKs, ISO-8601 UTC
4
+ timestamps stored as TEXT, JSON columns via SQLAlchemy's generic ``JSON`` type
5
+ (SQLite ``JSON1`` / Postgres ``JSONB``).
6
+
7
+ Circular references (``datasets.current_spec_id`` ⇄ ``specs.dataset_id`` and
8
+ ``datasets.latest_run_id``) are kept as plain indexed columns — "soft" FKs the
9
+ application maintains — exactly as doc 07 notes, to avoid SQLite's
10
+ create-order / ALTER-FK limitations. The hard, cascade-enforced FKs all point
11
+ "downward" (spec → dataset, run → dataset/spec, artifact/report → run).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+ from sqlalchemy import JSON as SAJSON
19
+ from sqlalchemy import ForeignKey, Index, Integer, String
20
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
21
+
22
+
23
+ class Base(DeclarativeBase):
24
+ pass
25
+
26
+
27
+ class DatasetRow(Base):
28
+ __tablename__ = "datasets"
29
+
30
+ dataset_id: Mapped[str] = mapped_column(String(36), primary_key=True)
31
+ name: Mapped[str] = mapped_column(String, nullable=False)
32
+ description: Mapped[str | None] = mapped_column(String, nullable=True)
33
+ # Soft references (app-maintained; see module docstring).
34
+ current_spec_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
35
+ latest_run_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
36
+ status: Mapped[str] = mapped_column(String, nullable=False, default="draft")
37
+ owner_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
38
+ created_at: Mapped[str] = mapped_column(String, nullable=False)
39
+ updated_at: Mapped[str] = mapped_column(String, nullable=False)
40
+
41
+ # passive_deletes lets the DB's ON DELETE CASCADE do the work in one statement,
42
+ # avoiding ORM delete-ordering issues across the specs⇄runs FK.
43
+ specs: Mapped[list[SpecRow]] = relationship(
44
+ back_populates="dataset", cascade="all, delete-orphan", passive_deletes=True
45
+ )
46
+ runs: Mapped[list[GenerationRunRow]] = relationship(
47
+ back_populates="dataset", cascade="all, delete-orphan", passive_deletes=True
48
+ )
49
+
50
+ __table_args__ = (
51
+ Index("ux_datasets_owner_name", "owner_id", "name", unique=True),
52
+ Index("ix_datasets_status", "status"),
53
+ )
54
+
55
+
56
+ class SpecRow(Base):
57
+ __tablename__ = "specs"
58
+
59
+ spec_id: Mapped[str] = mapped_column(String(36), primary_key=True)
60
+ dataset_id: Mapped[str] = mapped_column(
61
+ String(36), ForeignKey("datasets.dataset_id", ondelete="CASCADE"), nullable=False
62
+ )
63
+ spec_hash: Mapped[str] = mapped_column(String(64), nullable=False)
64
+ body: Mapped[dict[str, Any]] = mapped_column(SAJSON, nullable=False)
65
+ datadoom_version: Mapped[str] = mapped_column(String, nullable=False)
66
+ version: Mapped[int] = mapped_column(Integer, nullable=False)
67
+ created_at: Mapped[str] = mapped_column(String, nullable=False)
68
+
69
+ dataset: Mapped[DatasetRow] = relationship(back_populates="specs")
70
+
71
+ __table_args__ = (
72
+ Index("ix_specs_dataset", "dataset_id"),
73
+ Index("ix_specs_hash", "spec_hash"),
74
+ Index("ux_specs_dataset_version", "dataset_id", "version", unique=True),
75
+ )
76
+
77
+
78
+ class GenerationRunRow(Base):
79
+ __tablename__ = "generation_runs"
80
+
81
+ run_id: Mapped[str] = mapped_column(String(36), primary_key=True)
82
+ dataset_id: Mapped[str] = mapped_column(
83
+ String(36), ForeignKey("datasets.dataset_id", ondelete="CASCADE"), nullable=False
84
+ )
85
+ spec_id: Mapped[str] = mapped_column(
86
+ String(36), ForeignKey("specs.spec_id", ondelete="CASCADE"), nullable=False
87
+ )
88
+ # Human label for the generation. Optional for rows created before naming
89
+ # existed; the UI mandates one on new runs (falls back to the id for display).
90
+ name: Mapped[str | None] = mapped_column(String, nullable=True)
91
+ seed: Mapped[int] = mapped_column(Integer, nullable=False)
92
+ status: Mapped[str] = mapped_column(String, nullable=False, default="queued")
93
+ stage: Mapped[str | None] = mapped_column(String, nullable=True)
94
+ progress_pct: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
95
+ error: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
96
+ metrics: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
97
+ started_at: Mapped[str | None] = mapped_column(String, nullable=True)
98
+ finished_at: Mapped[str | None] = mapped_column(String, nullable=True)
99
+ created_at: Mapped[str] = mapped_column(String, nullable=False)
100
+
101
+ dataset: Mapped[DatasetRow] = relationship(back_populates="runs")
102
+ # Read-only handle to the immutable spec snapshot this run was generated from,
103
+ # so callers can surface its ``spec_hash`` (the version-control anchor).
104
+ spec: Mapped[SpecRow] = relationship(viewonly=True)
105
+ artifacts: Mapped[list[ArtifactRow]] = relationship(
106
+ back_populates="run", cascade="all, delete-orphan", passive_deletes=True
107
+ )
108
+ report: Mapped[ReportRow | None] = relationship(
109
+ back_populates="run", cascade="all, delete-orphan", uselist=False, passive_deletes=True
110
+ )
111
+
112
+ __table_args__ = (
113
+ Index("ix_runs_dataset", "dataset_id"),
114
+ Index("ix_runs_status", "status"),
115
+ Index("ix_runs_repro", "spec_id", "seed"),
116
+ )
117
+
118
+
119
+ class ArtifactRow(Base):
120
+ __tablename__ = "artifacts"
121
+
122
+ artifact_id: Mapped[str] = mapped_column(String(36), primary_key=True)
123
+ run_id: Mapped[str] = mapped_column(
124
+ String(36), ForeignKey("generation_runs.run_id", ondelete="CASCADE"), nullable=False
125
+ )
126
+ version: Mapped[str] = mapped_column(String, nullable=False) # clean | injected
127
+ split: Mapped[str | None] = mapped_column(String, nullable=True)
128
+ format: Mapped[str] = mapped_column(String, nullable=False)
129
+ storage_uri: Mapped[str] = mapped_column(String, nullable=False)
130
+ checksum_sha256: Mapped[str] = mapped_column(String(64), nullable=False)
131
+ size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
132
+ created_at: Mapped[str] = mapped_column(String, nullable=False)
133
+
134
+ run: Mapped[GenerationRunRow] = relationship(back_populates="artifacts")
135
+
136
+ __table_args__ = (Index("ix_artifacts_run", "run_id"),)
137
+
138
+
139
+ class ReportRow(Base):
140
+ __tablename__ = "reports"
141
+
142
+ report_id: Mapped[str] = mapped_column(String(36), primary_key=True)
143
+ run_id: Mapped[str] = mapped_column(
144
+ String(36),
145
+ ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
146
+ nullable=False,
147
+ unique=True,
148
+ )
149
+ compliance_score: Mapped[float | None] = mapped_column(nullable=True)
150
+ distribution: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
151
+ correlation: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
152
+ mutual_information: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
153
+ causal_truth: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
154
+ difficulty: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
155
+ failures: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
156
+ profile: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
157
+ determinism: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
158
+ created_at: Mapped[str] = mapped_column(String, nullable=False)
159
+
160
+ run: Mapped[GenerationRunRow] = relationship(back_populates="report")
161
+
162
+
163
+ class PluginRow(Base):
164
+ __tablename__ = "plugins"
165
+
166
+ name: Mapped[str] = mapped_column(String, primary_key=True)
167
+ kind: Mapped[str] = mapped_column(String, nullable=False)
168
+ version: Mapped[str | None] = mapped_column(String, nullable=True)
169
+ schema: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
170
+ enabled: Mapped[int] = mapped_column(Integer, nullable=False, default=1)