datadoom 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datadoom/__init__.py +23 -0
- datadoom/adapters/__init__.py +29 -0
- datadoom/adapters/frameworks.py +94 -0
- datadoom/adapters/loaders.py +72 -0
- datadoom/api/__init__.py +11 -0
- datadoom/api/app.py +109 -0
- datadoom/api/deps.py +30 -0
- datadoom/api/errors.py +89 -0
- datadoom/api/estimate.py +82 -0
- datadoom/api/routes/__init__.py +7 -0
- datadoom/api/routes/artifacts.py +147 -0
- datadoom/api/routes/datasets.py +180 -0
- datadoom/api/routes/meta.py +45 -0
- datadoom/api/routes/plugins.py +22 -0
- datadoom/api/routes/runs.py +144 -0
- datadoom/api/routes/specs.py +73 -0
- datadoom/api/routes/templates.py +30 -0
- datadoom/api/schemas.py +230 -0
- datadoom/api/serializers.py +143 -0
- datadoom/api/state.py +24 -0
- datadoom/api/store_helpers.py +56 -0
- datadoom/api/ws.py +72 -0
- datadoom/cli/__init__.py +1 -0
- datadoom/cli/main.py +313 -0
- datadoom/config.py +108 -0
- datadoom/engine/__init__.py +38 -0
- datadoom/engine/advice.py +289 -0
- datadoom/engine/audit.py +290 -0
- datadoom/engine/causal/__init__.py +15 -0
- datadoom/engine/causal/execute.py +116 -0
- datadoom/engine/causal/functions.py +116 -0
- datadoom/engine/causal/graph.py +54 -0
- datadoom/engine/difficulty/__init__.py +36 -0
- datadoom/engine/difficulty/calibrate.py +235 -0
- datadoom/engine/difficulty/knobs.py +171 -0
- datadoom/engine/difficulty/probes.py +181 -0
- datadoom/engine/dist/__init__.py +35 -0
- datadoom/engine/dist/base.py +46 -0
- datadoom/engine/dist/builtins.py +172 -0
- datadoom/engine/dist/compliance.py +344 -0
- datadoom/engine/dist/providers.py +117 -0
- datadoom/engine/errors.py +32 -0
- datadoom/engine/export/__init__.py +27 -0
- datadoom/engine/export/base.py +49 -0
- datadoom/engine/export/checksums.py +18 -0
- datadoom/engine/export/csv_exporter.py +34 -0
- datadoom/engine/export/json_exporter.py +67 -0
- datadoom/engine/export/metadata.py +58 -0
- datadoom/engine/export/parquet_exporter.py +45 -0
- datadoom/engine/failure/__init__.py +18 -0
- datadoom/engine/failure/apply.py +37 -0
- datadoom/engine/failure/base.py +116 -0
- datadoom/engine/failure/modes.py +442 -0
- datadoom/engine/pipeline.py +418 -0
- datadoom/engine/profile.py +327 -0
- datadoom/engine/progress.py +14 -0
- datadoom/engine/reference.py +338 -0
- datadoom/engine/reports.py +206 -0
- datadoom/engine/rng.py +79 -0
- datadoom/engine/spec/__init__.py +45 -0
- datadoom/engine/spec/hashing.py +57 -0
- datadoom/engine/spec/models.py +238 -0
- datadoom/engine/spec/validate.py +345 -0
- datadoom/engine/timeseries.py +88 -0
- datadoom/jobs/__init__.py +14 -0
- datadoom/jobs/progress.py +155 -0
- datadoom/jobs/worker.py +162 -0
- datadoom/plugin.py +35 -0
- datadoom/plugins/__init__.py +47 -0
- datadoom/plugins/contracts.py +72 -0
- datadoom/plugins/loader.py +125 -0
- datadoom/plugins/registry.py +214 -0
- datadoom/plugins/scaffold.py +434 -0
- datadoom/store/__init__.py +47 -0
- datadoom/store/artifacts.py +67 -0
- datadoom/store/db.py +104 -0
- datadoom/store/migrations/__init__.py +0 -0
- datadoom/store/migrations/env.py +53 -0
- datadoom/store/migrations/script.py.mako +24 -0
- datadoom/store/migrations/versions/0001_init.py +149 -0
- datadoom/store/migrations/versions/0002_report_mutual_information.py +23 -0
- datadoom/store/migrations/versions/0003_run_name.py +23 -0
- datadoom/store/migrations/versions/0004_report_profile.py +24 -0
- datadoom/store/models.py +170 -0
- datadoom/store/repositories.py +279 -0
- datadoom/templates/__init__.py +239 -0
- datadoom/templates/ab_test.datadoom.yaml +46 -0
- datadoom/templates/clinical_deterioration.datadoom.yaml +124 -0
- datadoom/templates/credit_default_challenge.datadoom.yaml +147 -0
- datadoom/templates/customer_churn.datadoom.yaml +60 -0
- datadoom/templates/ecommerce_orders.datadoom.yaml +46 -0
- datadoom/templates/fraud_detection.datadoom.yaml +57 -0
- datadoom/templates/hospital_readmission.datadoom.yaml +61 -0
- datadoom/templates/insurance_claims.datadoom.yaml +43 -0
- datadoom/templates/iot_sensors.datadoom.yaml +44 -0
- datadoom/templates/people_directory.datadoom.yaml +56 -0
- datadoom/templates/predictive_maintenance.datadoom.yaml +107 -0
- datadoom/templates/telecom_churn_challenge.datadoom.yaml +125 -0
- datadoom/version.py +3 -0
- datadoom/webdist/assets/index-V8VAuTJG.js +445 -0
- datadoom/webdist/assets/index-doRjyG5s.css +1 -0
- datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
- datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
- datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
- datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
- datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
- datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
- datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
- datadoom/webdist/index.html +15 -0
- datadoom-0.1.0.dev0.dist-info/METADATA +143 -0
- datadoom-0.1.0.dev0.dist-info/RECORD +122 -0
- datadoom-0.1.0.dev0.dist-info/WHEEL +4 -0
- datadoom-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- datadoom-0.1.0.dev0.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Alembic environment (07 §5).
|
|
2
|
+
|
|
3
|
+
Driven programmatically from ``store.db.init_database`` — the URL is injected via
|
|
4
|
+
``Config.set_main_option("sqlalchemy.url", ...)``. We bind ``target_metadata`` to
|
|
5
|
+
the ORM ``Base`` so ``alembic revision --autogenerate`` stays usable, but the
|
|
6
|
+
checked-in linear history is authoritative.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from alembic import context
|
|
12
|
+
from sqlalchemy import engine_from_config, pool
|
|
13
|
+
|
|
14
|
+
from datadoom.store.models import Base
|
|
15
|
+
|
|
16
|
+
config = context.config
|
|
17
|
+
target_metadata = Base.metadata
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def run_migrations_offline() -> None:
|
|
21
|
+
url = config.get_main_option("sqlalchemy.url")
|
|
22
|
+
context.configure(
|
|
23
|
+
url=url,
|
|
24
|
+
target_metadata=target_metadata,
|
|
25
|
+
literal_binds=True,
|
|
26
|
+
dialect_opts={"paramstyle": "named"},
|
|
27
|
+
render_as_batch=True,
|
|
28
|
+
)
|
|
29
|
+
with context.begin_transaction():
|
|
30
|
+
context.run_migrations()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def run_migrations_online() -> None:
|
|
34
|
+
connectable = engine_from_config(
|
|
35
|
+
config.get_section(config.config_ini_section, {}),
|
|
36
|
+
prefix="sqlalchemy.",
|
|
37
|
+
poolclass=pool.NullPool,
|
|
38
|
+
)
|
|
39
|
+
with connectable.connect() as connection:
|
|
40
|
+
context.configure(
|
|
41
|
+
connection=connection,
|
|
42
|
+
target_metadata=target_metadata,
|
|
43
|
+
render_as_batch=True,
|
|
44
|
+
)
|
|
45
|
+
with context.begin_transaction():
|
|
46
|
+
context.run_migrations()
|
|
47
|
+
connectable.dispose()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if context.is_offline_mode():
|
|
51
|
+
run_migrations_offline()
|
|
52
|
+
else:
|
|
53
|
+
run_migrations_online()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""${message}
|
|
2
|
+
|
|
3
|
+
Revision ID: ${up_revision}
|
|
4
|
+
Revises: ${down_revision | comma,n}
|
|
5
|
+
Create Date: ${create_date}
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
${imports if imports else ""}
|
|
12
|
+
|
|
13
|
+
revision = ${repr(up_revision)}
|
|
14
|
+
down_revision = ${repr(down_revision)}
|
|
15
|
+
branch_labels = ${repr(branch_labels)}
|
|
16
|
+
depends_on = ${repr(depends_on)}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
${upgrades if upgrades else "pass"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def downgrade() -> None:
|
|
24
|
+
${downgrades if downgrades else "pass"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""0001 init — core tables (07 §2, no team-mode ``users``).
|
|
2
|
+
|
|
3
|
+
Revision ID: 0001_init
|
|
4
|
+
Revises:
|
|
5
|
+
Create Date: 2026-06-01
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
|
|
12
|
+
revision = "0001_init"
|
|
13
|
+
down_revision = None
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.create_table(
|
|
20
|
+
"datasets",
|
|
21
|
+
sa.Column("dataset_id", sa.String(length=36), primary_key=True),
|
|
22
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
23
|
+
sa.Column("description", sa.String(), nullable=True),
|
|
24
|
+
# Soft references (app-maintained) to break the datasets⇄specs cycle.
|
|
25
|
+
sa.Column("current_spec_id", sa.String(length=36), nullable=True),
|
|
26
|
+
sa.Column("latest_run_id", sa.String(length=36), nullable=True),
|
|
27
|
+
sa.Column("status", sa.String(), nullable=False, server_default="draft"),
|
|
28
|
+
sa.Column("owner_id", sa.String(length=36), nullable=True),
|
|
29
|
+
sa.Column("created_at", sa.String(), nullable=False),
|
|
30
|
+
sa.Column("updated_at", sa.String(), nullable=False),
|
|
31
|
+
)
|
|
32
|
+
op.create_index("ux_datasets_owner_name", "datasets", ["owner_id", "name"], unique=True)
|
|
33
|
+
op.create_index("ix_datasets_status", "datasets", ["status"])
|
|
34
|
+
|
|
35
|
+
op.create_table(
|
|
36
|
+
"specs",
|
|
37
|
+
sa.Column("spec_id", sa.String(length=36), primary_key=True),
|
|
38
|
+
sa.Column(
|
|
39
|
+
"dataset_id",
|
|
40
|
+
sa.String(length=36),
|
|
41
|
+
sa.ForeignKey("datasets.dataset_id", ondelete="CASCADE"),
|
|
42
|
+
nullable=False,
|
|
43
|
+
),
|
|
44
|
+
sa.Column("spec_hash", sa.String(length=64), nullable=False),
|
|
45
|
+
sa.Column("body", sa.JSON(), nullable=False),
|
|
46
|
+
sa.Column("datadoom_version", sa.String(), nullable=False),
|
|
47
|
+
sa.Column("version", sa.Integer(), nullable=False),
|
|
48
|
+
sa.Column("created_at", sa.String(), nullable=False),
|
|
49
|
+
)
|
|
50
|
+
op.create_index("ix_specs_dataset", "specs", ["dataset_id"])
|
|
51
|
+
op.create_index("ix_specs_hash", "specs", ["spec_hash"])
|
|
52
|
+
op.create_index(
|
|
53
|
+
"ux_specs_dataset_version", "specs", ["dataset_id", "version"], unique=True
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
op.create_table(
|
|
57
|
+
"generation_runs",
|
|
58
|
+
sa.Column("run_id", sa.String(length=36), primary_key=True),
|
|
59
|
+
sa.Column(
|
|
60
|
+
"dataset_id",
|
|
61
|
+
sa.String(length=36),
|
|
62
|
+
sa.ForeignKey("datasets.dataset_id", ondelete="CASCADE"),
|
|
63
|
+
nullable=False,
|
|
64
|
+
),
|
|
65
|
+
sa.Column(
|
|
66
|
+
"spec_id",
|
|
67
|
+
sa.String(length=36),
|
|
68
|
+
sa.ForeignKey("specs.spec_id", ondelete="CASCADE"),
|
|
69
|
+
nullable=False,
|
|
70
|
+
),
|
|
71
|
+
sa.Column("seed", sa.Integer(), nullable=False),
|
|
72
|
+
sa.Column("status", sa.String(), nullable=False, server_default="queued"),
|
|
73
|
+
sa.Column("stage", sa.String(), nullable=True),
|
|
74
|
+
sa.Column("progress_pct", sa.Integer(), nullable=False, server_default="0"),
|
|
75
|
+
sa.Column("error", sa.JSON(), nullable=True),
|
|
76
|
+
sa.Column("metrics", sa.JSON(), nullable=True),
|
|
77
|
+
sa.Column("started_at", sa.String(), nullable=True),
|
|
78
|
+
sa.Column("finished_at", sa.String(), nullable=True),
|
|
79
|
+
sa.Column("created_at", sa.String(), nullable=False),
|
|
80
|
+
)
|
|
81
|
+
op.create_index("ix_runs_dataset", "generation_runs", ["dataset_id"])
|
|
82
|
+
op.create_index("ix_runs_status", "generation_runs", ["status"])
|
|
83
|
+
op.create_index("ix_runs_repro", "generation_runs", ["spec_id", "seed"])
|
|
84
|
+
|
|
85
|
+
op.create_table(
|
|
86
|
+
"artifacts",
|
|
87
|
+
sa.Column("artifact_id", sa.String(length=36), primary_key=True),
|
|
88
|
+
sa.Column(
|
|
89
|
+
"run_id",
|
|
90
|
+
sa.String(length=36),
|
|
91
|
+
sa.ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
|
|
92
|
+
nullable=False,
|
|
93
|
+
),
|
|
94
|
+
sa.Column("version", sa.String(), nullable=False),
|
|
95
|
+
sa.Column("split", sa.String(), nullable=True),
|
|
96
|
+
sa.Column("format", sa.String(), nullable=False),
|
|
97
|
+
sa.Column("storage_uri", sa.String(), nullable=False),
|
|
98
|
+
sa.Column("checksum_sha256", sa.String(length=64), nullable=False),
|
|
99
|
+
sa.Column("size_bytes", sa.Integer(), nullable=False),
|
|
100
|
+
sa.Column("created_at", sa.String(), nullable=False),
|
|
101
|
+
)
|
|
102
|
+
op.create_index("ix_artifacts_run", "artifacts", ["run_id"])
|
|
103
|
+
|
|
104
|
+
op.create_table(
|
|
105
|
+
"reports",
|
|
106
|
+
sa.Column("report_id", sa.String(length=36), primary_key=True),
|
|
107
|
+
sa.Column(
|
|
108
|
+
"run_id",
|
|
109
|
+
sa.String(length=36),
|
|
110
|
+
sa.ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
|
|
111
|
+
nullable=False,
|
|
112
|
+
unique=True,
|
|
113
|
+
),
|
|
114
|
+
sa.Column("compliance_score", sa.Float(), nullable=True),
|
|
115
|
+
sa.Column("distribution", sa.JSON(), nullable=True),
|
|
116
|
+
sa.Column("correlation", sa.JSON(), nullable=True),
|
|
117
|
+
sa.Column("causal_truth", sa.JSON(), nullable=True),
|
|
118
|
+
sa.Column("difficulty", sa.JSON(), nullable=True),
|
|
119
|
+
sa.Column("failures", sa.JSON(), nullable=True),
|
|
120
|
+
sa.Column("determinism", sa.JSON(), nullable=True),
|
|
121
|
+
sa.Column("created_at", sa.String(), nullable=False),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
op.create_table(
|
|
125
|
+
"plugins",
|
|
126
|
+
sa.Column("name", sa.String(), primary_key=True),
|
|
127
|
+
sa.Column("kind", sa.String(), nullable=False),
|
|
128
|
+
sa.Column("version", sa.String(), nullable=True),
|
|
129
|
+
sa.Column("schema", sa.JSON(), nullable=True),
|
|
130
|
+
sa.Column("enabled", sa.Integer(), nullable=False, server_default="1"),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def downgrade() -> None:
|
|
135
|
+
op.drop_table("plugins")
|
|
136
|
+
op.drop_table("reports")
|
|
137
|
+
op.drop_index("ix_artifacts_run", table_name="artifacts")
|
|
138
|
+
op.drop_table("artifacts")
|
|
139
|
+
op.drop_index("ix_runs_repro", table_name="generation_runs")
|
|
140
|
+
op.drop_index("ix_runs_status", table_name="generation_runs")
|
|
141
|
+
op.drop_index("ix_runs_dataset", table_name="generation_runs")
|
|
142
|
+
op.drop_table("generation_runs")
|
|
143
|
+
op.drop_index("ux_specs_dataset_version", table_name="specs")
|
|
144
|
+
op.drop_index("ix_specs_hash", table_name="specs")
|
|
145
|
+
op.drop_index("ix_specs_dataset", table_name="specs")
|
|
146
|
+
op.drop_table("specs")
|
|
147
|
+
op.drop_index("ix_datasets_status", table_name="datasets")
|
|
148
|
+
op.drop_index("ux_datasets_owner_name", table_name="datasets")
|
|
149
|
+
op.drop_table("datasets")
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""0002 — add ``reports.mutual_information`` (P2 causal: MI matrix, 05 §7).
|
|
2
|
+
|
|
3
|
+
Adds the JSON column that ``ReportRepository.upsert`` writes the mutual-information
|
|
4
|
+
matrix into. Separate from ``0001_init`` so databases created before this column
|
|
5
|
+
existed are upgraded in place on startup (``alembic upgrade head``).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
|
|
12
|
+
revision = "0002_report_mutual_information"
|
|
13
|
+
down_revision = "0001_init"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.add_column("reports", sa.Column("mutual_information", sa.JSON(), nullable=True))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def downgrade() -> None:
|
|
23
|
+
op.drop_column("reports", "mutual_information")
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""0003 — add ``generation_runs.name`` (named generations).
|
|
2
|
+
|
|
3
|
+
Each generation gets an optional human label. Nullable so databases created
|
|
4
|
+
before naming existed upgrade in place; the UI requires a name on new runs and
|
|
5
|
+
falls back to the run id for older, unnamed rows.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
|
|
12
|
+
revision = "0003_run_name"
|
|
13
|
+
down_revision = "0002_report_mutual_information"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.add_column("generation_runs", sa.Column("name", sa.String(), nullable=True))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def downgrade() -> None:
|
|
23
|
+
op.drop_column("generation_runs", "name")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""0004 — add ``reports.profile`` (per-column data profile / "Column Guide").
|
|
2
|
+
|
|
3
|
+
Adds the JSON column ``ReportRepository.upsert`` writes the per-column profile
|
|
4
|
+
into: summary statistics, role, causal parents, failure attribution, and
|
|
5
|
+
ML-handling advice per column. Separate from earlier revisions so databases
|
|
6
|
+
created before this column existed upgrade in place on startup.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
from alembic import op
|
|
12
|
+
|
|
13
|
+
revision = "0004_report_profile"
|
|
14
|
+
down_revision = "0003_run_name"
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
op.add_column("reports", sa.Column("profile", sa.JSON(), nullable=True))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def downgrade() -> None:
|
|
24
|
+
op.drop_column("reports", "profile")
|
datadoom/store/models.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""SQLAlchemy ORM models — mirror docs 06 §3 and 07 §2.
|
|
2
|
+
|
|
3
|
+
Portable DDL (SQLite default, Postgres-compatible): UUID string PKs, ISO-8601 UTC
|
|
4
|
+
timestamps stored as TEXT, JSON columns via SQLAlchemy's generic ``JSON`` type
|
|
5
|
+
(SQLite ``JSON1`` / Postgres ``JSONB``).
|
|
6
|
+
|
|
7
|
+
Circular references (``datasets.current_spec_id`` ⇄ ``specs.dataset_id`` and
|
|
8
|
+
``datasets.latest_run_id``) are kept as plain indexed columns — "soft" FKs the
|
|
9
|
+
application maintains — exactly as doc 07 notes, to avoid SQLite's
|
|
10
|
+
create-order / ALTER-FK limitations. The hard, cascade-enforced FKs all point
|
|
11
|
+
"downward" (spec → dataset, run → dataset/spec, artifact/report → run).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from sqlalchemy import JSON as SAJSON
|
|
19
|
+
from sqlalchemy import ForeignKey, Index, Integer, String
|
|
20
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Base(DeclarativeBase):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DatasetRow(Base):
|
|
28
|
+
__tablename__ = "datasets"
|
|
29
|
+
|
|
30
|
+
dataset_id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
|
31
|
+
name: Mapped[str] = mapped_column(String, nullable=False)
|
|
32
|
+
description: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
33
|
+
# Soft references (app-maintained; see module docstring).
|
|
34
|
+
current_spec_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
|
35
|
+
latest_run_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
|
36
|
+
status: Mapped[str] = mapped_column(String, nullable=False, default="draft")
|
|
37
|
+
owner_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
|
|
38
|
+
created_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
39
|
+
updated_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
40
|
+
|
|
41
|
+
# passive_deletes lets the DB's ON DELETE CASCADE do the work in one statement,
|
|
42
|
+
# avoiding ORM delete-ordering issues across the specs⇄runs FK.
|
|
43
|
+
specs: Mapped[list[SpecRow]] = relationship(
|
|
44
|
+
back_populates="dataset", cascade="all, delete-orphan", passive_deletes=True
|
|
45
|
+
)
|
|
46
|
+
runs: Mapped[list[GenerationRunRow]] = relationship(
|
|
47
|
+
back_populates="dataset", cascade="all, delete-orphan", passive_deletes=True
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
__table_args__ = (
|
|
51
|
+
Index("ux_datasets_owner_name", "owner_id", "name", unique=True),
|
|
52
|
+
Index("ix_datasets_status", "status"),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SpecRow(Base):
|
|
57
|
+
__tablename__ = "specs"
|
|
58
|
+
|
|
59
|
+
spec_id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
|
60
|
+
dataset_id: Mapped[str] = mapped_column(
|
|
61
|
+
String(36), ForeignKey("datasets.dataset_id", ondelete="CASCADE"), nullable=False
|
|
62
|
+
)
|
|
63
|
+
spec_hash: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
64
|
+
body: Mapped[dict[str, Any]] = mapped_column(SAJSON, nullable=False)
|
|
65
|
+
datadoom_version: Mapped[str] = mapped_column(String, nullable=False)
|
|
66
|
+
version: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
67
|
+
created_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
68
|
+
|
|
69
|
+
dataset: Mapped[DatasetRow] = relationship(back_populates="specs")
|
|
70
|
+
|
|
71
|
+
__table_args__ = (
|
|
72
|
+
Index("ix_specs_dataset", "dataset_id"),
|
|
73
|
+
Index("ix_specs_hash", "spec_hash"),
|
|
74
|
+
Index("ux_specs_dataset_version", "dataset_id", "version", unique=True),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class GenerationRunRow(Base):
|
|
79
|
+
__tablename__ = "generation_runs"
|
|
80
|
+
|
|
81
|
+
run_id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
|
82
|
+
dataset_id: Mapped[str] = mapped_column(
|
|
83
|
+
String(36), ForeignKey("datasets.dataset_id", ondelete="CASCADE"), nullable=False
|
|
84
|
+
)
|
|
85
|
+
spec_id: Mapped[str] = mapped_column(
|
|
86
|
+
String(36), ForeignKey("specs.spec_id", ondelete="CASCADE"), nullable=False
|
|
87
|
+
)
|
|
88
|
+
# Human label for the generation. Optional for rows created before naming
|
|
89
|
+
# existed; the UI mandates one on new runs (falls back to the id for display).
|
|
90
|
+
name: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
91
|
+
seed: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
92
|
+
status: Mapped[str] = mapped_column(String, nullable=False, default="queued")
|
|
93
|
+
stage: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
94
|
+
progress_pct: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
|
95
|
+
error: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
96
|
+
metrics: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
97
|
+
started_at: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
98
|
+
finished_at: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
99
|
+
created_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
100
|
+
|
|
101
|
+
dataset: Mapped[DatasetRow] = relationship(back_populates="runs")
|
|
102
|
+
# Read-only handle to the immutable spec snapshot this run was generated from,
|
|
103
|
+
# so callers can surface its ``spec_hash`` (the version-control anchor).
|
|
104
|
+
spec: Mapped[SpecRow] = relationship(viewonly=True)
|
|
105
|
+
artifacts: Mapped[list[ArtifactRow]] = relationship(
|
|
106
|
+
back_populates="run", cascade="all, delete-orphan", passive_deletes=True
|
|
107
|
+
)
|
|
108
|
+
report: Mapped[ReportRow | None] = relationship(
|
|
109
|
+
back_populates="run", cascade="all, delete-orphan", uselist=False, passive_deletes=True
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
__table_args__ = (
|
|
113
|
+
Index("ix_runs_dataset", "dataset_id"),
|
|
114
|
+
Index("ix_runs_status", "status"),
|
|
115
|
+
Index("ix_runs_repro", "spec_id", "seed"),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ArtifactRow(Base):
|
|
120
|
+
__tablename__ = "artifacts"
|
|
121
|
+
|
|
122
|
+
artifact_id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
|
123
|
+
run_id: Mapped[str] = mapped_column(
|
|
124
|
+
String(36), ForeignKey("generation_runs.run_id", ondelete="CASCADE"), nullable=False
|
|
125
|
+
)
|
|
126
|
+
version: Mapped[str] = mapped_column(String, nullable=False) # clean | injected
|
|
127
|
+
split: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
128
|
+
format: Mapped[str] = mapped_column(String, nullable=False)
|
|
129
|
+
storage_uri: Mapped[str] = mapped_column(String, nullable=False)
|
|
130
|
+
checksum_sha256: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
131
|
+
size_bytes: Mapped[int] = mapped_column(Integer, nullable=False)
|
|
132
|
+
created_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
133
|
+
|
|
134
|
+
run: Mapped[GenerationRunRow] = relationship(back_populates="artifacts")
|
|
135
|
+
|
|
136
|
+
__table_args__ = (Index("ix_artifacts_run", "run_id"),)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ReportRow(Base):
|
|
140
|
+
__tablename__ = "reports"
|
|
141
|
+
|
|
142
|
+
report_id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
|
143
|
+
run_id: Mapped[str] = mapped_column(
|
|
144
|
+
String(36),
|
|
145
|
+
ForeignKey("generation_runs.run_id", ondelete="CASCADE"),
|
|
146
|
+
nullable=False,
|
|
147
|
+
unique=True,
|
|
148
|
+
)
|
|
149
|
+
compliance_score: Mapped[float | None] = mapped_column(nullable=True)
|
|
150
|
+
distribution: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
151
|
+
correlation: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
152
|
+
mutual_information: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
153
|
+
causal_truth: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
154
|
+
difficulty: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
155
|
+
failures: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
156
|
+
profile: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
157
|
+
determinism: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
158
|
+
created_at: Mapped[str] = mapped_column(String, nullable=False)
|
|
159
|
+
|
|
160
|
+
run: Mapped[GenerationRunRow] = relationship(back_populates="report")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class PluginRow(Base):
|
|
164
|
+
__tablename__ = "plugins"
|
|
165
|
+
|
|
166
|
+
name: Mapped[str] = mapped_column(String, primary_key=True)
|
|
167
|
+
kind: Mapped[str] = mapped_column(String, nullable=False)
|
|
168
|
+
version: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
169
|
+
schema: Mapped[dict[str, Any] | None] = mapped_column(SAJSON, nullable=True)
|
|
170
|
+
enabled: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
|