interloper-db 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ from interloper_db.engine import get_engine, init_engine
2
+ from interloper_db.models import (
3
+ Asset,
4
+ AssetDependency,
5
+ AssetDestination,
6
+ AssetResource,
7
+ Backfill,
8
+ Destination,
9
+ DestinationResource,
10
+ Event,
11
+ Invitation,
12
+ Job,
13
+ JobAsset,
14
+ JobSource,
15
+ Organisation,
16
+ Profile,
17
+ Resource,
18
+ Run,
19
+ Session,
20
+ Source,
21
+ SourceDestination,
22
+ SourceResource,
23
+ UserOrganisation,
24
+ )
25
+ from interloper_db.provision import create_all, downgrade, ensure_database, upgrade
26
+ from interloper_db.store import Store
27
+
28
+ __all__ = [
29
+ "Asset",
30
+ "AssetDependency",
31
+ "AssetDestination",
32
+ "AssetResource",
33
+ "Backfill",
34
+ "Destination",
35
+ "DestinationResource",
36
+ "Event",
37
+ "Invitation",
38
+ "Job",
39
+ "JobAsset",
40
+ "JobSource",
41
+ "Organisation",
42
+ "Profile",
43
+ "Resource",
44
+ "Run",
45
+ "Session",
46
+ "Source",
47
+ "SourceDestination",
48
+ "SourceResource",
49
+ "Store",
50
+ "UserOrganisation",
51
+ "create_all",
52
+ "downgrade",
53
+ "ensure_database",
54
+ "upgrade",
55
+ "get_engine",
56
+ "init_engine",
57
+ ]
@@ -0,0 +1,46 @@
1
+ """Database engine singleton."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from sqlalchemy import Engine, create_engine
8
+
9
+ _engine: Engine | None = None
10
+
11
+
12
+ def init_engine(dsn: str | None = None, **kwargs: object) -> Engine:
13
+ """Initialize the global database engine.
14
+
15
+ Args:
16
+ dsn: PostgreSQL connection string. Falls back to ``DATABASE_URL`` env var.
17
+ **kwargs: Additional kwargs forwarded to ``create_engine``.
18
+
19
+ Returns:
20
+ The SQLAlchemy engine.
21
+
22
+ Raises:
23
+ ValueError: If no DSN is provided and ``DATABASE_URL`` is not set.
24
+ """
25
+ global _engine # noqa: PLW0603
26
+ dsn = dsn or os.getenv("DATABASE_URL")
27
+ if not dsn:
28
+ from interloper.errors import ConfigError
29
+
30
+ raise ConfigError("Database DSN required: pass dsn= or set DATABASE_URL")
31
+ _engine = create_engine(dsn, **kwargs)
32
+ return _engine
33
+
34
+
35
+ def get_engine() -> Engine:
36
+ """Return the global database engine.
37
+
38
+ Returns:
39
+ The SQLAlchemy engine.
40
+
41
+ Raises:
42
+ RuntimeError: If ``init_engine`` has not been called.
43
+ """
44
+ if _engine is None:
45
+ raise RuntimeError("Database engine not initialized. Call init_engine() first.")
46
+ return _engine
@@ -0,0 +1,335 @@
1
+ """Hydration: translates DB rows into ``ComponentSpec`` trees.
2
+
3
+ This module is a pure transformation layer. It reads rows from the
4
+ database and builds the ``ComponentSpec`` tree that ``Component.from_spec``
5
+ expects. No framework classes are instantiated here — reconstruction
6
+ happens at the call site via ``spec.reconstruct()``::
7
+
8
+ hydrator = Hydrator(catalog, decrypt=decrypt_fn)
9
+ with Session(engine) as session:
10
+ db_source = session.get(Source, source_id, options=[...])
11
+ spec = hydrator.build_source_spec(session, db_source)
12
+ source = spec.reconstruct()
13
+
14
+ The Store wraps this pattern in thin ``load_*`` convenience methods, but
15
+ any caller can use the hydrator directly to assemble a spec (for example,
16
+ to serialize it to JSON and send it across a process boundary).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ from collections.abc import Callable, Iterable
23
+ from typing import Any
24
+ from uuid import UUID
25
+
26
+ from interloper.catalog.base import Catalog
27
+ from interloper.component.base import ComponentSpec
28
+ from interloper.errors import CatalogKeyError
29
+ from sqlmodel import Session, select
30
+
31
+ from interloper_db.models import (
32
+ Asset,
33
+ AssetDependency,
34
+ AssetResource,
35
+ Destination,
36
+ DestinationResource,
37
+ Resource,
38
+ Source,
39
+ SourceResource,
40
+ )
41
+
42
+
43
+ class Hydrator:
44
+ """Builds ``ComponentSpec`` trees from DB rows.
45
+
46
+ The hydrator holds a catalog (for import-path lookups) and an optional
47
+ decrypt callable (for resource data). All methods are pure
48
+ transformations — they read rows and return specs without ever
49
+ instantiating framework classes. Reconstruction is the caller's job.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ catalog: Catalog,
55
+ decrypt: Callable[[bytes], bytes] | None = None,
56
+ ) -> None:
57
+ """Initialize the hydrator.
58
+
59
+ Args:
60
+ catalog: Catalog used to resolve ``key → import path``.
61
+ decrypt: Optional ``(bytes) -> bytes`` callable for decrypting
62
+ resource data blobs marked ``encrypted=True``.
63
+ """
64
+ self._catalog = catalog
65
+ self._decrypt = decrypt
66
+
67
+ # ------------------------------------------------------------------
68
+ # Resource
69
+ # ------------------------------------------------------------------
70
+
71
+ def build_resource_spec(self, db_resource: Resource) -> ComponentSpec:
72
+ """Build a spec from a Resource row.
73
+
74
+ Resources are leaves — they carry their full state in ``data``
75
+ and have no nested specs. No session is needed.
76
+
77
+ Args:
78
+ db_resource: The Resource row.
79
+
80
+ Returns:
81
+ A ``ComponentSpec`` with the row's ``id`` and decoded data.
82
+ """
83
+ path = self._resolve_path(db_resource.key, kind="resource")
84
+ init = self.decode_resource_data(db_resource)
85
+ return ComponentSpec(
86
+ path=path,
87
+ id=str(db_resource.id) if db_resource.id else "",
88
+ init=init or None,
89
+ )
90
+
91
+ def decode_resource_data(self, db_resource: Resource) -> dict[str, Any]:
92
+ """Decrypt (when needed) and JSON-decode a resource's data blob.
93
+
94
+ Args:
95
+ db_resource: The Resource row whose ``data`` bytes should be
96
+ decoded.
97
+
98
+ Returns:
99
+ The decoded configuration dict, or ``{}`` if the row carries
100
+ no data.
101
+ """
102
+ if db_resource.data is None:
103
+ return {}
104
+ raw = db_resource.data
105
+ if db_resource.encrypted and self._decrypt:
106
+ raw = self._decrypt(raw)
107
+ return json.loads(raw)
108
+
109
+ # ------------------------------------------------------------------
110
+ # Destination
111
+ # ------------------------------------------------------------------
112
+
113
+ def build_destination_spec(
114
+ self,
115
+ session: Session,
116
+ db_destination: Destination,
117
+ ) -> ComponentSpec:
118
+ """Build a spec from a Destination row.
119
+
120
+ Captures the destination's ``config`` dict and resolves its
121
+ resource bindings as nested resource specs under ``init.resources``.
122
+
123
+ Args:
124
+ session: Active DB session.
125
+ db_destination: The Destination row.
126
+
127
+ Returns:
128
+ A ``ComponentSpec`` with the row's ``id`` and resolved bindings.
129
+ """
130
+ path = self._resolve_path(db_destination.key, kind="destination")
131
+
132
+ init: dict[str, Any] = dict(db_destination.config) if db_destination.config else {}
133
+
134
+ bindings = session.exec(
135
+ select(DestinationResource).where(DestinationResource.destination_id == db_destination.id)
136
+ ).all()
137
+ resources = self._resource_specs_from_bindings(session, bindings)
138
+ if resources:
139
+ init["resources"] = resources
140
+
141
+ return ComponentSpec(
142
+ path=path,
143
+ id=str(db_destination.id) if db_destination.id else "",
144
+ init=init or None,
145
+ )
146
+
147
+ # ------------------------------------------------------------------
148
+ # Asset
149
+ # ------------------------------------------------------------------
150
+
151
+ def build_asset_spec(
152
+ self,
153
+ session: Session,
154
+ db_asset: Asset,
155
+ ) -> ComponentSpec:
156
+ """Build a spec from a standalone Asset row.
157
+
158
+ Source-owned assets should not be built through this method —
159
+ they're embedded in the parent source's spec via
160
+ :meth:`build_source_spec`, which handles the
161
+ ``"source_path:asset_key"`` path convention.
162
+
163
+ Args:
164
+ session: Active DB session.
165
+ db_asset: A standalone Asset row (``source_id`` is ``None``).
166
+
167
+ Returns:
168
+ A ``ComponentSpec`` with the row's ``id`` and all per-asset
169
+ state (materializable, config, resources, destinations, deps).
170
+ """
171
+ path = self._resolve_path(db_asset.key, kind="asset")
172
+ init = self._build_asset_init(session, db_asset)
173
+ return ComponentSpec(
174
+ path=path,
175
+ id=str(db_asset.id) if db_asset.id else "",
176
+ init=init or None,
177
+ )
178
+
179
+ def _build_asset_init(
180
+ self,
181
+ session: Session,
182
+ db_asset: Asset,
183
+ ) -> dict[str, Any]:
184
+ """Build the ``init`` dict for an asset spec.
185
+
186
+ Captures materializable, per-field config overrides, resource
187
+ bindings, destination bindings, and cross-asset dependencies.
188
+
189
+ Returns:
190
+ A dict suitable for use as a ``ComponentSpec.init``.
191
+ """
192
+ init: dict[str, Any] = {"materializable": db_asset.materializable}
193
+
194
+ if db_asset.config:
195
+ init.update(db_asset.config)
196
+
197
+ if db_asset.id:
198
+ resource_bindings = session.exec(
199
+ select(AssetResource).where(AssetResource.asset_id == db_asset.id)
200
+ ).all()
201
+ resources = self._resource_specs_from_bindings(session, resource_bindings)
202
+ if resources:
203
+ init["resources"] = resources
204
+
205
+ destination_specs = [
206
+ self.build_destination_spec(session, db_dest).model_dump(mode="json")
207
+ for db_dest in db_asset.destinations
208
+ ]
209
+ if destination_specs:
210
+ init["destination"] = (
211
+ destination_specs if len(destination_specs) > 1 else destination_specs[0]
212
+ )
213
+
214
+ deps = self._deps_for_asset(session, db_asset.id)
215
+ if deps:
216
+ init["deps"] = deps
217
+
218
+ return init
219
+
220
+ def _deps_for_asset(
221
+ self,
222
+ session: Session,
223
+ asset_id: UUID,
224
+ ) -> dict[str, str]:
225
+ """Return ``{param_name: upstream_uuid}`` from ``AssetDependency`` rows."""
226
+ dependency_rows = session.exec(
227
+ select(AssetDependency).where(AssetDependency.asset_id == asset_id)
228
+ ).all()
229
+ return {d.param_name: str(d.upstream_asset_id) for d in dependency_rows}
230
+
231
+ # ------------------------------------------------------------------
232
+ # Source
233
+ # ------------------------------------------------------------------
234
+
235
+ def build_source_spec(
236
+ self,
237
+ session: Session,
238
+ db_source: Source,
239
+ ) -> ComponentSpec:
240
+ """Build a spec from a Source row including per-asset overrides.
241
+
242
+ The returned spec is self-contained: it carries the source's own
243
+ config, resource bindings, destination bindings, plus an
244
+ ``assets`` override map keyed by asset key. Each entry is the
245
+ sparse init payload for one asset (materializable, config,
246
+ resources, destinations, deps). Reconstruction via
247
+ ``ComponentSpec.reconstruct()`` produces a live ``Source`` whose
248
+ ``_apply_asset_overrides`` validator materialises each asset from
249
+ ``asset_types`` with the overrides applied.
250
+
251
+ Args:
252
+ session: Active DB session.
253
+ db_source: The Source row (relationships should be eagerly
254
+ loaded via ``selectinload``).
255
+
256
+ Returns:
257
+ A ``ComponentSpec`` capturing the source and its assets.
258
+ """
259
+ path = self._resolve_path(db_source.key, kind="source")
260
+
261
+ init: dict[str, Any] = {}
262
+
263
+ if db_source.config:
264
+ init.update(db_source.config)
265
+
266
+ resource_bindings = session.exec(
267
+ select(SourceResource).where(SourceResource.source_id == db_source.id)
268
+ ).all()
269
+ resources = self._resource_specs_from_bindings(session, resource_bindings)
270
+ if resources:
271
+ init["resources"] = resources
272
+
273
+ destination_specs = [
274
+ self.build_destination_spec(session, db_dest).model_dump(mode="json")
275
+ for db_dest in db_source.destinations
276
+ ]
277
+ if destination_specs:
278
+ init["destination"] = (
279
+ destination_specs if len(destination_specs) > 1 else destination_specs[0]
280
+ )
281
+
282
+ asset_overrides: dict[str, dict[str, Any]] = {
283
+ db_asset.key: {"id": str(db_asset.id), **self._build_asset_init(session, db_asset)}
284
+ for db_asset in db_source.assets
285
+ }
286
+ if asset_overrides:
287
+ init["assets"] = asset_overrides
288
+
289
+ return ComponentSpec(
290
+ path=path,
291
+ id=str(db_source.id) if db_source.id else "",
292
+ init=init or None,
293
+ )
294
+
295
+ # ------------------------------------------------------------------
296
+ # Shared helpers
297
+ # ------------------------------------------------------------------
298
+
299
+ def _resolve_path(self, key: str, *, kind: str) -> str:
300
+ """Look up a component's import path via the catalog.
301
+
302
+ Args:
303
+ key: The catalog key.
304
+ kind: Component kind, used only in the error message.
305
+
306
+ Returns:
307
+ The resolved import path.
308
+
309
+ Raises:
310
+ CatalogKeyError: If the catalog has no entry for *key*.
311
+ """
312
+ definition = self._catalog.get(key)
313
+ if not definition:
314
+ raise CatalogKeyError(f"Unknown {kind} key: {key}")
315
+ return definition.path
316
+
317
+ def _resource_specs_from_bindings(
318
+ self,
319
+ session: Session,
320
+ bindings: Iterable[Any],
321
+ ) -> dict[str, dict[str, Any]]:
322
+ """Build a ``{slot: resource_spec_dict}`` map from binding rows.
323
+
324
+ Each binding row must expose ``resource_id`` and ``key`` attributes.
325
+
326
+ Returns:
327
+ A dict mapping slot name → resource spec (as a JSON-safe dict).
328
+ """
329
+ result: dict[str, dict[str, Any]] = {}
330
+ for binding in bindings:
331
+ db_resource = session.get(Resource, binding.resource_id)
332
+ if db_resource:
333
+ spec = self.build_resource_spec(db_resource)
334
+ result[binding.key] = spec.model_dump(mode="json")
335
+ return result
@@ -0,0 +1,34 @@
1
+ """Alembic environment configuration.
2
+
3
+ Reads the database URL from the engine singleton initialised by
4
+ ``init_engine()``, so migrations share the exact same connection as the
5
+ rest of the application.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from alembic import context
11
+ from sqlmodel import SQLModel
12
+
13
+ import interloper_db.models as _models # noqa: F401 — register all models
14
+
15
+ target_metadata = SQLModel.metadata
16
+
17
+
18
+ def run_migrations_online() -> None:
19
+ """Run migrations against a live database."""
20
+ from interloper_db.engine import get_engine
21
+
22
+ connectable = get_engine()
23
+
24
+ with connectable.connect() as connection:
25
+ context.configure(
26
+ connection=connection,
27
+ target_metadata=target_metadata,
28
+ compare_type=True,
29
+ )
30
+ with context.begin_transaction():
31
+ context.run_migrations()
32
+
33
+
34
+ run_migrations_online()
@@ -0,0 +1,25 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from alembic import op
11
+ ${imports if imports else ""}
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision: str = ${repr(up_revision)}
15
+ down_revision: str | None = ${repr(down_revision)}
16
+ branch_labels: str | None = None
17
+ depends_on: str | None = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ ${upgrades if upgrades else "pass"}
22
+
23
+
24
+ def downgrade() -> None:
25
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,23 @@
1
+ """Enable the pgcrypto extension.
2
+
3
+ Revision ID: 001
4
+ Revises: None
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+
11
+ # revision identifiers, used by Alembic.
12
+ revision: str = "001"
13
+ down_revision: str | None = None
14
+ branch_labels: str | None = None
15
+ depends_on: str | None = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.execute("DROP EXTENSION IF EXISTS pgcrypto")
@@ -0,0 +1,79 @@
1
+ """Create asset_executions view.
2
+
3
+ Revision ID: 002
4
+ Revises: 001
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+
11
+ # revision identifiers, used by Alembic.
12
+ revision: str = "002"
13
+ down_revision: str | None = "001"
14
+ branch_labels: str | None = None
15
+ depends_on: str | None = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.execute(
20
+ """
21
+ CREATE OR REPLACE VIEW asset_executions AS
22
+ WITH ranked AS (
23
+ SELECT
24
+ e.run_id,
25
+ e.org_id,
26
+ e.asset_id,
27
+ e.asset_key,
28
+ e.event_type,
29
+ e.timestamp,
30
+ row_number() OVER (
31
+ PARTITION BY e.run_id, e.asset_id
32
+ ORDER BY
33
+ CASE e.event_type
34
+ WHEN 'asset_failed' THEN 1
35
+ WHEN 'asset_canceled' THEN 2
36
+ WHEN 'asset_completed' THEN 3
37
+ WHEN 'asset_started' THEN 4
38
+ WHEN 'asset_skipped' THEN 5
39
+ WHEN 'asset_queued' THEN 6
40
+ END,
41
+ e.timestamp DESC
42
+ ) AS rn,
43
+ min(CASE WHEN e.event_type = 'asset_queued' THEN e.timestamp END) OVER (
44
+ PARTITION BY e.run_id, e.asset_id
45
+ ) AS queued_at,
46
+ min(CASE WHEN e.event_type = 'asset_started' THEN e.timestamp END) OVER (
47
+ PARTITION BY e.run_id, e.asset_id
48
+ ) AS started_at,
49
+ max(CASE WHEN e.event_type IN ('asset_completed', 'asset_failed', 'asset_canceled')
50
+ THEN e.timestamp END) OVER (
51
+ PARTITION BY e.run_id, e.asset_id
52
+ ) AS completed_at
53
+ FROM events e
54
+ WHERE e.asset_id IS NOT NULL
55
+ )
56
+ SELECT
57
+ r.run_id,
58
+ r.org_id,
59
+ r.asset_id,
60
+ r.asset_key,
61
+ CASE r.event_type
62
+ WHEN 'asset_failed' THEN 'failed'
63
+ WHEN 'asset_canceled' THEN 'canceled'
64
+ WHEN 'asset_completed' THEN 'success'
65
+ WHEN 'asset_started' THEN 'running'
66
+ WHEN 'asset_skipped' THEN 'skipped'
67
+ WHEN 'asset_queued' THEN 'queued'
68
+ END AS status,
69
+ r.started_at,
70
+ r.completed_at,
71
+ r.queued_at AS created_at
72
+ FROM ranked r
73
+ WHERE r.rn = 1
74
+ """
75
+ )
76
+
77
+
78
+ def downgrade() -> None:
79
+ op.execute("DROP VIEW IF EXISTS asset_executions CASCADE")
@@ -0,0 +1,60 @@
1
+ """Add notify_table_change() function and triggers for realtime.
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+
11
+ # revision identifiers, used by Alembic.
12
+ revision: str = "003"
13
+ down_revision: str | None = "002"
14
+ branch_labels: str | None = None
15
+ depends_on: str | None = None
16
+
17
+ _TRIGGERS = [
18
+ ("trg_runs_notify", "runs", "INSERT OR UPDATE"),
19
+ ("trg_events_notify", "events", "INSERT"),
20
+ ("trg_backfills_notify", "backfills", "INSERT OR UPDATE"),
21
+ ]
22
+
23
+
24
+ def upgrade() -> None:
25
+ op.execute(
26
+ """
27
+ CREATE OR REPLACE FUNCTION notify_table_change()
28
+ RETURNS trigger
29
+ LANGUAGE plpgsql
30
+ AS $$
31
+ DECLARE
32
+ payload jsonb;
33
+ rec RECORD;
34
+ BEGIN
35
+ rec := COALESCE(NEW, OLD);
36
+ payload := jsonb_build_object(
37
+ 'table', TG_TABLE_NAME,
38
+ 'op', TG_OP,
39
+ 'org_id', rec.org_id,
40
+ 'record', row_to_json(rec)::jsonb
41
+ );
42
+ PERFORM pg_notify('table_changes', payload::text);
43
+ RETURN rec;
44
+ END;
45
+ $$
46
+ """
47
+ )
48
+
49
+ for trigger_name, table, events in _TRIGGERS:
50
+ op.execute(
51
+ f"CREATE OR REPLACE TRIGGER {trigger_name} "
52
+ f"AFTER {events} ON {table} "
53
+ f"FOR EACH ROW EXECUTE FUNCTION notify_table_change()"
54
+ )
55
+
56
+
57
+ def downgrade() -> None:
58
+ for trigger_name, table, _events in _TRIGGERS:
59
+ op.execute(f"DROP TRIGGER IF EXISTS {trigger_name} ON {table}")
60
+ op.execute("DROP FUNCTION IF EXISTS notify_table_change()")
@@ -0,0 +1,28 @@
1
+ """Add traceback column to events table.
2
+
3
+ Stores the formatted Python traceback alongside the error message so
4
+ that failure diagnostics are available in the UI without parsing logs.
5
+
6
+ Revision ID: 004
7
+ Revises: 003
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from alembic import op
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "004"
16
+ down_revision: str | None = "003"
17
+ branch_labels: str | None = None
18
+ depends_on: str | None = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Add the traceback column to the events table."""
23
+ op.execute("ALTER TABLE events ADD COLUMN IF NOT EXISTS traceback TEXT;")
24
+
25
+
26
+ def downgrade() -> None:
27
+ """Remove the traceback column from the events table."""
28
+ op.execute("ALTER TABLE events DROP COLUMN IF EXISTS traceback;")