interloper-db 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.3
2
+ Name: interloper-db
3
+ Version: 0.2.0
4
+ Summary: Interloper database persistence layer
5
+ Author: Guillaume Onfroy
6
+ Author-email: Guillaume Onfroy <guillaume@digitlcloud.com>
7
+ Requires-Dist: interloper-core
8
+ Requires-Dist: sqlmodel>=0.0.27
9
+ Requires-Dist: sqlalchemy>=2.0.0
10
+ Requires-Dist: psycopg2-binary>=2.9.0
11
+ Requires-Dist: alembic>=1.18.4
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+
File without changes
@@ -0,0 +1,54 @@
1
+ # ###############
2
+ # PROJECT / UV
3
+ # ###############
4
+ [project]
5
+ name = "interloper-db"
6
+ version = "0.2.0"
7
+ description = "Interloper database persistence layer"
8
+ readme = "README.md"
9
+ authors = [{ name = "Guillaume Onfroy", email = "guillaume@digitlcloud.com" }]
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "interloper-core",
13
+ "sqlmodel>=0.0.27",
14
+ "sqlalchemy>=2.0.0",
15
+ "psycopg2-binary>=2.9.0",
16
+ "alembic>=1.18.4",
17
+ ]
18
+
19
+ [build-system]
20
+ requires = ["uv_build>=0.11.5,<0.12"]
21
+ build-backend = "uv_build"
22
+
23
+ [tool.uv.sources]
24
+ interloper-core = { workspace = true }
25
+
26
+ # ###############
27
+ # RUFF
28
+ # ###############
29
+ [tool.ruff]
30
+ line-length = 120
31
+
32
+ [tool.ruff.lint]
33
+ extend-select = ["E", "I", "UP", "ANN001", "ANN201", "ANN202"]
34
+
35
+ [tool.ruff.lint.per-file-ignores]
36
+ "__init__.py" = ["F401", "F403"]
37
+ "tests/**" = ["ANN", "F811"]
38
+
39
+ # ###############
40
+ # ALEMBIC
41
+ # ###############
42
+ [tool.alembic]
43
+ script_location = "src/interloper_db/migrations"
44
+ file_template = "%(rev)s_%(slug)s"
45
+ # sqlalchemy.url is set programmatically in env.py from the engine singleton.
46
+
47
+ # ###############
48
+ # PYRIGHT
49
+ # ###############
50
+ [tool.pyright]
51
+ include = ["src"]
52
+ typeCheckingMode = "basic"
53
+ reportMissingParameterType = true
54
+ ignore = ["tests/**"]
@@ -0,0 +1,57 @@
1
+ from interloper_db.engine import get_engine, init_engine
2
+ from interloper_db.models import (
3
+ Asset,
4
+ AssetDependency,
5
+ AssetDestination,
6
+ AssetResource,
7
+ Backfill,
8
+ Destination,
9
+ DestinationResource,
10
+ Event,
11
+ Invitation,
12
+ Job,
13
+ JobAsset,
14
+ JobSource,
15
+ Organisation,
16
+ Profile,
17
+ Resource,
18
+ Run,
19
+ Session,
20
+ Source,
21
+ SourceDestination,
22
+ SourceResource,
23
+ UserOrganisation,
24
+ )
25
+ from interloper_db.provision import create_all, downgrade, ensure_database, upgrade
26
+ from interloper_db.store import Store
27
+
28
+ __all__ = [
29
+ "Asset",
30
+ "AssetDependency",
31
+ "AssetDestination",
32
+ "AssetResource",
33
+ "Backfill",
34
+ "Destination",
35
+ "DestinationResource",
36
+ "Event",
37
+ "Invitation",
38
+ "Job",
39
+ "JobAsset",
40
+ "JobSource",
41
+ "Organisation",
42
+ "Profile",
43
+ "Resource",
44
+ "Run",
45
+ "Session",
46
+ "Source",
47
+ "SourceDestination",
48
+ "SourceResource",
49
+ "Store",
50
+ "UserOrganisation",
51
+ "create_all",
52
+ "downgrade",
53
+ "ensure_database",
54
+ "upgrade",
55
+ "get_engine",
56
+ "init_engine",
57
+ ]
@@ -0,0 +1,46 @@
1
+ """Database engine singleton."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from sqlalchemy import Engine, create_engine
8
+
9
+ _engine: Engine | None = None
10
+
11
+
12
+ def init_engine(dsn: str | None = None, **kwargs: object) -> Engine:
13
+ """Initialize the global database engine.
14
+
15
+ Args:
16
+ dsn: PostgreSQL connection string. Falls back to ``DATABASE_URL`` env var.
17
+ **kwargs: Additional kwargs forwarded to ``create_engine``.
18
+
19
+ Returns:
20
+ The SQLAlchemy engine.
21
+
22
+ Raises:
23
+ ValueError: If no DSN is provided and ``DATABASE_URL`` is not set.
24
+ """
25
+ global _engine # noqa: PLW0603
26
+ dsn = dsn or os.getenv("DATABASE_URL")
27
+ if not dsn:
28
+ from interloper.errors import ConfigError
29
+
30
+ raise ConfigError("Database DSN required: pass dsn= or set DATABASE_URL")
31
+ _engine = create_engine(dsn, **kwargs)
32
+ return _engine
33
+
34
+
35
+ def get_engine() -> Engine:
36
+ """Return the global database engine.
37
+
38
+ Returns:
39
+ The SQLAlchemy engine.
40
+
41
+ Raises:
42
+ RuntimeError: If ``init_engine`` has not been called.
43
+ """
44
+ if _engine is None:
45
+ raise RuntimeError("Database engine not initialized. Call init_engine() first.")
46
+ return _engine
@@ -0,0 +1,335 @@
1
+ """Hydration: translates DB rows into ``ComponentSpec`` trees.
2
+
3
+ This module is a pure transformation layer. It reads rows from the
4
+ database and builds the ``ComponentSpec`` tree that ``Component.from_spec``
5
+ expects. No framework classes are instantiated here — reconstruction
6
+ happens at the call site via ``spec.reconstruct()``::
7
+
8
+ hydrator = Hydrator(catalog, decrypt=decrypt_fn)
9
+ with Session(engine) as session:
10
+ db_source = session.get(Source, source_id, options=[...])
11
+ spec = hydrator.build_source_spec(session, db_source)
12
+ source = spec.reconstruct()
13
+
14
+ The Store wraps this pattern in thin ``load_*`` convenience methods, but
15
+ any caller can use the hydrator directly to assemble a spec (for example,
16
+ to serialize it to JSON and send it across a process boundary).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ from collections.abc import Callable, Iterable
23
+ from typing import Any
24
+ from uuid import UUID
25
+
26
+ from interloper.catalog.base import Catalog
27
+ from interloper.component.base import ComponentSpec
28
+ from interloper.errors import CatalogKeyError
29
+ from sqlmodel import Session, select
30
+
31
+ from interloper_db.models import (
32
+ Asset,
33
+ AssetDependency,
34
+ AssetResource,
35
+ Destination,
36
+ DestinationResource,
37
+ Resource,
38
+ Source,
39
+ SourceResource,
40
+ )
41
+
42
+
43
+ class Hydrator:
44
+ """Builds ``ComponentSpec`` trees from DB rows.
45
+
46
+ The hydrator holds a catalog (for import-path lookups) and an optional
47
+ decrypt callable (for resource data). All methods are pure
48
+ transformations — they read rows and return specs without ever
49
+ instantiating framework classes. Reconstruction is the caller's job.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ catalog: Catalog,
55
+ decrypt: Callable[[bytes], bytes] | None = None,
56
+ ) -> None:
57
+ """Initialize the hydrator.
58
+
59
+ Args:
60
+ catalog: Catalog used to resolve ``key → import path``.
61
+ decrypt: Optional ``(bytes) -> bytes`` callable for decrypting
62
+ resource data blobs marked ``encrypted=True``.
63
+ """
64
+ self._catalog = catalog
65
+ self._decrypt = decrypt
66
+
67
+ # ------------------------------------------------------------------
68
+ # Resource
69
+ # ------------------------------------------------------------------
70
+
71
+ def build_resource_spec(self, db_resource: Resource) -> ComponentSpec:
72
+ """Build a spec from a Resource row.
73
+
74
+ Resources are leaves — they carry their full state in ``data``
75
+ and have no nested specs. No session is needed.
76
+
77
+ Args:
78
+ db_resource: The Resource row.
79
+
80
+ Returns:
81
+ A ``ComponentSpec`` with the row's ``id`` and decoded data.
82
+ """
83
+ path = self._resolve_path(db_resource.key, kind="resource")
84
+ init = self.decode_resource_data(db_resource)
85
+ return ComponentSpec(
86
+ path=path,
87
+ id=str(db_resource.id) if db_resource.id else "",
88
+ init=init or None,
89
+ )
90
+
91
+ def decode_resource_data(self, db_resource: Resource) -> dict[str, Any]:
92
+ """Decrypt (when needed) and JSON-decode a resource's data blob.
93
+
94
+ Args:
95
+ db_resource: The Resource row whose ``data`` bytes should be
96
+ decoded.
97
+
98
+ Returns:
99
+ The decoded configuration dict, or ``{}`` if the row carries
100
+ no data.
101
+ """
102
+ if db_resource.data is None:
103
+ return {}
104
+ raw = db_resource.data
105
+ if db_resource.encrypted and self._decrypt:
106
+ raw = self._decrypt(raw)
107
+ return json.loads(raw)
108
+
109
+ # ------------------------------------------------------------------
110
+ # Destination
111
+ # ------------------------------------------------------------------
112
+
113
+ def build_destination_spec(
114
+ self,
115
+ session: Session,
116
+ db_destination: Destination,
117
+ ) -> ComponentSpec:
118
+ """Build a spec from a Destination row.
119
+
120
+ Captures the destination's ``config`` dict and resolves its
121
+ resource bindings as nested resource specs under ``init.resources``.
122
+
123
+ Args:
124
+ session: Active DB session.
125
+ db_destination: The Destination row.
126
+
127
+ Returns:
128
+ A ``ComponentSpec`` with the row's ``id`` and resolved bindings.
129
+ """
130
+ path = self._resolve_path(db_destination.key, kind="destination")
131
+
132
+ init: dict[str, Any] = dict(db_destination.config) if db_destination.config else {}
133
+
134
+ bindings = session.exec(
135
+ select(DestinationResource).where(DestinationResource.destination_id == db_destination.id)
136
+ ).all()
137
+ resources = self._resource_specs_from_bindings(session, bindings)
138
+ if resources:
139
+ init["resources"] = resources
140
+
141
+ return ComponentSpec(
142
+ path=path,
143
+ id=str(db_destination.id) if db_destination.id else "",
144
+ init=init or None,
145
+ )
146
+
147
+ # ------------------------------------------------------------------
148
+ # Asset
149
+ # ------------------------------------------------------------------
150
+
151
+ def build_asset_spec(
152
+ self,
153
+ session: Session,
154
+ db_asset: Asset,
155
+ ) -> ComponentSpec:
156
+ """Build a spec from a standalone Asset row.
157
+
158
+ Source-owned assets should not be built through this method —
159
+ they're embedded in the parent source's spec via
160
+ :meth:`build_source_spec`, which handles the
161
+ ``"source_path:asset_key"`` path convention.
162
+
163
+ Args:
164
+ session: Active DB session.
165
+ db_asset: A standalone Asset row (``source_id`` is ``None``).
166
+
167
+ Returns:
168
+ A ``ComponentSpec`` with the row's ``id`` and all per-asset
169
+ state (materializable, config, resources, destinations, deps).
170
+ """
171
+ path = self._resolve_path(db_asset.key, kind="asset")
172
+ init = self._build_asset_init(session, db_asset)
173
+ return ComponentSpec(
174
+ path=path,
175
+ id=str(db_asset.id) if db_asset.id else "",
176
+ init=init or None,
177
+ )
178
+
179
+ def _build_asset_init(
180
+ self,
181
+ session: Session,
182
+ db_asset: Asset,
183
+ ) -> dict[str, Any]:
184
+ """Build the ``init`` dict for an asset spec.
185
+
186
+ Captures materializable, per-field config overrides, resource
187
+ bindings, destination bindings, and cross-asset dependencies.
188
+
189
+ Returns:
190
+ A dict suitable for use as a ``ComponentSpec.init``.
191
+ """
192
+ init: dict[str, Any] = {"materializable": db_asset.materializable}
193
+
194
+ if db_asset.config:
195
+ init.update(db_asset.config)
196
+
197
+ if db_asset.id:
198
+ resource_bindings = session.exec(
199
+ select(AssetResource).where(AssetResource.asset_id == db_asset.id)
200
+ ).all()
201
+ resources = self._resource_specs_from_bindings(session, resource_bindings)
202
+ if resources:
203
+ init["resources"] = resources
204
+
205
+ destination_specs = [
206
+ self.build_destination_spec(session, db_dest).model_dump(mode="json")
207
+ for db_dest in db_asset.destinations
208
+ ]
209
+ if destination_specs:
210
+ init["destination"] = (
211
+ destination_specs if len(destination_specs) > 1 else destination_specs[0]
212
+ )
213
+
214
+ deps = self._deps_for_asset(session, db_asset.id)
215
+ if deps:
216
+ init["deps"] = deps
217
+
218
+ return init
219
+
220
+ def _deps_for_asset(
221
+ self,
222
+ session: Session,
223
+ asset_id: UUID,
224
+ ) -> dict[str, str]:
225
+ """Return ``{param_name: upstream_uuid}`` from ``AssetDependency`` rows."""
226
+ dependency_rows = session.exec(
227
+ select(AssetDependency).where(AssetDependency.asset_id == asset_id)
228
+ ).all()
229
+ return {d.param_name: str(d.upstream_asset_id) for d in dependency_rows}
230
+
231
+ # ------------------------------------------------------------------
232
+ # Source
233
+ # ------------------------------------------------------------------
234
+
235
+ def build_source_spec(
236
+ self,
237
+ session: Session,
238
+ db_source: Source,
239
+ ) -> ComponentSpec:
240
+ """Build a spec from a Source row including per-asset overrides.
241
+
242
+ The returned spec is self-contained: it carries the source's own
243
+ config, resource bindings, destination bindings, plus an
244
+ ``assets`` override map keyed by asset key. Each entry is the
245
+ sparse init payload for one asset (materializable, config,
246
+ resources, destinations, deps). Reconstruction via
247
+ ``ComponentSpec.reconstruct()`` produces a live ``Source`` whose
248
+ ``_apply_asset_overrides`` validator materialises each asset from
249
+ ``asset_types`` with the overrides applied.
250
+
251
+ Args:
252
+ session: Active DB session.
253
+ db_source: The Source row (relationships should be eagerly
254
+ loaded via ``selectinload``).
255
+
256
+ Returns:
257
+ A ``ComponentSpec`` capturing the source and its assets.
258
+ """
259
+ path = self._resolve_path(db_source.key, kind="source")
260
+
261
+ init: dict[str, Any] = {}
262
+
263
+ if db_source.config:
264
+ init.update(db_source.config)
265
+
266
+ resource_bindings = session.exec(
267
+ select(SourceResource).where(SourceResource.source_id == db_source.id)
268
+ ).all()
269
+ resources = self._resource_specs_from_bindings(session, resource_bindings)
270
+ if resources:
271
+ init["resources"] = resources
272
+
273
+ destination_specs = [
274
+ self.build_destination_spec(session, db_dest).model_dump(mode="json")
275
+ for db_dest in db_source.destinations
276
+ ]
277
+ if destination_specs:
278
+ init["destination"] = (
279
+ destination_specs if len(destination_specs) > 1 else destination_specs[0]
280
+ )
281
+
282
+ asset_overrides: dict[str, dict[str, Any]] = {
283
+ db_asset.key: {"id": str(db_asset.id), **self._build_asset_init(session, db_asset)}
284
+ for db_asset in db_source.assets
285
+ }
286
+ if asset_overrides:
287
+ init["assets"] = asset_overrides
288
+
289
+ return ComponentSpec(
290
+ path=path,
291
+ id=str(db_source.id) if db_source.id else "",
292
+ init=init or None,
293
+ )
294
+
295
+ # ------------------------------------------------------------------
296
+ # Shared helpers
297
+ # ------------------------------------------------------------------
298
+
299
+ def _resolve_path(self, key: str, *, kind: str) -> str:
300
+ """Look up a component's import path via the catalog.
301
+
302
+ Args:
303
+ key: The catalog key.
304
+ kind: Component kind, used only in the error message.
305
+
306
+ Returns:
307
+ The resolved import path.
308
+
309
+ Raises:
310
+ CatalogKeyError: If the catalog has no entry for *key*.
311
+ """
312
+ definition = self._catalog.get(key)
313
+ if not definition:
314
+ raise CatalogKeyError(f"Unknown {kind} key: {key}")
315
+ return definition.path
316
+
317
+ def _resource_specs_from_bindings(
318
+ self,
319
+ session: Session,
320
+ bindings: Iterable[Any],
321
+ ) -> dict[str, dict[str, Any]]:
322
+ """Build a ``{slot: resource_spec_dict}`` map from binding rows.
323
+
324
+ Each binding row must expose ``resource_id`` and ``key`` attributes.
325
+
326
+ Returns:
327
+ A dict mapping slot name → resource spec (as a JSON-safe dict).
328
+ """
329
+ result: dict[str, dict[str, Any]] = {}
330
+ for binding in bindings:
331
+ db_resource = session.get(Resource, binding.resource_id)
332
+ if db_resource:
333
+ spec = self.build_resource_spec(db_resource)
334
+ result[binding.key] = spec.model_dump(mode="json")
335
+ return result
@@ -0,0 +1,34 @@
1
+ """Alembic environment configuration.
2
+
3
+ Reads the database URL from the engine singleton initialised by
4
+ ``init_engine()``, so migrations share the exact same connection as the
5
+ rest of the application.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from alembic import context
11
+ from sqlmodel import SQLModel
12
+
13
+ import interloper_db.models as _models # noqa: F401 — register all models
14
+
15
+ target_metadata = SQLModel.metadata
16
+
17
+
18
+ def run_migrations_online() -> None:
19
+ """Run migrations against a live database."""
20
+ from interloper_db.engine import get_engine
21
+
22
+ connectable = get_engine()
23
+
24
+ with connectable.connect() as connection:
25
+ context.configure(
26
+ connection=connection,
27
+ target_metadata=target_metadata,
28
+ compare_type=True,
29
+ )
30
+ with context.begin_transaction():
31
+ context.run_migrations()
32
+
33
+
34
+ run_migrations_online()
@@ -0,0 +1,25 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from alembic import op
11
+ ${imports if imports else ""}
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision: str = ${repr(up_revision)}
15
+ down_revision: str | None = ${repr(down_revision)}
16
+ branch_labels: str | None = None
17
+ depends_on: str | None = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ ${upgrades if upgrades else "pass"}
22
+
23
+
24
+ def downgrade() -> None:
25
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,23 @@
1
+ """Enable the pgcrypto extension.
2
+
3
+ Revision ID: 001
4
+ Revises: None
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+
11
+ # revision identifiers, used by Alembic.
12
+ revision: str = "001"
13
+ down_revision: str | None = None
14
+ branch_labels: str | None = None
15
+ depends_on: str | None = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.execute("DROP EXTENSION IF EXISTS pgcrypto")
@@ -0,0 +1,79 @@
1
+ """Create asset_executions view.
2
+
3
+ Revision ID: 002
4
+ Revises: 001
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from alembic import op
10
+
11
+ # revision identifiers, used by Alembic.
12
+ revision: str = "002"
13
+ down_revision: str | None = "001"
14
+ branch_labels: str | None = None
15
+ depends_on: str | None = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.execute(
20
+ """
21
+ CREATE OR REPLACE VIEW asset_executions AS
22
+ WITH ranked AS (
23
+ SELECT
24
+ e.run_id,
25
+ e.org_id,
26
+ e.asset_id,
27
+ e.asset_key,
28
+ e.event_type,
29
+ e.timestamp,
30
+ row_number() OVER (
31
+ PARTITION BY e.run_id, e.asset_id
32
+ ORDER BY
33
+ CASE e.event_type
34
+ WHEN 'asset_failed' THEN 1
35
+ WHEN 'asset_canceled' THEN 2
36
+ WHEN 'asset_completed' THEN 3
37
+ WHEN 'asset_started' THEN 4
38
+ WHEN 'asset_skipped' THEN 5
39
+ WHEN 'asset_queued' THEN 6
40
+ END,
41
+ e.timestamp DESC
42
+ ) AS rn,
43
+ min(CASE WHEN e.event_type = 'asset_queued' THEN e.timestamp END) OVER (
44
+ PARTITION BY e.run_id, e.asset_id
45
+ ) AS queued_at,
46
+ min(CASE WHEN e.event_type = 'asset_started' THEN e.timestamp END) OVER (
47
+ PARTITION BY e.run_id, e.asset_id
48
+ ) AS started_at,
49
+ max(CASE WHEN e.event_type IN ('asset_completed', 'asset_failed', 'asset_canceled')
50
+ THEN e.timestamp END) OVER (
51
+ PARTITION BY e.run_id, e.asset_id
52
+ ) AS completed_at
53
+ FROM events e
54
+ WHERE e.asset_id IS NOT NULL
55
+ )
56
+ SELECT
57
+ r.run_id,
58
+ r.org_id,
59
+ r.asset_id,
60
+ r.asset_key,
61
+ CASE r.event_type
62
+ WHEN 'asset_failed' THEN 'failed'
63
+ WHEN 'asset_canceled' THEN 'canceled'
64
+ WHEN 'asset_completed' THEN 'success'
65
+ WHEN 'asset_started' THEN 'running'
66
+ WHEN 'asset_skipped' THEN 'skipped'
67
+ WHEN 'asset_queued' THEN 'queued'
68
+ END AS status,
69
+ r.started_at,
70
+ r.completed_at,
71
+ r.queued_at AS created_at
72
+ FROM ranked r
73
+ WHERE r.rn = 1
74
+ """
75
+ )
76
+
77
+
78
+ def downgrade() -> None:
79
+ op.execute("DROP VIEW IF EXISTS asset_executions CASCADE")