contextbase-shared-plugins 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
  2. contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
  3. contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
  4. shared_plugins/__init__.py +12 -0
  5. shared_plugins/automation.py +11 -0
  6. shared_plugins/bindings.py +253 -0
  7. shared_plugins/control_plane.py +208 -0
  8. shared_plugins/dlt.py +84 -0
  9. shared_plugins/env.py +102 -0
  10. shared_plugins/exceptions.py +10 -0
  11. shared_plugins/google_client/__init__.py +1 -0
  12. shared_plugins/google_client/auth.py +82 -0
  13. shared_plugins/google_client/batch_retry.py +308 -0
  14. shared_plugins/google_client/http_errors.py +27 -0
  15. shared_plugins/microsoft_dataverse/__init__.py +27 -0
  16. shared_plugins/microsoft_dataverse/annotations.py +38 -0
  17. shared_plugins/microsoft_dataverse/auth.py +26 -0
  18. shared_plugins/microsoft_dataverse/binding_config.py +35 -0
  19. shared_plugins/microsoft_dataverse/client.py +456 -0
  20. shared_plugins/microsoft_dataverse/ctx.py +21 -0
  21. shared_plugins/microsoft_dataverse/identifiers.py +62 -0
  22. shared_plugins/microsoft_dataverse/ingress.py +53 -0
  23. shared_plugins/microsoft_dataverse/metadata.py +106 -0
  24. shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
  25. shared_plugins/microsoft_dataverse/source.py +250 -0
  26. shared_plugins/microsoft_dataverse/tables.py +34 -0
  27. shared_plugins/microsoft_dataverse/translators.py +128 -0
  28. shared_plugins/microsoft_dataverse/types.py +346 -0
  29. shared_plugins/models.py +91 -0
  30. shared_plugins/naming.py +83 -0
  31. shared_plugins/pg_column_comments.py +59 -0
  32. shared_plugins/pyairbyte.py +399 -0
  33. shared_plugins/resources.py +179 -0
  34. shared_plugins/scratch.py +127 -0
  35. shared_plugins/sqlalchemy_types.py +225 -0
  36. shared_plugins/sqlite.py +123 -0
  37. shared_plugins/values.py +117 -0
@@ -0,0 +1,59 @@
1
+ """Monkey-patch PostgresClient to emit COMMENT ON COLUMN for dlt column descriptions.
2
+
3
+ dlt's ``TColumnSchema`` supports a ``description`` field, but the Postgres destination
4
+ does not act on it. Databricks and Snowflake do (inline ``COMMENT`` in DDL). Postgres
5
+ uses a separate ``COMMENT ON COLUMN`` statement, so we hook into
6
+ PostgresClient's ``_build_schema_update_sql`` to append COMMENT statements for the
7
+ same tables dlt is updating — regardless of whether those tables have new columns.
8
+
9
+ ctxb docs reads ``COMMENT ON COLUMN`` metadata and surfaces it in generated schema docs
10
+ that are injected into the agent context. This closes the loop:
11
+
12
+ dlt schema (description) → COMMENT ON COLUMN → ctxb docs → agent prompt
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Iterable, List, Tuple
18
+
19
+ from dlt.common.data_writers.escape import (
20
+ escape_postgres_identifier,
21
+ escape_postgres_literal,
22
+ )
23
+ from dlt.common.schema.typing import (
24
+ TSchemaTables,
25
+ TTableSchemaColumns,
26
+ )
27
+ from dlt.destinations.impl.postgres.postgres import PostgresClient
28
+
29
+ _original_build_schema_update_sql = PostgresClient._build_schema_update_sql
30
+
31
+
32
+ def _build_schema_update_sql_with_comments(
33
+ self: PostgresClient,
34
+ storage_tables: Iterable[Tuple[str, TTableSchemaColumns]],
35
+ ) -> Tuple[List[str], TSchemaTables]:
36
+ storage_tables = list(storage_tables)
37
+ sql_updates, schema_update = _original_build_schema_update_sql(self, storage_tables)
38
+
39
+ dlt_table_names = set(self.schema.dlt_table_names())
40
+ for table_name, _storage_columns in storage_tables:
41
+ if table_name in dlt_table_names:
42
+ continue
43
+
44
+ full_table = self.prepare_load_table(table_name)
45
+ qualified_table = self.sql_client.make_qualified_table_name(table_name)
46
+ for col in full_table["columns"].values():
47
+ description = col.get("description")
48
+ comment_literal = (
49
+ escape_postgres_literal(description) if description else "NULL"
50
+ )
51
+ col_name = escape_postgres_identifier(col["name"])
52
+ sql_updates.append(
53
+ f"COMMENT ON COLUMN {qualified_table}.{col_name} IS {comment_literal}"
54
+ )
55
+
56
+ return sql_updates, schema_update
57
+
58
+
59
+ PostgresClient._build_schema_update_sql = _build_schema_update_sql_with_comments # type: ignore[method-assign]
@@ -0,0 +1,399 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Mapping, Sequence
5
+ from datetime import datetime, timezone
6
+ from math import isfinite
7
+ from typing import Any, ClassVar, cast
8
+
9
+ import airbyte as ab
10
+ import dagster as dg
11
+ import sqlalchemy
12
+ from dagster import AssetExecutionContext
13
+ from airbyte._processors.sql.postgres import PostgresSqlProcessor
14
+ from airbyte.caches.postgres import PostgresCache
15
+ from airbyte.progress import ProgressTracker
16
+ from airbyte.records import StreamRecordHandler
17
+ from airbyte_protocol.models import AirbyteRecordMessage
18
+ from pydantic import Field
19
+ from sqlalchemy.engine import make_url
20
+ from sqlalchemy.types import TypeEngine
21
+
22
+ from shared_types.dagster_binding_plan import DagsterAllPlanBinding
23
+
24
+ from .env import load_shared_python_settings
25
+ from .exceptions import PluginConfigurationError
26
+ from .naming import dagster_airbyte_sync_asset_key
27
+ from .values import parse_utc_datetime_from_str
28
+
29
+ _CTX_BINDING_ID_COLUMN = "_ctx_binding_id"
30
+ _CTX_SOURCE_UPDATED_AT_COLUMN = "_ctx_source_updated_at"
31
+ _CTX_BINDING_ID_SCHEMA: dict[str, object] = {"type": "string"}
32
+ # WARNING: This *looks* like it produces a Postgres timestamptz column, but
33
+ # pyairbyte/airbyte currently drops the timezone flag in its DDL path.
34
+ #
35
+ # In airbyte/shared/sql_processor.py, `_create_table_for_loading()`,
36
+ # `_ensure_final_table_exists()`, and `_add_column_to_table()` render SQLAlchemy
37
+ # types via `f"{sql_type}"` / `f"{column_type}"` instead of dialect-aware
38
+ # compilation. For `sqlalchemy.TIMESTAMP(timezone=True)`, `str(...)` is just
39
+ # `"TIMESTAMP"`, so Postgres creates `timestamp without time zone` instead of
40
+ # `TIMESTAMP WITH TIME ZONE`.
41
+ #
42
+ # This means `_ctx_source_updated_at` can be stored incorrectly: psycopg/Postgres
43
+ # will coerce aware datetimes through the session timezone and then drop tz info.
44
+ # The value may therefore vary by session/database timezone instead of preserving a
45
+ # stable absolute instant.
46
+ #
47
+ # We are documenting this intentionally rather than fixing it right now. A future
48
+ # fix should keep the upstream catalog/row-shaping behavior, but override the DDL
49
+ # rendering path so these types compile with the Postgres dialect (for example,
50
+ # `TIMESTAMP(timezone=True)` -> `TIMESTAMP WITH TIME ZONE`) before creating or
51
+ # altering tables.
52
+ _CTX_SOURCE_UPDATED_AT_SCHEMA: dict[str, object] = {
53
+ "type": ["string", "null"],
54
+ "format": "date-time",
55
+ }
56
+
57
+
58
+ def _coerce_source_updated_at(value: object) -> datetime | None:
59
+ if value is None:
60
+ return None
61
+
62
+ if isinstance(value, bool):
63
+ raise RuntimeError("_ctx_source_updated_at does not accept bool values.")
64
+
65
+ if isinstance(value, datetime):
66
+ if value.tzinfo is None or value.utcoffset() is None:
67
+ return value.replace(tzinfo=timezone.utc)
68
+ return value.astimezone(timezone.utc)
69
+
70
+ if isinstance(value, int):
71
+ return _coerce_unix_timestamp(value)
72
+
73
+ if isinstance(value, float):
74
+ if not isfinite(value):
75
+ raise RuntimeError(
76
+ "_ctx_source_updated_at does not accept non-finite unix timestamps."
77
+ )
78
+ return _coerce_unix_timestamp(value)
79
+
80
+ if isinstance(value, str):
81
+ try:
82
+ return parse_utc_datetime_from_str(value)
83
+ except ValueError as exc:
84
+ raise RuntimeError(
85
+ f"_ctx_source_updated_at must be an ISO-8601 timestamp, got {value!r}."
86
+ ) from exc
87
+
88
+ raise RuntimeError(
89
+ "_ctx_source_updated_at must be None, a datetime, an ISO-8601 timestamp string, "
90
+ f"or a unix timestamp, got {type(value).__name__}."
91
+ )
92
+
93
+
94
+ def _coerce_unix_timestamp(value: int | float) -> datetime | None:
95
+ if value == 0:
96
+ return None
97
+ if value < 0:
98
+ raise RuntimeError(
99
+ "_ctx_source_updated_at does not accept negative unix timestamps."
100
+ )
101
+
102
+ try:
103
+ return datetime.fromtimestamp(value, tz=timezone.utc)
104
+ except (OverflowError, OSError, ValueError) as exc:
105
+ raise RuntimeError(
106
+ "_ctx_source_updated_at must be a valid unix timestamp."
107
+ ) from exc
108
+
109
+
110
+ def _build_source_updated_at_fields(
111
+ stream_names: Sequence[str],
112
+ source_updated_at_fields: Mapping[str, str | None] | None,
113
+ ) -> dict[str, str | None]:
114
+ return {
115
+ stream_name: (
116
+ None
117
+ if source_updated_at_fields is None
118
+ else source_updated_at_fields.get(stream_name)
119
+ )
120
+ for stream_name in stream_names
121
+ }
122
+
123
+
124
+ def _extract_source_updated_at_value(
125
+ *,
126
+ stream_name: str,
127
+ record_data: Mapping[str, Any],
128
+ source_updated_at_field: str | None,
129
+ ) -> object:
130
+ if source_updated_at_field is None:
131
+ return None
132
+
133
+ if source_updated_at_field not in record_data:
134
+ raise RuntimeError(
135
+ f"PyAirbyte stream '{stream_name}' declared SOURCE_UPDATED_AT_FIELDS[{stream_name!r}] = "
136
+ f"{source_updated_at_field!r}, but that field was not present in the record payload."
137
+ )
138
+
139
+ return record_data[source_updated_at_field]
140
+
141
+
142
+ def _with_ctx_binding_primary_key(
143
+ *,
144
+ stream_name: str,
145
+ source_primary_key: Sequence[Sequence[str]] | None,
146
+ ) -> list[list[str]]:
147
+ if not source_primary_key:
148
+ raise RuntimeError(
149
+ f"PyAirbyte stream '{stream_name}' does not define a primary key. "
150
+ "ContextBase requires source-defined keys so _ctx_binding_id can be prepended safely."
151
+ )
152
+
153
+ normalized_primary_key = [[_CTX_BINDING_ID_COLUMN]]
154
+ for field_path in source_primary_key:
155
+ if len(field_path) != 1:
156
+ raise RuntimeError(
157
+ f"PyAirbyte stream '{stream_name}' uses nested primary keys {field_path!r}. "
158
+ "ContextBase only supports top-level primary-key columns."
159
+ )
160
+
161
+ field_name = field_path[0]
162
+ if field_name == _CTX_BINDING_ID_COLUMN:
163
+ continue
164
+ normalized_primary_key.append([field_name])
165
+
166
+ return normalized_primary_key
167
+
168
+
169
+ def _apply_ctx_catalog_contract(
170
+ *,
171
+ source: Any,
172
+ selected_stream_names: Sequence[str],
173
+ ) -> None:
174
+ selected_stream_name_set = set(selected_stream_names)
175
+ applied_stream_names: set[str] = set()
176
+
177
+ for stream in source.discovered_catalog.streams:
178
+ if stream.name not in selected_stream_name_set:
179
+ continue
180
+
181
+ json_schema = stream.json_schema
182
+ properties = json_schema.setdefault("properties", {})
183
+ if not isinstance(properties, dict):
184
+ raise RuntimeError(
185
+ f"PyAirbyte stream '{stream.name}' JSON schema must define a top-level "
186
+ "properties mapping."
187
+ )
188
+
189
+ properties[_CTX_BINDING_ID_COLUMN] = dict(_CTX_BINDING_ID_SCHEMA)
190
+ properties[_CTX_SOURCE_UPDATED_AT_COLUMN] = dict(_CTX_SOURCE_UPDATED_AT_SCHEMA)
191
+ stream.source_defined_primary_key = _with_ctx_binding_primary_key(
192
+ stream_name=stream.name,
193
+ source_primary_key=stream.source_defined_primary_key,
194
+ )
195
+ applied_stream_names.add(stream.name)
196
+
197
+ missing_stream_names = selected_stream_name_set - applied_stream_names
198
+ if missing_stream_names:
199
+ missing_stream_names_str = ", ".join(sorted(missing_stream_names))
200
+ raise RuntimeError(
201
+ "PyAirbyte selected streams were not found in the discovered catalog: "
202
+ f"{missing_stream_names_str}."
203
+ )
204
+
205
+
206
+ def _normalize_record_data_for_sql_columns(
207
+ *,
208
+ record_data: dict[str, Any],
209
+ column_definitions: dict[str, TypeEngine[Any]],
210
+ ) -> None:
211
+ for key, value in record_data.items():
212
+ if not isinstance(value, (dict, list)):
213
+ continue
214
+
215
+ column_type = column_definitions.get(key)
216
+ if isinstance(column_type, sqlalchemy.types.JSON):
217
+ continue
218
+ if isinstance(column_type, sqlalchemy.types.String):
219
+ record_data[key] = json.dumps(value)
220
+ continue
221
+
222
+ raise RuntimeError(
223
+ f"PyAirbyte record field {key!r} has structured "
224
+ f"{type(value).__name__} data for unsupported SQL column type "
225
+ f"{type(column_type).__name__ if column_type is not None else 'None'}."
226
+ )
227
+
228
+
229
+ class CtxPostgresProcessor(PostgresSqlProcessor):
230
+ def process_record_message(
231
+ self,
232
+ record_msg: AirbyteRecordMessage,
233
+ stream_record_handler: StreamRecordHandler,
234
+ progress_tracker: ProgressTracker,
235
+ ) -> None:
236
+ cache = cast(CtxPostgresCache, self.sql_config)
237
+ record_data = record_msg.data
238
+ record_data[_CTX_BINDING_ID_COLUMN] = cache.ctx_binding_id
239
+
240
+ source_updated_at_field = cache.ctx_source_updated_at_fields.get(
241
+ record_msg.stream
242
+ )
243
+ raw_source_updated_at = _extract_source_updated_at_value(
244
+ stream_name=record_msg.stream,
245
+ record_data=record_data,
246
+ source_updated_at_field=source_updated_at_field,
247
+ )
248
+ record_data[_CTX_SOURCE_UPDATED_AT_COLUMN] = _coerce_source_updated_at(
249
+ raw_source_updated_at
250
+ )
251
+
252
+ column_definitions = self._get_sql_column_definitions(record_msg.stream)
253
+ _normalize_record_data_for_sql_columns(
254
+ record_data=record_data,
255
+ column_definitions=column_definitions,
256
+ )
257
+
258
+ super().process_record_message(
259
+ record_msg,
260
+ stream_record_handler,
261
+ progress_tracker,
262
+ )
263
+
264
+
265
+ class CtxPostgresCache(PostgresCache):
266
+ _sql_processor_class: ClassVar[type] = CtxPostgresProcessor
267
+
268
+ ctx_binding_id: str = ""
269
+ ctx_source_updated_at_fields: dict[str, str | None] = Field(default_factory=dict)
270
+
271
+
272
+ def _build_cache(
273
+ *,
274
+ schema_name: str,
275
+ binding_id: str,
276
+ source_updated_at_fields: dict[str, str | None],
277
+ ) -> CtxPostgresCache:
278
+ database_url = load_shared_python_settings().ctx_database_url
279
+ parsed_url = make_url(database_url)
280
+
281
+ return CtxPostgresCache(
282
+ host=parsed_url.host or "localhost",
283
+ port=parsed_url.port or 5432,
284
+ database=parsed_url.database,
285
+ username=parsed_url.username or "postgres",
286
+ password=parsed_url.password or "",
287
+ schema_name=schema_name,
288
+ ctx_binding_id=binding_id,
289
+ ctx_source_updated_at_fields=source_updated_at_fields,
290
+ )
291
+
292
+
293
+ def build_pyairbyte_source(
294
+ *,
295
+ docker_image: str,
296
+ connector_config: dict[str, Any],
297
+ ) -> Any:
298
+ image_name = docker_image.split(":")[0].split("/")[-1]
299
+ return ab.get_source(
300
+ image_name,
301
+ docker_image=docker_image,
302
+ config=connector_config,
303
+ )
304
+
305
+
306
+ def require_pyairbyte_selected_stream_names(
307
+ binding: DagsterAllPlanBinding,
308
+ ) -> tuple[str, ...]:
309
+ if binding.models is not None and binding.models.filter is not None:
310
+ raise PluginConfigurationError(
311
+ f"{binding.plugin_id} models.filter is not supported for PyAirbyte connectors."
312
+ )
313
+
314
+ if binding.models is None or binding.models.active is None:
315
+ raise PluginConfigurationError(
316
+ f"{binding.plugin_id} models.active must include at least one stream."
317
+ )
318
+
319
+ selected_stream_names = tuple(binding.models.active)
320
+ if not selected_stream_names:
321
+ raise PluginConfigurationError(
322
+ f"{binding.plugin_id} models.active must include at least one stream."
323
+ )
324
+
325
+ return selected_stream_names
326
+
327
+
328
+ def run_pyairbyte_sync(
329
+ *,
330
+ context: AssetExecutionContext,
331
+ plugin_id: str,
332
+ binding_id: str,
333
+ selected_stream_names: Sequence[str],
334
+ source_updated_at_fields: Mapping[str, str | None] | None = None,
335
+ force_full_refresh: bool = False,
336
+ source: Any,
337
+ ) -> dg.MaterializeResult:
338
+ ordered_selected_stream_names = tuple(selected_stream_names)
339
+ context.log.info(
340
+ f"Starting {plugin_id} sync for binding_id={binding_id} "
341
+ f"selected_streams={list(ordered_selected_stream_names)}"
342
+ )
343
+
344
+ cache = _build_cache(
345
+ schema_name=plugin_id,
346
+ binding_id=binding_id,
347
+ source_updated_at_fields=_build_source_updated_at_fields(
348
+ ordered_selected_stream_names,
349
+ source_updated_at_fields,
350
+ ),
351
+ )
352
+
353
+ _apply_ctx_catalog_contract(
354
+ source=source,
355
+ selected_stream_names=ordered_selected_stream_names,
356
+ )
357
+ # Airbyte's internal sync model lives below this call. Our runtime does not
358
+ # choose per-stream incremental vs full-refresh policy; it only chooses:
359
+ # 1. which raw stream IDs to pass to Source.read(streams=[...])
360
+ # 2. whether to request force_full_refresh=True
361
+ #
362
+ # Verified against the installed PyAirbyte package in this repo:
363
+ # - `airbyte/sources/base.py:420`
364
+ # `Source.get_configured_catalog(...)` builds each configured stream and
365
+ # assigns `sync_mode`.
366
+ # - `airbyte/sources/base.py:449`
367
+ # When `force_full_refresh=True`, PyAirbyte prefers `full_refresh` if the
368
+ # stream supports it; otherwise it uses `incremental`.
369
+ # - `airbyte/sources/base.py:873`
370
+ # Full refresh also disables state loading by setting
371
+ # `state_provider = None`.
372
+ # - `airbyte/caches/_state_backend.py:31`
373
+ # Incremental state is stored in `_airbyte_state`.
374
+ #
375
+ # So the repo seam stays intentionally narrow: `binding.models.active`
376
+ # selects streams, and `force_full_refresh` is the only runtime override.
377
+ source.read(
378
+ cache=cache,
379
+ streams=list(ordered_selected_stream_names),
380
+ force_full_refresh=force_full_refresh,
381
+ )
382
+
383
+ context.log.info(
384
+ f"Completed {plugin_id} sync for binding_id={binding_id} "
385
+ f"selected_streams={list(ordered_selected_stream_names)}"
386
+ )
387
+
388
+ return dg.MaterializeResult(
389
+ asset_key=dagster_airbyte_sync_asset_key(plugin_id),
390
+ metadata={
391
+ "selected_streams": dg.MetadataValue.json(
392
+ list(ordered_selected_stream_names)
393
+ ),
394
+ "selected_stream_count": dg.MetadataValue.int(
395
+ len(ordered_selected_stream_names)
396
+ ),
397
+ "force_full_refresh": dg.MetadataValue.bool(force_full_refresh),
398
+ },
399
+ )
@@ -0,0 +1,179 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Mapping, Sequence
4
+ from copy import deepcopy
5
+ from functools import wraps
6
+ from typing import Any
7
+
8
+ import dlt
9
+ from dagster_dlt import DagsterDltResource, DagsterDltTranslator
10
+
11
+ from .models import CtxModel
12
+
13
+ _CTX_METADATA_COLUMNS = (
14
+ "_ctx_binding_id",
15
+ "_ctx_source_updated_at",
16
+ )
17
+
18
+
19
+ def _normalize_primary_key(primary_key: object) -> tuple[str, ...]:
20
+ if isinstance(primary_key, str):
21
+ return (primary_key,)
22
+ if isinstance(primary_key, Sequence):
23
+ normalized = tuple(str(value) for value in primary_key)
24
+ if normalized:
25
+ return normalized
26
+ raise ValueError(
27
+ "ctx_dlt_* wrappers require a non-empty primary_key that starts with '_ctx_binding_id'."
28
+ )
29
+
30
+
31
+ def _validate_primary_key_prefix(primary_key: object) -> None:
32
+ normalized = _normalize_primary_key(primary_key)
33
+ if normalized[0] != "_ctx_binding_id":
34
+ raise ValueError(
35
+ "ctx_dlt_* wrappers require primary_key to start with '_ctx_binding_id'. "
36
+ f"Received: {normalized!r}"
37
+ )
38
+
39
+
40
+ def _apply_max_table_nesting(kwargs: dict[str, Any]) -> None:
41
+ max_table_nesting = kwargs.pop("max_table_nesting", None)
42
+ if max_table_nesting not in (None, 0):
43
+ raise ValueError(
44
+ "ctx_dlt_* wrappers enforce max_table_nesting=0. Omit the argument or set it to 0."
45
+ )
46
+ kwargs["max_table_nesting"] = 0
47
+
48
+
49
+ def _with_metadata_columns(columns: object) -> dict[str, Any]:
50
+ if columns is None:
51
+ merged_columns: dict[str, Any] = {}
52
+ elif isinstance(columns, Mapping):
53
+ merged_columns = deepcopy(dict(columns))
54
+ else:
55
+ raise TypeError("ctx_dlt_* wrappers expect 'columns' to be a mapping when set.")
56
+
57
+ merged_columns.setdefault("_ctx_source_updated_at", {"data_type": "timestamp"})
58
+ return merged_columns
59
+
60
+
61
+ def _validate_ctx_row(
62
+ row: object,
63
+ *,
64
+ wrapper_name: str,
65
+ resource_name: str,
66
+ ) -> CtxModel:
67
+ if not isinstance(row, CtxModel):
68
+ raise TypeError(
69
+ f"{wrapper_name}('{resource_name}') emitted non-CtxModel row "
70
+ f"of type {type(row).__name__}."
71
+ )
72
+
73
+ serialized = row.model_dump(by_alias=True)
74
+ missing_columns = [name for name in _CTX_METADATA_COLUMNS if name not in serialized]
75
+ if missing_columns:
76
+ raise TypeError(
77
+ f"{wrapper_name}('{resource_name}') emitted CtxModel row missing required "
78
+ f"metadata columns: {', '.join(missing_columns)}."
79
+ )
80
+
81
+ binding_id = serialized.get("_ctx_binding_id")
82
+ if not isinstance(binding_id, str) or not binding_id.strip():
83
+ raise TypeError(
84
+ f"{wrapper_name}('{resource_name}') emitted row with empty '_ctx_binding_id'."
85
+ )
86
+
87
+ return row
88
+
89
+
90
+ def _iter_validated_rows(
91
+ emitted: object,
92
+ *,
93
+ wrapper_name: str,
94
+ resource_name: str,
95
+ ) -> Iterable[CtxModel]:
96
+ if emitted is None:
97
+ return ()
98
+
99
+ if isinstance(emitted, CtxModel):
100
+ return (
101
+ _validate_ctx_row(
102
+ emitted, wrapper_name=wrapper_name, resource_name=resource_name
103
+ ),
104
+ )
105
+
106
+ if isinstance(emitted, (str, bytes, bytearray, Mapping)):
107
+ raise TypeError(
108
+ f"{wrapper_name}('{resource_name}') emitted invalid row container "
109
+ f"of type {type(emitted).__name__}."
110
+ )
111
+
112
+ if not isinstance(emitted, Iterable):
113
+ raise TypeError(
114
+ f"{wrapper_name}('{resource_name}') emitted non-iterable value "
115
+ f"of type {type(emitted).__name__}."
116
+ )
117
+
118
+ def _generator() -> Iterable[CtxModel]:
119
+ for row in emitted:
120
+ yield _validate_ctx_row(
121
+ row,
122
+ wrapper_name=wrapper_name,
123
+ resource_name=resource_name,
124
+ )
125
+
126
+ return _generator()
127
+
128
+
129
+ def _wrap_emitter(
130
+ func: Callable[..., object],
131
+ *,
132
+ wrapper_name: str,
133
+ ) -> Callable[..., Iterable[CtxModel]]:
134
+ @wraps(func)
135
+ def wrapped(*args: Any, **kwargs: Any) -> Iterable[CtxModel]:
136
+ emitted = func(*args, **kwargs)
137
+ yield from _iter_validated_rows(
138
+ emitted,
139
+ wrapper_name=wrapper_name,
140
+ resource_name=func.__name__,
141
+ )
142
+
143
+ return wrapped
144
+
145
+
146
+ def ctx_dlt_resource(**kwargs: Any) -> Callable[[Callable[..., object]], Any]:
147
+ decorator_kwargs = dict(kwargs)
148
+ _apply_max_table_nesting(decorator_kwargs)
149
+ _validate_primary_key_prefix(decorator_kwargs.get("primary_key"))
150
+ decorator_kwargs["columns"] = _with_metadata_columns(
151
+ decorator_kwargs.get("columns")
152
+ )
153
+
154
+ def decorator(func: Callable[..., object]) -> Any:
155
+ return dlt.resource(**decorator_kwargs)(
156
+ _wrap_emitter(func, wrapper_name="ctx_dlt_resource")
157
+ )
158
+
159
+ return decorator
160
+
161
+
162
+ def ctx_dlt_transformer(**kwargs: Any) -> Callable[[Callable[..., object]], Any]:
163
+ decorator_kwargs = dict(kwargs)
164
+ _apply_max_table_nesting(decorator_kwargs)
165
+ _validate_primary_key_prefix(decorator_kwargs.get("primary_key"))
166
+ decorator_kwargs["columns"] = _with_metadata_columns(
167
+ decorator_kwargs.get("columns")
168
+ )
169
+
170
+ def decorator(func: Callable[..., object]) -> Any:
171
+ return dlt.transformer(**decorator_kwargs)(
172
+ _wrap_emitter(func, wrapper_name="ctx_dlt_transformer")
173
+ )
174
+
175
+ return decorator
176
+
177
+
178
+ DLT_TRANSLATOR = DagsterDltTranslator()
179
+ DLT_RESOURCE = DagsterDltResource()