contextbase-shared-plugins 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
  2. contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
  3. contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
  4. shared_plugins/__init__.py +12 -0
  5. shared_plugins/automation.py +11 -0
  6. shared_plugins/bindings.py +253 -0
  7. shared_plugins/control_plane.py +208 -0
  8. shared_plugins/dlt.py +84 -0
  9. shared_plugins/env.py +102 -0
  10. shared_plugins/exceptions.py +10 -0
  11. shared_plugins/google_client/__init__.py +1 -0
  12. shared_plugins/google_client/auth.py +82 -0
  13. shared_plugins/google_client/batch_retry.py +308 -0
  14. shared_plugins/google_client/http_errors.py +27 -0
  15. shared_plugins/microsoft_dataverse/__init__.py +27 -0
  16. shared_plugins/microsoft_dataverse/annotations.py +38 -0
  17. shared_plugins/microsoft_dataverse/auth.py +26 -0
  18. shared_plugins/microsoft_dataverse/binding_config.py +35 -0
  19. shared_plugins/microsoft_dataverse/client.py +456 -0
  20. shared_plugins/microsoft_dataverse/ctx.py +21 -0
  21. shared_plugins/microsoft_dataverse/identifiers.py +62 -0
  22. shared_plugins/microsoft_dataverse/ingress.py +53 -0
  23. shared_plugins/microsoft_dataverse/metadata.py +106 -0
  24. shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
  25. shared_plugins/microsoft_dataverse/source.py +250 -0
  26. shared_plugins/microsoft_dataverse/tables.py +34 -0
  27. shared_plugins/microsoft_dataverse/translators.py +128 -0
  28. shared_plugins/microsoft_dataverse/types.py +346 -0
  29. shared_plugins/models.py +91 -0
  30. shared_plugins/naming.py +83 -0
  31. shared_plugins/pg_column_comments.py +59 -0
  32. shared_plugins/pyairbyte.py +399 -0
  33. shared_plugins/resources.py +179 -0
  34. shared_plugins/scratch.py +127 -0
  35. shared_plugins/sqlalchemy_types.py +225 -0
  36. shared_plugins/sqlite.py +123 -0
  37. shared_plugins/values.py +117 -0
@@ -0,0 +1,332 @@
1
+ """Per-table schema construction at warmup.
2
+
3
+ Fetches Dataverse entity + attribute metadata, builds RuntimeFieldSpecs,
4
+ validates 63-byte postgres identifier limits, and assembles per-table
5
+ runtime CtxModel subclasses + dlt column hints. The plugin's source factory
6
+ calls `build_runtime_table_schemas(...)` once per asset run.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterable
12
+ from dataclasses import dataclass
13
+ from decimal import Decimal
14
+ from typing import Annotated, Any
15
+
16
+ from pydantic import BeforeValidator, Field, create_model
17
+ from shared_plugins.exceptions import PluginConfigurationError
18
+ from shared_plugins.models import IdStr
19
+
20
+ from .ctx import DataverseRowBase
21
+ from .identifiers import (
22
+ escape_odata_string,
23
+ pascal_case,
24
+ safe_identifier,
25
+ validate_identifier,
26
+ )
27
+ from .metadata import (
28
+ AttributeMetadataIngress,
29
+ EntityMetadataIngress,
30
+ MetadataListResponseIngress,
31
+ )
32
+ from .tables import DataverseSyncMode, DataverseTableSpec
33
+ from .types import (
34
+ RuntimeFieldSpec,
35
+ build_dlt_column_hints,
36
+ field_spec_for_attribute,
37
+ )
38
+
39
+ # Field names already declared on DataverseRowBase (and CtxModel via inheritance).
40
+ # A runtime field whose Python name lands in this set would silently shadow the
41
+ # parent contract via pydantic.create_model, breaking tombstone semantics or the
42
+ # _ctx_binding_id invariant. The collision check in _create_runtime_record_model
43
+ # rejects such attributes loudly at warmup.
44
+ _RESERVED_ROW_BASE_FIELDS: frozenset[str] = frozenset(
45
+ DataverseRowBase.model_fields.keys()
46
+ )
47
+
48
+ ENTITY_SELECT = "LogicalName,EntitySetName,PrimaryIdAttribute"
49
+ # Only base-type fields. Subtype fields (Precision, Targets, Format,
50
+ # DateTimeBehavior) live on Microsoft.Dynamics.CRM.{Decimal,Lookup,DateTime,...}
51
+ # AttributeMetadata, NOT on the base type. Microsoft's OData rejects
52
+ # $select=Precision,... here with HTTP 400 because the cast is implicit.
53
+ # Decimal columns therefore default to numeric(28, 4); DateTime columns default
54
+ # to timestamptz (UserLocal); Lookup loses Targets metadata. See
55
+ # internal/planned/2026-05-03-dataverse-subtype-metadata-fetch.md for the
56
+ # upgrade path (per-subtype cast queries, ~5x metadata round-trips at warmup,
57
+ # tenant-specific precision + Format + DateTimeBehavior fidelity).
58
+ ATTRIBUTE_SELECT = (
59
+ "LogicalName,"
60
+ "AttributeType,"
61
+ "AttributeTypeName,"
62
+ "AttributeOf,"
63
+ "IsValidForRead,"
64
+ "IsCustomAttribute,"
65
+ "IsPrimaryId,"
66
+ "Description,"
67
+ "DisplayName"
68
+ )
69
+
70
+
71
+ @dataclass(frozen=True)
72
+ class RuntimeDataverseTableSchema:
73
+ spec: DataverseTableSpec
74
+ fields: tuple[RuntimeFieldSpec, ...]
75
+ record_model: type[DataverseRowBase]
76
+ select_columns: tuple[str, ...]
77
+ dlt_columns: dict[str, dict[str, Any]]
78
+
79
+
80
+ def build_runtime_table_schemas(
81
+ *,
82
+ client: Any,
83
+ specs: Iterable[DataverseTableSpec],
84
+ ) -> dict[str, RuntimeDataverseTableSchema]:
85
+ return {
86
+ schema.spec.entity_set: schema
87
+ for schema in (
88
+ _build_runtime_table_schema(client=client, spec=spec) for spec in specs
89
+ )
90
+ }
91
+
92
+
93
+ def _build_runtime_table_schema(
94
+ *,
95
+ client: Any,
96
+ spec: DataverseTableSpec,
97
+ ) -> RuntimeDataverseTableSchema:
98
+ entity = _fetch_entity_metadata(client=client, entity_set=spec.entity_set)
99
+ attributes = _fetch_attribute_metadata(
100
+ client=client,
101
+ logical_name=entity.logical_name,
102
+ )
103
+
104
+ fields: list[RuntimeFieldSpec] = []
105
+ for attribute in attributes:
106
+ spec_field = field_spec_for_attribute(attribute)
107
+ if spec_field is None:
108
+ continue
109
+
110
+ validate_identifier(
111
+ spec_field.logical_name,
112
+ context=f"{spec.entity_set}.{spec_field.logical_name}",
113
+ )
114
+ for ann in spec_field.annotation_columns:
115
+ validate_identifier(
116
+ ann.column_name,
117
+ context=f"{spec.entity_set}.{ann.column_name}",
118
+ )
119
+
120
+ fields.append(spec_field)
121
+
122
+ select_columns = _compute_select_columns(
123
+ fields=fields,
124
+ primary_key=entity.primary_id_attribute,
125
+ )
126
+ record_model = _create_runtime_record_model(
127
+ class_name=f"{pascal_case(entity.entity_set_name)}Row",
128
+ fields=fields,
129
+ primary_key=entity.primary_id_attribute,
130
+ entity_set=spec.entity_set,
131
+ )
132
+ dlt_columns = build_dlt_column_hints(
133
+ tuple(fields),
134
+ sync_mode_is_delta=spec.sync_mode is DataverseSyncMode.DELTA,
135
+ )
136
+
137
+ return RuntimeDataverseTableSchema(
138
+ spec=spec,
139
+ fields=tuple(fields),
140
+ record_model=record_model,
141
+ select_columns=select_columns,
142
+ dlt_columns=dlt_columns,
143
+ )
144
+
145
+
146
+ def _compute_select_columns(
147
+ *,
148
+ fields: list[RuntimeFieldSpec],
149
+ primary_key: str,
150
+ ) -> tuple[str, ...]:
151
+ columns = [field.logical_name for field in fields]
152
+ if primary_key not in columns:
153
+ # PK injection here mirrors the model-side fallback in
154
+ # _create_runtime_record_model; keep the two consistent.
155
+ columns.insert(0, primary_key)
156
+ # Preserve order while deduping.
157
+ seen: set[str] = set()
158
+ deduped: list[str] = []
159
+ for col in columns:
160
+ if col not in seen:
161
+ seen.add(col)
162
+ deduped.append(col)
163
+ return tuple(deduped)
164
+
165
+
166
+ def _create_runtime_record_model(
167
+ *,
168
+ class_name: str,
169
+ fields: list[RuntimeFieldSpec],
170
+ primary_key: str,
171
+ entity_set: str,
172
+ ) -> type[DataverseRowBase]:
173
+ """Construct a per-table CtxModel subclass via pydantic.create_model.
174
+
175
+ Each Dataverse attribute becomes a Pydantic field with a lenient ingress
176
+ type (string for date/timestamp/uuid, Decimal for numeric, native Python
177
+ scalars for others — dlt's normalizer handles the destination cast).
178
+ Field aliases match the Dataverse logical name so record validation can
179
+ take the raw OData payload directly.
180
+
181
+ Raises PluginConfigurationError if any attribute's Python field name
182
+ collides with a field on DataverseRowBase. pydantic.create_model would
183
+ silently override the parent field, breaking the row metadata contract.
184
+ """
185
+ field_definitions: dict[str, tuple[Any, Any]] = {}
186
+ field_logical_names = {f.logical_name for f in fields}
187
+ pk_present_in_fields = primary_key in field_logical_names
188
+
189
+ if not pk_present_in_fields:
190
+ # Add the PK as a string field so the schema knows about it for
191
+ # OData $select ordering. Most tenants always include the PK; this
192
+ # mirrors the select-side fallback in _compute_select_columns.
193
+ pk_py_name = safe_identifier(primary_key)
194
+ _check_no_row_base_collision(
195
+ python_name=pk_py_name,
196
+ logical_name=primary_key,
197
+ entity_set=entity_set,
198
+ )
199
+ field_definitions[pk_py_name] = (
200
+ IdStr | None,
201
+ Field(default=None, alias=primary_key),
202
+ )
203
+
204
+ for field in fields:
205
+ py_name = safe_identifier(field.logical_name)
206
+ _check_no_row_base_collision(
207
+ python_name=py_name,
208
+ logical_name=field.logical_name,
209
+ entity_set=entity_set,
210
+ )
211
+ annotation = _ingress_type_for(field.pg_type)
212
+ alias = field.logical_name if py_name != field.logical_name else None
213
+ default_kwargs: dict[str, Any] = {"default": None}
214
+ if alias is not None:
215
+ default_kwargs["alias"] = alias
216
+ field_definitions[py_name] = (
217
+ annotation | None,
218
+ Field(**default_kwargs),
219
+ )
220
+
221
+ for ann in field.annotation_columns:
222
+ ann_py_name = safe_identifier(ann.column_name)
223
+ _check_no_row_base_collision(
224
+ python_name=ann_py_name,
225
+ logical_name=ann.column_name,
226
+ entity_set=entity_set,
227
+ )
228
+ ann_alias = ann.column_name if ann_py_name != ann.column_name else None
229
+ ann_kwargs: dict[str, Any] = {"default": None}
230
+ if ann_alias is not None:
231
+ ann_kwargs["alias"] = ann_alias
232
+ field_definitions[ann_py_name] = (
233
+ str | None,
234
+ Field(**ann_kwargs),
235
+ )
236
+
237
+ return create_model( # type: ignore[no-any-return]
238
+ class_name,
239
+ __base__=DataverseRowBase,
240
+ __module__=__name__,
241
+ **field_definitions,
242
+ )
243
+
244
+
245
+ def _check_no_row_base_collision(
246
+ *,
247
+ python_name: str,
248
+ logical_name: str,
249
+ entity_set: str,
250
+ ) -> None:
251
+ if python_name in _RESERVED_ROW_BASE_FIELDS:
252
+ raise PluginConfigurationError(
253
+ f"Dataverse attribute {logical_name!r} on entity_set "
254
+ f"{entity_set!r} produces field name {python_name!r}, which "
255
+ f"would silently shadow reserved {python_name!r} on "
256
+ "DataverseRowBase. This collision is unhandled."
257
+ )
258
+
259
+
260
+ def _ingress_type_for(pg_type: str) -> Any:
261
+ """Map dlt pg_type back to the lenient Python ingress type.
262
+
263
+ Most scalars arrive from OData as JSON-typed values; pydantic + dlt's
264
+ normalizer coerce on the way to postgres.
265
+ """
266
+ if pg_type == "bigint":
267
+ return int
268
+ if pg_type == "double":
269
+ return float
270
+ if pg_type == "bool":
271
+ return bool
272
+ if pg_type == "bigint[]":
273
+ # Dataverse v9.2 Web API serializes MultiSelectPicklist as a
274
+ # comma-separated string of option-set integers (e.g.
275
+ # "975620006,975620013"), not a JSON array. The attribute metadata
276
+ # describes the conceptual type (list of options) but is silent on
277
+ # wire format; verified empirically against a real tenant on
278
+ # 2026-05-12. A pre-validator splits the CSV before pydantic
279
+ # validates against list[int]; if Microsoft ever ships an actual
280
+ # JSON array, the passthrough branch keeps it working.
281
+ return Annotated[list[int], BeforeValidator(_coerce_multiselect_picklist_csv)]
282
+ if pg_type == "numeric":
283
+ # Decimal/Money values arrive as JSON numbers (e.g. 0.5, 8.0), not
284
+ # strings. Pydantic auto-coerces JSON numbers to Decimal in lenient
285
+ # mode (CtxModel inherits extra="forbid" but not strict=True).
286
+ return Decimal
287
+ # text, uuid, date, timestamp arrive as strings from OData.
288
+ return str
289
+
290
+
291
+ def _coerce_multiselect_picklist_csv(value: Any) -> Any:
292
+ if isinstance(value, str):
293
+ return [int(part) for part in value.split(",")]
294
+ return value
295
+
296
+
297
+ def _fetch_entity_metadata(
298
+ *,
299
+ client: Any,
300
+ entity_set: str,
301
+ ) -> EntityMetadataIngress:
302
+ payload = client.get_json(
303
+ "EntityDefinitions",
304
+ params={
305
+ "$select": ENTITY_SELECT,
306
+ "$filter": f"EntitySetName eq '{escape_odata_string(entity_set)}'",
307
+ },
308
+ )
309
+ response = MetadataListResponseIngress.model_validate(payload)
310
+ if len(response.value) != 1:
311
+ raise PluginConfigurationError(
312
+ f"Expected exactly one EntityDefinitions match for {entity_set}, "
313
+ f"got {len(response.value)}. Confirm the entity_set name in TABLE_SPECS "
314
+ "exists in the tenant's Dataverse schema."
315
+ )
316
+ return EntityMetadataIngress.model_validate(response.value[0])
317
+
318
+
319
+ def _fetch_attribute_metadata(
320
+ *,
321
+ client: Any,
322
+ logical_name: str,
323
+ ) -> tuple[AttributeMetadataIngress, ...]:
324
+ payload = client.get_json(
325
+ f"EntityDefinitions(LogicalName='{escape_odata_string(logical_name)}')"
326
+ "/Attributes",
327
+ params={"$select": ATTRIBUTE_SELECT},
328
+ )
329
+ response = MetadataListResponseIngress.model_validate(payload)
330
+ return tuple(
331
+ AttributeMetadataIngress.model_validate(item) for item in response.value
332
+ )
@@ -0,0 +1,250 @@
1
+ """Dlt source factory for any Dataverse-backed plugin.
2
+
3
+ Plugin asset bodies call `build_dataverse_dlt_source(plugin_id, job, ...)`
4
+ to produce a `@dlt.source`-wrapped tuple of resources. One resource per
5
+ active table (delta or snapshot mode). The plugin's component.py is
6
+ agnostic to OData/Pydantic/runtime-schema concerns -- those all live here.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import time
13
+ from collections.abc import Iterator, Mapping
14
+ from typing import Any
15
+
16
+ import dlt
17
+ from shared_plugins.bindings import ResolvedBindingModels, iter_active_model_rows
18
+ from shared_plugins.naming import dlt_resource_name, dlt_source_name
19
+ from shared_plugins.resources import ctx_dlt_resource
20
+
21
+ from .client import DataverseClient
22
+ from .ctx import DataverseRowBase
23
+ from .runtime_schema import RuntimeDataverseTableSchema, build_runtime_table_schemas
24
+ from .tables import DataverseSyncMode, DataverseTableSpec
25
+ from .translators import dataverse_record_to_row
26
+
27
+ LOGGER = logging.getLogger(__name__)
28
+ DELTA_LINK_KEY = "delta_link"
29
+ SNAPSHOT_WRITE_DISPOSITION = {
30
+ "disposition": "merge",
31
+ "strategy": "delete-insert",
32
+ }
33
+ SNAPSHOT_MERGE_KEY = ("_ctx_binding_id",)
34
+
35
+
36
+ def build_dataverse_dlt_source(
37
+ *,
38
+ plugin_id: str,
39
+ job: str,
40
+ binding_id: str,
41
+ client: DataverseClient,
42
+ binding_models: ResolvedBindingModels,
43
+ specs: tuple[DataverseTableSpec, ...],
44
+ ) -> Any:
45
+ active_specs = tuple(
46
+ spec for spec in specs if spec.resource_name in binding_models.active
47
+ )
48
+ runtime_schemas: Mapping[str, RuntimeDataverseTableSchema] = (
49
+ build_runtime_table_schemas(client=client, specs=active_specs)
50
+ )
51
+
52
+ @dlt.source(name=dlt_source_name(plugin_id, job))
53
+ def _source() -> tuple[Any, ...]:
54
+ resources: list[Any] = []
55
+ for spec in active_specs:
56
+ schema = runtime_schemas[spec.entity_set]
57
+ if spec.sync_mode is DataverseSyncMode.DELTA:
58
+ resources.append(
59
+ _build_delta_resource(
60
+ binding_id=binding_id,
61
+ client=client,
62
+ schema=schema,
63
+ binding_models=binding_models,
64
+ )
65
+ )
66
+ else:
67
+ resources.append(
68
+ _build_snapshot_resource(
69
+ binding_id=binding_id,
70
+ client=client,
71
+ schema=schema,
72
+ binding_models=binding_models,
73
+ )
74
+ )
75
+
76
+ if not resources:
77
+ LOGGER.info(
78
+ "%s.%s.empty_source binding_id=%s active_models=%s",
79
+ plugin_id,
80
+ job,
81
+ binding_id,
82
+ tuple(binding_models.active),
83
+ )
84
+
85
+ return tuple(resources)
86
+
87
+ return _source()
88
+
89
+
90
+ def _build_delta_resource(
91
+ *,
92
+ binding_id: str,
93
+ client: DataverseClient,
94
+ schema: RuntimeDataverseTableSchema,
95
+ binding_models: ResolvedBindingModels,
96
+ ) -> Any:
97
+ spec = schema.spec
98
+
99
+ @ctx_dlt_resource(
100
+ name=dlt_resource_name(spec.resource_name),
101
+ write_disposition="merge",
102
+ primary_key=("_ctx_binding_id", spec.primary_key),
103
+ columns=schema.dlt_columns,
104
+ )
105
+ def delta_resource() -> Iterator[DataverseRowBase]:
106
+ state = dlt.current.resource_state()
107
+ initial_delta_link = state.get(DELTA_LINK_KEY)
108
+ if initial_delta_link is not None and not isinstance(initial_delta_link, str):
109
+ raise RuntimeError(
110
+ f"Stored Dataverse deltaLink for {spec.entity_set} is not a string."
111
+ )
112
+ next_delta_link: dict[str, str] = {}
113
+ rows = _iter_delta_rows(
114
+ binding_id=binding_id,
115
+ client=client,
116
+ schema=schema,
117
+ initial_delta_link=initial_delta_link,
118
+ next_delta_link=next_delta_link,
119
+ )
120
+ yield from iter_active_model_rows(
121
+ model_name=spec.resource_name,
122
+ rows=rows,
123
+ binding_models=binding_models,
124
+ )
125
+ state[DELTA_LINK_KEY] = next_delta_link[DELTA_LINK_KEY]
126
+
127
+ return delta_resource
128
+
129
+
130
+ def _build_snapshot_resource(
131
+ *,
132
+ binding_id: str,
133
+ client: DataverseClient,
134
+ schema: RuntimeDataverseTableSchema,
135
+ binding_models: ResolvedBindingModels,
136
+ ) -> Any:
137
+ spec = schema.spec
138
+
139
+ @ctx_dlt_resource(
140
+ name=dlt_resource_name(spec.resource_name),
141
+ write_disposition=SNAPSHOT_WRITE_DISPOSITION,
142
+ merge_key=SNAPSHOT_MERGE_KEY,
143
+ primary_key=("_ctx_binding_id", spec.primary_key),
144
+ columns=schema.dlt_columns,
145
+ )
146
+ def snapshot_resource() -> Iterator[DataverseRowBase]:
147
+ rows = _iter_snapshot_rows(
148
+ binding_id=binding_id,
149
+ client=client,
150
+ schema=schema,
151
+ )
152
+ yield from iter_active_model_rows(
153
+ model_name=spec.resource_name,
154
+ rows=rows,
155
+ binding_models=binding_models,
156
+ )
157
+
158
+ return snapshot_resource
159
+
160
+
161
+ def _iter_delta_rows(
162
+ *,
163
+ binding_id: str,
164
+ client: DataverseClient,
165
+ schema: RuntimeDataverseTableSchema,
166
+ initial_delta_link: str | None,
167
+ next_delta_link: dict[str, str],
168
+ ) -> Iterator[DataverseRowBase]:
169
+ spec = schema.spec
170
+ latest_delta_link = initial_delta_link
171
+ row_count = 0
172
+ page_count = 0
173
+ t0 = time.monotonic()
174
+
175
+ LOGGER.info(
176
+ "dataverse.delta.start entity_set=%s mode=%s",
177
+ spec.entity_set,
178
+ "incremental" if initial_delta_link else "full",
179
+ )
180
+
181
+ for page in client.iter_change_tracking_pages(
182
+ spec,
183
+ delta_link=initial_delta_link,
184
+ select=schema.select_columns,
185
+ ):
186
+ page_count += 1
187
+ if page.delta_link:
188
+ latest_delta_link = page.delta_link
189
+
190
+ for record in page.value:
191
+ raw_payload = record.model_dump(by_alias=True, exclude_none=True)
192
+ row_count += 1
193
+ yield dataverse_record_to_row(
194
+ binding_id=binding_id,
195
+ schema=schema,
196
+ record=record,
197
+ raw_payload=raw_payload,
198
+ )
199
+
200
+ if latest_delta_link is None or "$deltatoken" not in latest_delta_link:
201
+ raise RuntimeError(
202
+ f"Dataverse delta drain for {spec.entity_set} did not finish with "
203
+ "a deltaLink."
204
+ )
205
+ next_delta_link[DELTA_LINK_KEY] = latest_delta_link
206
+
207
+ LOGGER.info(
208
+ "dataverse.delta.done entity_set=%s pages=%d rows=%d elapsed=%.1fs",
209
+ spec.entity_set,
210
+ page_count,
211
+ row_count,
212
+ time.monotonic() - t0,
213
+ )
214
+
215
+
216
+ def _iter_snapshot_rows(
217
+ *,
218
+ binding_id: str,
219
+ client: DataverseClient,
220
+ schema: RuntimeDataverseTableSchema,
221
+ ) -> Iterator[DataverseRowBase]:
222
+ spec = schema.spec
223
+ row_count = 0
224
+ page_count = 0
225
+ t0 = time.monotonic()
226
+
227
+ LOGGER.info("dataverse.snapshot.start entity_set=%s", spec.entity_set)
228
+
229
+ for page in client.iter_snapshot_pages(
230
+ spec,
231
+ select=schema.select_columns,
232
+ ):
233
+ page_count += 1
234
+ for record in page.value:
235
+ raw_payload = record.model_dump(by_alias=True, exclude_none=True)
236
+ row_count += 1
237
+ yield dataverse_record_to_row(
238
+ binding_id=binding_id,
239
+ schema=schema,
240
+ record=record,
241
+ raw_payload=raw_payload,
242
+ )
243
+
244
+ LOGGER.info(
245
+ "dataverse.snapshot.done entity_set=%s pages=%d rows=%d elapsed=%.1fs",
246
+ spec.entity_set,
247
+ page_count,
248
+ row_count,
249
+ time.monotonic() - t0,
250
+ )
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+
7
+ class DataverseSyncMode(str, Enum):
8
+ DELTA = "delta"
9
+ SNAPSHOT = "snapshot"
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class DataverseTableSpec:
14
+ """Identifies one Dataverse entity set and how it should be synced.
15
+
16
+ Fields:
17
+ - entity_set: the OData entity-set name (e.g. "msdyn_projects").
18
+ - primary_key: the Dataverse logical name of the table's PK column
19
+ (e.g. "msdyn_projectid"). Used as the dlt primary_key trailing
20
+ element after _ctx_binding_id. Plugin authors look up this value
21
+ from Dataverse EntityDefinitions.PrimaryIdAttribute and hard-code
22
+ it in the spec — keeps the spec list self-documenting.
23
+ - sync_mode: DELTA (Prefer: odata.track-changes) or SNAPSHOT (full pull).
24
+ - resource_name: the dlt resource name. Conventionally equal to
25
+ entity_set, but kept distinct so plugins can rename if needed.
26
+
27
+ Note: $select is NOT on the spec. Selected columns are computed at
28
+ warmup from per-tenant attribute metadata (see runtime_schema.py).
29
+ """
30
+
31
+ entity_set: str
32
+ primary_key: str
33
+ sync_mode: DataverseSyncMode
34
+ resource_name: str