contextbase-shared-plugins 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
- contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
- contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
- shared_plugins/__init__.py +12 -0
- shared_plugins/automation.py +11 -0
- shared_plugins/bindings.py +253 -0
- shared_plugins/control_plane.py +208 -0
- shared_plugins/dlt.py +84 -0
- shared_plugins/env.py +102 -0
- shared_plugins/exceptions.py +10 -0
- shared_plugins/google_client/__init__.py +1 -0
- shared_plugins/google_client/auth.py +82 -0
- shared_plugins/google_client/batch_retry.py +308 -0
- shared_plugins/google_client/http_errors.py +27 -0
- shared_plugins/microsoft_dataverse/__init__.py +27 -0
- shared_plugins/microsoft_dataverse/annotations.py +38 -0
- shared_plugins/microsoft_dataverse/auth.py +26 -0
- shared_plugins/microsoft_dataverse/binding_config.py +35 -0
- shared_plugins/microsoft_dataverse/client.py +456 -0
- shared_plugins/microsoft_dataverse/ctx.py +21 -0
- shared_plugins/microsoft_dataverse/identifiers.py +62 -0
- shared_plugins/microsoft_dataverse/ingress.py +53 -0
- shared_plugins/microsoft_dataverse/metadata.py +106 -0
- shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
- shared_plugins/microsoft_dataverse/source.py +250 -0
- shared_plugins/microsoft_dataverse/tables.py +34 -0
- shared_plugins/microsoft_dataverse/translators.py +128 -0
- shared_plugins/microsoft_dataverse/types.py +346 -0
- shared_plugins/models.py +91 -0
- shared_plugins/naming.py +83 -0
- shared_plugins/pg_column_comments.py +59 -0
- shared_plugins/pyairbyte.py +399 -0
- shared_plugins/resources.py +179 -0
- shared_plugins/scratch.py +127 -0
- shared_plugins/sqlalchemy_types.py +225 -0
- shared_plugins/sqlite.py +123 -0
- shared_plugins/values.py +117 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Per-table schema construction at warmup.
|
|
2
|
+
|
|
3
|
+
Fetches Dataverse entity + attribute metadata, builds RuntimeFieldSpecs,
|
|
4
|
+
validates 63-byte postgres identifier limits, and assembles per-table
|
|
5
|
+
runtime CtxModel subclasses + dlt column hints. The plugin's source factory
|
|
6
|
+
calls `build_runtime_table_schemas(...)` once per asset run.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Iterable
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from decimal import Decimal
|
|
14
|
+
from typing import Annotated, Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BeforeValidator, Field, create_model
|
|
17
|
+
from shared_plugins.exceptions import PluginConfigurationError
|
|
18
|
+
from shared_plugins.models import IdStr
|
|
19
|
+
|
|
20
|
+
from .ctx import DataverseRowBase
|
|
21
|
+
from .identifiers import (
|
|
22
|
+
escape_odata_string,
|
|
23
|
+
pascal_case,
|
|
24
|
+
safe_identifier,
|
|
25
|
+
validate_identifier,
|
|
26
|
+
)
|
|
27
|
+
from .metadata import (
|
|
28
|
+
AttributeMetadataIngress,
|
|
29
|
+
EntityMetadataIngress,
|
|
30
|
+
MetadataListResponseIngress,
|
|
31
|
+
)
|
|
32
|
+
from .tables import DataverseSyncMode, DataverseTableSpec
|
|
33
|
+
from .types import (
|
|
34
|
+
RuntimeFieldSpec,
|
|
35
|
+
build_dlt_column_hints,
|
|
36
|
+
field_spec_for_attribute,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Field names already declared on DataverseRowBase (and CtxModel via inheritance).
|
|
40
|
+
# A runtime field whose Python name lands in this set would silently shadow the
|
|
41
|
+
# parent contract via pydantic.create_model, breaking tombstone semantics or the
|
|
42
|
+
# _ctx_binding_id invariant. The collision check in _create_runtime_record_model
|
|
43
|
+
# rejects such attributes loudly at warmup.
|
|
44
|
+
_RESERVED_ROW_BASE_FIELDS: frozenset[str] = frozenset(
|
|
45
|
+
DataverseRowBase.model_fields.keys()
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
ENTITY_SELECT = "LogicalName,EntitySetName,PrimaryIdAttribute"
|
|
49
|
+
# Only base-type fields. Subtype fields (Precision, Targets, Format,
|
|
50
|
+
# DateTimeBehavior) live on Microsoft.Dynamics.CRM.{Decimal,Lookup,DateTime,...}
|
|
51
|
+
# AttributeMetadata, NOT on the base type. Microsoft's OData rejects
|
|
52
|
+
# $select=Precision,... here with HTTP 400 because the cast is implicit.
|
|
53
|
+
# Decimal columns therefore default to numeric(28, 4); DateTime columns default
|
|
54
|
+
# to timestamptz (UserLocal); Lookup loses Targets metadata. See
|
|
55
|
+
# internal/planned/2026-05-03-dataverse-subtype-metadata-fetch.md for the
|
|
56
|
+
# upgrade path (per-subtype cast queries, ~5x metadata round-trips at warmup,
|
|
57
|
+
# tenant-specific precision + Format + DateTimeBehavior fidelity).
|
|
58
|
+
ATTRIBUTE_SELECT = (
|
|
59
|
+
"LogicalName,"
|
|
60
|
+
"AttributeType,"
|
|
61
|
+
"AttributeTypeName,"
|
|
62
|
+
"AttributeOf,"
|
|
63
|
+
"IsValidForRead,"
|
|
64
|
+
"IsCustomAttribute,"
|
|
65
|
+
"IsPrimaryId,"
|
|
66
|
+
"Description,"
|
|
67
|
+
"DisplayName"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(frozen=True)
|
|
72
|
+
class RuntimeDataverseTableSchema:
|
|
73
|
+
spec: DataverseTableSpec
|
|
74
|
+
fields: tuple[RuntimeFieldSpec, ...]
|
|
75
|
+
record_model: type[DataverseRowBase]
|
|
76
|
+
select_columns: tuple[str, ...]
|
|
77
|
+
dlt_columns: dict[str, dict[str, Any]]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def build_runtime_table_schemas(
|
|
81
|
+
*,
|
|
82
|
+
client: Any,
|
|
83
|
+
specs: Iterable[DataverseTableSpec],
|
|
84
|
+
) -> dict[str, RuntimeDataverseTableSchema]:
|
|
85
|
+
return {
|
|
86
|
+
schema.spec.entity_set: schema
|
|
87
|
+
for schema in (
|
|
88
|
+
_build_runtime_table_schema(client=client, spec=spec) for spec in specs
|
|
89
|
+
)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _build_runtime_table_schema(
|
|
94
|
+
*,
|
|
95
|
+
client: Any,
|
|
96
|
+
spec: DataverseTableSpec,
|
|
97
|
+
) -> RuntimeDataverseTableSchema:
|
|
98
|
+
entity = _fetch_entity_metadata(client=client, entity_set=spec.entity_set)
|
|
99
|
+
attributes = _fetch_attribute_metadata(
|
|
100
|
+
client=client,
|
|
101
|
+
logical_name=entity.logical_name,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
fields: list[RuntimeFieldSpec] = []
|
|
105
|
+
for attribute in attributes:
|
|
106
|
+
spec_field = field_spec_for_attribute(attribute)
|
|
107
|
+
if spec_field is None:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
validate_identifier(
|
|
111
|
+
spec_field.logical_name,
|
|
112
|
+
context=f"{spec.entity_set}.{spec_field.logical_name}",
|
|
113
|
+
)
|
|
114
|
+
for ann in spec_field.annotation_columns:
|
|
115
|
+
validate_identifier(
|
|
116
|
+
ann.column_name,
|
|
117
|
+
context=f"{spec.entity_set}.{ann.column_name}",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
fields.append(spec_field)
|
|
121
|
+
|
|
122
|
+
select_columns = _compute_select_columns(
|
|
123
|
+
fields=fields,
|
|
124
|
+
primary_key=entity.primary_id_attribute,
|
|
125
|
+
)
|
|
126
|
+
record_model = _create_runtime_record_model(
|
|
127
|
+
class_name=f"{pascal_case(entity.entity_set_name)}Row",
|
|
128
|
+
fields=fields,
|
|
129
|
+
primary_key=entity.primary_id_attribute,
|
|
130
|
+
entity_set=spec.entity_set,
|
|
131
|
+
)
|
|
132
|
+
dlt_columns = build_dlt_column_hints(
|
|
133
|
+
tuple(fields),
|
|
134
|
+
sync_mode_is_delta=spec.sync_mode is DataverseSyncMode.DELTA,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return RuntimeDataverseTableSchema(
|
|
138
|
+
spec=spec,
|
|
139
|
+
fields=tuple(fields),
|
|
140
|
+
record_model=record_model,
|
|
141
|
+
select_columns=select_columns,
|
|
142
|
+
dlt_columns=dlt_columns,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _compute_select_columns(
|
|
147
|
+
*,
|
|
148
|
+
fields: list[RuntimeFieldSpec],
|
|
149
|
+
primary_key: str,
|
|
150
|
+
) -> tuple[str, ...]:
|
|
151
|
+
columns = [field.logical_name for field in fields]
|
|
152
|
+
if primary_key not in columns:
|
|
153
|
+
# PK injection here mirrors the model-side fallback in
|
|
154
|
+
# _create_runtime_record_model; keep the two consistent.
|
|
155
|
+
columns.insert(0, primary_key)
|
|
156
|
+
# Preserve order while deduping.
|
|
157
|
+
seen: set[str] = set()
|
|
158
|
+
deduped: list[str] = []
|
|
159
|
+
for col in columns:
|
|
160
|
+
if col not in seen:
|
|
161
|
+
seen.add(col)
|
|
162
|
+
deduped.append(col)
|
|
163
|
+
return tuple(deduped)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _create_runtime_record_model(
|
|
167
|
+
*,
|
|
168
|
+
class_name: str,
|
|
169
|
+
fields: list[RuntimeFieldSpec],
|
|
170
|
+
primary_key: str,
|
|
171
|
+
entity_set: str,
|
|
172
|
+
) -> type[DataverseRowBase]:
|
|
173
|
+
"""Construct a per-table CtxModel subclass via pydantic.create_model.
|
|
174
|
+
|
|
175
|
+
Each Dataverse attribute becomes a Pydantic field with a lenient ingress
|
|
176
|
+
type (string for date/timestamp/uuid, Decimal for numeric, native Python
|
|
177
|
+
scalars for others — dlt's normalizer handles the destination cast).
|
|
178
|
+
Field aliases match the Dataverse logical name so record validation can
|
|
179
|
+
take the raw OData payload directly.
|
|
180
|
+
|
|
181
|
+
Raises PluginConfigurationError if any attribute's Python field name
|
|
182
|
+
collides with a field on DataverseRowBase. pydantic.create_model would
|
|
183
|
+
silently override the parent field, breaking the row metadata contract.
|
|
184
|
+
"""
|
|
185
|
+
field_definitions: dict[str, tuple[Any, Any]] = {}
|
|
186
|
+
field_logical_names = {f.logical_name for f in fields}
|
|
187
|
+
pk_present_in_fields = primary_key in field_logical_names
|
|
188
|
+
|
|
189
|
+
if not pk_present_in_fields:
|
|
190
|
+
# Add the PK as a string field so the schema knows about it for
|
|
191
|
+
# OData $select ordering. Most tenants always include the PK; this
|
|
192
|
+
# mirrors the select-side fallback in _compute_select_columns.
|
|
193
|
+
pk_py_name = safe_identifier(primary_key)
|
|
194
|
+
_check_no_row_base_collision(
|
|
195
|
+
python_name=pk_py_name,
|
|
196
|
+
logical_name=primary_key,
|
|
197
|
+
entity_set=entity_set,
|
|
198
|
+
)
|
|
199
|
+
field_definitions[pk_py_name] = (
|
|
200
|
+
IdStr | None,
|
|
201
|
+
Field(default=None, alias=primary_key),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
for field in fields:
|
|
205
|
+
py_name = safe_identifier(field.logical_name)
|
|
206
|
+
_check_no_row_base_collision(
|
|
207
|
+
python_name=py_name,
|
|
208
|
+
logical_name=field.logical_name,
|
|
209
|
+
entity_set=entity_set,
|
|
210
|
+
)
|
|
211
|
+
annotation = _ingress_type_for(field.pg_type)
|
|
212
|
+
alias = field.logical_name if py_name != field.logical_name else None
|
|
213
|
+
default_kwargs: dict[str, Any] = {"default": None}
|
|
214
|
+
if alias is not None:
|
|
215
|
+
default_kwargs["alias"] = alias
|
|
216
|
+
field_definitions[py_name] = (
|
|
217
|
+
annotation | None,
|
|
218
|
+
Field(**default_kwargs),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
for ann in field.annotation_columns:
|
|
222
|
+
ann_py_name = safe_identifier(ann.column_name)
|
|
223
|
+
_check_no_row_base_collision(
|
|
224
|
+
python_name=ann_py_name,
|
|
225
|
+
logical_name=ann.column_name,
|
|
226
|
+
entity_set=entity_set,
|
|
227
|
+
)
|
|
228
|
+
ann_alias = ann.column_name if ann_py_name != ann.column_name else None
|
|
229
|
+
ann_kwargs: dict[str, Any] = {"default": None}
|
|
230
|
+
if ann_alias is not None:
|
|
231
|
+
ann_kwargs["alias"] = ann_alias
|
|
232
|
+
field_definitions[ann_py_name] = (
|
|
233
|
+
str | None,
|
|
234
|
+
Field(**ann_kwargs),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return create_model( # type: ignore[no-any-return]
|
|
238
|
+
class_name,
|
|
239
|
+
__base__=DataverseRowBase,
|
|
240
|
+
__module__=__name__,
|
|
241
|
+
**field_definitions,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _check_no_row_base_collision(
|
|
246
|
+
*,
|
|
247
|
+
python_name: str,
|
|
248
|
+
logical_name: str,
|
|
249
|
+
entity_set: str,
|
|
250
|
+
) -> None:
|
|
251
|
+
if python_name in _RESERVED_ROW_BASE_FIELDS:
|
|
252
|
+
raise PluginConfigurationError(
|
|
253
|
+
f"Dataverse attribute {logical_name!r} on entity_set "
|
|
254
|
+
f"{entity_set!r} produces field name {python_name!r}, which "
|
|
255
|
+
f"would silently shadow reserved {python_name!r} on "
|
|
256
|
+
"DataverseRowBase. This collision is unhandled."
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _ingress_type_for(pg_type: str) -> Any:
|
|
261
|
+
"""Map dlt pg_type back to the lenient Python ingress type.
|
|
262
|
+
|
|
263
|
+
Most scalars arrive from OData as JSON-typed values; pydantic + dlt's
|
|
264
|
+
normalizer coerce on the way to postgres.
|
|
265
|
+
"""
|
|
266
|
+
if pg_type == "bigint":
|
|
267
|
+
return int
|
|
268
|
+
if pg_type == "double":
|
|
269
|
+
return float
|
|
270
|
+
if pg_type == "bool":
|
|
271
|
+
return bool
|
|
272
|
+
if pg_type == "bigint[]":
|
|
273
|
+
# Dataverse v9.2 Web API serializes MultiSelectPicklist as a
|
|
274
|
+
# comma-separated string of option-set integers (e.g.
|
|
275
|
+
# "975620006,975620013"), not a JSON array. The attribute metadata
|
|
276
|
+
# describes the conceptual type (list of options) but is silent on
|
|
277
|
+
# wire format; verified empirically against a real tenant on
|
|
278
|
+
# 2026-05-12. A pre-validator splits the CSV before pydantic
|
|
279
|
+
# validates against list[int]; if Microsoft ever ships an actual
|
|
280
|
+
# JSON array, the passthrough branch keeps it working.
|
|
281
|
+
return Annotated[list[int], BeforeValidator(_coerce_multiselect_picklist_csv)]
|
|
282
|
+
if pg_type == "numeric":
|
|
283
|
+
# Decimal/Money values arrive as JSON numbers (e.g. 0.5, 8.0), not
|
|
284
|
+
# strings. Pydantic auto-coerces JSON numbers to Decimal in lenient
|
|
285
|
+
# mode (CtxModel inherits extra="forbid" but not strict=True).
|
|
286
|
+
return Decimal
|
|
287
|
+
# text, uuid, date, timestamp arrive as strings from OData.
|
|
288
|
+
return str
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _coerce_multiselect_picklist_csv(value: Any) -> Any:
|
|
292
|
+
if isinstance(value, str):
|
|
293
|
+
return [int(part) for part in value.split(",")]
|
|
294
|
+
return value
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _fetch_entity_metadata(
|
|
298
|
+
*,
|
|
299
|
+
client: Any,
|
|
300
|
+
entity_set: str,
|
|
301
|
+
) -> EntityMetadataIngress:
|
|
302
|
+
payload = client.get_json(
|
|
303
|
+
"EntityDefinitions",
|
|
304
|
+
params={
|
|
305
|
+
"$select": ENTITY_SELECT,
|
|
306
|
+
"$filter": f"EntitySetName eq '{escape_odata_string(entity_set)}'",
|
|
307
|
+
},
|
|
308
|
+
)
|
|
309
|
+
response = MetadataListResponseIngress.model_validate(payload)
|
|
310
|
+
if len(response.value) != 1:
|
|
311
|
+
raise PluginConfigurationError(
|
|
312
|
+
f"Expected exactly one EntityDefinitions match for {entity_set}, "
|
|
313
|
+
f"got {len(response.value)}. Confirm the entity_set name in TABLE_SPECS "
|
|
314
|
+
"exists in the tenant's Dataverse schema."
|
|
315
|
+
)
|
|
316
|
+
return EntityMetadataIngress.model_validate(response.value[0])
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _fetch_attribute_metadata(
|
|
320
|
+
*,
|
|
321
|
+
client: Any,
|
|
322
|
+
logical_name: str,
|
|
323
|
+
) -> tuple[AttributeMetadataIngress, ...]:
|
|
324
|
+
payload = client.get_json(
|
|
325
|
+
f"EntityDefinitions(LogicalName='{escape_odata_string(logical_name)}')"
|
|
326
|
+
"/Attributes",
|
|
327
|
+
params={"$select": ATTRIBUTE_SELECT},
|
|
328
|
+
)
|
|
329
|
+
response = MetadataListResponseIngress.model_validate(payload)
|
|
330
|
+
return tuple(
|
|
331
|
+
AttributeMetadataIngress.model_validate(item) for item in response.value
|
|
332
|
+
)
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""Dlt source factory for any Dataverse-backed plugin.
|
|
2
|
+
|
|
3
|
+
Plugin asset bodies call `build_dataverse_dlt_source(plugin_id, job, ...)`
|
|
4
|
+
to produce a `@dlt.source`-wrapped tuple of resources. One resource per
|
|
5
|
+
active table (delta or snapshot mode). The plugin's component.py is
|
|
6
|
+
agnostic to OData/Pydantic/runtime-schema concerns -- those all live here.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import time
|
|
13
|
+
from collections.abc import Iterator, Mapping
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import dlt
|
|
17
|
+
from shared_plugins.bindings import ResolvedBindingModels, iter_active_model_rows
|
|
18
|
+
from shared_plugins.naming import dlt_resource_name, dlt_source_name
|
|
19
|
+
from shared_plugins.resources import ctx_dlt_resource
|
|
20
|
+
|
|
21
|
+
from .client import DataverseClient
|
|
22
|
+
from .ctx import DataverseRowBase
|
|
23
|
+
from .runtime_schema import RuntimeDataverseTableSchema, build_runtime_table_schemas
|
|
24
|
+
from .tables import DataverseSyncMode, DataverseTableSpec
|
|
25
|
+
from .translators import dataverse_record_to_row
|
|
26
|
+
|
|
27
|
+
LOGGER = logging.getLogger(__name__)
|
|
28
|
+
DELTA_LINK_KEY = "delta_link"
|
|
29
|
+
SNAPSHOT_WRITE_DISPOSITION = {
|
|
30
|
+
"disposition": "merge",
|
|
31
|
+
"strategy": "delete-insert",
|
|
32
|
+
}
|
|
33
|
+
SNAPSHOT_MERGE_KEY = ("_ctx_binding_id",)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def build_dataverse_dlt_source(
|
|
37
|
+
*,
|
|
38
|
+
plugin_id: str,
|
|
39
|
+
job: str,
|
|
40
|
+
binding_id: str,
|
|
41
|
+
client: DataverseClient,
|
|
42
|
+
binding_models: ResolvedBindingModels,
|
|
43
|
+
specs: tuple[DataverseTableSpec, ...],
|
|
44
|
+
) -> Any:
|
|
45
|
+
active_specs = tuple(
|
|
46
|
+
spec for spec in specs if spec.resource_name in binding_models.active
|
|
47
|
+
)
|
|
48
|
+
runtime_schemas: Mapping[str, RuntimeDataverseTableSchema] = (
|
|
49
|
+
build_runtime_table_schemas(client=client, specs=active_specs)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@dlt.source(name=dlt_source_name(plugin_id, job))
|
|
53
|
+
def _source() -> tuple[Any, ...]:
|
|
54
|
+
resources: list[Any] = []
|
|
55
|
+
for spec in active_specs:
|
|
56
|
+
schema = runtime_schemas[spec.entity_set]
|
|
57
|
+
if spec.sync_mode is DataverseSyncMode.DELTA:
|
|
58
|
+
resources.append(
|
|
59
|
+
_build_delta_resource(
|
|
60
|
+
binding_id=binding_id,
|
|
61
|
+
client=client,
|
|
62
|
+
schema=schema,
|
|
63
|
+
binding_models=binding_models,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
resources.append(
|
|
68
|
+
_build_snapshot_resource(
|
|
69
|
+
binding_id=binding_id,
|
|
70
|
+
client=client,
|
|
71
|
+
schema=schema,
|
|
72
|
+
binding_models=binding_models,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if not resources:
|
|
77
|
+
LOGGER.info(
|
|
78
|
+
"%s.%s.empty_source binding_id=%s active_models=%s",
|
|
79
|
+
plugin_id,
|
|
80
|
+
job,
|
|
81
|
+
binding_id,
|
|
82
|
+
tuple(binding_models.active),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return tuple(resources)
|
|
86
|
+
|
|
87
|
+
return _source()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _build_delta_resource(
|
|
91
|
+
*,
|
|
92
|
+
binding_id: str,
|
|
93
|
+
client: DataverseClient,
|
|
94
|
+
schema: RuntimeDataverseTableSchema,
|
|
95
|
+
binding_models: ResolvedBindingModels,
|
|
96
|
+
) -> Any:
|
|
97
|
+
spec = schema.spec
|
|
98
|
+
|
|
99
|
+
@ctx_dlt_resource(
|
|
100
|
+
name=dlt_resource_name(spec.resource_name),
|
|
101
|
+
write_disposition="merge",
|
|
102
|
+
primary_key=("_ctx_binding_id", spec.primary_key),
|
|
103
|
+
columns=schema.dlt_columns,
|
|
104
|
+
)
|
|
105
|
+
def delta_resource() -> Iterator[DataverseRowBase]:
|
|
106
|
+
state = dlt.current.resource_state()
|
|
107
|
+
initial_delta_link = state.get(DELTA_LINK_KEY)
|
|
108
|
+
if initial_delta_link is not None and not isinstance(initial_delta_link, str):
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
f"Stored Dataverse deltaLink for {spec.entity_set} is not a string."
|
|
111
|
+
)
|
|
112
|
+
next_delta_link: dict[str, str] = {}
|
|
113
|
+
rows = _iter_delta_rows(
|
|
114
|
+
binding_id=binding_id,
|
|
115
|
+
client=client,
|
|
116
|
+
schema=schema,
|
|
117
|
+
initial_delta_link=initial_delta_link,
|
|
118
|
+
next_delta_link=next_delta_link,
|
|
119
|
+
)
|
|
120
|
+
yield from iter_active_model_rows(
|
|
121
|
+
model_name=spec.resource_name,
|
|
122
|
+
rows=rows,
|
|
123
|
+
binding_models=binding_models,
|
|
124
|
+
)
|
|
125
|
+
state[DELTA_LINK_KEY] = next_delta_link[DELTA_LINK_KEY]
|
|
126
|
+
|
|
127
|
+
return delta_resource
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_snapshot_resource(
|
|
131
|
+
*,
|
|
132
|
+
binding_id: str,
|
|
133
|
+
client: DataverseClient,
|
|
134
|
+
schema: RuntimeDataverseTableSchema,
|
|
135
|
+
binding_models: ResolvedBindingModels,
|
|
136
|
+
) -> Any:
|
|
137
|
+
spec = schema.spec
|
|
138
|
+
|
|
139
|
+
@ctx_dlt_resource(
|
|
140
|
+
name=dlt_resource_name(spec.resource_name),
|
|
141
|
+
write_disposition=SNAPSHOT_WRITE_DISPOSITION,
|
|
142
|
+
merge_key=SNAPSHOT_MERGE_KEY,
|
|
143
|
+
primary_key=("_ctx_binding_id", spec.primary_key),
|
|
144
|
+
columns=schema.dlt_columns,
|
|
145
|
+
)
|
|
146
|
+
def snapshot_resource() -> Iterator[DataverseRowBase]:
|
|
147
|
+
rows = _iter_snapshot_rows(
|
|
148
|
+
binding_id=binding_id,
|
|
149
|
+
client=client,
|
|
150
|
+
schema=schema,
|
|
151
|
+
)
|
|
152
|
+
yield from iter_active_model_rows(
|
|
153
|
+
model_name=spec.resource_name,
|
|
154
|
+
rows=rows,
|
|
155
|
+
binding_models=binding_models,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return snapshot_resource
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _iter_delta_rows(
|
|
162
|
+
*,
|
|
163
|
+
binding_id: str,
|
|
164
|
+
client: DataverseClient,
|
|
165
|
+
schema: RuntimeDataverseTableSchema,
|
|
166
|
+
initial_delta_link: str | None,
|
|
167
|
+
next_delta_link: dict[str, str],
|
|
168
|
+
) -> Iterator[DataverseRowBase]:
|
|
169
|
+
spec = schema.spec
|
|
170
|
+
latest_delta_link = initial_delta_link
|
|
171
|
+
row_count = 0
|
|
172
|
+
page_count = 0
|
|
173
|
+
t0 = time.monotonic()
|
|
174
|
+
|
|
175
|
+
LOGGER.info(
|
|
176
|
+
"dataverse.delta.start entity_set=%s mode=%s",
|
|
177
|
+
spec.entity_set,
|
|
178
|
+
"incremental" if initial_delta_link else "full",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
for page in client.iter_change_tracking_pages(
|
|
182
|
+
spec,
|
|
183
|
+
delta_link=initial_delta_link,
|
|
184
|
+
select=schema.select_columns,
|
|
185
|
+
):
|
|
186
|
+
page_count += 1
|
|
187
|
+
if page.delta_link:
|
|
188
|
+
latest_delta_link = page.delta_link
|
|
189
|
+
|
|
190
|
+
for record in page.value:
|
|
191
|
+
raw_payload = record.model_dump(by_alias=True, exclude_none=True)
|
|
192
|
+
row_count += 1
|
|
193
|
+
yield dataverse_record_to_row(
|
|
194
|
+
binding_id=binding_id,
|
|
195
|
+
schema=schema,
|
|
196
|
+
record=record,
|
|
197
|
+
raw_payload=raw_payload,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if latest_delta_link is None or "$deltatoken" not in latest_delta_link:
|
|
201
|
+
raise RuntimeError(
|
|
202
|
+
f"Dataverse delta drain for {spec.entity_set} did not finish with "
|
|
203
|
+
"a deltaLink."
|
|
204
|
+
)
|
|
205
|
+
next_delta_link[DELTA_LINK_KEY] = latest_delta_link
|
|
206
|
+
|
|
207
|
+
LOGGER.info(
|
|
208
|
+
"dataverse.delta.done entity_set=%s pages=%d rows=%d elapsed=%.1fs",
|
|
209
|
+
spec.entity_set,
|
|
210
|
+
page_count,
|
|
211
|
+
row_count,
|
|
212
|
+
time.monotonic() - t0,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _iter_snapshot_rows(
|
|
217
|
+
*,
|
|
218
|
+
binding_id: str,
|
|
219
|
+
client: DataverseClient,
|
|
220
|
+
schema: RuntimeDataverseTableSchema,
|
|
221
|
+
) -> Iterator[DataverseRowBase]:
|
|
222
|
+
spec = schema.spec
|
|
223
|
+
row_count = 0
|
|
224
|
+
page_count = 0
|
|
225
|
+
t0 = time.monotonic()
|
|
226
|
+
|
|
227
|
+
LOGGER.info("dataverse.snapshot.start entity_set=%s", spec.entity_set)
|
|
228
|
+
|
|
229
|
+
for page in client.iter_snapshot_pages(
|
|
230
|
+
spec,
|
|
231
|
+
select=schema.select_columns,
|
|
232
|
+
):
|
|
233
|
+
page_count += 1
|
|
234
|
+
for record in page.value:
|
|
235
|
+
raw_payload = record.model_dump(by_alias=True, exclude_none=True)
|
|
236
|
+
row_count += 1
|
|
237
|
+
yield dataverse_record_to_row(
|
|
238
|
+
binding_id=binding_id,
|
|
239
|
+
schema=schema,
|
|
240
|
+
record=record,
|
|
241
|
+
raw_payload=raw_payload,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
LOGGER.info(
|
|
245
|
+
"dataverse.snapshot.done entity_set=%s pages=%d rows=%d elapsed=%.1fs",
|
|
246
|
+
spec.entity_set,
|
|
247
|
+
page_count,
|
|
248
|
+
row_count,
|
|
249
|
+
time.monotonic() - t0,
|
|
250
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataverseSyncMode(str, Enum):
|
|
8
|
+
DELTA = "delta"
|
|
9
|
+
SNAPSHOT = "snapshot"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class DataverseTableSpec:
|
|
14
|
+
"""Identifies one Dataverse entity set and how it should be synced.
|
|
15
|
+
|
|
16
|
+
Fields:
|
|
17
|
+
- entity_set: the OData entity-set name (e.g. "msdyn_projects").
|
|
18
|
+
- primary_key: the Dataverse logical name of the table's PK column
|
|
19
|
+
(e.g. "msdyn_projectid"). Used as the dlt primary_key trailing
|
|
20
|
+
element after _ctx_binding_id. Plugin authors look up this value
|
|
21
|
+
from Dataverse EntityDefinitions.PrimaryIdAttribute and hard-code
|
|
22
|
+
it in the spec — keeps the spec list self-documenting.
|
|
23
|
+
- sync_mode: DELTA (Prefer: odata.track-changes) or SNAPSHOT (full pull).
|
|
24
|
+
- resource_name: the dlt resource name. Conventionally equal to
|
|
25
|
+
entity_set, but kept distinct so plugins can rename if needed.
|
|
26
|
+
|
|
27
|
+
Note: $select is NOT on the spec. Selected columns are computed at
|
|
28
|
+
warmup from per-tenant attribute metadata (see runtime_schema.py).
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
entity_set: str
|
|
32
|
+
primary_key: str
|
|
33
|
+
sync_mode: DataverseSyncMode
|
|
34
|
+
resource_name: str
|