contextbase-shared-plugins 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
- contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
- contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
- shared_plugins/__init__.py +12 -0
- shared_plugins/automation.py +11 -0
- shared_plugins/bindings.py +253 -0
- shared_plugins/control_plane.py +208 -0
- shared_plugins/dlt.py +84 -0
- shared_plugins/env.py +102 -0
- shared_plugins/exceptions.py +10 -0
- shared_plugins/google_client/__init__.py +1 -0
- shared_plugins/google_client/auth.py +82 -0
- shared_plugins/google_client/batch_retry.py +308 -0
- shared_plugins/google_client/http_errors.py +27 -0
- shared_plugins/microsoft_dataverse/__init__.py +27 -0
- shared_plugins/microsoft_dataverse/annotations.py +38 -0
- shared_plugins/microsoft_dataverse/auth.py +26 -0
- shared_plugins/microsoft_dataverse/binding_config.py +35 -0
- shared_plugins/microsoft_dataverse/client.py +456 -0
- shared_plugins/microsoft_dataverse/ctx.py +21 -0
- shared_plugins/microsoft_dataverse/identifiers.py +62 -0
- shared_plugins/microsoft_dataverse/ingress.py +53 -0
- shared_plugins/microsoft_dataverse/metadata.py +106 -0
- shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
- shared_plugins/microsoft_dataverse/source.py +250 -0
- shared_plugins/microsoft_dataverse/tables.py +34 -0
- shared_plugins/microsoft_dataverse/translators.py +128 -0
- shared_plugins/microsoft_dataverse/types.py +346 -0
- shared_plugins/models.py +91 -0
- shared_plugins/naming.py +83 -0
- shared_plugins/pg_column_comments.py +59 -0
- shared_plugins/pyairbyte.py +399 -0
- shared_plugins/resources.py +179 -0
- shared_plugins/scratch.py +127 -0
- shared_plugins/sqlalchemy_types.py +225 -0
- shared_plugins/sqlite.py +123 -0
- shared_plugins/values.py +117 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Per-record translation: Dataverse OData payload → per-table CtxModel instance.
|
|
2
|
+
|
|
3
|
+
The translator walks the raw OData payload, splits annotation keys
|
|
4
|
+
(`<col>@<annotation>`) per the registry policy, and constructs an instance
|
|
5
|
+
of the per-table runtime CtxModel subclass. Returns a CtxModel — NOT a dict
|
|
6
|
+
— because @ctx_dlt_resource validates emitted rows are CtxModel instances.
|
|
7
|
+
|
|
8
|
+
Unknown annotations raise PluginConfigurationError. Dropped annotations are
|
|
9
|
+
silently omitted (they're triaged and intentional). Kept annotations land
|
|
10
|
+
on the companion column suffix.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from shared_plugins.exceptions import PluginConfigurationError
|
|
18
|
+
|
|
19
|
+
from .annotations import (
|
|
20
|
+
DROPPED_ODATA_ANNOTATIONS,
|
|
21
|
+
ODATA_ANNOTATION_COLUMN_SUFFIXES,
|
|
22
|
+
)
|
|
23
|
+
from .ctx import DataverseRowBase
|
|
24
|
+
from .ingress import DataverseRecordIngress, is_deleted_record
|
|
25
|
+
from .runtime_schema import RuntimeDataverseTableSchema
|
|
26
|
+
|
|
27
|
+
# Keys that DataverseRecordIngress emits via model_dump(by_alias=True): the
|
|
28
|
+
# @odata.* aliases plus the non-aliased tombstone fields (`id`, `reason`).
|
|
29
|
+
# The column-flatten step skips these so only column data (extras under
|
|
30
|
+
# DataverseRecordIngress's extra="allow") reaches the per-table row model.
|
|
31
|
+
# Derived from the ingress model so new envelope fields are picked up
|
|
32
|
+
# automatically without touching the flatten filter.
|
|
33
|
+
_DATAVERSE_ENVELOPE_KEYS: frozenset[str] = frozenset(
|
|
34
|
+
(field.alias or name) for name, field in DataverseRecordIngress.model_fields.items()
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def dataverse_record_to_row(
|
|
39
|
+
*,
|
|
40
|
+
binding_id: str,
|
|
41
|
+
schema: RuntimeDataverseTableSchema,
|
|
42
|
+
record: DataverseRecordIngress,
|
|
43
|
+
raw_payload: dict[str, Any],
|
|
44
|
+
) -> DataverseRowBase:
|
|
45
|
+
"""Convert one Dataverse record into a per-table CtxModel instance.
|
|
46
|
+
|
|
47
|
+
`raw_payload` is the same OData record dict that produced `record` — the
|
|
48
|
+
strict envelope view (`record`) carries only the validated envelope
|
|
49
|
+
fields, while the full raw view (`raw_payload`) keeps every key from the
|
|
50
|
+
source response, including annotation keys (`<col>@<ann>`) that the
|
|
51
|
+
strict envelope does not preserve.
|
|
52
|
+
|
|
53
|
+
Tombstones (delta-stream delete entries) arrive without column data and
|
|
54
|
+
without the entity's PK column; the deleted row's identifier is on
|
|
55
|
+
`record.id`. This translator injects it as the PK column so dlt's
|
|
56
|
+
`hard_delete` hint on `is_deleted` (see build_dlt_column_hints in
|
|
57
|
+
types.py) issues a DELETE for the matching destination row during merge.
|
|
58
|
+
"""
|
|
59
|
+
deleted = is_deleted_record(record, schema.spec, payload=raw_payload)
|
|
60
|
+
ctx_source_updated_at = raw_payload.get("modifiedon")
|
|
61
|
+
|
|
62
|
+
base_values: dict[str, Any] = {
|
|
63
|
+
"_ctx_binding_id": binding_id,
|
|
64
|
+
"_ctx_source_updated_at": ctx_source_updated_at,
|
|
65
|
+
"etag": record.odata_etag,
|
|
66
|
+
"is_deleted": deleted,
|
|
67
|
+
"delete_reason": record.reason,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
column_values = _flatten_columns_and_annotations(
|
|
71
|
+
raw_payload=raw_payload,
|
|
72
|
+
schema=schema,
|
|
73
|
+
)
|
|
74
|
+
base_values.update(column_values)
|
|
75
|
+
|
|
76
|
+
if deleted:
|
|
77
|
+
if not isinstance(record.id, str):
|
|
78
|
+
raise PluginConfigurationError(
|
|
79
|
+
f"Dataverse tombstone for entity_set "
|
|
80
|
+
f"{schema.spec.entity_set!r} has reason={record.reason!r} "
|
|
81
|
+
"but no 'id' field on the envelope; cannot identify the "
|
|
82
|
+
"deleted row."
|
|
83
|
+
)
|
|
84
|
+
base_values[schema.spec.primary_key] = record.id
|
|
85
|
+
|
|
86
|
+
return schema.record_model.model_validate(base_values)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _flatten_columns_and_annotations(
|
|
90
|
+
*,
|
|
91
|
+
raw_payload: dict[str, Any],
|
|
92
|
+
schema: RuntimeDataverseTableSchema,
|
|
93
|
+
) -> dict[str, Any]:
|
|
94
|
+
"""Walk raw payload keys, returning a {column_name: value} dict.
|
|
95
|
+
|
|
96
|
+
- Envelope keys (declared on DataverseRecordIngress: @odata.* aliases
|
|
97
|
+
plus `id` and `reason`) are skipped — those are read from `record`
|
|
98
|
+
directly by the caller.
|
|
99
|
+
- Plain keys land under their logical name.
|
|
100
|
+
- Annotation keys (`<col>@<annotation>`) land under
|
|
101
|
+
`<col><suffix>` per ODATA_ANNOTATION_COLUMN_SUFFIXES, or are dropped
|
|
102
|
+
per DROPPED_ODATA_ANNOTATIONS, or raise on unknown.
|
|
103
|
+
"""
|
|
104
|
+
out: dict[str, Any] = {}
|
|
105
|
+
for key, value in raw_payload.items():
|
|
106
|
+
if key in _DATAVERSE_ENVELOPE_KEYS:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
if "@" not in key:
|
|
110
|
+
out[key] = value
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
base, _, annotation_token = key.partition("@")
|
|
114
|
+
annotation = f"@{annotation_token}"
|
|
115
|
+
if annotation in DROPPED_ODATA_ANNOTATIONS:
|
|
116
|
+
continue
|
|
117
|
+
suffix = ODATA_ANNOTATION_COLUMN_SUFFIXES.get(annotation)
|
|
118
|
+
if suffix is None:
|
|
119
|
+
raise PluginConfigurationError(
|
|
120
|
+
f"Encountered unknown OData annotation {annotation!r} on "
|
|
121
|
+
f"column {base!r} in entity_set {schema.spec.entity_set!r}. "
|
|
122
|
+
"Triage by adding it to ODATA_ANNOTATION_COLUMN_SUFFIXES "
|
|
123
|
+
"(keep) or DROPPED_ODATA_ANNOTATIONS (drop) in "
|
|
124
|
+
"shared_plugins.microsoft_dataverse.annotations."
|
|
125
|
+
)
|
|
126
|
+
out[f"{base}{suffix}"] = value
|
|
127
|
+
|
|
128
|
+
return out
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Dataverse AttributeType → dlt TColumnSchema mapping.
|
|
2
|
+
|
|
3
|
+
Single source of truth for every type-mapping decision the plan §9 locks in.
|
|
4
|
+
Unknown types raise loudly — new Microsoft types must be added explicitly.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from shared_plugins.exceptions import PluginConfigurationError
|
|
13
|
+
|
|
14
|
+
from .annotations import ODATA_ANNOTATION_COLUMN_SUFFIXES
|
|
15
|
+
from .identifiers import annotation_column_name
|
|
16
|
+
from .metadata import AttributeMetadataIngress, extract_label
|
|
17
|
+
|
|
18
|
+
# Existing skips preserved from plugin's runtime_ingress.py.
|
|
19
|
+
SKIPPED_ATTRIBUTE_TYPES: frozenset[str] = frozenset(
|
|
20
|
+
{
|
|
21
|
+
"Virtual",
|
|
22
|
+
"File",
|
|
23
|
+
"Image",
|
|
24
|
+
"CalendarRules",
|
|
25
|
+
"PartyList",
|
|
26
|
+
"ManagedProperty",
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
SKIPPED_ATTRIBUTE_NAMES: frozenset[str] = frozenset({"stageid"})
|
|
30
|
+
|
|
31
|
+
LOOKUP_ATTRIBUTE_TYPES: frozenset[str] = frozenset({"Customer", "Lookup", "Owner"})
|
|
32
|
+
PICKLIST_ATTRIBUTE_TYPES: frozenset[str] = frozenset({"Picklist", "State", "Status"})
|
|
33
|
+
|
|
34
|
+
# Scalar AttributeTypes that emit @OData.Community.Display.V1.FormattedValue
|
|
35
|
+
# alongside their raw value. Empirically observed on the customer tenant
|
|
36
|
+
# across ~5000 non-null values per type (May 2026): all numeric scalars and
|
|
37
|
+
# Boolean emit a locale-rendered string; Memo / String / Uniqueidentifier /
|
|
38
|
+
# EntityName never did. If a new type starts emitting one, the translator's
|
|
39
|
+
# unknown-column path fails loud at pydantic validation — add it here then.
|
|
40
|
+
_SCALAR_TYPES_WITH_FORMATTED_VALUE: frozenset[str] = frozenset(
|
|
41
|
+
{"Integer", "BigInt", "Double", "Boolean"}
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
_ANNOTATION_DESCRIPTIONS: dict[str, str] = {
|
|
45
|
+
"_formatted_value": "Formatted value of {base}",
|
|
46
|
+
"_lookup_logical_name": "Logical-name of the entity referenced by {base}",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class RuntimeAnnotationColumn:
|
|
52
|
+
column_name: str
|
|
53
|
+
pg_type: str
|
|
54
|
+
description: str | None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class RuntimeFieldSpec:
|
|
59
|
+
"""Per-attribute description ready for column emission.
|
|
60
|
+
|
|
61
|
+
Captures both Description and DisplayName for the
|
|
62
|
+
`description ?? display_name` fallback at the dlt edge. Other
|
|
63
|
+
consumers (LLM tool definitions, sibling metadata exporters) can
|
|
64
|
+
read the rich object directly without re-deriving from raw metadata.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
logical_name: str
|
|
68
|
+
pg_type: str
|
|
69
|
+
pg_type_args: dict[str, Any]
|
|
70
|
+
description: str | None
|
|
71
|
+
display_name: str | None
|
|
72
|
+
is_custom_attribute: bool
|
|
73
|
+
attribute_type: str
|
|
74
|
+
annotation_columns: tuple[RuntimeAnnotationColumn, ...]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def field_spec_for_attribute(
|
|
78
|
+
attribute: AttributeMetadataIngress,
|
|
79
|
+
) -> RuntimeFieldSpec | None:
|
|
80
|
+
"""Build a RuntimeFieldSpec from Dataverse attribute metadata.
|
|
81
|
+
|
|
82
|
+
Returns None if the attribute should be skipped per the existing filter
|
|
83
|
+
rules (IsValidForRead=False, AttributeOf set, type/name in skip sets).
|
|
84
|
+
|
|
85
|
+
Raises PluginConfigurationError if AttributeType is unknown — new
|
|
86
|
+
Microsoft types require explicit triage in this module.
|
|
87
|
+
"""
|
|
88
|
+
if attribute.is_valid_for_read is False:
|
|
89
|
+
return None
|
|
90
|
+
if attribute.attribute_of:
|
|
91
|
+
return None
|
|
92
|
+
if attribute.logical_name in SKIPPED_ATTRIBUTE_NAMES:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
attr_type = attribute.attribute_type or ""
|
|
96
|
+
|
|
97
|
+
# MultiSelectPicklist arrives as Virtual + AttributeTypeName.Value=MultiSelectPicklistType.
|
|
98
|
+
if attribute.type_name_value == "MultiSelectPicklistType":
|
|
99
|
+
return _build_multiselect_field(attribute)
|
|
100
|
+
|
|
101
|
+
if attr_type in SKIPPED_ATTRIBUTE_TYPES:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
if attr_type in LOOKUP_ATTRIBUTE_TYPES:
|
|
105
|
+
return _build_lookup_field(attribute)
|
|
106
|
+
|
|
107
|
+
if attr_type in PICKLIST_ATTRIBUTE_TYPES:
|
|
108
|
+
return _build_picklist_field(attribute)
|
|
109
|
+
|
|
110
|
+
if attr_type == "DateTime":
|
|
111
|
+
return _build_datetime_field(attribute)
|
|
112
|
+
|
|
113
|
+
if attr_type in ("Decimal", "Money"):
|
|
114
|
+
return _build_numeric_field(attribute)
|
|
115
|
+
|
|
116
|
+
pg_type = _SCALAR_TYPE_MAP.get(attr_type)
|
|
117
|
+
if pg_type is None:
|
|
118
|
+
raise PluginConfigurationError(
|
|
119
|
+
f"Unknown Dataverse AttributeType {attr_type!r} for attribute "
|
|
120
|
+
f"{attribute.logical_name!r}. New Microsoft types require explicit "
|
|
121
|
+
"triage in shared_plugins.microsoft_dataverse.types."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if attr_type in _SCALAR_TYPES_WITH_FORMATTED_VALUE:
|
|
125
|
+
annotations = _build_annotation_columns(
|
|
126
|
+
attribute.logical_name,
|
|
127
|
+
kept_suffixes=("_formatted_value",),
|
|
128
|
+
context_table=attribute.logical_name,
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
annotations = ()
|
|
132
|
+
|
|
133
|
+
return RuntimeFieldSpec(
|
|
134
|
+
logical_name=attribute.logical_name,
|
|
135
|
+
pg_type=pg_type,
|
|
136
|
+
pg_type_args={},
|
|
137
|
+
description=extract_label(attribute.description),
|
|
138
|
+
display_name=extract_label(attribute.display_name),
|
|
139
|
+
is_custom_attribute=attribute.is_custom_attribute,
|
|
140
|
+
attribute_type=attr_type,
|
|
141
|
+
annotation_columns=annotations,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
_SCALAR_TYPE_MAP: dict[str, str] = {
|
|
146
|
+
"String": "text",
|
|
147
|
+
"Memo": "text",
|
|
148
|
+
"Integer": "bigint",
|
|
149
|
+
"BigInt": "bigint",
|
|
150
|
+
"Double": "double",
|
|
151
|
+
"Boolean": "bool",
|
|
152
|
+
"Uniqueidentifier": "uuid",
|
|
153
|
+
"EntityName": "text",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _build_lookup_field(attr: AttributeMetadataIngress) -> RuntimeFieldSpec:
|
|
158
|
+
base = f"_{attr.logical_name}_value"
|
|
159
|
+
annotations = _build_annotation_columns(
|
|
160
|
+
base,
|
|
161
|
+
kept_suffixes=("_formatted_value", "_lookup_logical_name"),
|
|
162
|
+
context_table=attr.logical_name,
|
|
163
|
+
)
|
|
164
|
+
return RuntimeFieldSpec(
|
|
165
|
+
logical_name=base,
|
|
166
|
+
pg_type="uuid",
|
|
167
|
+
pg_type_args={},
|
|
168
|
+
description=extract_label(attr.description),
|
|
169
|
+
display_name=extract_label(attr.display_name),
|
|
170
|
+
is_custom_attribute=attr.is_custom_attribute,
|
|
171
|
+
attribute_type=attr.attribute_type or "",
|
|
172
|
+
annotation_columns=annotations,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _build_picklist_field(attr: AttributeMetadataIngress) -> RuntimeFieldSpec:
|
|
177
|
+
annotations = _build_annotation_columns(
|
|
178
|
+
attr.logical_name,
|
|
179
|
+
kept_suffixes=("_formatted_value",),
|
|
180
|
+
context_table=attr.logical_name,
|
|
181
|
+
)
|
|
182
|
+
return RuntimeFieldSpec(
|
|
183
|
+
logical_name=attr.logical_name,
|
|
184
|
+
pg_type="bigint",
|
|
185
|
+
pg_type_args={},
|
|
186
|
+
description=extract_label(attr.description),
|
|
187
|
+
display_name=extract_label(attr.display_name),
|
|
188
|
+
is_custom_attribute=attr.is_custom_attribute,
|
|
189
|
+
attribute_type=attr.attribute_type or "",
|
|
190
|
+
annotation_columns=annotations,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _build_multiselect_field(attr: AttributeMetadataIngress) -> RuntimeFieldSpec:
|
|
195
|
+
annotations = _build_annotation_columns(
|
|
196
|
+
attr.logical_name,
|
|
197
|
+
kept_suffixes=("_formatted_value",),
|
|
198
|
+
context_table=attr.logical_name,
|
|
199
|
+
)
|
|
200
|
+
return RuntimeFieldSpec(
|
|
201
|
+
logical_name=attr.logical_name,
|
|
202
|
+
pg_type="bigint[]",
|
|
203
|
+
pg_type_args={},
|
|
204
|
+
description=extract_label(attr.description),
|
|
205
|
+
display_name=extract_label(attr.display_name),
|
|
206
|
+
is_custom_attribute=attr.is_custom_attribute,
|
|
207
|
+
attribute_type=attr.attribute_type or "MultiSelectPicklist",
|
|
208
|
+
annotation_columns=annotations,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _build_datetime_field(attr: AttributeMetadataIngress) -> RuntimeFieldSpec:
|
|
213
|
+
fmt = attr.format
|
|
214
|
+
behavior = (attr.date_time_behavior or {}).get("Value")
|
|
215
|
+
if fmt == "DateOnly":
|
|
216
|
+
pg_type = "date"
|
|
217
|
+
pg_args: dict[str, Any] = {}
|
|
218
|
+
else:
|
|
219
|
+
pg_type = "timestamp"
|
|
220
|
+
pg_args = {"timezone": behavior != "TimeZoneIndependent"}
|
|
221
|
+
|
|
222
|
+
annotations = _build_annotation_columns(
|
|
223
|
+
attr.logical_name,
|
|
224
|
+
kept_suffixes=("_formatted_value",),
|
|
225
|
+
context_table=attr.logical_name,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return RuntimeFieldSpec(
|
|
229
|
+
logical_name=attr.logical_name,
|
|
230
|
+
pg_type=pg_type,
|
|
231
|
+
pg_type_args=pg_args,
|
|
232
|
+
description=extract_label(attr.description),
|
|
233
|
+
display_name=extract_label(attr.display_name),
|
|
234
|
+
is_custom_attribute=attr.is_custom_attribute,
|
|
235
|
+
attribute_type=attr.attribute_type or "DateTime",
|
|
236
|
+
annotation_columns=annotations,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _build_numeric_field(attr: AttributeMetadataIngress) -> RuntimeFieldSpec:
|
|
241
|
+
if attr.attribute_type == "Money":
|
|
242
|
+
precision = 28
|
|
243
|
+
scale = 4
|
|
244
|
+
else:
|
|
245
|
+
precision = 28
|
|
246
|
+
scale = attr.precision if attr.precision is not None else 4
|
|
247
|
+
|
|
248
|
+
annotations = _build_annotation_columns(
|
|
249
|
+
attr.logical_name,
|
|
250
|
+
kept_suffixes=("_formatted_value",),
|
|
251
|
+
context_table=attr.logical_name,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return RuntimeFieldSpec(
|
|
255
|
+
logical_name=attr.logical_name,
|
|
256
|
+
pg_type="numeric",
|
|
257
|
+
pg_type_args={"precision": precision, "scale": scale},
|
|
258
|
+
description=extract_label(attr.description),
|
|
259
|
+
display_name=extract_label(attr.display_name),
|
|
260
|
+
is_custom_attribute=attr.is_custom_attribute,
|
|
261
|
+
attribute_type=attr.attribute_type or "",
|
|
262
|
+
annotation_columns=annotations,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _build_annotation_columns(
|
|
267
|
+
base: str,
|
|
268
|
+
*,
|
|
269
|
+
kept_suffixes: tuple[str, ...],
|
|
270
|
+
context_table: str,
|
|
271
|
+
) -> tuple[RuntimeAnnotationColumn, ...]:
|
|
272
|
+
columns: list[RuntimeAnnotationColumn] = []
|
|
273
|
+
for suffix in kept_suffixes:
|
|
274
|
+
# Verify suffix is in the registry; this catches typos.
|
|
275
|
+
if suffix not in ODATA_ANNOTATION_COLUMN_SUFFIXES.values():
|
|
276
|
+
raise PluginConfigurationError(
|
|
277
|
+
f"Annotation suffix {suffix!r} is not registered in "
|
|
278
|
+
"ODATA_ANNOTATION_COLUMN_SUFFIXES."
|
|
279
|
+
)
|
|
280
|
+
column_name = annotation_column_name(
|
|
281
|
+
base,
|
|
282
|
+
suffix,
|
|
283
|
+
context=f"{context_table}.{base}",
|
|
284
|
+
)
|
|
285
|
+
description_template = _ANNOTATION_DESCRIPTIONS[suffix]
|
|
286
|
+
columns.append(
|
|
287
|
+
RuntimeAnnotationColumn(
|
|
288
|
+
column_name=column_name,
|
|
289
|
+
pg_type="text",
|
|
290
|
+
description=description_template.format(base=base),
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
return tuple(columns)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# dlt's TDataType literal (text, double, bool, timestamp, bigint, binary, json,
|
|
297
|
+
# decimal, wei, date, time) does not include "uuid", "numeric", or "bigint[]"
|
|
298
|
+
# today. We keep the richer pg_type on RuntimeFieldSpec so non-dlt consumers
|
|
299
|
+
# (LLM tool descriptions, sibling exporters, postgres-native upgrades) retain
|
|
300
|
+
# the fidelity. When dlt adds native uuid / numeric / array support, delete
|
|
301
|
+
# the corresponding entries from this mapping and pass pg_type through
|
|
302
|
+
# unchanged.
|
|
303
|
+
# Tracked: dlt's TDataType definition lives at
|
|
304
|
+
# dlt/common/data_types/typing.py (Literal); follow upstream support there.
|
|
305
|
+
_DLT_DATA_TYPE_OVERRIDES: dict[str, str] = {
|
|
306
|
+
"uuid": "text",
|
|
307
|
+
"numeric": "decimal",
|
|
308
|
+
"bigint[]": "json",
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _to_dlt_data_type(pg_type: str) -> str:
|
|
313
|
+
return _DLT_DATA_TYPE_OVERRIDES.get(pg_type, pg_type)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def build_dlt_column_hints(
|
|
317
|
+
fields: tuple[RuntimeFieldSpec, ...],
|
|
318
|
+
*,
|
|
319
|
+
sync_mode_is_delta: bool,
|
|
320
|
+
) -> dict[str, dict[str, Any]]:
|
|
321
|
+
"""Build the dlt `columns={col: {...}}` hint for `@ctx_dlt_resource`.
|
|
322
|
+
|
|
323
|
+
Description policy: `description ?? display_name`. If both are absent,
|
|
324
|
+
`description` is omitted from the hint entirely.
|
|
325
|
+
"""
|
|
326
|
+
hints: dict[str, dict[str, Any]] = {}
|
|
327
|
+
for field in fields:
|
|
328
|
+
entry: dict[str, Any] = {
|
|
329
|
+
"data_type": _to_dlt_data_type(field.pg_type),
|
|
330
|
+
**field.pg_type_args,
|
|
331
|
+
}
|
|
332
|
+
column_doc = field.description or field.display_name
|
|
333
|
+
if column_doc:
|
|
334
|
+
entry["description"] = column_doc
|
|
335
|
+
hints[field.logical_name] = entry
|
|
336
|
+
|
|
337
|
+
for ann in field.annotation_columns:
|
|
338
|
+
ann_entry: dict[str, Any] = {"data_type": _to_dlt_data_type(ann.pg_type)}
|
|
339
|
+
if ann.description:
|
|
340
|
+
ann_entry["description"] = ann.description
|
|
341
|
+
hints[ann.column_name] = ann_entry
|
|
342
|
+
|
|
343
|
+
if sync_mode_is_delta:
|
|
344
|
+
hints["is_deleted"] = {"hard_delete": True}
|
|
345
|
+
|
|
346
|
+
return hints
|
shared_plugins/models.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from typing import Annotated, Any, Optional, TypeVar, cast
|
|
5
|
+
|
|
6
|
+
from pydantic import (
|
|
7
|
+
AwareDatetime,
|
|
8
|
+
BaseModel,
|
|
9
|
+
ConfigDict,
|
|
10
|
+
Field,
|
|
11
|
+
StringConstraints,
|
|
12
|
+
ValidationError,
|
|
13
|
+
create_model,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
IdStr = Annotated[str, StringConstraints(min_length=1, strict=True)]
|
|
17
|
+
NonNegativeInt = Annotated[int, Field(ge=0, strict=True)]
|
|
18
|
+
|
|
19
|
+
ModelT = TypeVar("ModelT", bound=BaseModel)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def format_validation_error(error: ValidationError) -> str:
|
|
23
|
+
parts: list[str] = []
|
|
24
|
+
for issue in error.errors():
|
|
25
|
+
location = ".".join(str(part) for part in issue["loc"]) or "root"
|
|
26
|
+
parts.append(f"{location}: {issue['msg']}")
|
|
27
|
+
return "; ".join(parts)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StrictModel(BaseModel):
|
|
31
|
+
"""Shared strict model base: unknown fields are always rejected."""
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(extra="forbid", populate_by_name=True)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class IngressModel(StrictModel):
|
|
37
|
+
"""Shared plugin ingress contract base."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CtxModel(StrictModel):
|
|
41
|
+
"""Shared plugin row metadata contract."""
|
|
42
|
+
|
|
43
|
+
ctx_binding_id: str = Field(min_length=1, alias="_ctx_binding_id")
|
|
44
|
+
ctx_source_updated_at: AwareDatetime | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
alias="_ctx_source_updated_at",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def partialize(model_cls: type[ModelT], *, name: str | None = None) -> type[ModelT]:
|
|
51
|
+
"""Return a cached partial model class with all inherited fields optional."""
|
|
52
|
+
|
|
53
|
+
partial_name = name or f"{model_cls.__name__}Partial"
|
|
54
|
+
return cast(type[ModelT], _partialize_cached(model_cls, partial_name))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _to_optional_annotation(annotation: Any) -> Any:
|
|
58
|
+
try:
|
|
59
|
+
return annotation | None
|
|
60
|
+
except TypeError:
|
|
61
|
+
return Optional[annotation] # type: ignore[valid-type]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@lru_cache(maxsize=None)
|
|
65
|
+
def _partialize_cached(
|
|
66
|
+
model_cls: type[BaseModel], partial_name: str
|
|
67
|
+
) -> type[BaseModel]:
|
|
68
|
+
partial_fields: dict[str, tuple[Any, Any]] = {}
|
|
69
|
+
|
|
70
|
+
for field_name, field_info in model_cls.model_fields.items():
|
|
71
|
+
field_dict = field_info.asdict()
|
|
72
|
+
field_attributes = dict(field_dict["attributes"])
|
|
73
|
+
field_attributes.pop("default", None)
|
|
74
|
+
field_attributes.pop("default_factory", None)
|
|
75
|
+
|
|
76
|
+
optional_annotation = _to_optional_annotation(field_dict["annotation"])
|
|
77
|
+
annotated_field = Annotated[
|
|
78
|
+
(
|
|
79
|
+
optional_annotation,
|
|
80
|
+
*field_dict["metadata"],
|
|
81
|
+
Field(**field_attributes),
|
|
82
|
+
)
|
|
83
|
+
]
|
|
84
|
+
partial_fields[field_name] = (annotated_field, None)
|
|
85
|
+
|
|
86
|
+
return create_model(
|
|
87
|
+
partial_name,
|
|
88
|
+
__base__=model_cls,
|
|
89
|
+
__module__=model_cls.__module__,
|
|
90
|
+
**partial_fields,
|
|
91
|
+
)
|
shared_plugins/naming.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from functools import lru_cache
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _slugify(value: str) -> str:
|
|
11
|
+
return re.sub(r"[^a-zA-Z0-9]+", "_", value).strip("_").lower() or "value"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@lru_cache(maxsize=None)
|
|
15
|
+
def _plugin_id_from_dir(plugin_dir: str) -> str:
|
|
16
|
+
manifest_path = Path(plugin_dir) / "plugin.json"
|
|
17
|
+
with manifest_path.open("r", encoding="utf-8") as manifest_file:
|
|
18
|
+
manifest = json.load(manifest_file)
|
|
19
|
+
return str(manifest["plugin_id"]).strip()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def plugin_id_from_module(module_file: str) -> str:
|
|
23
|
+
module_path = Path(module_file).resolve()
|
|
24
|
+
current = module_path.parent
|
|
25
|
+
|
|
26
|
+
while True:
|
|
27
|
+
if (current / "plugin.json").exists():
|
|
28
|
+
return _plugin_id_from_dir(str(current))
|
|
29
|
+
if current == current.parent:
|
|
30
|
+
break
|
|
31
|
+
current = current.parent
|
|
32
|
+
|
|
33
|
+
raise FileNotFoundError(
|
|
34
|
+
f"Could not resolve plugin.json for module '{module_file}'."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def dlt_dataset_name(plugin_id: str) -> str:
|
|
39
|
+
return plugin_id.strip()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def dagster_asset_group_name(plugin_id: str) -> str:
|
|
43
|
+
return plugin_id.strip()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def dagster_partition_def_name(plugin_id: str) -> str:
|
|
47
|
+
return f"{plugin_id.strip()}_bindings"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def dagster_pool_name(plugin_id: str) -> str:
|
|
51
|
+
return f"{plugin_id.strip()}_pool"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def dlt_source_name(plugin_id: str, job: str) -> str:
|
|
55
|
+
return f"{_slugify(plugin_id)}_{_slugify(job)}"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def dlt_resource_name(entity: str) -> str:
|
|
59
|
+
return entity.strip()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def dlt_pipeline_name(plugin_id: str, binding_id: str, job: str) -> str:
|
|
63
|
+
return f"{_slugify(plugin_id)}__{_slugify(binding_id)}__{_slugify(job)}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def dagster_dlt_asset_key(source_name: str, entity: str) -> str:
|
|
67
|
+
"""Default asset key produced by DagsterDltTranslator for a dlt resource."""
|
|
68
|
+
return f"dlt_{source_name}_{entity}"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dagster_airbyte_sync_asset_key(plugin_id: str) -> str:
|
|
72
|
+
return f"ab_{_slugify(plugin_id)}_sync"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def dagster_asset_tags(
|
|
76
|
+
plugin_id: str,
|
|
77
|
+
*,
|
|
78
|
+
extra_tags: Mapping[str, str] | None = None,
|
|
79
|
+
) -> dict[str, str]:
|
|
80
|
+
tags = {"plugin_id": plugin_id.strip()}
|
|
81
|
+
if extra_tags:
|
|
82
|
+
tags.update(dict(extra_tags))
|
|
83
|
+
return tags
|