contextbase-shared-plugins 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
- contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
- contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
- shared_plugins/__init__.py +12 -0
- shared_plugins/automation.py +11 -0
- shared_plugins/bindings.py +253 -0
- shared_plugins/control_plane.py +208 -0
- shared_plugins/dlt.py +84 -0
- shared_plugins/env.py +102 -0
- shared_plugins/exceptions.py +10 -0
- shared_plugins/google_client/__init__.py +1 -0
- shared_plugins/google_client/auth.py +82 -0
- shared_plugins/google_client/batch_retry.py +308 -0
- shared_plugins/google_client/http_errors.py +27 -0
- shared_plugins/microsoft_dataverse/__init__.py +27 -0
- shared_plugins/microsoft_dataverse/annotations.py +38 -0
- shared_plugins/microsoft_dataverse/auth.py +26 -0
- shared_plugins/microsoft_dataverse/binding_config.py +35 -0
- shared_plugins/microsoft_dataverse/client.py +456 -0
- shared_plugins/microsoft_dataverse/ctx.py +21 -0
- shared_plugins/microsoft_dataverse/identifiers.py +62 -0
- shared_plugins/microsoft_dataverse/ingress.py +53 -0
- shared_plugins/microsoft_dataverse/metadata.py +106 -0
- shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
- shared_plugins/microsoft_dataverse/source.py +250 -0
- shared_plugins/microsoft_dataverse/tables.py +34 -0
- shared_plugins/microsoft_dataverse/translators.py +128 -0
- shared_plugins/microsoft_dataverse/types.py +346 -0
- shared_plugins/models.py +91 -0
- shared_plugins/naming.py +83 -0
- shared_plugins/pg_column_comments.py +59 -0
- shared_plugins/pyairbyte.py +399 -0
- shared_plugins/resources.py +179 -0
- shared_plugins/scratch.py +127 -0
- shared_plugins/sqlalchemy_types.py +225 -0
- shared_plugins/sqlite.py +123 -0
- shared_plugins/values.py +117 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: contextbase-shared-plugins
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Shared infrastructure for ContextBase plugins
|
|
5
|
+
Author: Alizain Feerasta
|
|
6
|
+
Author-email: Alizain Feerasta <alizain.feerasta@gmail.com>
|
|
7
|
+
Requires-Dist: contextbase-shared-types==0.2.3
|
|
8
|
+
Requires-Dist: airbyte
|
|
9
|
+
Requires-Dist: azure-identity>=1.25.1
|
|
10
|
+
Requires-Dist: dagster==1.12.14
|
|
11
|
+
Requires-Dist: dagster-dlt==0.28.14
|
|
12
|
+
Requires-Dist: dlt[postgres]>=1.26.0
|
|
13
|
+
Requires-Dist: google-api-python-client>=2.185.0
|
|
14
|
+
Requires-Dist: google-auth>=2.0.0
|
|
15
|
+
Requires-Dist: httpx>=0.28.1
|
|
16
|
+
Requires-Dist: psycopg2-binary
|
|
17
|
+
Requires-Dist: pyarrow>=22.0.0,<23.0.0
|
|
18
|
+
Requires-Dist: pydantic>=2.12.0
|
|
19
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
20
|
+
Requires-Dist: pydantic-settings>=2.13.1
|
|
21
|
+
Requires-Dist: tenacity>=8.2
|
|
22
|
+
Requires-Python: >=3.14, <3.15
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
shared_plugins/__init__.py,sha256=M6TV9owojVWO8c6E00UD1yA4-R1T_HsjnzP7sKr6Wl8,364
|
|
2
|
+
shared_plugins/automation.py,sha256=ZFpRzQUnyH_J_GoSZCrYYWNO6OcsnvbQ_kjFS2BnetA,380
|
|
3
|
+
shared_plugins/bindings.py,sha256=ex44eHvl-kePdN8ZyhVo0z2H_u-gNuZzxkcITEPfi_M,7559
|
|
4
|
+
shared_plugins/control_plane.py,sha256=U00Xv7AG4wPRwsMcGoowQ1eexm5RpfWGNveFD_LwfRw,7271
|
|
5
|
+
shared_plugins/dlt.py,sha256=YMJ3Gd-uCFuL8ppma7ghuOy3PzbulZUlZRSVy_1ibg8,2878
|
|
6
|
+
shared_plugins/env.py,sha256=YqUva_GQegPPVHe99Y-JmthSbwNfUHehDetNxiN4Vr0,3487
|
|
7
|
+
shared_plugins/exceptions.py,sha256=hV-Dn01TrvmFpMkRoNRy629KLL-Rdh3zi82z9JAodKM,318
|
|
8
|
+
shared_plugins/google_client/__init__.py,sha256=56uWeZViYLtnuD5X5iSiwmbbZVSMSuRhpOhjpPrwrAk,62
|
|
9
|
+
shared_plugins/google_client/auth.py,sha256=vHMik1ZypS2anYtgveveis3Jxk3lOFWRy_M83cFAPBo,2583
|
|
10
|
+
shared_plugins/google_client/batch_retry.py,sha256=sVK-uPXgF4a-ZDWunN_k5PUJYk1FsgvJzLYX3qmKrp8,10167
|
|
11
|
+
shared_plugins/google_client/http_errors.py,sha256=j_8vQ4URdnKYBatrkqSq2GHrRCgyHJZufyZIpvEn3S8,806
|
|
12
|
+
shared_plugins/microsoft_dataverse/__init__.py,sha256=xMLs_ICxnVkCfvpUHy-YK7d3ufk6OmUpVtweWLoudS4,840
|
|
13
|
+
shared_plugins/microsoft_dataverse/annotations.py,sha256=5Qz6RjCfcrkSUMEhdIvbcWo6r-AM449rJgDaRA_er_w,1567
|
|
14
|
+
shared_plugins/microsoft_dataverse/auth.py,sha256=PrMvAgfAT5fZt66zes6ShEzppFGSWCu5PudO0Y4wR8k,627
|
|
15
|
+
shared_plugins/microsoft_dataverse/binding_config.py,sha256=oFHYqKZHlKdcMQ98IOAdqt1KK2r2JwX716ny86lyS1c,1294
|
|
16
|
+
shared_plugins/microsoft_dataverse/client.py,sha256=dDduQF5GuYCA76wuwmbfiQUwPttZ3tihg9dJh2TZcV4,16276
|
|
17
|
+
shared_plugins/microsoft_dataverse/ctx.py,sha256=_nZYNPaDA9njvviwgMIdEKc815dnUM_CzwFGhqSMIpM,783
|
|
18
|
+
shared_plugins/microsoft_dataverse/identifiers.py,sha256=OMzQI2vVgL4AvWHMybOyltpjqW2Sr7q_gHU4doJN6iE,2300
|
|
19
|
+
shared_plugins/microsoft_dataverse/ingress.py,sha256=5PlVPxuzbY8whs6FjtTnqCVhFGHZnkzFzSgsf3RlO2U,2276
|
|
20
|
+
shared_plugins/microsoft_dataverse/metadata.py,sha256=NV_JaviYd2JdOtMx1z1sN9ezMxGPWBO1XYt7COzG8s8,4510
|
|
21
|
+
shared_plugins/microsoft_dataverse/runtime_schema.py,sha256=T1tNTX2CqOVGIBIY9YHNuc1dAFG60fD5owEH0224VD4,11554
|
|
22
|
+
shared_plugins/microsoft_dataverse/source.py,sha256=Kn6WGbucWdDXD8OtsSf2e7dThhnXKpoQf41ZgtELLKM,7584
|
|
23
|
+
shared_plugins/microsoft_dataverse/tables.py,sha256=j-20oGgYTUrc6De7_cni7nR9D9XfcHfa6zyOgc_FU1I,1192
|
|
24
|
+
shared_plugins/microsoft_dataverse/translators.py,sha256=3sKhUrhYgrDGjP_ZXFv8XNkrhvDqRA93idzMYShCFLs,5081
|
|
25
|
+
shared_plugins/microsoft_dataverse/types.py,sha256=G_e9NhvAQ6ljIwAxssH8VP6IZHEFKQ0s2DjSQ0xNiAY,11727
|
|
26
|
+
shared_plugins/models.py,sha256=m4nNrPTmwR1zV2EoUvuwfhS_WjBi05x61_bShOso3j0,2669
|
|
27
|
+
shared_plugins/naming.py,sha256=tE7oANx8SxGjfDzZT0GqDcObzFLs_RrS_B2vw_cdPjU,2208
|
|
28
|
+
shared_plugins/pg_column_comments.py,sha256=HTSC8Og4jPXdBaiEhrAN__k2UOnb3E_6g38teLp2Q7s,2346
|
|
29
|
+
shared_plugins/pyairbyte.py,sha256=iVPVjn2x1K07YVeDlJVMpuH26YRvXRJ0PxWwHlXdZQM,14038
|
|
30
|
+
shared_plugins/resources.py,sha256=i-pVKSXmSBAjKh6-KfAppAcuGiPt4cerjExM6nxEqvs,5429
|
|
31
|
+
shared_plugins/scratch.py,sha256=oZCJZSB5m80Ez7Ox8Ktl7rKV2vFIJyp8LBkZcXp6wlA,4176
|
|
32
|
+
shared_plugins/sqlalchemy_types.py,sha256=1e_OxfUQzY377VlV4gidtVroK6FkgdOyMvgZeATxv8E,7123
|
|
33
|
+
shared_plugins/sqlite.py,sha256=Pl3gXNixyQOrz19HgGojux0Ywkxukmub7rkwdkhU3QY,4301
|
|
34
|
+
shared_plugins/values.py,sha256=DhTk62AvmPcfUvCa_n-dXN5zTdu4UN9tJeyjzMhGTl8,3199
|
|
35
|
+
contextbase_shared_plugins-0.2.3.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
|
|
36
|
+
contextbase_shared_plugins-0.2.3.dist-info/METADATA,sha256=_xi9qHr6p43ZyLcTXGVf-r6iAH5vXBpEwX2nm7OlJnU,771
|
|
37
|
+
contextbase_shared_plugins-0.2.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .models import (
|
|
2
|
+
CtxModel as CtxModel,
|
|
3
|
+
IngressModel as IngressModel,
|
|
4
|
+
StrictModel as StrictModel,
|
|
5
|
+
partialize as partialize,
|
|
6
|
+
)
|
|
7
|
+
from .resources import (
|
|
8
|
+
ctx_dlt_resource as ctx_dlt_resource,
|
|
9
|
+
ctx_dlt_transformer as ctx_dlt_transformer,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
import shared_plugins.pg_column_comments as _pg_column_comments # noqa: F401 # side-effect import
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dagster as dg
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def non_overlapping_automation_condition(
|
|
7
|
+
condition: dg.AutomationCondition,
|
|
8
|
+
) -> dg.AutomationCondition:
|
|
9
|
+
# Cron and missing-based automation can keep requesting work while the
|
|
10
|
+
# current partition is still active unless we add this explicit guard.
|
|
11
|
+
return condition & ~dg.AutomationCondition.in_progress()
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import importlib.util
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Callable, Collection, Iterable, Iterator
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Annotated, TypeVar
|
|
11
|
+
|
|
12
|
+
from pydantic import (
|
|
13
|
+
BaseModel,
|
|
14
|
+
BeforeValidator,
|
|
15
|
+
ConfigDict,
|
|
16
|
+
Field,
|
|
17
|
+
StringConstraints,
|
|
18
|
+
ValidationError,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from shared_types.api_key_auth import ApiKeyAuth
|
|
22
|
+
from shared_types.authenticated_account import AuthenticatedAccountRef
|
|
23
|
+
from shared_types.binding_auth_none import BindingAuthNone
|
|
24
|
+
from shared_types.client_credentials_auth import ClientCredentialsAuth
|
|
25
|
+
from shared_types.dagster_binding_plan import DagsterAllPlanBinding
|
|
26
|
+
|
|
27
|
+
from .exceptions import PluginConfigurationError
|
|
28
|
+
from .models import CtxModel
|
|
29
|
+
|
|
30
|
+
RowFilter = Callable[[str, CtxModel], bool]
|
|
31
|
+
RowT = TypeVar("RowT", bound=CtxModel)
|
|
32
|
+
|
|
33
|
+
_UTC_TIMESTAMP_PATTERN = r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$"
|
|
34
|
+
|
|
35
|
+
NonEmptyText = Annotated[
|
|
36
|
+
str,
|
|
37
|
+
StringConstraints(strip_whitespace=True, min_length=1),
|
|
38
|
+
]
|
|
39
|
+
UtcTimestampText = Annotated[
|
|
40
|
+
str,
|
|
41
|
+
StringConstraints(
|
|
42
|
+
strip_whitespace=True,
|
|
43
|
+
min_length=1,
|
|
44
|
+
pattern=_UTC_TIMESTAMP_PATTERN,
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
StrictNonNegativeInt = Annotated[int, Field(strict=True, ge=0)]
|
|
48
|
+
ResolvedPath = Annotated[
|
|
49
|
+
Path, BeforeValidator(lambda value: _resolve_binding_path(value))
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BaseBindingConfigModel(BaseModel):
|
|
54
|
+
"""Canonical base class for every plugin's ``BindingConfig`` and any nested
|
|
55
|
+
sub-models it uses.
|
|
56
|
+
|
|
57
|
+
Declares ``extra="forbid"`` once so plugins never redeclare it and cannot
|
|
58
|
+
accidentally allow unknown keys. Every ``BaseModel`` that lives inside a
|
|
59
|
+
plugin's ``binding_config.py`` inherits from this class, at every depth of
|
|
60
|
+
the config tree.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
model_config = ConfigDict(extra="forbid")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
BindingConfigT = TypeVar("BindingConfigT", bound=BaseBindingConfigModel)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass(frozen=True)
|
|
70
|
+
class ResolvedBindingModels:
|
|
71
|
+
active: tuple[str, ...]
|
|
72
|
+
filter: RowFilter | None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def require_authenticated_account(
|
|
76
|
+
binding: DagsterAllPlanBinding,
|
|
77
|
+
) -> AuthenticatedAccountRef:
|
|
78
|
+
auth_payload = binding.auth
|
|
79
|
+
if isinstance(auth_payload, AuthenticatedAccountRef):
|
|
80
|
+
return auth_payload
|
|
81
|
+
|
|
82
|
+
raise RuntimeError(
|
|
83
|
+
f"{binding.plugin_id} binding requires auth.type='authenticated_account', got '{auth_payload.type}'."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def require_api_key(binding: DagsterAllPlanBinding) -> ApiKeyAuth:
|
|
88
|
+
auth_payload = binding.auth
|
|
89
|
+
if isinstance(auth_payload, ApiKeyAuth):
|
|
90
|
+
return auth_payload
|
|
91
|
+
|
|
92
|
+
raise RuntimeError(
|
|
93
|
+
f"{binding.plugin_id} binding requires auth.type='api_key', got '{auth_payload.type}'."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def require_client_credentials(binding: DagsterAllPlanBinding) -> ClientCredentialsAuth:
|
|
98
|
+
auth_payload = binding.auth
|
|
99
|
+
if isinstance(auth_payload, ClientCredentialsAuth):
|
|
100
|
+
return auth_payload
|
|
101
|
+
|
|
102
|
+
raise RuntimeError(
|
|
103
|
+
f"{binding.plugin_id} binding requires auth.type='client_credentials', got '{auth_payload.type}'."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def require_binding_auth_none(binding: DagsterAllPlanBinding) -> BindingAuthNone:
|
|
108
|
+
auth_payload = binding.auth
|
|
109
|
+
if isinstance(auth_payload, BindingAuthNone):
|
|
110
|
+
return auth_payload
|
|
111
|
+
|
|
112
|
+
raise RuntimeError(
|
|
113
|
+
f"{binding.plugin_id} binding requires auth.type='none', got '{auth_payload.type}'."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def parse_binding_config(
|
|
118
|
+
binding: DagsterAllPlanBinding,
|
|
119
|
+
config_model: type[BindingConfigT],
|
|
120
|
+
) -> BindingConfigT:
|
|
121
|
+
try:
|
|
122
|
+
return config_model.model_validate(binding.config)
|
|
123
|
+
except ValidationError as exc:
|
|
124
|
+
raise PluginConfigurationError(
|
|
125
|
+
f"{binding.plugin_id} binding.config is invalid: {exc}"
|
|
126
|
+
) from exc
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def resolve_binding_models(
|
|
130
|
+
binding: DagsterAllPlanBinding,
|
|
131
|
+
*,
|
|
132
|
+
supported_models: Collection[str],
|
|
133
|
+
default_active: Collection[str] = (),
|
|
134
|
+
) -> ResolvedBindingModels:
|
|
135
|
+
models_payload = binding.models
|
|
136
|
+
raw_active = (
|
|
137
|
+
default_active
|
|
138
|
+
if models_payload is None or models_payload.active is None
|
|
139
|
+
else models_payload.active
|
|
140
|
+
)
|
|
141
|
+
active_models = tuple(dict.fromkeys(raw_active))
|
|
142
|
+
unsupported_models = tuple(
|
|
143
|
+
model_name for model_name in active_models if model_name not in supported_models
|
|
144
|
+
)
|
|
145
|
+
if unsupported_models:
|
|
146
|
+
supported = ", ".join(sorted(supported_models))
|
|
147
|
+
unsupported = ", ".join(unsupported_models)
|
|
148
|
+
raise PluginConfigurationError(
|
|
149
|
+
f"{binding.plugin_id} models.active contains unsupported models [{unsupported}]. "
|
|
150
|
+
f"Supported models: [{supported}]."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return ResolvedBindingModels(
|
|
154
|
+
active=active_models,
|
|
155
|
+
filter=_load_row_filter(
|
|
156
|
+
None if models_payload is None else models_payload.filter,
|
|
157
|
+
plugin_id=binding.plugin_id,
|
|
158
|
+
),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def iter_active_model_rows(
|
|
163
|
+
*,
|
|
164
|
+
model_name: str,
|
|
165
|
+
rows: Iterable[RowT],
|
|
166
|
+
binding_models: ResolvedBindingModels,
|
|
167
|
+
) -> Iterator[RowT]:
|
|
168
|
+
if model_name not in binding_models.active:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
for row in rows:
|
|
172
|
+
if _should_keep_model_row(
|
|
173
|
+
model_name=model_name,
|
|
174
|
+
row=row,
|
|
175
|
+
binding_models=binding_models,
|
|
176
|
+
):
|
|
177
|
+
yield row
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _should_keep_model_row(
|
|
181
|
+
*,
|
|
182
|
+
model_name: str,
|
|
183
|
+
row: RowT,
|
|
184
|
+
binding_models: ResolvedBindingModels,
|
|
185
|
+
) -> bool:
|
|
186
|
+
row_filter = binding_models.filter
|
|
187
|
+
if row_filter is None:
|
|
188
|
+
return True
|
|
189
|
+
|
|
190
|
+
return row_filter(model_name, row.model_copy(deep=True))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _load_row_filter(
|
|
194
|
+
filter_ref: str | None,
|
|
195
|
+
*,
|
|
196
|
+
plugin_id: str,
|
|
197
|
+
) -> RowFilter | None:
|
|
198
|
+
if filter_ref is None:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
module_path_raw, separator, attr_name = filter_ref.partition(":")
|
|
202
|
+
if separator != ":" or not module_path_raw or not attr_name:
|
|
203
|
+
raise PluginConfigurationError(
|
|
204
|
+
f"Invalid {plugin_id} models.filter. Expected '<path.py>:<callable>'."
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
module_path = Path(module_path_raw).expanduser()
|
|
208
|
+
if not module_path.is_absolute():
|
|
209
|
+
module_path = (Path.cwd() / module_path).resolve()
|
|
210
|
+
else:
|
|
211
|
+
module_path = module_path.resolve()
|
|
212
|
+
|
|
213
|
+
if not module_path.is_file():
|
|
214
|
+
raise PluginConfigurationError(
|
|
215
|
+
f"{plugin_id} models.filter path does not exist: {module_path}"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
module_name = _filter_module_name(module_path)
|
|
219
|
+
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
220
|
+
if spec is None or spec.loader is None:
|
|
221
|
+
raise PluginConfigurationError(
|
|
222
|
+
f"Unable to load {plugin_id} models.filter module: {module_path}"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
module = importlib.util.module_from_spec(spec)
|
|
226
|
+
sys.modules[module_name] = module
|
|
227
|
+
spec.loader.exec_module(module)
|
|
228
|
+
|
|
229
|
+
row_filter = getattr(module, attr_name, None)
|
|
230
|
+
if not callable(row_filter):
|
|
231
|
+
raise PluginConfigurationError(
|
|
232
|
+
f"{plugin_id} models.filter '{filter_ref}' does not resolve to a callable"
|
|
233
|
+
)
|
|
234
|
+
return row_filter
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _filter_module_name(module_path: Path) -> str:
|
|
238
|
+
digest = hashlib.sha1(str(module_path).encode("utf-8")).hexdigest()[:12]
|
|
239
|
+
return f"shared_plugins_binding_filter_{digest}"
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _resolve_binding_path(value: object) -> Path:
|
|
243
|
+
if isinstance(value, Path):
|
|
244
|
+
path = value
|
|
245
|
+
elif isinstance(value, str):
|
|
246
|
+
stripped = value.strip()
|
|
247
|
+
if not stripped:
|
|
248
|
+
raise ValueError("Path values must not be empty.")
|
|
249
|
+
path = Path(os.path.expandvars(stripped)).expanduser()
|
|
250
|
+
else:
|
|
251
|
+
raise TypeError(f"Expected path-like value, got {type(value).__name__}")
|
|
252
|
+
|
|
253
|
+
return path.resolve(strict=False)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from datetime import timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
from pydantic import ValidationError
|
|
11
|
+
from shared_types.authenticated_account import AuthenticatedAccountRef
|
|
12
|
+
from shared_types.dagster_binding_plan import (
|
|
13
|
+
DagsterAllPlanBinding,
|
|
14
|
+
DagsterBindingPlanAll,
|
|
15
|
+
)
|
|
16
|
+
from shared_types.oauth_access_token import OAuthAccessTokenSuccess
|
|
17
|
+
|
|
18
|
+
from .models import format_validation_error
|
|
19
|
+
|
|
20
|
+
_DAGSTER_BINDING_PLAN_PATH = "/api/v1/binding-plan"
|
|
21
|
+
_OAUTH_ACCESS_TOKEN_PATH = "/api/v1/oauth-access-token"
|
|
22
|
+
_OAUTH_REFRESH_ACCESS_TOKEN_PATH = "/api/v1/oauth-refresh-access-token"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _control_plane_endpoint(
|
|
26
|
+
control_plane_base_url: httpx.URL,
|
|
27
|
+
path: str,
|
|
28
|
+
*,
|
|
29
|
+
query: dict[str, str] | None = None,
|
|
30
|
+
) -> str:
|
|
31
|
+
endpoint = control_plane_base_url.join(path)
|
|
32
|
+
if query:
|
|
33
|
+
endpoint = endpoint.copy_with(params=query)
|
|
34
|
+
return str(endpoint)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _parse_json_response(response: Any, *, endpoint: str, label: str) -> Any:
|
|
38
|
+
try:
|
|
39
|
+
return response.json()
|
|
40
|
+
except json.JSONDecodeError as exc:
|
|
41
|
+
raise RuntimeError(
|
|
42
|
+
f"{label} response from {endpoint} is not valid JSON: {exc.msg} (line {exc.lineno}, column {exc.colno})."
|
|
43
|
+
) from exc
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ControlPlaneClient:
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
control_plane_url: str,
|
|
51
|
+
root_dir: Path,
|
|
52
|
+
get: Callable[..., Any] | None = None,
|
|
53
|
+
post: Callable[..., Any] | None = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
normalized_control_plane_url = control_plane_url.strip()
|
|
56
|
+
if not normalized_control_plane_url:
|
|
57
|
+
raise RuntimeError("control_plane_url cannot be blank.")
|
|
58
|
+
if not normalized_control_plane_url.startswith(("http://", "https://")):
|
|
59
|
+
raise RuntimeError("control_plane_url must use http:// or https://.")
|
|
60
|
+
|
|
61
|
+
if not isinstance(root_dir, Path):
|
|
62
|
+
raise TypeError("root_dir must be a pathlib.Path.")
|
|
63
|
+
if not root_dir.is_absolute():
|
|
64
|
+
raise RuntimeError("root_dir must be an absolute path.")
|
|
65
|
+
|
|
66
|
+
self._control_plane_base_url = httpx.URL(
|
|
67
|
+
normalized_control_plane_url.rstrip("/")
|
|
68
|
+
)
|
|
69
|
+
self._root_dir = root_dir
|
|
70
|
+
self._get = httpx.get if get is None else get
|
|
71
|
+
self._post = httpx.post if post is None else post
|
|
72
|
+
|
|
73
|
+
def load_binding_plan(self) -> DagsterBindingPlanAll:
|
|
74
|
+
endpoint = _control_plane_endpoint(
|
|
75
|
+
self._control_plane_base_url,
|
|
76
|
+
_DAGSTER_BINDING_PLAN_PATH,
|
|
77
|
+
query={"mode": "dagster", "root_dir": str(self._root_dir)},
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
response = self._get(endpoint, timeout=10.0)
|
|
82
|
+
except httpx.RequestError as exc:
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
f"Failed to fetch binding plan from {endpoint}: {exc}"
|
|
85
|
+
) from exc
|
|
86
|
+
|
|
87
|
+
if response.status_code < 200 or response.status_code >= 300:
|
|
88
|
+
detail = response.text.strip() or "no response body"
|
|
89
|
+
raise RuntimeError(
|
|
90
|
+
f"Failed to fetch binding plan from {endpoint} (status={response.status_code}): {detail}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
payload = _parse_json_response(
|
|
94
|
+
response, endpoint=endpoint, label="Binding plan"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
plan = DagsterBindingPlanAll.model_validate(payload)
|
|
99
|
+
except ValidationError as exc:
|
|
100
|
+
error_message = format_validation_error(exc)
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Binding plan response from {endpoint} failed model validation: {error_message}."
|
|
103
|
+
) from exc
|
|
104
|
+
|
|
105
|
+
return plan.model_copy(
|
|
106
|
+
update={"generated_at": plan.generated_at.astimezone(timezone.utc)}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def list_active_bindings(self, plugin_id: str) -> list[DagsterAllPlanBinding]:
|
|
110
|
+
plan = self.load_binding_plan()
|
|
111
|
+
return [binding for binding in plan.bindings if binding.plugin_id == plugin_id]
|
|
112
|
+
|
|
113
|
+
def get_binding(self, plugin_id: str, binding_id: str) -> DagsterAllPlanBinding:
|
|
114
|
+
for binding in self.list_active_bindings(plugin_id):
|
|
115
|
+
if str(binding.binding_id) == binding_id:
|
|
116
|
+
return binding
|
|
117
|
+
|
|
118
|
+
raise RuntimeError(
|
|
119
|
+
f"Binding '{binding_id}' was not found for plugin_id '{plugin_id}'."
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def get_access_token(
|
|
123
|
+
self,
|
|
124
|
+
auth: AuthenticatedAccountRef,
|
|
125
|
+
) -> OAuthAccessTokenSuccess:
|
|
126
|
+
endpoint = _control_plane_endpoint(
|
|
127
|
+
self._control_plane_base_url,
|
|
128
|
+
_OAUTH_ACCESS_TOKEN_PATH,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
response = self._post(
|
|
133
|
+
endpoint,
|
|
134
|
+
json={
|
|
135
|
+
"account_id": auth.account_id,
|
|
136
|
+
"provider_id": auth.provider_id,
|
|
137
|
+
"root_dir": str(self._root_dir),
|
|
138
|
+
},
|
|
139
|
+
timeout=10.0,
|
|
140
|
+
)
|
|
141
|
+
except httpx.RequestError as exc:
|
|
142
|
+
raise RuntimeError(
|
|
143
|
+
f"Failed to fetch OAuth access token from {endpoint}: {exc}"
|
|
144
|
+
) from exc
|
|
145
|
+
|
|
146
|
+
if response.status_code != 200:
|
|
147
|
+
detail = response.text.strip() or "no response body"
|
|
148
|
+
raise RuntimeError(
|
|
149
|
+
f"Failed to fetch OAuth access token from {endpoint} (status={response.status_code}): {detail}"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
payload = _parse_json_response(
|
|
153
|
+
response,
|
|
154
|
+
endpoint=endpoint,
|
|
155
|
+
label="OAuth access token",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
return OAuthAccessTokenSuccess.model_validate(payload)
|
|
160
|
+
except ValidationError as exc:
|
|
161
|
+
error_message = format_validation_error(exc)
|
|
162
|
+
raise RuntimeError(
|
|
163
|
+
f"OAuth access token response from {endpoint} failed model validation: {error_message}."
|
|
164
|
+
) from exc
|
|
165
|
+
|
|
166
|
+
def refresh_access_token(
|
|
167
|
+
self,
|
|
168
|
+
auth: AuthenticatedAccountRef,
|
|
169
|
+
) -> OAuthAccessTokenSuccess:
|
|
170
|
+
endpoint = _control_plane_endpoint(
|
|
171
|
+
self._control_plane_base_url,
|
|
172
|
+
_OAUTH_REFRESH_ACCESS_TOKEN_PATH,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
response = self._post(
|
|
177
|
+
endpoint,
|
|
178
|
+
json={
|
|
179
|
+
"account_id": auth.account_id,
|
|
180
|
+
"provider_id": auth.provider_id,
|
|
181
|
+
"root_dir": str(self._root_dir),
|
|
182
|
+
},
|
|
183
|
+
timeout=10.0,
|
|
184
|
+
)
|
|
185
|
+
except httpx.RequestError as exc:
|
|
186
|
+
raise RuntimeError(
|
|
187
|
+
f"Failed to refresh OAuth access token from {endpoint}: {exc}"
|
|
188
|
+
) from exc
|
|
189
|
+
|
|
190
|
+
if response.status_code != 200:
|
|
191
|
+
detail = response.text.strip() or "no response body"
|
|
192
|
+
raise RuntimeError(
|
|
193
|
+
f"Failed to refresh OAuth access token from {endpoint} (status={response.status_code}): {detail}"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
payload = _parse_json_response(
|
|
197
|
+
response,
|
|
198
|
+
endpoint=endpoint,
|
|
199
|
+
label="OAuth refresh access token",
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
return OAuthAccessTokenSuccess.model_validate(payload)
|
|
204
|
+
except ValidationError as exc:
|
|
205
|
+
error_message = format_validation_error(exc)
|
|
206
|
+
raise RuntimeError(
|
|
207
|
+
f"OAuth refresh access token response from {endpoint} failed model validation: {error_message}."
|
|
208
|
+
) from exc
|
shared_plugins/dlt.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import dlt
|
|
7
|
+
from dagster import AssetExecutionContext
|
|
8
|
+
from dagster_dlt import DagsterDltResource
|
|
9
|
+
from dlt.common.schema import Schema
|
|
10
|
+
from dlt.destinations.sql_client import SqlClientBase
|
|
11
|
+
from shared_types.dagster_binding_plan import DagsterAllPlanBinding
|
|
12
|
+
|
|
13
|
+
from .control_plane import ControlPlaneClient
|
|
14
|
+
from .naming import dlt_dataset_name, dlt_pipeline_name
|
|
15
|
+
from .resources import DLT_TRANSLATOR
|
|
16
|
+
|
|
17
|
+
DLT_DESTINATION = "postgres"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def pre_register_source_schema(pipeline: Any, source: Any) -> None:
|
|
21
|
+
"""Idempotently register an empty Schema for `source.name` on the
|
|
22
|
+
pipeline. This lets resources call `pipe.sql_client(schema_name=<source>)`
|
|
23
|
+
on cold start; without it, dlt's `_get_schema_or_create` falls through to
|
|
24
|
+
`Schema(pipeline_name)`, which raises `InvalidSchemaName` whenever the
|
|
25
|
+
pipeline_name exceeds dlt's 64-char Schema regex limit. Safe to call on
|
|
26
|
+
warm runs — `has_schema` short-circuits when the schema is already on
|
|
27
|
+
disk or restored from destination state."""
|
|
28
|
+
if not pipeline.schemas.has_schema(source.name):
|
|
29
|
+
pipeline.schemas.save_schema(Schema(source.name))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def resolve_partition_binding(
|
|
33
|
+
*,
|
|
34
|
+
context: AssetExecutionContext,
|
|
35
|
+
control_plane: ControlPlaneClient,
|
|
36
|
+
plugin_id: str,
|
|
37
|
+
) -> DagsterAllPlanBinding:
|
|
38
|
+
partition_key = context.partition_key
|
|
39
|
+
if partition_key is None:
|
|
40
|
+
raise RuntimeError("Missing partition key; expected binding_id partition.")
|
|
41
|
+
|
|
42
|
+
return control_plane.get_binding(plugin_id=plugin_id, binding_id=partition_key)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def run_dlt_pipeline(
|
|
46
|
+
*,
|
|
47
|
+
context: AssetExecutionContext,
|
|
48
|
+
dlt_resource: DagsterDltResource,
|
|
49
|
+
source: Any,
|
|
50
|
+
plugin_id: str,
|
|
51
|
+
binding_id: str,
|
|
52
|
+
job_name: str,
|
|
53
|
+
) -> Iterator[Any]:
|
|
54
|
+
pipeline = dlt.pipeline(
|
|
55
|
+
pipeline_name=dlt_pipeline_name(plugin_id, binding_id, job_name),
|
|
56
|
+
destination=DLT_DESTINATION,
|
|
57
|
+
dataset_name=dlt_dataset_name(plugin_id),
|
|
58
|
+
progress="log",
|
|
59
|
+
)
|
|
60
|
+
pre_register_source_schema(pipeline, source)
|
|
61
|
+
context.log.info(f"Starting {plugin_id} job={job_name} for binding_id={binding_id}")
|
|
62
|
+
yield from dlt_resource.run(
|
|
63
|
+
context=context,
|
|
64
|
+
dlt_source=source,
|
|
65
|
+
dlt_pipeline=pipeline,
|
|
66
|
+
dagster_dlt_translator=DLT_TRANSLATOR,
|
|
67
|
+
)
|
|
68
|
+
context.log.info(
|
|
69
|
+
f"Completed {plugin_id} job={job_name} for binding_id={binding_id}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def destination_has_table(
|
|
74
|
+
sql_client: SqlClientBase[Any],
|
|
75
|
+
table_name: str,
|
|
76
|
+
) -> bool:
|
|
77
|
+
"""Check whether a table exists in the dlt destination dataset."""
|
|
78
|
+
query = (
|
|
79
|
+
"SELECT 1 FROM information_schema.tables"
|
|
80
|
+
" WHERE table_schema = current_schema() AND table_name = %s"
|
|
81
|
+
" LIMIT 1"
|
|
82
|
+
)
|
|
83
|
+
with sql_client.execute_query(query, table_name) as cursor:
|
|
84
|
+
return cursor.fetchone() is not None
|