dagster-airbyte 0.24.3__py3-none-any.whl → 0.28.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_airbyte/__init__.py +26 -9
- dagster_airbyte/asset_decorator.py +123 -0
- dagster_airbyte/asset_defs.py +334 -202
- dagster_airbyte/components/__init__.py +0 -0
- dagster_airbyte/components/workspace_component/__init__.py +0 -0
- dagster_airbyte/components/workspace_component/component.py +433 -0
- dagster_airbyte/components/workspace_component/scaffolder.py +30 -0
- dagster_airbyte/legacy_resources.py +826 -0
- dagster_airbyte/managed/__init__.py +2 -2
- dagster_airbyte/managed/generated/__init__.py +1 -1
- dagster_airbyte/managed/generated/sources.py +35 -35
- dagster_airbyte/managed/reconciliation.py +34 -44
- dagster_airbyte/managed/types.py +8 -7
- dagster_airbyte/ops.py +5 -4
- dagster_airbyte/resources.py +855 -601
- dagster_airbyte/translator.py +255 -0
- dagster_airbyte/types.py +8 -3
- dagster_airbyte/utils.py +36 -2
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.24.3.dist-info → dagster_airbyte-0.28.3.dist-info}/METADATA +19 -10
- dagster_airbyte-0.28.3.dist-info/RECORD +28 -0
- {dagster_airbyte-0.24.3.dist-info → dagster_airbyte-0.28.3.dist-info}/WHEEL +1 -1
- {dagster_airbyte-0.24.3.dist-info → dagster_airbyte-0.28.3.dist-info}/entry_points.txt +3 -0
- {dagster_airbyte-0.24.3.dist-info → dagster_airbyte-0.28.3.dist-info/licenses}/LICENSE +1 -1
- dagster_airbyte-0.24.3.dist-info/RECORD +0 -21
- {dagster_airbyte-0.24.3.dist-info → dagster_airbyte-0.28.3.dist-info}/top_level.txt +0 -0
dagster_airbyte/asset_defs.py
CHANGED
|
@@ -1,54 +1,53 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import inspect
|
|
3
3
|
import os
|
|
4
|
-
import re
|
|
5
4
|
from abc import abstractmethod
|
|
5
|
+
from collections.abc import Callable, Iterable, Mapping, Sequence
|
|
6
6
|
from functools import partial
|
|
7
7
|
from itertools import chain
|
|
8
|
-
from typing import
|
|
9
|
-
Any,
|
|
10
|
-
Callable,
|
|
11
|
-
Dict,
|
|
12
|
-
Iterable,
|
|
13
|
-
List,
|
|
14
|
-
Mapping,
|
|
15
|
-
NamedTuple,
|
|
16
|
-
Optional,
|
|
17
|
-
Sequence,
|
|
18
|
-
Set,
|
|
19
|
-
Tuple,
|
|
20
|
-
Union,
|
|
21
|
-
cast,
|
|
22
|
-
)
|
|
8
|
+
from typing import Any, NamedTuple, Optional, Union, cast
|
|
23
9
|
|
|
24
10
|
import yaml
|
|
25
11
|
from dagster import (
|
|
12
|
+
AssetExecutionContext,
|
|
26
13
|
AssetKey,
|
|
27
14
|
AssetOut,
|
|
28
15
|
AutoMaterializePolicy,
|
|
29
|
-
|
|
16
|
+
LegacyFreshnessPolicy,
|
|
30
17
|
Nothing,
|
|
31
18
|
Output,
|
|
32
19
|
ResourceDefinition,
|
|
33
20
|
SourceAsset,
|
|
34
21
|
_check as check,
|
|
35
22
|
)
|
|
36
|
-
from dagster._annotations import
|
|
23
|
+
from dagster._annotations import beta, hidden_param, only_allow_hidden_params_in_kwargs, superseded
|
|
37
24
|
from dagster._core.definitions import AssetsDefinition, multi_asset
|
|
38
|
-
from dagster._core.definitions.
|
|
25
|
+
from dagster._core.definitions.assets.definition.cacheable_assets_definition import (
|
|
39
26
|
AssetsDefinitionCacheableData,
|
|
40
27
|
CacheableAssetsDefinition,
|
|
41
28
|
)
|
|
42
29
|
from dagster._core.definitions.events import CoercibleToAssetKey, CoercibleToAssetKeyPrefix
|
|
43
|
-
from dagster._core.definitions.metadata import
|
|
30
|
+
from dagster._core.definitions.metadata.metadata_set import TableMetadataSet
|
|
44
31
|
from dagster._core.definitions.metadata.table import TableSchema
|
|
45
32
|
from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvalidInvocationError
|
|
46
33
|
from dagster._core.execution.context.init import build_init_resource_context
|
|
47
34
|
from dagster._utils.merger import merge_dicts
|
|
48
35
|
|
|
49
|
-
from dagster_airbyte.
|
|
36
|
+
from dagster_airbyte.asset_decorator import airbyte_assets
|
|
37
|
+
from dagster_airbyte.legacy_resources import (
|
|
38
|
+
AirbyteCloudResource,
|
|
39
|
+
AirbyteResource,
|
|
40
|
+
BaseAirbyteResource,
|
|
41
|
+
)
|
|
42
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace, AirbyteWorkspace, BaseAirbyteWorkspace
|
|
43
|
+
from dagster_airbyte.translator import (
|
|
44
|
+
AirbyteConnection,
|
|
45
|
+
AirbyteMetadataSet,
|
|
46
|
+
DagsterAirbyteTranslator,
|
|
47
|
+
)
|
|
50
48
|
from dagster_airbyte.types import AirbyteTableMetadata
|
|
51
49
|
from dagster_airbyte.utils import (
|
|
50
|
+
clean_name,
|
|
52
51
|
generate_materializations,
|
|
53
52
|
generate_table_schema,
|
|
54
53
|
is_basic_normalization_operation,
|
|
@@ -62,14 +61,18 @@ def _table_to_output_name_fn(table: str) -> str:
|
|
|
62
61
|
def _build_airbyte_asset_defn_metadata(
|
|
63
62
|
connection_id: str,
|
|
64
63
|
destination_tables: Sequence[str],
|
|
64
|
+
destination_raw_table_names_by_table: Mapping[str, str],
|
|
65
|
+
destination_database: Optional[str],
|
|
66
|
+
destination_schema: Optional[str],
|
|
65
67
|
table_to_asset_key_fn: Callable[[str], AssetKey],
|
|
66
68
|
asset_key_prefix: Optional[Sequence[str]] = None,
|
|
67
|
-
normalization_tables: Optional[Mapping[str,
|
|
69
|
+
normalization_tables: Optional[Mapping[str, set[str]]] = None,
|
|
70
|
+
normalization_raw_table_names_by_table: Optional[Mapping[str, str]] = None,
|
|
68
71
|
upstream_assets: Optional[Iterable[AssetKey]] = None,
|
|
69
72
|
group_name: Optional[str] = None,
|
|
70
73
|
io_manager_key: Optional[str] = None,
|
|
71
74
|
schema_by_table_name: Optional[Mapping[str, TableSchema]] = None,
|
|
72
|
-
|
|
75
|
+
legacy_freshness_policy: Optional[LegacyFreshnessPolicy] = None,
|
|
73
76
|
auto_materialize_policy: Optional[AutoMaterializePolicy] = None,
|
|
74
77
|
) -> AssetsDefinitionCacheableData:
|
|
75
78
|
asset_key_prefix = (
|
|
@@ -93,7 +96,7 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
93
96
|
for table in tables
|
|
94
97
|
}
|
|
95
98
|
|
|
96
|
-
internal_deps:
|
|
99
|
+
internal_deps: dict[str, set[AssetKey]] = {}
|
|
97
100
|
|
|
98
101
|
metadata_encodable_normalization_tables = (
|
|
99
102
|
{k: list(v) for k, v in normalization_tables.items()} if normalization_tables else {}
|
|
@@ -112,6 +115,30 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
112
115
|
for table in destination_tables:
|
|
113
116
|
internal_deps[table] = set(upstream_assets or [])
|
|
114
117
|
|
|
118
|
+
table_names: dict[str, str] = {}
|
|
119
|
+
for table in destination_tables:
|
|
120
|
+
if destination_database and destination_schema and table:
|
|
121
|
+
# Use the destination raw table name to create the table name
|
|
122
|
+
table_names[table] = ".".join(
|
|
123
|
+
[
|
|
124
|
+
destination_database,
|
|
125
|
+
destination_schema,
|
|
126
|
+
destination_raw_table_names_by_table[table],
|
|
127
|
+
]
|
|
128
|
+
)
|
|
129
|
+
if normalization_tables and normalization_raw_table_names_by_table:
|
|
130
|
+
for normalization_table in normalization_tables.get(table, set()):
|
|
131
|
+
table_names[normalization_table] = ".".join(
|
|
132
|
+
[
|
|
133
|
+
destination_database,
|
|
134
|
+
destination_schema,
|
|
135
|
+
destination_raw_table_names_by_table[table],
|
|
136
|
+
normalization_raw_table_names_by_table[normalization_table],
|
|
137
|
+
]
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
schema_by_table_name = schema_by_table_name if schema_by_table_name else {}
|
|
141
|
+
|
|
115
142
|
return AssetsDefinitionCacheableData(
|
|
116
143
|
keys_by_input_name=(
|
|
117
144
|
{asset_key.path[-1]: asset_key for asset_key in upstream_assets}
|
|
@@ -125,14 +152,19 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
125
152
|
can_subset=False,
|
|
126
153
|
metadata_by_output_name=(
|
|
127
154
|
{
|
|
128
|
-
table: {
|
|
155
|
+
table: {
|
|
156
|
+
**TableMetadataSet(
|
|
157
|
+
column_schema=schema_by_table_name.get(table),
|
|
158
|
+
table_name=table_names.get(table),
|
|
159
|
+
),
|
|
160
|
+
}
|
|
129
161
|
for table in tables
|
|
130
162
|
}
|
|
131
|
-
if schema_by_table_name
|
|
132
|
-
else None
|
|
133
163
|
),
|
|
134
|
-
|
|
135
|
-
{output:
|
|
164
|
+
legacy_freshness_policies_by_output_name=(
|
|
165
|
+
{output: legacy_freshness_policy for output in outputs}
|
|
166
|
+
if legacy_freshness_policy
|
|
167
|
+
else None
|
|
136
168
|
),
|
|
137
169
|
auto_materialize_policies_by_output_name=(
|
|
138
170
|
{output: auto_materialize_policy for output in outputs}
|
|
@@ -153,31 +185,28 @@ def _build_airbyte_assets_from_metadata(
|
|
|
153
185
|
assets_defn_meta: AssetsDefinitionCacheableData,
|
|
154
186
|
resource_defs: Optional[Mapping[str, ResourceDefinition]],
|
|
155
187
|
) -> AssetsDefinition:
|
|
156
|
-
metadata = cast(Mapping[str, Any], assets_defn_meta.extra_metadata)
|
|
157
|
-
connection_id = cast(str, metadata["connection_id"])
|
|
158
|
-
group_name = cast(Optional[str], metadata["group_name"])
|
|
159
|
-
destination_tables = cast(
|
|
160
|
-
normalization_tables = cast(Mapping[str,
|
|
161
|
-
io_manager_key = cast(Optional[str], metadata["io_manager_key"])
|
|
188
|
+
metadata = cast("Mapping[str, Any]", assets_defn_meta.extra_metadata)
|
|
189
|
+
connection_id = cast("str", metadata["connection_id"])
|
|
190
|
+
group_name = cast("Optional[str]", metadata["group_name"])
|
|
191
|
+
destination_tables = cast("list[str]", metadata["destination_tables"])
|
|
192
|
+
normalization_tables = cast("Mapping[str, list[str]]", metadata["normalization_tables"])
|
|
193
|
+
io_manager_key = cast("Optional[str]", metadata["io_manager_key"])
|
|
162
194
|
|
|
163
195
|
@multi_asset(
|
|
164
|
-
name=f"airbyte_sync_{connection_id
|
|
196
|
+
name=f"airbyte_sync_{connection_id.replace('-', '_')}",
|
|
165
197
|
deps=list((assets_defn_meta.keys_by_input_name or {}).values()),
|
|
166
198
|
outs={
|
|
167
199
|
k: AssetOut(
|
|
168
200
|
key=v,
|
|
169
201
|
metadata=(
|
|
170
|
-
|
|
171
|
-
k: cast(TableSchemaMetadataValue, v)
|
|
172
|
-
for k, v in assets_defn_meta.metadata_by_output_name.get(k, {}).items()
|
|
173
|
-
}
|
|
202
|
+
assets_defn_meta.metadata_by_output_name.get(k)
|
|
174
203
|
if assets_defn_meta.metadata_by_output_name
|
|
175
204
|
else None
|
|
176
205
|
),
|
|
177
206
|
io_manager_key=io_manager_key,
|
|
178
|
-
|
|
179
|
-
assets_defn_meta.
|
|
180
|
-
if assets_defn_meta.
|
|
207
|
+
legacy_freshness_policy=(
|
|
208
|
+
assets_defn_meta.legacy_freshness_policies_by_output_name.get(k)
|
|
209
|
+
if assets_defn_meta.legacy_freshness_policies_by_output_name
|
|
181
210
|
else None
|
|
182
211
|
),
|
|
183
212
|
dagster_type=Nothing,
|
|
@@ -222,18 +251,27 @@ def _build_airbyte_assets_from_metadata(
|
|
|
222
251
|
return _assets
|
|
223
252
|
|
|
224
253
|
|
|
254
|
+
@hidden_param(
|
|
255
|
+
param="legacy_freshness_policy",
|
|
256
|
+
breaking_version="1.13.0",
|
|
257
|
+
)
|
|
258
|
+
@hidden_param(
|
|
259
|
+
param="auto_materialize_policy",
|
|
260
|
+
breaking_version="1.10.0",
|
|
261
|
+
)
|
|
225
262
|
def build_airbyte_assets(
|
|
226
263
|
connection_id: str,
|
|
227
264
|
destination_tables: Sequence[str],
|
|
265
|
+
destination_database: Optional[str] = None,
|
|
266
|
+
destination_schema: Optional[str] = None,
|
|
228
267
|
asset_key_prefix: Optional[Sequence[str]] = None,
|
|
229
268
|
group_name: Optional[str] = None,
|
|
230
|
-
normalization_tables: Optional[Mapping[str,
|
|
269
|
+
normalization_tables: Optional[Mapping[str, set[str]]] = None,
|
|
231
270
|
deps: Optional[Iterable[Union[CoercibleToAssetKey, AssetsDefinition, SourceAsset]]] = None,
|
|
232
|
-
upstream_assets: Optional[
|
|
271
|
+
upstream_assets: Optional[set[AssetKey]] = None,
|
|
233
272
|
schema_by_table_name: Optional[Mapping[str, TableSchema]] = None,
|
|
234
|
-
freshness_policy: Optional[FreshnessPolicy] = None,
|
|
235
273
|
stream_to_asset_map: Optional[Mapping[str, str]] = None,
|
|
236
|
-
|
|
274
|
+
**kwargs,
|
|
237
275
|
) -> Sequence[AssetsDefinition]:
|
|
238
276
|
"""Builds a set of assets representing the tables created by an Airbyte sync operation.
|
|
239
277
|
|
|
@@ -243,6 +281,8 @@ def build_airbyte_assets(
|
|
|
243
281
|
destination_tables (List[str]): The names of the tables that you want to be represented
|
|
244
282
|
in the Dagster asset graph for this sync. This will generally map to the name of the
|
|
245
283
|
stream in Airbyte, unless a stream prefix has been specified in Airbyte.
|
|
284
|
+
destination_database (Optional[str]): The name of the destination database.
|
|
285
|
+
destination_schema (Optional[str]): The name of the destination schema.
|
|
246
286
|
normalization_tables (Optional[Mapping[str, List[str]]]): If you are using Airbyte's
|
|
247
287
|
normalization feature, you may specify a mapping of destination table to a list of
|
|
248
288
|
derived tables that will be created by the normalization process.
|
|
@@ -251,11 +291,13 @@ def build_airbyte_assets(
|
|
|
251
291
|
deps (Optional[Sequence[Union[AssetsDefinition, SourceAsset, str, AssetKey]]]):
|
|
252
292
|
A list of assets to add as sources.
|
|
253
293
|
upstream_assets (Optional[Set[AssetKey]]): Deprecated, use deps instead. A list of assets to add as sources.
|
|
254
|
-
freshness_policy (Optional[FreshnessPolicy]): A freshness policy to apply to the assets
|
|
255
294
|
stream_to_asset_map (Optional[Mapping[str, str]]): A mapping of an Airbyte stream name to a Dagster asset.
|
|
256
295
|
This allows the use of the "prefix" setting in Airbyte with special characters that aren't valid asset names.
|
|
257
|
-
auto_materialize_policy (Optional[AutoMaterializePolicy]): An auto materialization policy to apply to the assets.
|
|
258
296
|
"""
|
|
297
|
+
only_allow_hidden_params_in_kwargs(build_airbyte_assets, kwargs)
|
|
298
|
+
legacy_freshness_policy = kwargs.get("legacy_freshness_policy")
|
|
299
|
+
auto_materialize_policy = kwargs.get("auto_materialize_policy")
|
|
300
|
+
|
|
259
301
|
if upstream_assets is not None and deps is not None:
|
|
260
302
|
raise DagsterInvalidDefinitionError(
|
|
261
303
|
"Cannot specify both deps and upstream_assets to build_airbyte_assets. Use only deps"
|
|
@@ -269,15 +311,36 @@ def build_airbyte_assets(
|
|
|
269
311
|
tables = chain.from_iterable(
|
|
270
312
|
chain([destination_tables], normalization_tables.values() if normalization_tables else [])
|
|
271
313
|
)
|
|
314
|
+
|
|
315
|
+
table_names: dict[str, str] = {}
|
|
316
|
+
for table in destination_tables:
|
|
317
|
+
if destination_database and destination_schema and table:
|
|
318
|
+
table_names[table] = ".".join([destination_database, destination_schema, table])
|
|
319
|
+
if normalization_tables:
|
|
320
|
+
for normalization_table in normalization_tables.get(table, set()):
|
|
321
|
+
table_names[normalization_table] = ".".join(
|
|
322
|
+
[
|
|
323
|
+
destination_database,
|
|
324
|
+
destination_schema,
|
|
325
|
+
table,
|
|
326
|
+
normalization_table,
|
|
327
|
+
]
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
schema_by_table_name = schema_by_table_name if schema_by_table_name else {}
|
|
331
|
+
|
|
272
332
|
outputs = {
|
|
273
333
|
table: AssetOut(
|
|
274
334
|
key=AssetKey([*asset_key_prefix, table]),
|
|
275
335
|
metadata=(
|
|
276
|
-
{
|
|
277
|
-
|
|
278
|
-
|
|
336
|
+
{
|
|
337
|
+
**TableMetadataSet(
|
|
338
|
+
column_schema=schema_by_table_name.get(table),
|
|
339
|
+
table_name=table_names.get(table),
|
|
340
|
+
),
|
|
341
|
+
}
|
|
279
342
|
),
|
|
280
|
-
|
|
343
|
+
legacy_freshness_policy=legacy_freshness_policy,
|
|
281
344
|
auto_materialize_policy=auto_materialize_policy,
|
|
282
345
|
)
|
|
283
346
|
for table in tables
|
|
@@ -301,7 +364,7 @@ def build_airbyte_assets(
|
|
|
301
364
|
internal_deps[table] = set(upstream_deps) if upstream_deps else set()
|
|
302
365
|
|
|
303
366
|
@multi_asset(
|
|
304
|
-
name=f"airbyte_sync_{connection_id
|
|
367
|
+
name=f"airbyte_sync_{connection_id.replace('-', '_')}",
|
|
305
368
|
deps=upstream_deps,
|
|
306
369
|
outs=outputs,
|
|
307
370
|
internal_asset_deps=internal_deps,
|
|
@@ -377,7 +440,7 @@ def _get_normalization_tables_for_schema(
|
|
|
377
440
|
For more information on Airbyte's normalization process, see:
|
|
378
441
|
https://docs.airbyte.com/understanding-airbyte/basic-normalization/#nesting
|
|
379
442
|
"""
|
|
380
|
-
out:
|
|
443
|
+
out: dict[str, AirbyteTableMetadata] = {}
|
|
381
444
|
# Object types are broken into a new table, as long as they have children
|
|
382
445
|
|
|
383
446
|
sub_schemas = _get_sub_schemas(schema)
|
|
@@ -389,7 +452,7 @@ def _get_normalization_tables_for_schema(
|
|
|
389
452
|
|
|
390
453
|
if "object" in schema_types and len(sub_schema.get("properties", {})) > 0:
|
|
391
454
|
out[prefix + key] = AirbyteTableMetadata(
|
|
392
|
-
schema=generate_table_schema(sub_schema.get("properties", {}))
|
|
455
|
+
raw_table_name=key, schema=generate_table_schema(sub_schema.get("properties", {}))
|
|
393
456
|
)
|
|
394
457
|
for k, v in sub_schema["properties"].items():
|
|
395
458
|
out = merge_dicts(
|
|
@@ -398,7 +461,8 @@ def _get_normalization_tables_for_schema(
|
|
|
398
461
|
# Array types are also broken into a new table
|
|
399
462
|
elif "array" in schema_types:
|
|
400
463
|
out[prefix + key] = AirbyteTableMetadata(
|
|
401
|
-
|
|
464
|
+
raw_table_name=key,
|
|
465
|
+
schema=generate_table_schema(sub_schema.get("items", {}).get("properties", {})),
|
|
402
466
|
)
|
|
403
467
|
if sub_schema.get("items", {}).get("properties"):
|
|
404
468
|
for k, v in sub_schema["items"]["properties"].items():
|
|
@@ -409,11 +473,6 @@ def _get_normalization_tables_for_schema(
|
|
|
409
473
|
return out
|
|
410
474
|
|
|
411
475
|
|
|
412
|
-
def _clean_name(name: str) -> str:
|
|
413
|
-
"""Cleans an input to be a valid Dagster asset name."""
|
|
414
|
-
return re.sub(r"[^a-z0-9]+", "_", name.lower())
|
|
415
|
-
|
|
416
|
-
|
|
417
476
|
class AirbyteConnectionMetadata(
|
|
418
477
|
NamedTuple(
|
|
419
478
|
"_AirbyteConnectionMetadata",
|
|
@@ -421,13 +480,14 @@ class AirbyteConnectionMetadata(
|
|
|
421
480
|
("name", str),
|
|
422
481
|
("stream_prefix", str),
|
|
423
482
|
("has_basic_normalization", bool),
|
|
424
|
-
("stream_data",
|
|
483
|
+
("stream_data", list[Mapping[str, Any]]),
|
|
484
|
+
("destination", Mapping[str, Any]),
|
|
425
485
|
],
|
|
426
486
|
)
|
|
427
487
|
):
|
|
428
488
|
"""Contains information about an Airbyte connection.
|
|
429
489
|
|
|
430
|
-
|
|
490
|
+
Args:
|
|
431
491
|
name (str): The name of the connection.
|
|
432
492
|
stream_prefix (str): A prefix to add to all stream names.
|
|
433
493
|
has_basic_normalization (bool): Whether or not the connection has basic normalization enabled.
|
|
@@ -436,7 +496,10 @@ class AirbyteConnectionMetadata(
|
|
|
436
496
|
|
|
437
497
|
@classmethod
|
|
438
498
|
def from_api_json(
|
|
439
|
-
cls,
|
|
499
|
+
cls,
|
|
500
|
+
contents: Mapping[str, Any],
|
|
501
|
+
operations: Mapping[str, Any],
|
|
502
|
+
destination: Mapping[str, Any],
|
|
440
503
|
) -> "AirbyteConnectionMetadata":
|
|
441
504
|
return cls(
|
|
442
505
|
name=contents["name"],
|
|
@@ -446,11 +509,14 @@ class AirbyteConnectionMetadata(
|
|
|
446
509
|
for op in operations.get("operations", [])
|
|
447
510
|
),
|
|
448
511
|
stream_data=contents.get("syncCatalog", {}).get("streams", []),
|
|
512
|
+
destination=destination,
|
|
449
513
|
)
|
|
450
514
|
|
|
451
515
|
@classmethod
|
|
452
|
-
def from_config(
|
|
453
|
-
|
|
516
|
+
def from_config(
|
|
517
|
+
cls, contents: Mapping[str, Any], destination: Mapping[str, Any]
|
|
518
|
+
) -> "AirbyteConnectionMetadata":
|
|
519
|
+
config_contents = cast("Mapping[str, Any]", contents.get("configuration"))
|
|
454
520
|
check.invariant(
|
|
455
521
|
config_contents is not None, "Airbyte connection config is missing 'configuration' key"
|
|
456
522
|
)
|
|
@@ -463,6 +529,7 @@ class AirbyteConnectionMetadata(
|
|
|
463
529
|
for op in config_contents.get("operations", [])
|
|
464
530
|
),
|
|
465
531
|
stream_data=config_contents.get("sync_catalog", {}).get("streams", []),
|
|
532
|
+
destination=destination,
|
|
466
533
|
)
|
|
467
534
|
|
|
468
535
|
def parse_stream_tables(
|
|
@@ -472,14 +539,14 @@ class AirbyteConnectionMetadata(
|
|
|
472
539
|
tables associated with each enabled stream and values representing any affiliated
|
|
473
540
|
tables created by Airbyte's normalization process, if enabled.
|
|
474
541
|
"""
|
|
475
|
-
tables:
|
|
542
|
+
tables: dict[str, AirbyteTableMetadata] = {}
|
|
476
543
|
|
|
477
544
|
enabled_streams = [
|
|
478
545
|
stream for stream in self.stream_data if stream.get("config", {}).get("selected", False)
|
|
479
546
|
]
|
|
480
547
|
|
|
481
548
|
for stream in enabled_streams:
|
|
482
|
-
name = cast(str, stream.get("stream", {}).get("name"))
|
|
549
|
+
name = cast("str", stream.get("stream", {}).get("name"))
|
|
483
550
|
prefixed_name = f"{self.stream_prefix}{name}"
|
|
484
551
|
|
|
485
552
|
schema = (
|
|
@@ -487,7 +554,7 @@ class AirbyteConnectionMetadata(
|
|
|
487
554
|
if "json_schema" in stream["stream"]
|
|
488
555
|
else stream["stream"]["jsonSchema"]
|
|
489
556
|
)
|
|
490
|
-
normalization_tables:
|
|
557
|
+
normalization_tables: dict[str, AirbyteTableMetadata] = {}
|
|
491
558
|
schema_props = schema.get("properties", schema.get("items", {}).get("properties", {}))
|
|
492
559
|
if self.has_basic_normalization and return_normalization_tables:
|
|
493
560
|
for k, v in schema_props.items():
|
|
@@ -497,6 +564,7 @@ class AirbyteConnectionMetadata(
|
|
|
497
564
|
prefixed_norm_table_name = f"{self.stream_prefix}{normalization_table_name}"
|
|
498
565
|
normalization_tables[prefixed_norm_table_name] = meta
|
|
499
566
|
tables[prefixed_name] = AirbyteTableMetadata(
|
|
567
|
+
raw_table_name=name,
|
|
500
568
|
schema=generate_table_schema(schema_props),
|
|
501
569
|
normalization_tables=normalization_tables,
|
|
502
570
|
)
|
|
@@ -514,7 +582,7 @@ def _get_schema_by_table_name(
|
|
|
514
582
|
[
|
|
515
583
|
(k, v.schema)
|
|
516
584
|
for k, v in cast(
|
|
517
|
-
|
|
585
|
+
"dict[str, AirbyteTableMetadata]", meta.normalization_tables
|
|
518
586
|
).items()
|
|
519
587
|
]
|
|
520
588
|
for meta in stream_table_metadata.values()
|
|
@@ -535,7 +603,7 @@ class AirbyteCoreCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
535
603
|
connection_filter: Optional[Callable[[AirbyteConnectionMetadata], bool]],
|
|
536
604
|
connection_to_asset_key_fn: Optional[Callable[[AirbyteConnectionMetadata, str], AssetKey]],
|
|
537
605
|
connection_to_freshness_policy_fn: Optional[
|
|
538
|
-
Callable[[AirbyteConnectionMetadata], Optional[
|
|
606
|
+
Callable[[AirbyteConnectionMetadata], Optional[LegacyFreshnessPolicy]]
|
|
539
607
|
],
|
|
540
608
|
connection_to_auto_materialize_policy_fn: Optional[
|
|
541
609
|
Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]
|
|
@@ -565,25 +633,46 @@ class AirbyteCoreCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
565
633
|
super().__init__(unique_id=f"airbyte-{contents.hexdigest()}")
|
|
566
634
|
|
|
567
635
|
@abstractmethod
|
|
568
|
-
def _get_connections(self) -> Sequence[
|
|
636
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
569
637
|
pass
|
|
570
638
|
|
|
571
639
|
def compute_cacheable_data(self) -> Sequence[AssetsDefinitionCacheableData]:
|
|
572
|
-
asset_defn_data:
|
|
640
|
+
asset_defn_data: list[AssetsDefinitionCacheableData] = []
|
|
573
641
|
for connection_id, connection in self._get_connections():
|
|
574
642
|
stream_table_metadata = connection.parse_stream_tables(
|
|
575
643
|
self._create_assets_for_normalization_tables
|
|
576
644
|
)
|
|
577
645
|
schema_by_table_name = _get_schema_by_table_name(stream_table_metadata)
|
|
578
646
|
|
|
647
|
+
destination_database = connection.destination.get("configuration", {}).get("database")
|
|
648
|
+
destination_schema = connection.destination.get("configuration", {}).get("schema")
|
|
649
|
+
|
|
579
650
|
table_to_asset_key = partial(self._connection_to_asset_key_fn, connection)
|
|
651
|
+
|
|
652
|
+
destination_tables = list(stream_table_metadata.keys())
|
|
653
|
+
destination_raw_table_names_by_table = {
|
|
654
|
+
table: metadata.raw_table_name for table, metadata in stream_table_metadata.items()
|
|
655
|
+
}
|
|
656
|
+
normalization_tables = {
|
|
657
|
+
table: set(metadata.normalization_tables.keys())
|
|
658
|
+
for table, metadata in stream_table_metadata.items()
|
|
659
|
+
}
|
|
660
|
+
normalization_raw_table_names_by_table = {
|
|
661
|
+
normalization_table: metadata.normalization_tables[
|
|
662
|
+
normalization_table
|
|
663
|
+
].raw_table_name
|
|
664
|
+
for table, metadata in stream_table_metadata.items()
|
|
665
|
+
for normalization_table in normalization_tables[table]
|
|
666
|
+
}
|
|
667
|
+
|
|
580
668
|
asset_data_for_conn = _build_airbyte_asset_defn_metadata(
|
|
581
669
|
connection_id=connection_id,
|
|
582
|
-
destination_tables=
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
670
|
+
destination_tables=destination_tables,
|
|
671
|
+
destination_raw_table_names_by_table=destination_raw_table_names_by_table,
|
|
672
|
+
destination_database=destination_database,
|
|
673
|
+
destination_schema=destination_schema,
|
|
674
|
+
normalization_tables=normalization_tables,
|
|
675
|
+
normalization_raw_table_names_by_table=normalization_raw_table_names_by_table,
|
|
587
676
|
asset_key_prefix=self._key_prefix,
|
|
588
677
|
group_name=(
|
|
589
678
|
self._connection_meta_to_group_fn(connection)
|
|
@@ -597,7 +686,7 @@ class AirbyteCoreCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
597
686
|
),
|
|
598
687
|
schema_by_table_name=schema_by_table_name,
|
|
599
688
|
table_to_asset_key_fn=table_to_asset_key,
|
|
600
|
-
|
|
689
|
+
legacy_freshness_policy=self._connection_to_freshness_policy_fn(connection),
|
|
601
690
|
auto_materialize_policy=self._connection_to_auto_materialize_policy_fn(connection),
|
|
602
691
|
)
|
|
603
692
|
|
|
@@ -630,7 +719,7 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
630
719
|
connection_filter: Optional[Callable[[AirbyteConnectionMetadata], bool]],
|
|
631
720
|
connection_to_asset_key_fn: Optional[Callable[[AirbyteConnectionMetadata, str], AssetKey]],
|
|
632
721
|
connection_to_freshness_policy_fn: Optional[
|
|
633
|
-
Callable[[AirbyteConnectionMetadata], Optional[
|
|
722
|
+
Callable[[AirbyteConnectionMetadata], Optional[LegacyFreshnessPolicy]]
|
|
634
723
|
],
|
|
635
724
|
connection_to_auto_materialize_policy_fn: Optional[
|
|
636
725
|
Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]
|
|
@@ -662,11 +751,11 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
662
751
|
)
|
|
663
752
|
self._airbyte_instance: AirbyteResource = self._partially_initialized_airbyte_instance
|
|
664
753
|
|
|
665
|
-
def _get_connections(self) -> Sequence[
|
|
754
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
666
755
|
workspace_id = self._workspace_id
|
|
667
756
|
if not workspace_id:
|
|
668
757
|
workspaces = cast(
|
|
669
|
-
|
|
758
|
+
"list[dict[str, Any]]",
|
|
670
759
|
check.not_none(
|
|
671
760
|
self._airbyte_instance.make_request(endpoint="/workspaces/list", data={})
|
|
672
761
|
).get("workspaces", []),
|
|
@@ -678,7 +767,7 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
678
767
|
workspace_id = workspaces[0].get("workspaceId")
|
|
679
768
|
|
|
680
769
|
connections = cast(
|
|
681
|
-
|
|
770
|
+
"list[dict[str, Any]]",
|
|
682
771
|
check.not_none(
|
|
683
772
|
self._airbyte_instance.make_request(
|
|
684
773
|
endpoint="/connections/list", data={"workspaceId": workspace_id}
|
|
@@ -686,12 +775,12 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
686
775
|
).get("connections", []),
|
|
687
776
|
)
|
|
688
777
|
|
|
689
|
-
output_connections:
|
|
778
|
+
output_connections: list[tuple[str, AirbyteConnectionMetadata]] = []
|
|
690
779
|
for connection_json in connections:
|
|
691
|
-
connection_id = cast(str, connection_json.get("connectionId"))
|
|
780
|
+
connection_id = cast("str", connection_json.get("connectionId"))
|
|
692
781
|
|
|
693
782
|
operations_json = cast(
|
|
694
|
-
|
|
783
|
+
"dict[str, Any]",
|
|
695
784
|
check.not_none(
|
|
696
785
|
self._airbyte_instance.make_request(
|
|
697
786
|
endpoint="/operations/list",
|
|
@@ -699,7 +788,21 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
699
788
|
)
|
|
700
789
|
),
|
|
701
790
|
)
|
|
702
|
-
|
|
791
|
+
|
|
792
|
+
destination_id = cast("str", connection_json.get("destinationId"))
|
|
793
|
+
destination_json = cast(
|
|
794
|
+
"dict[str, Any]",
|
|
795
|
+
check.not_none(
|
|
796
|
+
self._airbyte_instance.make_request(
|
|
797
|
+
endpoint="/destinations/get",
|
|
798
|
+
data={"destinationId": destination_id},
|
|
799
|
+
)
|
|
800
|
+
),
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
connection = AirbyteConnectionMetadata.from_api_json(
|
|
804
|
+
connection_json, operations_json, destination_json
|
|
805
|
+
)
|
|
703
806
|
|
|
704
807
|
# Filter out connections that don't match the filter function
|
|
705
808
|
if self._connection_filter and not self._connection_filter(connection):
|
|
@@ -730,7 +833,7 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
|
|
|
730
833
|
connection_directories: Optional[Sequence[str]],
|
|
731
834
|
connection_to_asset_key_fn: Optional[Callable[[AirbyteConnectionMetadata, str], AssetKey]],
|
|
732
835
|
connection_to_freshness_policy_fn: Optional[
|
|
733
|
-
Callable[[AirbyteConnectionMetadata], Optional[
|
|
836
|
+
Callable[[AirbyteConnectionMetadata], Optional[LegacyFreshnessPolicy]]
|
|
734
837
|
],
|
|
735
838
|
connection_to_auto_materialize_policy_fn: Optional[
|
|
736
839
|
Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]
|
|
@@ -750,16 +853,26 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
|
|
|
750
853
|
self._project_dir = project_dir
|
|
751
854
|
self._connection_directories = connection_directories
|
|
752
855
|
|
|
753
|
-
def _get_connections(self) -> Sequence[
|
|
856
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
754
857
|
connections_dir = os.path.join(self._project_dir, "connections")
|
|
755
858
|
|
|
756
|
-
output_connections:
|
|
859
|
+
output_connections: list[tuple[str, AirbyteConnectionMetadata]] = []
|
|
757
860
|
|
|
758
861
|
connection_directories = self._connection_directories or os.listdir(connections_dir)
|
|
759
862
|
for connection_name in connection_directories:
|
|
760
863
|
connection_dir = os.path.join(connections_dir, connection_name)
|
|
761
864
|
with open(os.path.join(connection_dir, "configuration.yaml"), encoding="utf-8") as f:
|
|
762
|
-
|
|
865
|
+
connection_data = yaml.safe_load(f.read())
|
|
866
|
+
|
|
867
|
+
destination_configuration_path = cast(
|
|
868
|
+
"str", connection_data.get("destination_configuration_path")
|
|
869
|
+
)
|
|
870
|
+
with open(
|
|
871
|
+
os.path.join(self._project_dir, destination_configuration_path), encoding="utf-8"
|
|
872
|
+
) as f:
|
|
873
|
+
destination_data = yaml.safe_load(f.read())
|
|
874
|
+
|
|
875
|
+
connection = AirbyteConnectionMetadata.from_config(connection_data, destination_data)
|
|
763
876
|
|
|
764
877
|
# Filter out connections that don't match the filter function
|
|
765
878
|
if self._connection_filter and not self._connection_filter(connection):
|
|
@@ -788,7 +901,7 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
|
|
|
788
901
|
)
|
|
789
902
|
state_file = state_files[0]
|
|
790
903
|
|
|
791
|
-
with open(os.path.join(connection_dir, cast(str, state_file)), encoding="utf-8") as f:
|
|
904
|
+
with open(os.path.join(connection_dir, cast("str", state_file)), encoding="utf-8") as f:
|
|
792
905
|
state = yaml.safe_load(f.read())
|
|
793
906
|
connection_id = state.get("resource_id")
|
|
794
907
|
|
|
@@ -796,12 +909,17 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
|
|
|
796
909
|
return output_connections
|
|
797
910
|
|
|
798
911
|
|
|
912
|
+
@superseded(
|
|
913
|
+
additional_warn_text=(
|
|
914
|
+
"If you are using Airbyte 1.6.0 or higher, please see the migration guide: https://docs.dagster.io/integrations/libraries/airbyte/migration-guide"
|
|
915
|
+
)
|
|
916
|
+
)
|
|
799
917
|
def load_assets_from_airbyte_instance(
|
|
800
918
|
airbyte: Union[AirbyteResource, ResourceDefinition],
|
|
801
919
|
workspace_id: Optional[str] = None,
|
|
802
920
|
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
803
921
|
create_assets_for_normalization_tables: bool = True,
|
|
804
|
-
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] =
|
|
922
|
+
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
|
|
805
923
|
connection_meta_to_group_fn: Optional[
|
|
806
924
|
Callable[[AirbyteConnectionMetadata], Optional[str]]
|
|
807
925
|
] = None,
|
|
@@ -812,7 +930,7 @@ def load_assets_from_airbyte_instance(
|
|
|
812
930
|
Callable[[AirbyteConnectionMetadata, str], AssetKey]
|
|
813
931
|
] = None,
|
|
814
932
|
connection_to_freshness_policy_fn: Optional[
|
|
815
|
-
Callable[[AirbyteConnectionMetadata], Optional[
|
|
933
|
+
Callable[[AirbyteConnectionMetadata], Optional[LegacyFreshnessPolicy]]
|
|
816
934
|
] = None,
|
|
817
935
|
connection_to_auto_materialize_policy_fn: Optional[
|
|
818
936
|
Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]
|
|
@@ -906,7 +1024,7 @@ def load_assets_from_airbyte_instance(
|
|
|
906
1024
|
check.invariant(
|
|
907
1025
|
not connection_meta_to_group_fn
|
|
908
1026
|
or not connection_to_group_fn
|
|
909
|
-
or connection_to_group_fn ==
|
|
1027
|
+
or connection_to_group_fn == clean_name,
|
|
910
1028
|
"Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
|
|
911
1029
|
)
|
|
912
1030
|
|
|
@@ -927,127 +1045,141 @@ def load_assets_from_airbyte_instance(
|
|
|
927
1045
|
)
|
|
928
1046
|
|
|
929
1047
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
)
|
|
934
|
-
def load_assets_from_airbyte_project(
|
|
935
|
-
project_dir: str,
|
|
936
|
-
workspace_id: Optional[str] = None,
|
|
937
|
-
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
938
|
-
create_assets_for_normalization_tables: bool = True,
|
|
939
|
-
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = _clean_name,
|
|
940
|
-
connection_meta_to_group_fn: Optional[
|
|
941
|
-
Callable[[AirbyteConnectionMetadata], Optional[str]]
|
|
942
|
-
] = None,
|
|
943
|
-
io_manager_key: Optional[str] = None,
|
|
944
|
-
connection_to_io_manager_key_fn: Optional[Callable[[str], Optional[str]]] = None,
|
|
945
|
-
connection_filter: Optional[Callable[[AirbyteConnectionMetadata], bool]] = None,
|
|
946
|
-
connection_directories: Optional[Sequence[str]] = None,
|
|
947
|
-
connection_to_asset_key_fn: Optional[
|
|
948
|
-
Callable[[AirbyteConnectionMetadata, str], AssetKey]
|
|
949
|
-
] = None,
|
|
950
|
-
connection_to_freshness_policy_fn: Optional[
|
|
951
|
-
Callable[[AirbyteConnectionMetadata], Optional[FreshnessPolicy]]
|
|
952
|
-
] = None,
|
|
953
|
-
connection_to_auto_materialize_policy_fn: Optional[
|
|
954
|
-
Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]
|
|
955
|
-
] = None,
|
|
956
|
-
) -> CacheableAssetsDefinition:
|
|
957
|
-
"""Loads an Airbyte project into a set of Dagster assets.
|
|
1048
|
+
# -----------------------
|
|
1049
|
+
# Reworked assets factory
|
|
1050
|
+
# -----------------------
|
|
958
1051
|
|
|
959
|
-
|
|
960
|
-
|
|
1052
|
+
|
|
1053
|
+
@beta
|
|
1054
|
+
def build_airbyte_assets_definitions(
|
|
1055
|
+
*,
|
|
1056
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
1057
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1058
|
+
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]] = None,
|
|
1059
|
+
) -> Sequence[AssetsDefinition]:
|
|
1060
|
+
"""The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
961
1061
|
|
|
962
1062
|
Args:
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
created by Airbyte's normalization feature. If False, only the destination tables
|
|
970
|
-
will be created. Defaults to True.
|
|
971
|
-
connection_to_group_fn (Optional[Callable[[str], Optional[str]]]): Function which returns an asset
|
|
972
|
-
group name for a given Airbyte connection name. If None, no groups will be created. Defaults
|
|
973
|
-
to a basic sanitization function.
|
|
974
|
-
connection_meta_to_group_fn (Optional[Callable[[AirbyteConnectionMetadata], Optional[str]]]): Function
|
|
975
|
-
which returns an asset group name for a given Airbyte connection metadata. If None and connection_to_group_fn
|
|
976
|
-
is None, no groups will be created. Defaults to None.
|
|
977
|
-
io_manager_key (Optional[str]): The I/O manager key to use for all assets. Defaults to "io_manager".
|
|
978
|
-
Use this if all assets should be loaded from the same source, otherwise use connection_to_io_manager_key_fn.
|
|
979
|
-
connection_to_io_manager_key_fn (Optional[Callable[[str], Optional[str]]]): Function which returns an
|
|
980
|
-
I/O manager key for a given Airbyte connection name. When other ops are downstream of the loaded assets,
|
|
981
|
-
the IOManager specified determines how the inputs to those ops are loaded. Defaults to "io_manager".
|
|
982
|
-
connection_filter (Optional[Callable[[AirbyteConnectionMetadata], bool]]): Optional function which
|
|
983
|
-
takes in connection metadata and returns False if the connection should be excluded from the output assets.
|
|
984
|
-
connection_directories (Optional[List[str]]): Optional list of connection directories to load assets from.
|
|
985
|
-
If omitted, all connections in the Airbyte project are loaded. May be faster than connection_filter
|
|
986
|
-
if the project has many connections or if the connection yaml files are large.
|
|
987
|
-
connection_to_asset_key_fn (Optional[Callable[[AirbyteConnectionMetadata, str], AssetKey]]): Optional function which
|
|
988
|
-
takes in connection metadata and table name and returns an asset key for the table. If None, the default asset
|
|
989
|
-
key is based on the table name. Any asset key prefix will be applied to the output of this function.
|
|
990
|
-
connection_to_freshness_policy_fn (Optional[Callable[[AirbyteConnectionMetadata], Optional[FreshnessPolicy]]]):
|
|
991
|
-
Optional function which takes in connection metadata and returns a freshness policy for the connection's assets.
|
|
992
|
-
If None, no freshness policies will be applied to the assets.
|
|
993
|
-
connection_to_auto_materialize_policy_fn (Optional[Callable[[AirbyteConnectionMetadata], Optional[AutoMaterializePolicy]]]):
|
|
994
|
-
Optional function which takes in connection metadata and returns an auto materialization policy for the connection's assets.
|
|
995
|
-
If None, no auto materialization policies will be applied to the assets.
|
|
1063
|
+
workspace (Union[AirbyteWorkspace, AirbyteCloudWorkspace]): The Airbyte workspace to fetch assets from.
|
|
1064
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1065
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1066
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1067
|
+
connection_selector_fn (Optional[Callable[[AirbyteConnection], bool]]): A function that allows for filtering
|
|
1068
|
+
which Airbyte connection assets are created for.
|
|
996
1069
|
|
|
997
|
-
|
|
1070
|
+
Returns:
|
|
1071
|
+
List[AssetsDefinition]: The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
998
1072
|
|
|
999
|
-
|
|
1073
|
+
Examples:
|
|
1074
|
+
Sync the tables of a Airbyte connection:
|
|
1000
1075
|
|
|
1001
|
-
|
|
1076
|
+
.. code-block:: python
|
|
1002
1077
|
|
|
1003
|
-
|
|
1078
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1004
1079
|
|
|
1005
|
-
|
|
1006
|
-
project_dir="path/to/airbyte/project",
|
|
1007
|
-
)
|
|
1080
|
+
import dagster as dg
|
|
1008
1081
|
|
|
1009
|
-
|
|
1082
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1083
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1084
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1085
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1086
|
+
)
|
|
1010
1087
|
|
|
1011
|
-
|
|
1088
|
+
airbyte_assets = build_airbyte_assets_definitions(workspace=workspace)
|
|
1012
1089
|
|
|
1013
|
-
|
|
1090
|
+
defs = dg.Definitions(
|
|
1091
|
+
assets=airbyte_assets,
|
|
1092
|
+
resources={"airbyte": airbyte_workspace},
|
|
1093
|
+
)
|
|
1014
1094
|
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1095
|
+
Sync the tables of a Airbyte connection with a custom translator:
|
|
1096
|
+
|
|
1097
|
+
.. code-block:: python
|
|
1098
|
+
|
|
1099
|
+
from dagster_airbyte import (
|
|
1100
|
+
DagsterAirbyteTranslator,
|
|
1101
|
+
AirbyteConnectionTableProps,
|
|
1102
|
+
AirbyteCloudWorkspace,
|
|
1103
|
+
build_airbyte_assets_definitions
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
import dagster as dg
|
|
1107
|
+
|
|
1108
|
+
class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
1109
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
|
|
1110
|
+
default_spec = super().get_asset_spec(props)
|
|
1111
|
+
return default_spec.merge_attributes(
|
|
1112
|
+
metadata={"custom": "metadata"},
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1116
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1117
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1118
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1119
|
+
)
|
|
1120
|
+
|
|
1121
|
+
airbyte_assets = build_airbyte_assets_definitions(
|
|
1122
|
+
workspace=workspace,
|
|
1123
|
+
dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
defs = dg.Definitions(
|
|
1127
|
+
assets=airbyte_assets,
|
|
1128
|
+
resources={"airbyte": airbyte_workspace},
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
Filter connections by name:
|
|
1132
|
+
|
|
1133
|
+
.. code-block:: python
|
|
1134
|
+
|
|
1135
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1136
|
+
|
|
1137
|
+
import dagster as dg
|
|
1138
|
+
|
|
1139
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1140
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1141
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1142
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1143
|
+
)
|
|
1144
|
+
|
|
1145
|
+
airbyte_assets = build_airbyte_assets_definitions(
|
|
1146
|
+
workspace=workspace,
|
|
1147
|
+
connection_selector_fn=lambda connection: connection.name in ["connection1", "connection2"]
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
defs = dg.Definitions(
|
|
1151
|
+
assets=airbyte_assets,
|
|
1152
|
+
resources={"airbyte": airbyte_workspace},
|
|
1153
|
+
)
|
|
1019
1154
|
"""
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
key_prefix = check.list_param(key_prefix or [], "key_prefix", of_type=str)
|
|
1155
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1156
|
+
connection_selector_fn = connection_selector_fn or (lambda connection: True)
|
|
1023
1157
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1158
|
+
all_asset_specs = workspace.load_asset_specs(
|
|
1159
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1160
|
+
connection_selector_fn=connection_selector_fn,
|
|
1027
1161
|
)
|
|
1028
|
-
if not connection_to_io_manager_key_fn:
|
|
1029
|
-
connection_to_io_manager_key_fn = lambda _: io_manager_key
|
|
1030
1162
|
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1163
|
+
connections = {
|
|
1164
|
+
(
|
|
1165
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id),
|
|
1166
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_name),
|
|
1167
|
+
)
|
|
1168
|
+
for spec in all_asset_specs
|
|
1169
|
+
}
|
|
1037
1170
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1171
|
+
_asset_fns = []
|
|
1172
|
+
for connection_id, connection_name in connections:
|
|
1040
1173
|
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
)
|
|
1174
|
+
@airbyte_assets(
|
|
1175
|
+
connection_id=connection_id,
|
|
1176
|
+
workspace=workspace,
|
|
1177
|
+
name=f"airbyte_{clean_name(connection_name)}",
|
|
1178
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1179
|
+
)
|
|
1180
|
+
def _asset_fn(context: AssetExecutionContext, airbyte: BaseAirbyteWorkspace):
|
|
1181
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
1182
|
+
|
|
1183
|
+
_asset_fns.append(_asset_fn)
|
|
1184
|
+
|
|
1185
|
+
return _asset_fns
|