dagster-airbyte 0.25.1__py3-none-any.whl → 0.25.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-airbyte might be problematic. Click here for more details.
- dagster_airbyte/__init__.py +10 -1
- dagster_airbyte/asset_decorator.py +113 -0
- dagster_airbyte/asset_defs.py +152 -47
- dagster_airbyte/managed/generated/sources.py +33 -33
- dagster_airbyte/managed/reconciliation.py +22 -34
- dagster_airbyte/managed/types.py +8 -7
- dagster_airbyte/ops.py +3 -2
- dagster_airbyte/resources.py +626 -36
- dagster_airbyte/translator.py +236 -0
- dagster_airbyte/types.py +2 -1
- dagster_airbyte/utils.py +38 -2
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.25.1.dist-info → dagster_airbyte-0.25.10.dist-info}/METADATA +4 -3
- dagster_airbyte-0.25.10.dist-info/RECORD +23 -0
- dagster_airbyte-0.25.1.dist-info/RECORD +0 -21
- {dagster_airbyte-0.25.1.dist-info → dagster_airbyte-0.25.10.dist-info}/LICENSE +0 -0
- {dagster_airbyte-0.25.1.dist-info → dagster_airbyte-0.25.10.dist-info}/WHEEL +0 -0
- {dagster_airbyte-0.25.1.dist-info → dagster_airbyte-0.25.10.dist-info}/entry_points.txt +0 -0
- {dagster_airbyte-0.25.1.dist-info → dagster_airbyte-0.25.10.dist-info}/top_level.txt +0 -0
dagster_airbyte/__init__.py
CHANGED
|
@@ -14,17 +14,26 @@ try:
|
|
|
14
14
|
except ImportError:
|
|
15
15
|
pass
|
|
16
16
|
|
|
17
|
+
from dagster_airbyte.asset_decorator import airbyte_assets as airbyte_assets
|
|
17
18
|
from dagster_airbyte.asset_defs import (
|
|
18
19
|
build_airbyte_assets as build_airbyte_assets,
|
|
20
|
+
build_airbyte_assets_definitions as build_airbyte_assets_definitions,
|
|
19
21
|
load_assets_from_airbyte_instance as load_assets_from_airbyte_instance,
|
|
20
22
|
)
|
|
21
23
|
from dagster_airbyte.ops import airbyte_sync_op as airbyte_sync_op
|
|
22
24
|
from dagster_airbyte.resources import (
|
|
23
25
|
AirbyteCloudResource as AirbyteCloudResource,
|
|
26
|
+
AirbyteCloudWorkspace as AirbyteCloudWorkspace,
|
|
24
27
|
AirbyteResource as AirbyteResource,
|
|
25
|
-
AirbyteState as AirbyteState,
|
|
26
28
|
airbyte_cloud_resource as airbyte_cloud_resource,
|
|
27
29
|
airbyte_resource as airbyte_resource,
|
|
30
|
+
load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
|
|
31
|
+
)
|
|
32
|
+
from dagster_airbyte.translator import (
|
|
33
|
+
AirbyteConnectionTableProps as AirbyteConnectionTableProps,
|
|
34
|
+
AirbyteJobStatusType as AirbyteJobStatusType,
|
|
35
|
+
AirbyteState as AirbyteState,
|
|
36
|
+
DagsterAirbyteTranslator as DagsterAirbyteTranslator,
|
|
28
37
|
)
|
|
29
38
|
from dagster_airbyte.types import AirbyteOutput as AirbyteOutput
|
|
30
39
|
from dagster_airbyte.version import __version__ as __version__
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Any, Callable, Optional
|
|
2
|
+
|
|
3
|
+
from dagster import AssetsDefinition, multi_asset
|
|
4
|
+
from dagster._annotations import experimental
|
|
5
|
+
|
|
6
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
7
|
+
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@experimental
|
|
11
|
+
def airbyte_assets(
|
|
12
|
+
*,
|
|
13
|
+
connection_id: str,
|
|
14
|
+
workspace: AirbyteCloudWorkspace,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
group_name: Optional[str] = None,
|
|
17
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
18
|
+
) -> Callable[[Callable[..., Any]], AssetsDefinition]:
|
|
19
|
+
"""Create a definition for how to sync the tables of a given Airbyte connection.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
connection_id (str): The Airbyte Connection ID.
|
|
23
|
+
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
24
|
+
name (Optional[str], optional): The name of the op.
|
|
25
|
+
group_name (Optional[str], optional): The name of the asset group.
|
|
26
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
27
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
28
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
Sync the tables of an Airbyte connection:
|
|
32
|
+
|
|
33
|
+
.. code-block:: python
|
|
34
|
+
|
|
35
|
+
from dagster_airbyte import AirbyteCloudWorkspace, airbyte_assets
|
|
36
|
+
|
|
37
|
+
import dagster as dg
|
|
38
|
+
|
|
39
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
40
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
41
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
42
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@airbyte_assets(
|
|
47
|
+
connection_id="airbyte_connection_id",
|
|
48
|
+
workspace=airbyte_workspace,
|
|
49
|
+
)
|
|
50
|
+
def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
51
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
defs = dg.Definitions(
|
|
55
|
+
assets=[airbyte_connection_assets],
|
|
56
|
+
resources={"airbyte": airbyte_workspace},
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
Sync the tables of an Airbyte connection with a custom translator:
|
|
60
|
+
|
|
61
|
+
.. code-block:: python
|
|
62
|
+
|
|
63
|
+
from dagster_airbyte import (
|
|
64
|
+
DagsterAirbyteTranslator,
|
|
65
|
+
AirbyteConnectionTableProps,
|
|
66
|
+
AirbyteCloudWorkspace,
|
|
67
|
+
airbyte_assets
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
import dagster as dg
|
|
71
|
+
|
|
72
|
+
class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
73
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
|
|
74
|
+
default_spec = super().get_asset_spec(props)
|
|
75
|
+
return default_spec.merge_attributes(
|
|
76
|
+
metadata={"custom": "metadata"},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
80
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
81
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
82
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@airbyte_assets(
|
|
87
|
+
connection_id="airbyte_connection_id",
|
|
88
|
+
workspace=airbyte_workspace,
|
|
89
|
+
dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
|
|
90
|
+
)
|
|
91
|
+
def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
92
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
defs = dg.Definitions(
|
|
96
|
+
assets=[airbyte_connection_assets],
|
|
97
|
+
resources={"airbyte": airbyte_workspace},
|
|
98
|
+
)
|
|
99
|
+
"""
|
|
100
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
101
|
+
|
|
102
|
+
return multi_asset(
|
|
103
|
+
name=name,
|
|
104
|
+
group_name=group_name,
|
|
105
|
+
can_subset=True,
|
|
106
|
+
specs=[
|
|
107
|
+
spec
|
|
108
|
+
for spec in workspace.load_asset_specs(
|
|
109
|
+
dagster_airbyte_translator=dagster_airbyte_translator
|
|
110
|
+
)
|
|
111
|
+
if AirbyteMetadataSet.extract(spec.metadata).connection_id == connection_id
|
|
112
|
+
],
|
|
113
|
+
)
|
dagster_airbyte/asset_defs.py
CHANGED
|
@@ -1,28 +1,15 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import inspect
|
|
3
3
|
import os
|
|
4
|
-
import re
|
|
5
4
|
from abc import abstractmethod
|
|
5
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
6
6
|
from functools import partial
|
|
7
7
|
from itertools import chain
|
|
8
|
-
from typing import
|
|
9
|
-
Any,
|
|
10
|
-
Callable,
|
|
11
|
-
Dict,
|
|
12
|
-
Iterable,
|
|
13
|
-
List,
|
|
14
|
-
Mapping,
|
|
15
|
-
NamedTuple,
|
|
16
|
-
Optional,
|
|
17
|
-
Sequence,
|
|
18
|
-
Set,
|
|
19
|
-
Tuple,
|
|
20
|
-
Union,
|
|
21
|
-
cast,
|
|
22
|
-
)
|
|
8
|
+
from typing import Any, Callable, NamedTuple, Optional, Union, cast
|
|
23
9
|
|
|
24
10
|
import yaml
|
|
25
11
|
from dagster import (
|
|
12
|
+
AssetExecutionContext,
|
|
26
13
|
AssetKey,
|
|
27
14
|
AssetOut,
|
|
28
15
|
AutoMaterializePolicy,
|
|
@@ -33,6 +20,7 @@ from dagster import (
|
|
|
33
20
|
SourceAsset,
|
|
34
21
|
_check as check,
|
|
35
22
|
)
|
|
23
|
+
from dagster._annotations import experimental
|
|
36
24
|
from dagster._core.definitions import AssetsDefinition, multi_asset
|
|
37
25
|
from dagster._core.definitions.cacheable_assets import (
|
|
38
26
|
AssetsDefinitionCacheableData,
|
|
@@ -45,9 +33,17 @@ from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvalidIn
|
|
|
45
33
|
from dagster._core.execution.context.init import build_init_resource_context
|
|
46
34
|
from dagster._utils.merger import merge_dicts
|
|
47
35
|
|
|
48
|
-
from dagster_airbyte.
|
|
36
|
+
from dagster_airbyte.asset_decorator import airbyte_assets
|
|
37
|
+
from dagster_airbyte.resources import (
|
|
38
|
+
AirbyteCloudResource,
|
|
39
|
+
AirbyteCloudWorkspace,
|
|
40
|
+
AirbyteResource,
|
|
41
|
+
BaseAirbyteResource,
|
|
42
|
+
)
|
|
43
|
+
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
49
44
|
from dagster_airbyte.types import AirbyteTableMetadata
|
|
50
45
|
from dagster_airbyte.utils import (
|
|
46
|
+
clean_name,
|
|
51
47
|
generate_materializations,
|
|
52
48
|
generate_table_schema,
|
|
53
49
|
is_basic_normalization_operation,
|
|
@@ -66,7 +62,7 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
66
62
|
destination_schema: Optional[str],
|
|
67
63
|
table_to_asset_key_fn: Callable[[str], AssetKey],
|
|
68
64
|
asset_key_prefix: Optional[Sequence[str]] = None,
|
|
69
|
-
normalization_tables: Optional[Mapping[str,
|
|
65
|
+
normalization_tables: Optional[Mapping[str, set[str]]] = None,
|
|
70
66
|
normalization_raw_table_names_by_table: Optional[Mapping[str, str]] = None,
|
|
71
67
|
upstream_assets: Optional[Iterable[AssetKey]] = None,
|
|
72
68
|
group_name: Optional[str] = None,
|
|
@@ -96,7 +92,7 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
96
92
|
for table in tables
|
|
97
93
|
}
|
|
98
94
|
|
|
99
|
-
internal_deps:
|
|
95
|
+
internal_deps: dict[str, set[AssetKey]] = {}
|
|
100
96
|
|
|
101
97
|
metadata_encodable_normalization_tables = (
|
|
102
98
|
{k: list(v) for k, v in normalization_tables.items()} if normalization_tables else {}
|
|
@@ -115,7 +111,7 @@ def _build_airbyte_asset_defn_metadata(
|
|
|
115
111
|
for table in destination_tables:
|
|
116
112
|
internal_deps[table] = set(upstream_assets or [])
|
|
117
113
|
|
|
118
|
-
table_names:
|
|
114
|
+
table_names: dict[str, str] = {}
|
|
119
115
|
for table in destination_tables:
|
|
120
116
|
if destination_database and destination_schema and table:
|
|
121
117
|
# Use the destination raw table name to create the table name
|
|
@@ -186,8 +182,8 @@ def _build_airbyte_assets_from_metadata(
|
|
|
186
182
|
metadata = cast(Mapping[str, Any], assets_defn_meta.extra_metadata)
|
|
187
183
|
connection_id = cast(str, metadata["connection_id"])
|
|
188
184
|
group_name = cast(Optional[str], metadata["group_name"])
|
|
189
|
-
destination_tables = cast(
|
|
190
|
-
normalization_tables = cast(Mapping[str,
|
|
185
|
+
destination_tables = cast(list[str], metadata["destination_tables"])
|
|
186
|
+
normalization_tables = cast(Mapping[str, list[str]], metadata["normalization_tables"])
|
|
191
187
|
io_manager_key = cast(Optional[str], metadata["io_manager_key"])
|
|
192
188
|
|
|
193
189
|
@multi_asset(
|
|
@@ -256,9 +252,9 @@ def build_airbyte_assets(
|
|
|
256
252
|
destination_schema: Optional[str] = None,
|
|
257
253
|
asset_key_prefix: Optional[Sequence[str]] = None,
|
|
258
254
|
group_name: Optional[str] = None,
|
|
259
|
-
normalization_tables: Optional[Mapping[str,
|
|
255
|
+
normalization_tables: Optional[Mapping[str, set[str]]] = None,
|
|
260
256
|
deps: Optional[Iterable[Union[CoercibleToAssetKey, AssetsDefinition, SourceAsset]]] = None,
|
|
261
|
-
upstream_assets: Optional[
|
|
257
|
+
upstream_assets: Optional[set[AssetKey]] = None,
|
|
262
258
|
schema_by_table_name: Optional[Mapping[str, TableSchema]] = None,
|
|
263
259
|
freshness_policy: Optional[FreshnessPolicy] = None,
|
|
264
260
|
stream_to_asset_map: Optional[Mapping[str, str]] = None,
|
|
@@ -301,7 +297,7 @@ def build_airbyte_assets(
|
|
|
301
297
|
chain([destination_tables], normalization_tables.values() if normalization_tables else [])
|
|
302
298
|
)
|
|
303
299
|
|
|
304
|
-
table_names:
|
|
300
|
+
table_names: dict[str, str] = {}
|
|
305
301
|
for table in destination_tables:
|
|
306
302
|
if destination_database and destination_schema and table:
|
|
307
303
|
table_names[table] = ".".join([destination_database, destination_schema, table])
|
|
@@ -429,7 +425,7 @@ def _get_normalization_tables_for_schema(
|
|
|
429
425
|
For more information on Airbyte's normalization process, see:
|
|
430
426
|
https://docs.airbyte.com/understanding-airbyte/basic-normalization/#nesting
|
|
431
427
|
"""
|
|
432
|
-
out:
|
|
428
|
+
out: dict[str, AirbyteTableMetadata] = {}
|
|
433
429
|
# Object types are broken into a new table, as long as they have children
|
|
434
430
|
|
|
435
431
|
sub_schemas = _get_sub_schemas(schema)
|
|
@@ -462,11 +458,6 @@ def _get_normalization_tables_for_schema(
|
|
|
462
458
|
return out
|
|
463
459
|
|
|
464
460
|
|
|
465
|
-
def _clean_name(name: str) -> str:
|
|
466
|
-
"""Cleans an input to be a valid Dagster asset name."""
|
|
467
|
-
return re.sub(r"[^a-z0-9]+", "_", name.lower())
|
|
468
|
-
|
|
469
|
-
|
|
470
461
|
class AirbyteConnectionMetadata(
|
|
471
462
|
NamedTuple(
|
|
472
463
|
"_AirbyteConnectionMetadata",
|
|
@@ -474,7 +465,7 @@ class AirbyteConnectionMetadata(
|
|
|
474
465
|
("name", str),
|
|
475
466
|
("stream_prefix", str),
|
|
476
467
|
("has_basic_normalization", bool),
|
|
477
|
-
("stream_data",
|
|
468
|
+
("stream_data", list[Mapping[str, Any]]),
|
|
478
469
|
("destination", Mapping[str, Any]),
|
|
479
470
|
],
|
|
480
471
|
)
|
|
@@ -533,7 +524,7 @@ class AirbyteConnectionMetadata(
|
|
|
533
524
|
tables associated with each enabled stream and values representing any affiliated
|
|
534
525
|
tables created by Airbyte's normalization process, if enabled.
|
|
535
526
|
"""
|
|
536
|
-
tables:
|
|
527
|
+
tables: dict[str, AirbyteTableMetadata] = {}
|
|
537
528
|
|
|
538
529
|
enabled_streams = [
|
|
539
530
|
stream for stream in self.stream_data if stream.get("config", {}).get("selected", False)
|
|
@@ -548,7 +539,7 @@ class AirbyteConnectionMetadata(
|
|
|
548
539
|
if "json_schema" in stream["stream"]
|
|
549
540
|
else stream["stream"]["jsonSchema"]
|
|
550
541
|
)
|
|
551
|
-
normalization_tables:
|
|
542
|
+
normalization_tables: dict[str, AirbyteTableMetadata] = {}
|
|
552
543
|
schema_props = schema.get("properties", schema.get("items", {}).get("properties", {}))
|
|
553
544
|
if self.has_basic_normalization and return_normalization_tables:
|
|
554
545
|
for k, v in schema_props.items():
|
|
@@ -576,7 +567,7 @@ def _get_schema_by_table_name(
|
|
|
576
567
|
[
|
|
577
568
|
(k, v.schema)
|
|
578
569
|
for k, v in cast(
|
|
579
|
-
|
|
570
|
+
dict[str, AirbyteTableMetadata], meta.normalization_tables
|
|
580
571
|
).items()
|
|
581
572
|
]
|
|
582
573
|
for meta in stream_table_metadata.values()
|
|
@@ -627,11 +618,11 @@ class AirbyteCoreCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
627
618
|
super().__init__(unique_id=f"airbyte-{contents.hexdigest()}")
|
|
628
619
|
|
|
629
620
|
@abstractmethod
|
|
630
|
-
def _get_connections(self) -> Sequence[
|
|
621
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
631
622
|
pass
|
|
632
623
|
|
|
633
624
|
def compute_cacheable_data(self) -> Sequence[AssetsDefinitionCacheableData]:
|
|
634
|
-
asset_defn_data:
|
|
625
|
+
asset_defn_data: list[AssetsDefinitionCacheableData] = []
|
|
635
626
|
for connection_id, connection in self._get_connections():
|
|
636
627
|
stream_table_metadata = connection.parse_stream_tables(
|
|
637
628
|
self._create_assets_for_normalization_tables
|
|
@@ -745,11 +736,11 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
745
736
|
)
|
|
746
737
|
self._airbyte_instance: AirbyteResource = self._partially_initialized_airbyte_instance
|
|
747
738
|
|
|
748
|
-
def _get_connections(self) -> Sequence[
|
|
739
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
749
740
|
workspace_id = self._workspace_id
|
|
750
741
|
if not workspace_id:
|
|
751
742
|
workspaces = cast(
|
|
752
|
-
|
|
743
|
+
list[dict[str, Any]],
|
|
753
744
|
check.not_none(
|
|
754
745
|
self._airbyte_instance.make_request(endpoint="/workspaces/list", data={})
|
|
755
746
|
).get("workspaces", []),
|
|
@@ -761,7 +752,7 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
761
752
|
workspace_id = workspaces[0].get("workspaceId")
|
|
762
753
|
|
|
763
754
|
connections = cast(
|
|
764
|
-
|
|
755
|
+
list[dict[str, Any]],
|
|
765
756
|
check.not_none(
|
|
766
757
|
self._airbyte_instance.make_request(
|
|
767
758
|
endpoint="/connections/list", data={"workspaceId": workspace_id}
|
|
@@ -769,12 +760,12 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
769
760
|
).get("connections", []),
|
|
770
761
|
)
|
|
771
762
|
|
|
772
|
-
output_connections:
|
|
763
|
+
output_connections: list[tuple[str, AirbyteConnectionMetadata]] = []
|
|
773
764
|
for connection_json in connections:
|
|
774
765
|
connection_id = cast(str, connection_json.get("connectionId"))
|
|
775
766
|
|
|
776
767
|
operations_json = cast(
|
|
777
|
-
|
|
768
|
+
dict[str, Any],
|
|
778
769
|
check.not_none(
|
|
779
770
|
self._airbyte_instance.make_request(
|
|
780
771
|
endpoint="/operations/list",
|
|
@@ -785,7 +776,7 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
|
|
|
785
776
|
|
|
786
777
|
destination_id = cast(str, connection_json.get("destinationId"))
|
|
787
778
|
destination_json = cast(
|
|
788
|
-
|
|
779
|
+
dict[str, Any],
|
|
789
780
|
check.not_none(
|
|
790
781
|
self._airbyte_instance.make_request(
|
|
791
782
|
endpoint="/destinations/get",
|
|
@@ -847,10 +838,10 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
|
|
|
847
838
|
self._project_dir = project_dir
|
|
848
839
|
self._connection_directories = connection_directories
|
|
849
840
|
|
|
850
|
-
def _get_connections(self) -> Sequence[
|
|
841
|
+
def _get_connections(self) -> Sequence[tuple[str, AirbyteConnectionMetadata]]:
|
|
851
842
|
connections_dir = os.path.join(self._project_dir, "connections")
|
|
852
843
|
|
|
853
|
-
output_connections:
|
|
844
|
+
output_connections: list[tuple[str, AirbyteConnectionMetadata]] = []
|
|
854
845
|
|
|
855
846
|
connection_directories = self._connection_directories or os.listdir(connections_dir)
|
|
856
847
|
for connection_name in connection_directories:
|
|
@@ -908,7 +899,7 @@ def load_assets_from_airbyte_instance(
|
|
|
908
899
|
workspace_id: Optional[str] = None,
|
|
909
900
|
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
910
901
|
create_assets_for_normalization_tables: bool = True,
|
|
911
|
-
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] =
|
|
902
|
+
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
|
|
912
903
|
connection_meta_to_group_fn: Optional[
|
|
913
904
|
Callable[[AirbyteConnectionMetadata], Optional[str]]
|
|
914
905
|
] = None,
|
|
@@ -1013,7 +1004,7 @@ def load_assets_from_airbyte_instance(
|
|
|
1013
1004
|
check.invariant(
|
|
1014
1005
|
not connection_meta_to_group_fn
|
|
1015
1006
|
or not connection_to_group_fn
|
|
1016
|
-
or connection_to_group_fn ==
|
|
1007
|
+
or connection_to_group_fn == clean_name,
|
|
1017
1008
|
"Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
|
|
1018
1009
|
)
|
|
1019
1010
|
|
|
@@ -1032,3 +1023,117 @@ def load_assets_from_airbyte_instance(
|
|
|
1032
1023
|
connection_to_freshness_policy_fn=connection_to_freshness_policy_fn,
|
|
1033
1024
|
connection_to_auto_materialize_policy_fn=connection_to_auto_materialize_policy_fn,
|
|
1034
1025
|
)
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
# -----------------------
|
|
1029
|
+
# Reworked assets factory
|
|
1030
|
+
# -----------------------
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
@experimental
|
|
1034
|
+
def build_airbyte_assets_definitions(
|
|
1035
|
+
*,
|
|
1036
|
+
workspace: AirbyteCloudWorkspace,
|
|
1037
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1038
|
+
) -> Sequence[AssetsDefinition]:
|
|
1039
|
+
"""The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1040
|
+
|
|
1041
|
+
Args:
|
|
1042
|
+
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
1043
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1044
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1045
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1046
|
+
|
|
1047
|
+
Returns:
|
|
1048
|
+
List[AssetsDefinition]: The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1049
|
+
|
|
1050
|
+
Examples:
|
|
1051
|
+
Sync the tables of a Airbyte connection:
|
|
1052
|
+
|
|
1053
|
+
.. code-block:: python
|
|
1054
|
+
|
|
1055
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1056
|
+
|
|
1057
|
+
import dagster as dg
|
|
1058
|
+
|
|
1059
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1060
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1061
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1062
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1063
|
+
)
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
airbyte_assets = build_airbyte_assets_definitions(workspace=workspace)
|
|
1067
|
+
|
|
1068
|
+
defs = dg.Definitions(
|
|
1069
|
+
assets=airbyte_assets,
|
|
1070
|
+
resources={"airbyte": airbyte_workspace},
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
Sync the tables of a Airbyte connection with a custom translator:
|
|
1074
|
+
|
|
1075
|
+
.. code-block:: python
|
|
1076
|
+
|
|
1077
|
+
from dagster_airbyte import (
|
|
1078
|
+
DagsterAirbyteTranslator,
|
|
1079
|
+
AirbyteConnectionTableProps,
|
|
1080
|
+
AirbyteCloudWorkspace,
|
|
1081
|
+
build_airbyte_assets_definitions
|
|
1082
|
+
)
|
|
1083
|
+
|
|
1084
|
+
import dagster as dg
|
|
1085
|
+
|
|
1086
|
+
class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
1087
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
|
|
1088
|
+
default_spec = super().get_asset_spec(props)
|
|
1089
|
+
return default_spec.merge_attributes(
|
|
1090
|
+
metadata={"custom": "metadata"},
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1093
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1094
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1095
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1096
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
airbyte_assets = build_airbyte_assets_definitions(
|
|
1101
|
+
workspace=workspace,
|
|
1102
|
+
dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1105
|
+
defs = dg.Definitions(
|
|
1106
|
+
assets=airbyte_assets,
|
|
1107
|
+
resources={"airbyte": airbyte_workspace},
|
|
1108
|
+
)
|
|
1109
|
+
"""
|
|
1110
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1111
|
+
|
|
1112
|
+
all_asset_specs = workspace.load_asset_specs(
|
|
1113
|
+
dagster_airbyte_translator=dagster_airbyte_translator
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
connections = {
|
|
1117
|
+
(
|
|
1118
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id),
|
|
1119
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_name),
|
|
1120
|
+
)
|
|
1121
|
+
for spec in all_asset_specs
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
_asset_fns = []
|
|
1125
|
+
for connection_id, connection_name in connections:
|
|
1126
|
+
|
|
1127
|
+
@airbyte_assets(
|
|
1128
|
+
connection_id=connection_id,
|
|
1129
|
+
workspace=workspace,
|
|
1130
|
+
name=clean_name(connection_name),
|
|
1131
|
+
group_name=clean_name(connection_name),
|
|
1132
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1133
|
+
)
|
|
1134
|
+
def _asset_fn(context: AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
1135
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
1136
|
+
|
|
1137
|
+
_asset_fns.append(_asset_fn)
|
|
1138
|
+
|
|
1139
|
+
return _asset_fns
|