dagster-airbyte 0.25.6__py3-none-any.whl → 0.25.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-airbyte might be problematic. Click here for more details.
- dagster_airbyte/__init__.py +8 -2
- dagster_airbyte/asset_decorator.py +113 -0
- dagster_airbyte/asset_defs.py +127 -9
- dagster_airbyte/managed/reconciliation.py +3 -4
- dagster_airbyte/resources.py +275 -29
- dagster_airbyte/translator.py +44 -3
- dagster_airbyte/utils.py +33 -2
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.25.6.dist-info → dagster_airbyte-0.25.8.dist-info}/METADATA +3 -3
- dagster_airbyte-0.25.8.dist-info/RECORD +23 -0
- dagster_airbyte-0.25.6.dist-info/RECORD +0 -22
- {dagster_airbyte-0.25.6.dist-info → dagster_airbyte-0.25.8.dist-info}/LICENSE +0 -0
- {dagster_airbyte-0.25.6.dist-info → dagster_airbyte-0.25.8.dist-info}/WHEEL +0 -0
- {dagster_airbyte-0.25.6.dist-info → dagster_airbyte-0.25.8.dist-info}/entry_points.txt +0 -0
- {dagster_airbyte-0.25.6.dist-info → dagster_airbyte-0.25.8.dist-info}/top_level.txt +0 -0
dagster_airbyte/__init__.py
CHANGED
|
@@ -14,8 +14,10 @@ try:
|
|
|
14
14
|
except ImportError:
|
|
15
15
|
pass
|
|
16
16
|
|
|
17
|
+
from dagster_airbyte.asset_decorator import airbyte_assets as airbyte_assets
|
|
17
18
|
from dagster_airbyte.asset_defs import (
|
|
18
19
|
build_airbyte_assets as build_airbyte_assets,
|
|
20
|
+
build_airbyte_assets_definitions as build_airbyte_assets_definitions,
|
|
19
21
|
load_assets_from_airbyte_instance as load_assets_from_airbyte_instance,
|
|
20
22
|
)
|
|
21
23
|
from dagster_airbyte.ops import airbyte_sync_op as airbyte_sync_op
|
|
@@ -23,12 +25,16 @@ from dagster_airbyte.resources import (
|
|
|
23
25
|
AirbyteCloudResource as AirbyteCloudResource,
|
|
24
26
|
AirbyteCloudWorkspace as AirbyteCloudWorkspace,
|
|
25
27
|
AirbyteResource as AirbyteResource,
|
|
26
|
-
AirbyteState as AirbyteState,
|
|
27
28
|
airbyte_cloud_resource as airbyte_cloud_resource,
|
|
28
29
|
airbyte_resource as airbyte_resource,
|
|
29
30
|
load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
|
|
30
31
|
)
|
|
31
|
-
from dagster_airbyte.translator import
|
|
32
|
+
from dagster_airbyte.translator import (
|
|
33
|
+
AirbyteConnectionTableProps as AirbyteConnectionTableProps,
|
|
34
|
+
AirbyteJobStatusType as AirbyteJobStatusType,
|
|
35
|
+
AirbyteState as AirbyteState,
|
|
36
|
+
DagsterAirbyteTranslator as DagsterAirbyteTranslator,
|
|
37
|
+
)
|
|
32
38
|
from dagster_airbyte.types import AirbyteOutput as AirbyteOutput
|
|
33
39
|
from dagster_airbyte.version import __version__ as __version__
|
|
34
40
|
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Any, Callable, Optional
|
|
2
|
+
|
|
3
|
+
from dagster import AssetsDefinition, multi_asset
|
|
4
|
+
from dagster._annotations import experimental
|
|
5
|
+
|
|
6
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
7
|
+
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@experimental
|
|
11
|
+
def airbyte_assets(
|
|
12
|
+
*,
|
|
13
|
+
connection_id: str,
|
|
14
|
+
workspace: AirbyteCloudWorkspace,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
group_name: Optional[str] = None,
|
|
17
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
18
|
+
) -> Callable[[Callable[..., Any]], AssetsDefinition]:
|
|
19
|
+
"""Create a definition for how to sync the tables of a given Airbyte connection.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
connection_id (str): The Airbyte Connection ID.
|
|
23
|
+
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
24
|
+
name (Optional[str], optional): The name of the op.
|
|
25
|
+
group_name (Optional[str], optional): The name of the asset group.
|
|
26
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
27
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
28
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
Sync the tables of an Airbyte connection:
|
|
32
|
+
|
|
33
|
+
.. code-block:: python
|
|
34
|
+
|
|
35
|
+
from dagster_airbyte import AirbyteCloudWorkspace, airbyte_assets
|
|
36
|
+
|
|
37
|
+
import dagster as dg
|
|
38
|
+
|
|
39
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
40
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
41
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
42
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@airbyte_assets(
|
|
47
|
+
connection_id="airbyte_connection_id",
|
|
48
|
+
workspace=airbyte_workspace,
|
|
49
|
+
)
|
|
50
|
+
def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
51
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
defs = dg.Definitions(
|
|
55
|
+
assets=[airbyte_connection_assets],
|
|
56
|
+
resources={"airbyte": airbyte_workspace},
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
Sync the tables of an Airbyte connection with a custom translator:
|
|
60
|
+
|
|
61
|
+
.. code-block:: python
|
|
62
|
+
|
|
63
|
+
from dagster_airbyte import (
|
|
64
|
+
DagsterAirbyteTranslator,
|
|
65
|
+
AirbyteConnectionTableProps,
|
|
66
|
+
AirbyteCloudWorkspace,
|
|
67
|
+
airbyte_assets
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
import dagster as dg
|
|
71
|
+
|
|
72
|
+
class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
73
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
|
|
74
|
+
default_spec = super().get_asset_spec(props)
|
|
75
|
+
return default_spec.merge_attributes(
|
|
76
|
+
metadata={"custom": "metadata"},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
80
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
81
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
82
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@airbyte_assets(
|
|
87
|
+
connection_id="airbyte_connection_id",
|
|
88
|
+
workspace=airbyte_workspace,
|
|
89
|
+
dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
|
|
90
|
+
)
|
|
91
|
+
def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
92
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
defs = dg.Definitions(
|
|
96
|
+
assets=[airbyte_connection_assets],
|
|
97
|
+
resources={"airbyte": airbyte_workspace},
|
|
98
|
+
)
|
|
99
|
+
"""
|
|
100
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
101
|
+
|
|
102
|
+
return multi_asset(
|
|
103
|
+
name=name,
|
|
104
|
+
group_name=group_name,
|
|
105
|
+
can_subset=True,
|
|
106
|
+
specs=[
|
|
107
|
+
spec
|
|
108
|
+
for spec in workspace.load_asset_specs(
|
|
109
|
+
dagster_airbyte_translator=dagster_airbyte_translator
|
|
110
|
+
)
|
|
111
|
+
if AirbyteMetadataSet.extract(spec.metadata).connection_id == connection_id
|
|
112
|
+
],
|
|
113
|
+
)
|
dagster_airbyte/asset_defs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import inspect
|
|
3
3
|
import os
|
|
4
|
-
import re
|
|
5
4
|
from abc import abstractmethod
|
|
6
5
|
from functools import partial
|
|
7
6
|
from itertools import chain
|
|
@@ -23,6 +22,7 @@ from typing import (
|
|
|
23
22
|
|
|
24
23
|
import yaml
|
|
25
24
|
from dagster import (
|
|
25
|
+
AssetExecutionContext,
|
|
26
26
|
AssetKey,
|
|
27
27
|
AssetOut,
|
|
28
28
|
AutoMaterializePolicy,
|
|
@@ -33,6 +33,7 @@ from dagster import (
|
|
|
33
33
|
SourceAsset,
|
|
34
34
|
_check as check,
|
|
35
35
|
)
|
|
36
|
+
from dagster._annotations import experimental
|
|
36
37
|
from dagster._core.definitions import AssetsDefinition, multi_asset
|
|
37
38
|
from dagster._core.definitions.cacheable_assets import (
|
|
38
39
|
AssetsDefinitionCacheableData,
|
|
@@ -45,9 +46,17 @@ from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvalidIn
|
|
|
45
46
|
from dagster._core.execution.context.init import build_init_resource_context
|
|
46
47
|
from dagster._utils.merger import merge_dicts
|
|
47
48
|
|
|
48
|
-
from dagster_airbyte.
|
|
49
|
+
from dagster_airbyte.asset_decorator import airbyte_assets
|
|
50
|
+
from dagster_airbyte.resources import (
|
|
51
|
+
AirbyteCloudResource,
|
|
52
|
+
AirbyteCloudWorkspace,
|
|
53
|
+
AirbyteResource,
|
|
54
|
+
BaseAirbyteResource,
|
|
55
|
+
)
|
|
56
|
+
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
49
57
|
from dagster_airbyte.types import AirbyteTableMetadata
|
|
50
58
|
from dagster_airbyte.utils import (
|
|
59
|
+
clean_name,
|
|
51
60
|
generate_materializations,
|
|
52
61
|
generate_table_schema,
|
|
53
62
|
is_basic_normalization_operation,
|
|
@@ -462,11 +471,6 @@ def _get_normalization_tables_for_schema(
|
|
|
462
471
|
return out
|
|
463
472
|
|
|
464
473
|
|
|
465
|
-
def _clean_name(name: str) -> str:
|
|
466
|
-
"""Cleans an input to be a valid Dagster asset name."""
|
|
467
|
-
return re.sub(r"[^a-z0-9]+", "_", name.lower())
|
|
468
|
-
|
|
469
|
-
|
|
470
474
|
class AirbyteConnectionMetadata(
|
|
471
475
|
NamedTuple(
|
|
472
476
|
"_AirbyteConnectionMetadata",
|
|
@@ -908,7 +912,7 @@ def load_assets_from_airbyte_instance(
|
|
|
908
912
|
workspace_id: Optional[str] = None,
|
|
909
913
|
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
910
914
|
create_assets_for_normalization_tables: bool = True,
|
|
911
|
-
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] =
|
|
915
|
+
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
|
|
912
916
|
connection_meta_to_group_fn: Optional[
|
|
913
917
|
Callable[[AirbyteConnectionMetadata], Optional[str]]
|
|
914
918
|
] = None,
|
|
@@ -1013,7 +1017,7 @@ def load_assets_from_airbyte_instance(
|
|
|
1013
1017
|
check.invariant(
|
|
1014
1018
|
not connection_meta_to_group_fn
|
|
1015
1019
|
or not connection_to_group_fn
|
|
1016
|
-
or connection_to_group_fn ==
|
|
1020
|
+
or connection_to_group_fn == clean_name,
|
|
1017
1021
|
"Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
|
|
1018
1022
|
)
|
|
1019
1023
|
|
|
@@ -1032,3 +1036,117 @@ def load_assets_from_airbyte_instance(
|
|
|
1032
1036
|
connection_to_freshness_policy_fn=connection_to_freshness_policy_fn,
|
|
1033
1037
|
connection_to_auto_materialize_policy_fn=connection_to_auto_materialize_policy_fn,
|
|
1034
1038
|
)
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
# -----------------------
|
|
1042
|
+
# Reworked assets factory
|
|
1043
|
+
# -----------------------
|
|
1044
|
+
|
|
1045
|
+
|
|
1046
|
+
@experimental
|
|
1047
|
+
def build_airbyte_assets_definitions(
|
|
1048
|
+
*,
|
|
1049
|
+
workspace: AirbyteCloudWorkspace,
|
|
1050
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1051
|
+
) -> Sequence[AssetsDefinition]:
|
|
1052
|
+
"""The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1053
|
+
|
|
1054
|
+
Args:
|
|
1055
|
+
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
1056
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1057
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1058
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1059
|
+
|
|
1060
|
+
Returns:
|
|
1061
|
+
List[AssetsDefinition]: The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1062
|
+
|
|
1063
|
+
Examples:
|
|
1064
|
+
Sync the tables of a Airbyte connection:
|
|
1065
|
+
|
|
1066
|
+
.. code-block:: python
|
|
1067
|
+
|
|
1068
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1069
|
+
|
|
1070
|
+
import dagster as dg
|
|
1071
|
+
|
|
1072
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1073
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1074
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1075
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
airbyte_assets = build_airbyte_assets_definitions(workspace=workspace)
|
|
1080
|
+
|
|
1081
|
+
defs = dg.Definitions(
|
|
1082
|
+
assets=airbyte_assets,
|
|
1083
|
+
resources={"airbyte": airbyte_workspace},
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
Sync the tables of a Airbyte connection with a custom translator:
|
|
1087
|
+
|
|
1088
|
+
.. code-block:: python
|
|
1089
|
+
|
|
1090
|
+
from dagster_airbyte import (
|
|
1091
|
+
DagsterAirbyteTranslator,
|
|
1092
|
+
AirbyteConnectionTableProps,
|
|
1093
|
+
AirbyteCloudWorkspace,
|
|
1094
|
+
build_airbyte_assets_definitions
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
import dagster as dg
|
|
1098
|
+
|
|
1099
|
+
class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
1100
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
|
|
1101
|
+
default_spec = super().get_asset_spec(props)
|
|
1102
|
+
return default_spec.merge_attributes(
|
|
1103
|
+
metadata={"custom": "metadata"},
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1107
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1108
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1109
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
|
|
1113
|
+
airbyte_assets = build_airbyte_assets_definitions(
|
|
1114
|
+
workspace=workspace,
|
|
1115
|
+
dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
defs = dg.Definitions(
|
|
1119
|
+
assets=airbyte_assets,
|
|
1120
|
+
resources={"airbyte": airbyte_workspace},
|
|
1121
|
+
)
|
|
1122
|
+
"""
|
|
1123
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1124
|
+
|
|
1125
|
+
all_asset_specs = workspace.load_asset_specs(
|
|
1126
|
+
dagster_airbyte_translator=dagster_airbyte_translator
|
|
1127
|
+
)
|
|
1128
|
+
|
|
1129
|
+
connections = {
|
|
1130
|
+
(
|
|
1131
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id),
|
|
1132
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_name),
|
|
1133
|
+
)
|
|
1134
|
+
for spec in all_asset_specs
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
_asset_fns = []
|
|
1138
|
+
for connection_id, connection_name in connections:
|
|
1139
|
+
|
|
1140
|
+
@airbyte_assets(
|
|
1141
|
+
connection_id=connection_id,
|
|
1142
|
+
workspace=workspace,
|
|
1143
|
+
name=clean_name(connection_name),
|
|
1144
|
+
group_name=clean_name(connection_name),
|
|
1145
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1146
|
+
)
|
|
1147
|
+
def _asset_fn(context: AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
|
|
1148
|
+
yield from airbyte.sync_and_poll(context=context)
|
|
1149
|
+
|
|
1150
|
+
_asset_fns.append(_asset_fn)
|
|
1151
|
+
|
|
1152
|
+
return _asset_fns
|
|
@@ -36,7 +36,6 @@ from dagster_managed_elements.utils import UNSET, diff_dicts
|
|
|
36
36
|
from dagster_airbyte.asset_defs import (
|
|
37
37
|
AirbyteConnectionMetadata,
|
|
38
38
|
AirbyteInstanceCacheableAssetsDefinition,
|
|
39
|
-
_clean_name,
|
|
40
39
|
)
|
|
41
40
|
from dagster_airbyte.managed.types import (
|
|
42
41
|
MANAGED_ELEMENTS_DEPRECATION_MSG,
|
|
@@ -50,7 +49,7 @@ from dagster_airbyte.managed.types import (
|
|
|
50
49
|
InitializedAirbyteSource,
|
|
51
50
|
)
|
|
52
51
|
from dagster_airbyte.resources import AirbyteResource
|
|
53
|
-
from dagster_airbyte.utils import is_basic_normalization_operation
|
|
52
|
+
from dagster_airbyte.utils import clean_name, is_basic_normalization_operation
|
|
54
53
|
|
|
55
54
|
|
|
56
55
|
def gen_configured_stream_json(
|
|
@@ -746,7 +745,7 @@ def load_assets_from_connections(
|
|
|
746
745
|
connections: Iterable[AirbyteConnection],
|
|
747
746
|
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
748
747
|
create_assets_for_normalization_tables: bool = True,
|
|
749
|
-
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] =
|
|
748
|
+
connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
|
|
750
749
|
connection_meta_to_group_fn: Optional[
|
|
751
750
|
Callable[[AirbyteConnectionMetadata], Optional[str]]
|
|
752
751
|
] = None,
|
|
@@ -821,7 +820,7 @@ def load_assets_from_connections(
|
|
|
821
820
|
check.invariant(
|
|
822
821
|
not connection_meta_to_group_fn
|
|
823
822
|
or not connection_to_group_fn
|
|
824
|
-
or connection_to_group_fn ==
|
|
823
|
+
or connection_to_group_fn == clean_name,
|
|
825
824
|
"Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
|
|
826
825
|
)
|
|
827
826
|
|
dagster_airbyte/resources.py
CHANGED
|
@@ -10,15 +10,18 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence, cast
|
|
|
10
10
|
|
|
11
11
|
import requests
|
|
12
12
|
from dagster import (
|
|
13
|
+
AssetExecutionContext,
|
|
14
|
+
AssetMaterialization,
|
|
13
15
|
ConfigurableResource,
|
|
14
16
|
Definitions,
|
|
15
17
|
Failure,
|
|
16
18
|
InitResourceContext,
|
|
19
|
+
MaterializeResult,
|
|
17
20
|
_check as check,
|
|
18
21
|
get_dagster_logger,
|
|
19
22
|
resource,
|
|
20
23
|
)
|
|
21
|
-
from dagster._annotations import experimental
|
|
24
|
+
from dagster._annotations import experimental, public
|
|
22
25
|
from dagster._config.pythonic_config import infer_schema_from_config_class
|
|
23
26
|
from dagster._core.definitions.asset_spec import AssetSpec
|
|
24
27
|
from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
|
|
@@ -32,11 +35,20 @@ from requests.exceptions import RequestException
|
|
|
32
35
|
|
|
33
36
|
from dagster_airbyte.translator import (
|
|
34
37
|
AirbyteConnection,
|
|
38
|
+
AirbyteConnectionTableProps,
|
|
35
39
|
AirbyteDestination,
|
|
40
|
+
AirbyteJob,
|
|
41
|
+
AirbyteJobStatusType,
|
|
42
|
+
AirbyteMetadataSet,
|
|
36
43
|
AirbyteWorkspaceData,
|
|
37
44
|
DagsterAirbyteTranslator,
|
|
38
45
|
)
|
|
39
46
|
from dagster_airbyte.types import AirbyteOutput
|
|
47
|
+
from dagster_airbyte.utils import (
|
|
48
|
+
DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY,
|
|
49
|
+
get_airbyte_connection_table_name,
|
|
50
|
+
get_translator_from_airbyte_assets,
|
|
51
|
+
)
|
|
40
52
|
|
|
41
53
|
AIRBYTE_REST_API_BASE = "https://api.airbyte.com"
|
|
42
54
|
AIRBYTE_REST_API_VERSION = "v1"
|
|
@@ -53,16 +65,6 @@ AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS = 150
|
|
|
53
65
|
AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
|
|
54
66
|
|
|
55
67
|
|
|
56
|
-
class AirbyteState:
|
|
57
|
-
RUNNING = "running"
|
|
58
|
-
SUCCEEDED = "succeeded"
|
|
59
|
-
CANCELLED = "cancelled"
|
|
60
|
-
PENDING = "pending"
|
|
61
|
-
FAILED = "failed"
|
|
62
|
-
ERROR = "error"
|
|
63
|
-
INCOMPLETE = "incomplete"
|
|
64
|
-
|
|
65
|
-
|
|
66
68
|
class AirbyteResourceState:
|
|
67
69
|
def __init__(self) -> None:
|
|
68
70
|
self.request_cache: Dict[str, Optional[Mapping[str, object]]] = {}
|
|
@@ -252,13 +254,17 @@ class BaseAirbyteResource(ConfigurableResource):
|
|
|
252
254
|
job_info = cast(Dict[str, object], job_details.get("job", {}))
|
|
253
255
|
state = job_info.get("status")
|
|
254
256
|
|
|
255
|
-
if state in (
|
|
257
|
+
if state in (
|
|
258
|
+
AirbyteJobStatusType.RUNNING,
|
|
259
|
+
AirbyteJobStatusType.PENDING,
|
|
260
|
+
AirbyteJobStatusType.INCOMPLETE,
|
|
261
|
+
):
|
|
256
262
|
continue
|
|
257
|
-
elif state ==
|
|
263
|
+
elif state == AirbyteJobStatusType.SUCCEEDED:
|
|
258
264
|
break
|
|
259
|
-
elif state ==
|
|
265
|
+
elif state == AirbyteJobStatusType.ERROR:
|
|
260
266
|
raise Failure(f"Job failed: {job_id}")
|
|
261
|
-
elif state ==
|
|
267
|
+
elif state == AirbyteJobStatusType.CANCELLED:
|
|
262
268
|
raise Failure(f"Job was cancelled: {job_id}")
|
|
263
269
|
else:
|
|
264
270
|
raise Failure(f"Encountered unexpected state `{state}` for job_id {job_id}")
|
|
@@ -266,7 +272,12 @@ class BaseAirbyteResource(ConfigurableResource):
|
|
|
266
272
|
# if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
|
|
267
273
|
# the python process
|
|
268
274
|
if (
|
|
269
|
-
state
|
|
275
|
+
state
|
|
276
|
+
not in (
|
|
277
|
+
AirbyteJobStatusType.SUCCEEDED,
|
|
278
|
+
AirbyteJobStatusType.ERROR,
|
|
279
|
+
AirbyteJobStatusType.CANCELLED,
|
|
280
|
+
)
|
|
270
281
|
and self.cancel_sync_on_run_termination
|
|
271
282
|
):
|
|
272
283
|
self.cancel_job(job_id)
|
|
@@ -742,13 +753,17 @@ class AirbyteResource(BaseAirbyteResource):
|
|
|
742
753
|
job_info = cast(Dict[str, object], job_details.get("job", {}))
|
|
743
754
|
state = job_info.get("status")
|
|
744
755
|
|
|
745
|
-
if state in (
|
|
756
|
+
if state in (
|
|
757
|
+
AirbyteJobStatusType.RUNNING,
|
|
758
|
+
AirbyteJobStatusType.PENDING,
|
|
759
|
+
AirbyteJobStatusType.INCOMPLETE,
|
|
760
|
+
):
|
|
746
761
|
continue
|
|
747
|
-
elif state ==
|
|
762
|
+
elif state == AirbyteJobStatusType.SUCCEEDED:
|
|
748
763
|
break
|
|
749
|
-
elif state ==
|
|
764
|
+
elif state == AirbyteJobStatusType.ERROR:
|
|
750
765
|
raise Failure(f"Job failed: {job_id}")
|
|
751
|
-
elif state ==
|
|
766
|
+
elif state == AirbyteJobStatusType.CANCELLED:
|
|
752
767
|
raise Failure(f"Job was cancelled: {job_id}")
|
|
753
768
|
else:
|
|
754
769
|
raise Failure(f"Encountered unexpected state `{state}` for job_id {job_id}")
|
|
@@ -756,7 +771,12 @@ class AirbyteResource(BaseAirbyteResource):
|
|
|
756
771
|
# if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
|
|
757
772
|
# the python process
|
|
758
773
|
if (
|
|
759
|
-
state
|
|
774
|
+
state
|
|
775
|
+
not in (
|
|
776
|
+
AirbyteJobStatusType.SUCCEEDED,
|
|
777
|
+
AirbyteJobStatusType.ERROR,
|
|
778
|
+
AirbyteJobStatusType.CANCELLED,
|
|
779
|
+
)
|
|
760
780
|
and self.cancel_sync_on_run_termination
|
|
761
781
|
):
|
|
762
782
|
self.cancel_job(job_id)
|
|
@@ -992,6 +1012,101 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
992
1012
|
base_url=self.rest_api_base_url,
|
|
993
1013
|
)
|
|
994
1014
|
|
|
1015
|
+
def start_sync_job(self, connection_id: str) -> Mapping[str, Any]:
|
|
1016
|
+
return self._make_request(
|
|
1017
|
+
method="POST",
|
|
1018
|
+
endpoint="jobs",
|
|
1019
|
+
base_url=self.rest_api_base_url,
|
|
1020
|
+
data={
|
|
1021
|
+
"connectionId": connection_id,
|
|
1022
|
+
"jobType": "sync",
|
|
1023
|
+
},
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
def get_job_details(self, job_id: int) -> Mapping[str, Any]:
|
|
1027
|
+
return self._make_request(
|
|
1028
|
+
method="GET", endpoint=f"jobs/{job_id}", base_url=self.rest_api_base_url
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
def cancel_job(self, job_id: int) -> Mapping[str, Any]:
|
|
1032
|
+
return self._make_request(
|
|
1033
|
+
method="DELETE", endpoint=f"jobs/{job_id}", base_url=self.rest_api_base_url
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
def sync_and_poll(
|
|
1037
|
+
self,
|
|
1038
|
+
connection_id: str,
|
|
1039
|
+
poll_interval: Optional[float] = None,
|
|
1040
|
+
poll_timeout: Optional[float] = None,
|
|
1041
|
+
cancel_on_termination: bool = True,
|
|
1042
|
+
) -> AirbyteOutput:
|
|
1043
|
+
"""Initializes a sync operation for the given connection, and polls until it completes.
|
|
1044
|
+
|
|
1045
|
+
Args:
|
|
1046
|
+
connection_id (str): The Airbyte Connection ID. You can retrieve this value from the
|
|
1047
|
+
"Connection" tab of a given connection in the Airbyte UI.
|
|
1048
|
+
poll_interval (float): The time (in seconds) that will be waited between successive polls.
|
|
1049
|
+
poll_timeout (float): The maximum time that will wait before this operation is timed
|
|
1050
|
+
out. By default, this will never time out.
|
|
1051
|
+
cancel_on_termination (bool): Whether to cancel a sync in Airbyte if the Dagster runner is terminated.
|
|
1052
|
+
This may be useful to disable if using Airbyte sources that cannot be cancelled and
|
|
1053
|
+
resumed easily, or if your Dagster deployment may experience runner interruptions
|
|
1054
|
+
that do not impact your Airbyte deployment.
|
|
1055
|
+
|
|
1056
|
+
Returns:
|
|
1057
|
+
:py:class:`~AirbyteOutput`:
|
|
1058
|
+
Details of the sync job.
|
|
1059
|
+
"""
|
|
1060
|
+
connection_details = self.get_connection_details(connection_id)
|
|
1061
|
+
start_job_details = self.start_sync_job(connection_id)
|
|
1062
|
+
job = AirbyteJob.from_job_details(job_details=start_job_details)
|
|
1063
|
+
|
|
1064
|
+
self._log.info(f"Job {job.id} initialized for connection_id={connection_id}.")
|
|
1065
|
+
poll_start = datetime.now()
|
|
1066
|
+
poll_interval = (
|
|
1067
|
+
poll_interval if poll_interval is not None else DEFAULT_POLL_INTERVAL_SECONDS
|
|
1068
|
+
)
|
|
1069
|
+
try:
|
|
1070
|
+
while True:
|
|
1071
|
+
if poll_timeout and datetime.now() > poll_start + timedelta(seconds=poll_timeout):
|
|
1072
|
+
raise Failure(
|
|
1073
|
+
f"Timeout: Airbyte job {job.id} is not ready after the timeout"
|
|
1074
|
+
f" {poll_timeout} seconds"
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
time.sleep(poll_interval)
|
|
1078
|
+
# We return these job details in the AirbyteOutput when the job succeeds
|
|
1079
|
+
poll_job_details = self.get_job_details(job.id)
|
|
1080
|
+
job = AirbyteJob.from_job_details(job_details=poll_job_details)
|
|
1081
|
+
if job.status in (
|
|
1082
|
+
AirbyteJobStatusType.RUNNING,
|
|
1083
|
+
AirbyteJobStatusType.PENDING,
|
|
1084
|
+
AirbyteJobStatusType.INCOMPLETE,
|
|
1085
|
+
):
|
|
1086
|
+
continue
|
|
1087
|
+
elif job.status == AirbyteJobStatusType.SUCCEEDED:
|
|
1088
|
+
break
|
|
1089
|
+
elif job.status in [AirbyteJobStatusType.ERROR, AirbyteJobStatusType.FAILED]:
|
|
1090
|
+
raise Failure(f"Job failed: {job.id}")
|
|
1091
|
+
elif job.status == AirbyteJobStatusType.CANCELLED:
|
|
1092
|
+
raise Failure(f"Job was cancelled: {job.id}")
|
|
1093
|
+
else:
|
|
1094
|
+
raise Failure(
|
|
1095
|
+
f"Encountered unexpected state `{job.status}` for job_id {job.id}"
|
|
1096
|
+
)
|
|
1097
|
+
finally:
|
|
1098
|
+
# if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
|
|
1099
|
+
# the python process
|
|
1100
|
+
if cancel_on_termination and job.status not in (
|
|
1101
|
+
AirbyteJobStatusType.SUCCEEDED,
|
|
1102
|
+
AirbyteJobStatusType.ERROR,
|
|
1103
|
+
AirbyteJobStatusType.CANCELLED,
|
|
1104
|
+
AirbyteJobStatusType.FAILED,
|
|
1105
|
+
):
|
|
1106
|
+
self.cancel_job(job.id)
|
|
1107
|
+
|
|
1108
|
+
return AirbyteOutput(job_details=poll_job_details, connection_details=connection_details)
|
|
1109
|
+
|
|
995
1110
|
|
|
996
1111
|
@experimental
|
|
997
1112
|
class AirbyteCloudWorkspace(ConfigurableResource):
|
|
@@ -1067,6 +1182,130 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1067
1182
|
destinations_by_id=destinations_by_id,
|
|
1068
1183
|
)
|
|
1069
1184
|
|
|
1185
|
+
@cached_method
|
|
1186
|
+
def load_asset_specs(
|
|
1187
|
+
self,
|
|
1188
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1189
|
+
) -> Sequence[AssetSpec]:
|
|
1190
|
+
"""Returns a list of AssetSpecs representing the Airbyte content in the workspace.
|
|
1191
|
+
|
|
1192
|
+
Args:
|
|
1193
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1194
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1195
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1196
|
+
|
|
1197
|
+
Returns:
|
|
1198
|
+
List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
|
|
1199
|
+
|
|
1200
|
+
Examples:
|
|
1201
|
+
Loading the asset specs for a given Airbyte workspace:
|
|
1202
|
+
.. code-block:: python
|
|
1203
|
+
|
|
1204
|
+
from dagster_airbyte import AirbyteCloudWorkspace
|
|
1205
|
+
|
|
1206
|
+
import dagster as dg
|
|
1207
|
+
|
|
1208
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1209
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1210
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1211
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1212
|
+
)
|
|
1213
|
+
|
|
1214
|
+
airbyte_specs = airbyte_workspace.load_asset_specs()
|
|
1215
|
+
defs = dg.Definitions(assets=airbyte_specs, resources={"airbyte": airbyte_workspace}
|
|
1216
|
+
"""
|
|
1217
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1218
|
+
|
|
1219
|
+
return load_airbyte_cloud_asset_specs(
|
|
1220
|
+
workspace=self, dagster_airbyte_translator=dagster_airbyte_translator
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
def _generate_materialization(
|
|
1224
|
+
self,
|
|
1225
|
+
airbyte_output: AirbyteOutput,
|
|
1226
|
+
dagster_airbyte_translator: DagsterAirbyteTranslator,
|
|
1227
|
+
):
|
|
1228
|
+
connection = AirbyteConnection.from_connection_details(
|
|
1229
|
+
connection_details=airbyte_output.connection_details
|
|
1230
|
+
)
|
|
1231
|
+
|
|
1232
|
+
for stream in connection.streams.values():
|
|
1233
|
+
if stream.selected:
|
|
1234
|
+
connection_table_name = get_airbyte_connection_table_name(
|
|
1235
|
+
stream_prefix=connection.stream_prefix,
|
|
1236
|
+
stream_name=stream.name,
|
|
1237
|
+
)
|
|
1238
|
+
stream_asset_spec = dagster_airbyte_translator.get_asset_spec(
|
|
1239
|
+
props=AirbyteConnectionTableProps(
|
|
1240
|
+
table_name=connection_table_name,
|
|
1241
|
+
stream_prefix=connection.stream_prefix,
|
|
1242
|
+
stream_name=stream.name,
|
|
1243
|
+
json_schema=stream.json_schema,
|
|
1244
|
+
connection_id=connection.id,
|
|
1245
|
+
connection_name=connection.name,
|
|
1246
|
+
destination_type=None,
|
|
1247
|
+
database=None,
|
|
1248
|
+
schema=None,
|
|
1249
|
+
)
|
|
1250
|
+
)
|
|
1251
|
+
|
|
1252
|
+
yield AssetMaterialization(
|
|
1253
|
+
asset_key=stream_asset_spec.key,
|
|
1254
|
+
description=(
|
|
1255
|
+
f"Table generated via Airbyte Cloud sync "
|
|
1256
|
+
f"for connection {connection.name}: {connection_table_name}"
|
|
1257
|
+
),
|
|
1258
|
+
metadata=stream_asset_spec.metadata,
|
|
1259
|
+
)
|
|
1260
|
+
|
|
1261
|
+
@public
|
|
1262
|
+
@experimental
|
|
1263
|
+
def sync_and_poll(self, context: AssetExecutionContext):
|
|
1264
|
+
"""Executes a sync and poll process to materialize Airbyte Cloud assets.
|
|
1265
|
+
This method can only be used in the context of an asset execution.
|
|
1266
|
+
|
|
1267
|
+
Args:
|
|
1268
|
+
context (AssetExecutionContext): The execution context
|
|
1269
|
+
from within `@airbyte_assets`.
|
|
1270
|
+
|
|
1271
|
+
Returns:
|
|
1272
|
+
Iterator[Union[AssetMaterialization, MaterializeResult]]: An iterator of MaterializeResult
|
|
1273
|
+
or AssetMaterialization.
|
|
1274
|
+
"""
|
|
1275
|
+
assets_def = context.assets_def
|
|
1276
|
+
dagster_airbyte_translator = get_translator_from_airbyte_assets(assets_def)
|
|
1277
|
+
connection_id = next(
|
|
1278
|
+
check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id)
|
|
1279
|
+
for spec in assets_def.specs
|
|
1280
|
+
)
|
|
1281
|
+
|
|
1282
|
+
client = self.get_client()
|
|
1283
|
+
airbyte_output = client.sync_and_poll(
|
|
1284
|
+
connection_id=connection_id,
|
|
1285
|
+
)
|
|
1286
|
+
|
|
1287
|
+
materialized_asset_keys = set()
|
|
1288
|
+
for materialization in self._generate_materialization(
|
|
1289
|
+
airbyte_output=airbyte_output, dagster_airbyte_translator=dagster_airbyte_translator
|
|
1290
|
+
):
|
|
1291
|
+
# Scan through all tables actually created, if it was expected then emit a MaterializeResult.
|
|
1292
|
+
# Otherwise, emit a runtime AssetMaterialization.
|
|
1293
|
+
if materialization.asset_key in context.selected_asset_keys:
|
|
1294
|
+
yield MaterializeResult(
|
|
1295
|
+
asset_key=materialization.asset_key, metadata=materialization.metadata
|
|
1296
|
+
)
|
|
1297
|
+
materialized_asset_keys.add(materialization.asset_key)
|
|
1298
|
+
else:
|
|
1299
|
+
context.log.warning(
|
|
1300
|
+
f"An unexpected asset was materialized: {materialization.asset_key}. "
|
|
1301
|
+
f"Yielding a materialization event."
|
|
1302
|
+
)
|
|
1303
|
+
yield materialization
|
|
1304
|
+
|
|
1305
|
+
unmaterialized_asset_keys = context.selected_asset_keys - materialized_asset_keys
|
|
1306
|
+
if unmaterialized_asset_keys:
|
|
1307
|
+
context.log.warning(f"Assets were not materialized: {unmaterialized_asset_keys}")
|
|
1308
|
+
|
|
1070
1309
|
|
|
1071
1310
|
@experimental
|
|
1072
1311
|
def load_airbyte_cloud_asset_specs(
|
|
@@ -1103,16 +1342,23 @@ def load_airbyte_cloud_asset_specs(
|
|
|
1103
1342
|
airbyte_cloud_specs = load_airbyte_cloud_asset_specs(airbyte_cloud_workspace)
|
|
1104
1343
|
defs = dg.Definitions(assets=airbyte_cloud_specs)
|
|
1105
1344
|
"""
|
|
1345
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1346
|
+
|
|
1106
1347
|
with workspace.process_config_and_initialize_cm() as initialized_workspace:
|
|
1107
|
-
return
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
translator=dagster_airbyte_translator or DagsterAirbyteTranslator(),
|
|
1348
|
+
return [
|
|
1349
|
+
spec.merge_attributes(
|
|
1350
|
+
metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
|
|
1111
1351
|
)
|
|
1112
|
-
.
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1352
|
+
for spec in check.is_list(
|
|
1353
|
+
AirbyteCloudWorkspaceDefsLoader(
|
|
1354
|
+
workspace=initialized_workspace,
|
|
1355
|
+
translator=dagster_airbyte_translator,
|
|
1356
|
+
)
|
|
1357
|
+
.build_defs()
|
|
1358
|
+
.assets,
|
|
1359
|
+
AssetSpec,
|
|
1360
|
+
)
|
|
1361
|
+
]
|
|
1116
1362
|
|
|
1117
1363
|
|
|
1118
1364
|
@record
|
dagster_airbyte/translator.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from enum import Enum
|
|
1
2
|
from typing import Any, List, Mapping, Optional, Sequence
|
|
2
3
|
|
|
3
|
-
from dagster._annotations import experimental
|
|
4
|
+
from dagster._annotations import deprecated, experimental
|
|
4
5
|
from dagster._core.definitions.asset_key import AssetKey
|
|
5
6
|
from dagster._core.definitions.asset_spec import AssetSpec
|
|
6
7
|
from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet
|
|
@@ -11,6 +12,27 @@ from dagster._utils.cached_method import cached_method
|
|
|
11
12
|
from dagster_airbyte.utils import generate_table_schema, get_airbyte_connection_table_name
|
|
12
13
|
|
|
13
14
|
|
|
15
|
+
class AirbyteJobStatusType(str, Enum):
|
|
16
|
+
RUNNING = "running"
|
|
17
|
+
SUCCEEDED = "succeeded"
|
|
18
|
+
CANCELLED = "cancelled"
|
|
19
|
+
PENDING = "pending"
|
|
20
|
+
FAILED = "failed"
|
|
21
|
+
ERROR = "error"
|
|
22
|
+
INCOMPLETE = "incomplete"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@deprecated(breaking_version="1.10", additional_warn_text="Use `AirbyteJobStatusType` instead.")
|
|
26
|
+
class AirbyteState:
|
|
27
|
+
RUNNING = AirbyteJobStatusType.RUNNING
|
|
28
|
+
SUCCEEDED = AirbyteJobStatusType.SUCCEEDED
|
|
29
|
+
CANCELLED = AirbyteJobStatusType.CANCELLED
|
|
30
|
+
PENDING = AirbyteJobStatusType.PENDING
|
|
31
|
+
FAILED = AirbyteJobStatusType.FAILED
|
|
32
|
+
ERROR = AirbyteJobStatusType.ERROR
|
|
33
|
+
INCOMPLETE = AirbyteJobStatusType.INCOMPLETE
|
|
34
|
+
|
|
35
|
+
|
|
14
36
|
@record
|
|
15
37
|
class AirbyteConnectionTableProps:
|
|
16
38
|
table_name: str
|
|
@@ -19,7 +41,7 @@ class AirbyteConnectionTableProps:
|
|
|
19
41
|
json_schema: Mapping[str, Any]
|
|
20
42
|
connection_id: str
|
|
21
43
|
connection_name: str
|
|
22
|
-
destination_type: str
|
|
44
|
+
destination_type: Optional[str]
|
|
23
45
|
database: Optional[str]
|
|
24
46
|
schema: Optional[str]
|
|
25
47
|
|
|
@@ -108,6 +130,25 @@ class AirbyteStream:
|
|
|
108
130
|
)
|
|
109
131
|
|
|
110
132
|
|
|
133
|
+
@whitelist_for_serdes
|
|
134
|
+
@record
|
|
135
|
+
class AirbyteJob:
|
|
136
|
+
"""Represents an Airbyte job, based on data as returned from the API."""
|
|
137
|
+
|
|
138
|
+
id: int
|
|
139
|
+
status: str
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def from_job_details(
|
|
143
|
+
cls,
|
|
144
|
+
job_details: Mapping[str, Any],
|
|
145
|
+
) -> "AirbyteJob":
|
|
146
|
+
return cls(
|
|
147
|
+
id=job_details["jobId"],
|
|
148
|
+
status=job_details["status"],
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
111
152
|
@whitelist_for_serdes
|
|
112
153
|
@record
|
|
113
154
|
class AirbyteWorkspaceData:
|
|
@@ -190,5 +231,5 @@ class DagsterAirbyteTranslator:
|
|
|
190
231
|
return AssetSpec(
|
|
191
232
|
key=AssetKey(props.table_name),
|
|
192
233
|
metadata=metadata,
|
|
193
|
-
kinds={"airbyte", props.destination_type},
|
|
234
|
+
kinds={"airbyte", *({props.destination_type} if props.destination_type else set())},
|
|
194
235
|
)
|
dagster_airbyte/utils.py
CHANGED
|
@@ -1,10 +1,26 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Iterator, Mapping, Optional, Sequence
|
|
2
3
|
|
|
3
|
-
from dagster import
|
|
4
|
+
from dagster import (
|
|
5
|
+
AssetMaterialization,
|
|
6
|
+
AssetsDefinition,
|
|
7
|
+
DagsterInvariantViolationError,
|
|
8
|
+
MetadataValue,
|
|
9
|
+
)
|
|
4
10
|
from dagster._core.definitions.metadata.table import TableColumn, TableSchema
|
|
5
11
|
|
|
6
12
|
from dagster_airbyte.types import AirbyteOutput
|
|
7
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from dagster_airbyte import DagsterAirbyteTranslator
|
|
16
|
+
|
|
17
|
+
DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY = "dagster-airbyte/dagster_airbyte_translator"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def clean_name(name: str) -> str:
|
|
21
|
+
"""Cleans an input to be a valid Dagster asset name."""
|
|
22
|
+
return re.sub(r"[^a-z0-9]+", "_", name.lower())
|
|
23
|
+
|
|
8
24
|
|
|
9
25
|
def get_airbyte_connection_table_name(stream_prefix: Optional[str], stream_name: str) -> str:
|
|
10
26
|
return f"{stream_prefix if stream_prefix else ''}{stream_name}"
|
|
@@ -78,3 +94,18 @@ def generate_materializations(
|
|
|
78
94
|
all_stream_stats.get(stream_name, {}),
|
|
79
95
|
asset_key_prefix=asset_key_prefix,
|
|
80
96
|
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_translator_from_airbyte_assets(
|
|
100
|
+
airbyte_assets: AssetsDefinition,
|
|
101
|
+
) -> "DagsterAirbyteTranslator":
|
|
102
|
+
metadata_by_key = airbyte_assets.metadata_by_key or {}
|
|
103
|
+
first_asset_key = next(iter(airbyte_assets.metadata_by_key.keys()))
|
|
104
|
+
first_metadata = metadata_by_key.get(first_asset_key, {})
|
|
105
|
+
dagster_airbyte_translator = first_metadata.get(DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY)
|
|
106
|
+
if dagster_airbyte_translator is None:
|
|
107
|
+
raise DagsterInvariantViolationError(
|
|
108
|
+
f"Expected to find airbyte translator metadata on asset {first_asset_key.to_user_string()},"
|
|
109
|
+
" but did not. Did you pass in assets that weren't generated by @airbyte_assets?"
|
|
110
|
+
)
|
|
111
|
+
return dagster_airbyte_translator
|
dagster_airbyte/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.25.
|
|
1
|
+
__version__ = "0.25.8"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dagster-airbyte
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.8
|
|
4
4
|
Summary: Package for integrating Airbyte with Dagster.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -14,10 +14,10 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
14
14
|
Classifier: Operating System :: OS Independent
|
|
15
15
|
Requires-Python: >=3.9,<3.13
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: dagster ==1.9.
|
|
17
|
+
Requires-Dist: dagster ==1.9.8
|
|
18
18
|
Requires-Dist: requests
|
|
19
19
|
Provides-Extra: managed
|
|
20
|
-
Requires-Dist: dagster-managed-elements ==0.25.
|
|
20
|
+
Requires-Dist: dagster-managed-elements ==0.25.8 ; extra == 'managed'
|
|
21
21
|
Provides-Extra: test
|
|
22
22
|
Requires-Dist: requests-mock ; extra == 'test'
|
|
23
23
|
Requires-Dist: flaky ; extra == 'test'
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
dagster_airbyte/__init__.py,sha256=T3mFUhqESk-F0FISreWfuLzULKYCO67WtGzbxGVUxdY,1719
|
|
2
|
+
dagster_airbyte/asset_decorator.py,sha256=s0wFVFqe8kOGSiwyZx9n96C-_tnYfGuxsf_qefeFjJU,4209
|
|
3
|
+
dagster_airbyte/asset_defs.py,sha256=UXp4CE6dVsWdbFn_iaHf-YE4Q3AAn6MgKxnvPY9d_Hk,50113
|
|
4
|
+
dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
|
|
5
|
+
dagster_airbyte/ops.py,sha256=pq6mp7vN2wXgo3gJMuWaAcxTmfkZ7d1zWzPyL_auSEY,4208
|
|
6
|
+
dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
7
|
+
dagster_airbyte/resources.py,sha256=dmInsuuhYYo1ys4V7V4CGoTp-0uy05sk4HtNJVW9i9A,53537
|
|
8
|
+
dagster_airbyte/translator.py,sha256=jqURbuSRu4dtpwDXIO6hGdcVoQFmWOBzXCLHtPiDOfQ,7525
|
|
9
|
+
dagster_airbyte/types.py,sha256=w1DyTcXyuzrG3wfkOPYFtwj7snHcgqf-dC7_pRjiE1Q,1544
|
|
10
|
+
dagster_airbyte/utils.py,sha256=qRAZY2MGpWmFHfU7ibUHXPyye2dZFz0MabusOeEbnSI,4130
|
|
11
|
+
dagster_airbyte/version.py,sha256=bkf--LdvzFbtaf_d7GSQn5A2t-yDZ-SX33wKaaeqHsY,23
|
|
12
|
+
dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
|
|
13
|
+
dagster_airbyte/managed/reconciliation.py,sha256=Im5fiT2qWRKqO4vYugpsSiCICoCKo3_gjXRO2UcR0XM,34855
|
|
14
|
+
dagster_airbyte/managed/types.py,sha256=ja056Wm7_ZFw1XGSNmdxmBy2TcOxbnylJCpRA2ng2TE,14596
|
|
15
|
+
dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
|
|
16
|
+
dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
|
|
17
|
+
dagster_airbyte/managed/generated/sources.py,sha256=wyNoGJiNvW8mjRRs6b-_lWFs0Fgy-MZlRaxiN6bP-4s,282691
|
|
18
|
+
dagster_airbyte-0.25.8.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
19
|
+
dagster_airbyte-0.25.8.dist-info/METADATA,sha256=Za13GjtnFwapDTDkI-L4CCDnSz3mlIIp8UkcW4c6ljQ,915
|
|
20
|
+
dagster_airbyte-0.25.8.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
21
|
+
dagster_airbyte-0.25.8.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
|
|
22
|
+
dagster_airbyte-0.25.8.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
|
|
23
|
+
dagster_airbyte-0.25.8.dist-info/RECORD,,
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
dagster_airbyte/__init__.py,sha256=deK1ieUOzZr63mpkWLapn9RLAI1uNnsXFZpNMfm-bgw,1445
|
|
2
|
-
dagster_airbyte/asset_defs.py,sha256=UFzHdNRgFPNU8xqDGcD-ce-J9I82HIj3N2GN5tg8G_Y,46039
|
|
3
|
-
dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
|
|
4
|
-
dagster_airbyte/ops.py,sha256=pq6mp7vN2wXgo3gJMuWaAcxTmfkZ7d1zWzPyL_auSEY,4208
|
|
5
|
-
dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
6
|
-
dagster_airbyte/resources.py,sha256=HG5Cps1WbMLH-iKFOYmIVDQoHMuYhH7OFv4vMVyqqE0,42936
|
|
7
|
-
dagster_airbyte/translator.py,sha256=Pr6N5Btkp83XvB-5Llra_xNqw3LTHrGtP3sxaQjqj3o,6409
|
|
8
|
-
dagster_airbyte/types.py,sha256=w1DyTcXyuzrG3wfkOPYFtwj7snHcgqf-dC7_pRjiE1Q,1544
|
|
9
|
-
dagster_airbyte/utils.py,sha256=hRUURJiVeximSfFP6pWxb0beh9PsOPdW4obX1pqBVt4,2987
|
|
10
|
-
dagster_airbyte/version.py,sha256=mgi0NAwnQWP7UdrUf2L9Xln12B8NoCQbqDnalCRZMpE,23
|
|
11
|
-
dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
|
|
12
|
-
dagster_airbyte/managed/reconciliation.py,sha256=HgrLT-Xs8vWY9SfbdBXuorMf60KCn5Qz7bPITW5MxJo,34862
|
|
13
|
-
dagster_airbyte/managed/types.py,sha256=ja056Wm7_ZFw1XGSNmdxmBy2TcOxbnylJCpRA2ng2TE,14596
|
|
14
|
-
dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
|
|
15
|
-
dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
|
|
16
|
-
dagster_airbyte/managed/generated/sources.py,sha256=wyNoGJiNvW8mjRRs6b-_lWFs0Fgy-MZlRaxiN6bP-4s,282691
|
|
17
|
-
dagster_airbyte-0.25.6.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
18
|
-
dagster_airbyte-0.25.6.dist-info/METADATA,sha256=gNscK29rWFUo3-Z-KmvthzMS9S-N3oxKO-GyVEOBepg,915
|
|
19
|
-
dagster_airbyte-0.25.6.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
20
|
-
dagster_airbyte-0.25.6.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
|
|
21
|
-
dagster_airbyte-0.25.6.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
|
|
22
|
-
dagster_airbyte-0.25.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|