dagster-airbyte 0.27.10__py3-none-any.whl → 0.27.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-airbyte might be problematic. Click here for more details.
- dagster_airbyte/__init__.py +3 -0
- dagster_airbyte/asset_decorator.py +4 -4
- dagster_airbyte/asset_defs.py +5 -3
- dagster_airbyte/components/workspace_component/component.py +120 -23
- dagster_airbyte/components/workspace_component/scaffolder.py +1 -1
- dagster_airbyte/resources.py +404 -116
- dagster_airbyte/utils.py +1 -1
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/METADATA +3 -3
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/RECORD +14 -14
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/WHEEL +0 -0
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/entry_points.txt +0 -0
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/licenses/LICENSE +0 -0
- {dagster_airbyte-0.27.10.dist-info → dagster_airbyte-0.27.12.dist-info}/top_level.txt +0 -0
dagster_airbyte/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@ from dagster_shared.libraries import DagsterLibraryRegistry
|
|
|
2
2
|
|
|
3
3
|
from dagster_airbyte.components.workspace_component.component import (
|
|
4
4
|
AirbyteCloudWorkspaceComponent as AirbyteCloudWorkspaceComponent,
|
|
5
|
+
AirbyteWorkspaceComponent as AirbyteWorkspaceComponent,
|
|
5
6
|
)
|
|
6
7
|
|
|
7
8
|
try:
|
|
@@ -29,8 +30,10 @@ from dagster_airbyte.resources import (
|
|
|
29
30
|
AirbyteCloudResource as AirbyteCloudResource,
|
|
30
31
|
AirbyteCloudWorkspace as AirbyteCloudWorkspace,
|
|
31
32
|
AirbyteResource as AirbyteResource,
|
|
33
|
+
AirbyteWorkspace as AirbyteWorkspace,
|
|
32
34
|
airbyte_cloud_resource as airbyte_cloud_resource,
|
|
33
35
|
airbyte_resource as airbyte_resource,
|
|
36
|
+
load_airbyte_asset_specs as load_airbyte_asset_specs,
|
|
34
37
|
load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
|
|
35
38
|
)
|
|
36
39
|
from dagster_airbyte.translator import (
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Any, Callable, Optional
|
|
1
|
+
from typing import Any, Callable, Optional, Union
|
|
2
2
|
|
|
3
3
|
from dagster import AssetsDefinition, multi_asset
|
|
4
4
|
from dagster._annotations import beta
|
|
5
5
|
from dagster._core.errors import DagsterInvariantViolationError
|
|
6
6
|
|
|
7
|
-
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
7
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace, AirbyteWorkspace
|
|
8
8
|
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
9
9
|
|
|
10
10
|
|
|
@@ -12,7 +12,7 @@ from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTransla
|
|
|
12
12
|
def airbyte_assets(
|
|
13
13
|
*,
|
|
14
14
|
connection_id: str,
|
|
15
|
-
workspace: AirbyteCloudWorkspace,
|
|
15
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
16
16
|
name: Optional[str] = None,
|
|
17
17
|
group_name: Optional[str] = None,
|
|
18
18
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
@@ -21,7 +21,7 @@ def airbyte_assets(
|
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
23
|
connection_id (str): The Airbyte Connection ID.
|
|
24
|
-
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
24
|
+
workspace (Union[AirbyteWorkspace, AirbyteCloudWorkspace]): The Airbyte workspace to fetch assets from.
|
|
25
25
|
name (Optional[str], optional): The name of the op.
|
|
26
26
|
group_name (Optional[str], optional): The name of the asset group.
|
|
27
27
|
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
dagster_airbyte/asset_defs.py
CHANGED
|
@@ -38,7 +38,9 @@ from dagster_airbyte.resources import (
|
|
|
38
38
|
AirbyteCloudResource,
|
|
39
39
|
AirbyteCloudWorkspace,
|
|
40
40
|
AirbyteResource,
|
|
41
|
+
AirbyteWorkspace,
|
|
41
42
|
BaseAirbyteResource,
|
|
43
|
+
BaseAirbyteWorkspace,
|
|
42
44
|
)
|
|
43
45
|
from dagster_airbyte.translator import (
|
|
44
46
|
AirbyteConnection,
|
|
@@ -1040,14 +1042,14 @@ def load_assets_from_airbyte_instance(
|
|
|
1040
1042
|
@beta
|
|
1041
1043
|
def build_airbyte_assets_definitions(
|
|
1042
1044
|
*,
|
|
1043
|
-
workspace: AirbyteCloudWorkspace,
|
|
1045
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
1044
1046
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1045
1047
|
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]] = None,
|
|
1046
1048
|
) -> Sequence[AssetsDefinition]:
|
|
1047
1049
|
"""The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1048
1050
|
|
|
1049
1051
|
Args:
|
|
1050
|
-
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
1052
|
+
workspace (Union[AirbyteWorkspace, AirbyteCloudWorkspace]): The Airbyte workspace to fetch assets from.
|
|
1051
1053
|
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1052
1054
|
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1053
1055
|
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
@@ -1164,7 +1166,7 @@ def build_airbyte_assets_definitions(
|
|
|
1164
1166
|
name=f"airbyte_{clean_name(connection_name)}",
|
|
1165
1167
|
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1166
1168
|
)
|
|
1167
|
-
def _asset_fn(context: AssetExecutionContext, airbyte:
|
|
1169
|
+
def _asset_fn(context: AssetExecutionContext, airbyte: BaseAirbyteWorkspace):
|
|
1168
1170
|
yield from airbyte.sync_and_poll(context=context)
|
|
1169
1171
|
|
|
1170
1172
|
_asset_fns.append(_asset_fn)
|
|
@@ -4,6 +4,7 @@ from typing import Annotated, Callable, Optional, Union
|
|
|
4
4
|
|
|
5
5
|
import dagster as dg
|
|
6
6
|
import pydantic
|
|
7
|
+
from dagster._annotations import superseded
|
|
7
8
|
from dagster._core.definitions.job_definition import default_job_io_manager
|
|
8
9
|
from dagster.components.resolved.base import resolve_fields
|
|
9
10
|
from dagster.components.utils.translation import TranslationFn, TranslationFnResolver
|
|
@@ -11,9 +12,9 @@ from dagster_shared import check
|
|
|
11
12
|
|
|
12
13
|
from dagster_airbyte.asset_defs import build_airbyte_assets_definitions
|
|
13
14
|
from dagster_airbyte.components.workspace_component.scaffolder import (
|
|
14
|
-
|
|
15
|
+
AirbyteWorkspaceComponentScaffolder,
|
|
15
16
|
)
|
|
16
|
-
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
17
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace, AirbyteWorkspace
|
|
17
18
|
from dagster_airbyte.translator import (
|
|
18
19
|
AirbyteConnection,
|
|
19
20
|
AirbyteConnectionTableProps,
|
|
@@ -32,13 +33,100 @@ class ProxyDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
|
32
33
|
return spec
|
|
33
34
|
|
|
34
35
|
|
|
35
|
-
class
|
|
36
|
-
|
|
36
|
+
class BaseAirbyteWorkspaceModel(dg.Model, dg.Resolvable):
|
|
37
|
+
request_max_retries: Annotated[
|
|
38
|
+
int,
|
|
39
|
+
pydantic.Field(
|
|
40
|
+
default=3,
|
|
41
|
+
description=(
|
|
42
|
+
"The maximum number of times requests to the Airbyte API should be retried "
|
|
43
|
+
"before failing."
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
request_retry_delay: Annotated[
|
|
48
|
+
float,
|
|
49
|
+
pydantic.Field(
|
|
50
|
+
default=0.25,
|
|
51
|
+
description="Time (in seconds) to wait between each request retry.",
|
|
52
|
+
),
|
|
53
|
+
]
|
|
54
|
+
request_timeout: Annotated[
|
|
55
|
+
int,
|
|
56
|
+
pydantic.Field(
|
|
57
|
+
default=15,
|
|
58
|
+
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
59
|
+
),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AirbyteWorkspaceModel(BaseAirbyteWorkspaceModel):
|
|
64
|
+
rest_api_base_url: Annotated[
|
|
65
|
+
str,
|
|
66
|
+
pydantic.Field(
|
|
67
|
+
...,
|
|
68
|
+
description=(
|
|
69
|
+
"The base URL for the Airbyte REST API. "
|
|
70
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
71
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/public/v1."
|
|
72
|
+
),
|
|
73
|
+
examples=[
|
|
74
|
+
"http://localhost:8000/api/public/v1",
|
|
75
|
+
"https://my-airbyte-server.com/api/public/v1",
|
|
76
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/public/v1",
|
|
77
|
+
],
|
|
78
|
+
),
|
|
79
|
+
]
|
|
80
|
+
configuration_api_base_url: Annotated[
|
|
81
|
+
str,
|
|
82
|
+
pydantic.Field(
|
|
83
|
+
...,
|
|
84
|
+
description=(
|
|
85
|
+
"The base URL for the Airbyte Configuration API. "
|
|
86
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
87
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/v1."
|
|
88
|
+
),
|
|
89
|
+
examples=[
|
|
90
|
+
"http://localhost:8000/api/v1",
|
|
91
|
+
"https://my-airbyte-server.com/api/v1",
|
|
92
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/v1",
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
]
|
|
96
|
+
workspace_id: Annotated[str, pydantic.Field(..., description="The Airbyte workspace ID.")]
|
|
97
|
+
client_id: Annotated[
|
|
98
|
+
Optional[str],
|
|
99
|
+
pydantic.Field(None, description="Client ID used to authenticate to Airbyte."),
|
|
100
|
+
]
|
|
101
|
+
client_secret: Annotated[
|
|
102
|
+
Optional[str],
|
|
103
|
+
pydantic.Field(None, description="Client secret used to authenticate to Airbyte."),
|
|
104
|
+
]
|
|
105
|
+
username: Annotated[
|
|
106
|
+
Optional[str],
|
|
107
|
+
pydantic.Field(
|
|
108
|
+
None,
|
|
109
|
+
description="Username used to authenticate to Airbyte. Used for self-managed Airbyte with basic auth.",
|
|
110
|
+
),
|
|
111
|
+
]
|
|
112
|
+
password: Annotated[
|
|
113
|
+
Optional[str],
|
|
114
|
+
pydantic.Field(
|
|
115
|
+
None,
|
|
116
|
+
description="Password used to authenticate to Airbyte. Used for self-managed Airbyte with basic auth.",
|
|
117
|
+
),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class AirbyteCloudWorkspaceModel(BaseAirbyteWorkspaceModel):
|
|
122
|
+
workspace_id: Annotated[str, pydantic.Field(..., description="The Airbyte workspace ID.")]
|
|
37
123
|
client_id: Annotated[
|
|
38
|
-
str,
|
|
124
|
+
Optional[str],
|
|
125
|
+
pydantic.Field(..., description="Client ID used to authenticate to Airbyte."),
|
|
39
126
|
]
|
|
40
127
|
client_secret: Annotated[
|
|
41
|
-
str,
|
|
128
|
+
Optional[str],
|
|
129
|
+
pydantic.Field(..., description="Client secret used to authenticate to Airbyte."),
|
|
42
130
|
]
|
|
43
131
|
|
|
44
132
|
|
|
@@ -70,19 +158,27 @@ def resolve_connection_selector(
|
|
|
70
158
|
check.failed(f"Unknown connection target type: {type(model)}")
|
|
71
159
|
|
|
72
160
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
161
|
+
def resolve_airbyte_workspace_type(context: dg.ResolutionContext, model):
|
|
162
|
+
if isinstance(model, AirbyteWorkspaceModel):
|
|
163
|
+
return AirbyteWorkspace(**resolve_fields(model, AirbyteWorkspaceModel, context))
|
|
164
|
+
elif isinstance(model, AirbyteCloudWorkspaceModel):
|
|
165
|
+
return AirbyteCloudWorkspace(**resolve_fields(model, AirbyteCloudWorkspaceModel, context))
|
|
166
|
+
else:
|
|
167
|
+
check.failed(f"Unknown Airbyte workspace type: {type(model)}")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dg.scaffold_with(AirbyteWorkspaceComponentScaffolder)
|
|
171
|
+
class AirbyteWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
172
|
+
"""Loads Airbyte connections from a given Airbyte workspace as Dagster assets.
|
|
173
|
+
Materializing these assets will trigger a sync of the Airbyte connection, enabling
|
|
174
|
+
you to schedule Airbyte syncs using Dagster.
|
|
78
175
|
"""
|
|
79
176
|
|
|
80
177
|
workspace: Annotated[
|
|
81
|
-
AirbyteCloudWorkspace,
|
|
178
|
+
Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
82
179
|
dg.Resolver(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
)
|
|
180
|
+
resolve_airbyte_workspace_type,
|
|
181
|
+
model_field_type=Union[AirbyteWorkspaceModel, AirbyteCloudWorkspaceModel],
|
|
86
182
|
),
|
|
87
183
|
]
|
|
88
184
|
connection_selector: Annotated[
|
|
@@ -92,7 +188,7 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
92
188
|
model_field_type=Union[
|
|
93
189
|
str, AirbyteConnectionSelectorByName, AirbyteConnectionSelectorById
|
|
94
190
|
],
|
|
95
|
-
description="Function used to select Airbyte
|
|
191
|
+
description="Function used to select Airbyte connections to pull into Dagster.",
|
|
96
192
|
),
|
|
97
193
|
] = None
|
|
98
194
|
translation: Optional[
|
|
@@ -102,13 +198,9 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
102
198
|
]
|
|
103
199
|
] = pydantic.Field(
|
|
104
200
|
None,
|
|
105
|
-
description="Function used to translate Airbyte
|
|
201
|
+
description="Function used to translate Airbyte connection table properties into Dagster asset specs.",
|
|
106
202
|
)
|
|
107
203
|
|
|
108
|
-
@cached_property
|
|
109
|
-
def workspace_resource(self) -> AirbyteCloudWorkspace:
|
|
110
|
-
return self.workspace
|
|
111
|
-
|
|
112
204
|
@cached_property
|
|
113
205
|
def translator(self) -> DagsterAirbyteTranslator:
|
|
114
206
|
if self.translation:
|
|
@@ -117,17 +209,22 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
117
209
|
|
|
118
210
|
def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
|
|
119
211
|
airbyte_assets = build_airbyte_assets_definitions(
|
|
120
|
-
workspace=self.
|
|
212
|
+
workspace=self.workspace,
|
|
121
213
|
dagster_airbyte_translator=self.translator,
|
|
122
214
|
connection_selector_fn=self.connection_selector,
|
|
123
215
|
)
|
|
124
216
|
assets_with_resource = [
|
|
125
217
|
airbyte_asset.with_resources(
|
|
126
218
|
{
|
|
127
|
-
"airbyte": self.
|
|
219
|
+
"airbyte": self.workspace.get_resource_definition(),
|
|
128
220
|
"io_manager": default_job_io_manager,
|
|
129
221
|
}
|
|
130
222
|
)
|
|
131
223
|
for airbyte_asset in airbyte_assets
|
|
132
224
|
]
|
|
133
225
|
return dg.Definitions(assets=assets_with_resource)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Subclassing to create the alias to be able to use the superseded decorator.
|
|
229
|
+
@superseded(additional_warn_text="Superseded. Use AirbyteWorkspaceComponent instead.")
|
|
230
|
+
class AirbyteCloudWorkspaceComponent(AirbyteWorkspaceComponent): ...
|
|
@@ -12,7 +12,7 @@ class AirbyteScaffolderParams(BaseModel):
|
|
|
12
12
|
client_secret: Optional[str] = None
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class AirbyteWorkspaceComponentScaffolder(Scaffolder[AirbyteScaffolderParams]):
|
|
16
16
|
@classmethod
|
|
17
17
|
def get_scaffold_params(cls) -> type[AirbyteScaffolderParams]:
|
|
18
18
|
return AirbyteScaffolderParams
|
dagster_airbyte/resources.py
CHANGED
|
@@ -7,12 +7,14 @@ from abc import abstractmethod
|
|
|
7
7
|
from collections.abc import Iterator, Mapping, Sequence
|
|
8
8
|
from contextlib import contextmanager
|
|
9
9
|
from datetime import datetime, timedelta
|
|
10
|
-
from typing import Any, Callable, Optional, cast
|
|
10
|
+
from typing import Any, Callable, ClassVar, Optional, Union, cast
|
|
11
|
+
from urllib.parse import parse_qsl, urlparse
|
|
11
12
|
|
|
12
13
|
import requests
|
|
13
14
|
from dagster import (
|
|
14
15
|
AssetExecutionContext,
|
|
15
16
|
AssetMaterialization,
|
|
17
|
+
AssetSpec,
|
|
16
18
|
ConfigurableResource,
|
|
17
19
|
Definitions,
|
|
18
20
|
Failure,
|
|
@@ -22,16 +24,16 @@ from dagster import (
|
|
|
22
24
|
get_dagster_logger,
|
|
23
25
|
resource,
|
|
24
26
|
)
|
|
25
|
-
from dagster._annotations import
|
|
27
|
+
from dagster._annotations import superseded
|
|
26
28
|
from dagster._config.pythonic_config import infer_schema_from_config_class
|
|
27
|
-
from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
|
|
28
29
|
from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
|
|
29
30
|
from dagster._core.definitions.resource_definition import dagster_maintained_resource
|
|
30
|
-
from dagster.
|
|
31
|
-
from dagster._utils.cached_method import cached_method
|
|
31
|
+
from dagster._symbol_annotations import beta, public
|
|
32
32
|
from dagster._utils.merger import deep_merge_dicts
|
|
33
33
|
from dagster_shared.dagster_model import DagsterModel
|
|
34
|
-
from
|
|
34
|
+
from dagster_shared.record import record
|
|
35
|
+
from dagster_shared.utils.cached_method import cached_method
|
|
36
|
+
from pydantic import Field, PrivateAttr, model_validator
|
|
35
37
|
from requests.exceptions import RequestException
|
|
36
38
|
|
|
37
39
|
from dagster_airbyte.translator import (
|
|
@@ -51,17 +53,20 @@ from dagster_airbyte.utils import (
|
|
|
51
53
|
get_translator_from_airbyte_assets,
|
|
52
54
|
)
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
AIRBYTE_CLOUD_REST_API_BASE = "https://api.airbyte.com"
|
|
57
|
+
AIRBYTE_CLOUD_REST_API_VERSION = "v1"
|
|
58
|
+
AIRBYTE_CLOUD_REST_API_BASE_URL = f"{AIRBYTE_CLOUD_REST_API_BASE}/{AIRBYTE_CLOUD_REST_API_VERSION}"
|
|
59
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_BASE = "https://cloud.airbyte.com/api"
|
|
60
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_VERSION = "v1"
|
|
61
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL = (
|
|
62
|
+
f"{AIRBYTE_CLOUD_CONFIGURATION_API_BASE}/{AIRBYTE_CLOUD_CONFIGURATION_API_VERSION}"
|
|
63
|
+
)
|
|
59
64
|
|
|
60
65
|
DEFAULT_POLL_INTERVAL_SECONDS = 10
|
|
61
66
|
|
|
62
67
|
# The access token expire every 3 minutes in Airbyte Cloud.
|
|
63
68
|
# Refresh is needed after 2.5 minutes to avoid the "token expired" error message.
|
|
64
|
-
|
|
69
|
+
AIRBYTE_REFRESH_TIMEDELTA_SECONDS = 150
|
|
65
70
|
|
|
66
71
|
AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
|
|
67
72
|
|
|
@@ -410,7 +415,7 @@ class AirbyteCloudResource(BaseAirbyteResource):
|
|
|
410
415
|
or not self._access_token_timestamp
|
|
411
416
|
or self._access_token_timestamp
|
|
412
417
|
<= datetime.timestamp(
|
|
413
|
-
datetime.now() - timedelta(seconds=
|
|
418
|
+
datetime.now() - timedelta(seconds=AIRBYTE_REFRESH_TIMEDELTA_SECONDS)
|
|
414
419
|
)
|
|
415
420
|
)
|
|
416
421
|
|
|
@@ -849,12 +854,36 @@ def airbyte_cloud_resource(context) -> AirbyteCloudResource:
|
|
|
849
854
|
|
|
850
855
|
|
|
851
856
|
@beta
|
|
852
|
-
class
|
|
853
|
-
"""This class exposes methods on top of the Airbyte APIs for Airbyte
|
|
857
|
+
class AirbyteClient(DagsterModel):
|
|
858
|
+
"""This class exposes methods on top of the Airbyte APIs for Airbyte."""
|
|
854
859
|
|
|
860
|
+
rest_api_base_url: str = Field(
|
|
861
|
+
default=AIRBYTE_CLOUD_REST_API_BASE_URL,
|
|
862
|
+
description=(
|
|
863
|
+
"The base URL for the Airbyte REST API. "
|
|
864
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
865
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/public/v1."
|
|
866
|
+
),
|
|
867
|
+
)
|
|
868
|
+
configuration_api_base_url: str = Field(
|
|
869
|
+
default=AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL,
|
|
870
|
+
description=(
|
|
871
|
+
"The base URL for the Airbyte Configuration API. "
|
|
872
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
873
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/v1."
|
|
874
|
+
),
|
|
875
|
+
)
|
|
855
876
|
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
856
|
-
client_id: str = Field(
|
|
857
|
-
client_secret: str = Field(
|
|
877
|
+
client_id: Optional[str] = Field(default=None, description="The Airbyte client ID.")
|
|
878
|
+
client_secret: Optional[str] = Field(default=None, description="The Airbyte client secret.")
|
|
879
|
+
username: Optional[str] = Field(
|
|
880
|
+
default=None,
|
|
881
|
+
description="The Airbyte username for authentication. Used for self-managed Airbyte with basic auth.",
|
|
882
|
+
)
|
|
883
|
+
password: Optional[str] = Field(
|
|
884
|
+
default=None,
|
|
885
|
+
description="The Airbyte password for authentication. Used for self-managed Airbyte with basic auth.",
|
|
886
|
+
)
|
|
858
887
|
request_max_retries: int = Field(
|
|
859
888
|
...,
|
|
860
889
|
description=(
|
|
@@ -874,19 +903,34 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
874
903
|
_access_token_value: Optional[str] = PrivateAttr(default=None)
|
|
875
904
|
_access_token_timestamp: Optional[float] = PrivateAttr(default=None)
|
|
876
905
|
|
|
906
|
+
@model_validator(mode="before")
|
|
907
|
+
def validate_authentication(cls, values):
|
|
908
|
+
has_client_id = values.get("client_id") is not None
|
|
909
|
+
has_client_secret = values.get("client_secret") is not None
|
|
910
|
+
has_username = values.get("username") is not None
|
|
911
|
+
has_password = values.get("password") is not None
|
|
912
|
+
|
|
913
|
+
check.invariant(
|
|
914
|
+
has_username == has_password,
|
|
915
|
+
"Missing config: both username and password are required for Airbyte authentication.",
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
check.invariant(
|
|
919
|
+
has_client_id == has_client_secret,
|
|
920
|
+
"Missing config: both client_id and client_secret are required for Airbyte authentication.",
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
check.invariant(
|
|
924
|
+
not ((has_client_id or has_client_secret) and (has_username or has_password)),
|
|
925
|
+
"Invalid config: cannot provide both client_id/client_secret and username/password for Airbyte authentication.",
|
|
926
|
+
)
|
|
927
|
+
return values
|
|
928
|
+
|
|
877
929
|
@property
|
|
878
930
|
@cached_method
|
|
879
931
|
def _log(self) -> logging.Logger:
|
|
880
932
|
return get_dagster_logger()
|
|
881
933
|
|
|
882
|
-
@property
|
|
883
|
-
def rest_api_base_url(self) -> str:
|
|
884
|
-
return f"{AIRBYTE_REST_API_BASE}/{AIRBYTE_REST_API_VERSION}"
|
|
885
|
-
|
|
886
|
-
@property
|
|
887
|
-
def configuration_api_base_url(self) -> str:
|
|
888
|
-
return f"{AIRBYTE_CONFIGURATION_API_BASE}/{AIRBYTE_CONFIGURATION_API_VERSION}"
|
|
889
|
-
|
|
890
934
|
@property
|
|
891
935
|
def all_additional_request_params(self) -> Mapping[str, Any]:
|
|
892
936
|
return {**self.authorization_request_params, **self.user_agent_request_params}
|
|
@@ -894,6 +938,9 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
894
938
|
@property
|
|
895
939
|
def authorization_request_params(self) -> Mapping[str, Any]:
|
|
896
940
|
# Make sure the access token is refreshed before using it when calling the API.
|
|
941
|
+
if not (self.client_id and self.client_secret):
|
|
942
|
+
return {}
|
|
943
|
+
|
|
897
944
|
if self._needs_refreshed_access_token():
|
|
898
945
|
self._refresh_access_token()
|
|
899
946
|
return {
|
|
@@ -908,10 +955,9 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
908
955
|
|
|
909
956
|
def _refresh_access_token(self) -> None:
|
|
910
957
|
response = check.not_none(
|
|
911
|
-
self.
|
|
958
|
+
self._single_request(
|
|
912
959
|
method="POST",
|
|
913
|
-
|
|
914
|
-
base_url=self.rest_api_base_url,
|
|
960
|
+
url=f"{self.rest_api_base_url}/applications/token",
|
|
915
961
|
data={
|
|
916
962
|
"client_id": self.client_id,
|
|
917
963
|
"client_secret": self.client_secret,
|
|
@@ -928,9 +974,7 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
928
974
|
not self._access_token_value
|
|
929
975
|
or not self._access_token_timestamp
|
|
930
976
|
or self._access_token_timestamp
|
|
931
|
-
<= (
|
|
932
|
-
datetime.now() - timedelta(seconds=AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS)
|
|
933
|
-
).timestamp()
|
|
977
|
+
<= (datetime.now() - timedelta(seconds=AIRBYTE_REFRESH_TIMEDELTA_SECONDS)).timestamp()
|
|
934
978
|
)
|
|
935
979
|
|
|
936
980
|
def _get_session(self, include_additional_request_params: bool) -> requests.Session:
|
|
@@ -942,33 +986,21 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
942
986
|
}
|
|
943
987
|
session = requests.Session()
|
|
944
988
|
session.headers.update(headers)
|
|
989
|
+
|
|
990
|
+
if self.username and self.password:
|
|
991
|
+
session.auth = (self.username, self.password)
|
|
992
|
+
|
|
945
993
|
return session
|
|
946
994
|
|
|
947
|
-
def
|
|
995
|
+
def _single_request(
|
|
948
996
|
self,
|
|
949
997
|
method: str,
|
|
950
|
-
|
|
951
|
-
base_url: str,
|
|
998
|
+
url: str,
|
|
952
999
|
data: Optional[Mapping[str, Any]] = None,
|
|
953
1000
|
params: Optional[Mapping[str, Any]] = None,
|
|
954
1001
|
include_additional_request_params: bool = True,
|
|
955
1002
|
) -> Mapping[str, Any]:
|
|
956
|
-
"""
|
|
957
|
-
|
|
958
|
-
Args:
|
|
959
|
-
method (str): The http method to use for this request (e.g. "POST", "GET", "PATCH").
|
|
960
|
-
endpoint (str): The Airbyte API endpoint to send this request to.
|
|
961
|
-
base_url (str): The base url to the Airbyte API to use.
|
|
962
|
-
data (Optional[Dict[str, Any]]): JSON-formatted data string to be included in the request.
|
|
963
|
-
params (Optional[Dict[str, Any]]): JSON-formatted query params to be included in the request.
|
|
964
|
-
include_additional_request_params (bool): Whether to include authorization and user-agent headers
|
|
965
|
-
to the request parameters. Defaults to True.
|
|
966
|
-
|
|
967
|
-
Returns:
|
|
968
|
-
Dict[str, Any]: Parsed json data from the response to this request
|
|
969
|
-
"""
|
|
970
|
-
url = f"{base_url}/{endpoint}"
|
|
971
|
-
|
|
1003
|
+
"""Execute a single HTTP request with retry logic."""
|
|
972
1004
|
num_retries = 0
|
|
973
1005
|
while True:
|
|
974
1006
|
try:
|
|
@@ -989,14 +1021,54 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
989
1021
|
num_retries += 1
|
|
990
1022
|
time.sleep(self.request_retry_delay)
|
|
991
1023
|
|
|
992
|
-
|
|
1024
|
+
raise Failure(f"Max retries ({self.request_max_retries}) exceeded with url: {url}.")
|
|
1025
|
+
|
|
1026
|
+
return {}
|
|
993
1027
|
|
|
994
|
-
def
|
|
1028
|
+
def _paginated_request(
|
|
1029
|
+
self,
|
|
1030
|
+
method: str,
|
|
1031
|
+
url: str,
|
|
1032
|
+
params: dict[str, Any],
|
|
1033
|
+
data: Optional[Mapping[str, Any]] = None,
|
|
1034
|
+
include_additional_request_params: bool = True,
|
|
1035
|
+
) -> Sequence[Mapping[str, Any]]:
|
|
1036
|
+
"""Execute paginated requests and yield all items."""
|
|
1037
|
+
result_data = []
|
|
1038
|
+
while True:
|
|
1039
|
+
response = self._single_request(
|
|
1040
|
+
method=method,
|
|
1041
|
+
url=url,
|
|
1042
|
+
data=data,
|
|
1043
|
+
params=params,
|
|
1044
|
+
include_additional_request_params=include_additional_request_params,
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
# Handle different response structures
|
|
1048
|
+
result_data.extend(response.get("data", []))
|
|
1049
|
+
next_url = response.get("next", "")
|
|
1050
|
+
if not next_url:
|
|
1051
|
+
break
|
|
1052
|
+
|
|
1053
|
+
# Parse the query string for the next page
|
|
1054
|
+
next_params = parse_qsl(urlparse(next_url).query)
|
|
1055
|
+
# Overwrite the pagination params with the ones for the next page
|
|
1056
|
+
params.update(dict(next_params))
|
|
1057
|
+
|
|
1058
|
+
return result_data
|
|
1059
|
+
|
|
1060
|
+
def validate_workspace_id(self) -> None:
|
|
1061
|
+
"""Fetches workspace details. This is used to validate that the workspace exists."""
|
|
1062
|
+
self._single_request(
|
|
1063
|
+
method="GET",
|
|
1064
|
+
url=f"{self.rest_api_base_url}/workspaces/{self.workspace_id}",
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
def get_connections(self) -> Sequence[Mapping[str, Any]]:
|
|
995
1068
|
"""Fetches all connections of an Airbyte workspace from the Airbyte REST API."""
|
|
996
|
-
return self.
|
|
1069
|
+
return self._paginated_request(
|
|
997
1070
|
method="GET",
|
|
998
|
-
|
|
999
|
-
base_url=self.rest_api_base_url,
|
|
1071
|
+
url=f"{self.rest_api_base_url}/connections",
|
|
1000
1072
|
params={"workspaceIds": self.workspace_id},
|
|
1001
1073
|
)
|
|
1002
1074
|
|
|
@@ -1007,26 +1079,23 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1007
1079
|
# Using the Airbyte Configuration API to get the connection details, including streams and their configs.
|
|
1008
1080
|
# https://airbyte-public-api-docs.s3.us-east-2.amazonaws.com/rapidoc-api-docs.html#post-/v1/connections/get
|
|
1009
1081
|
# https://github.com/airbytehq/airbyte-platform/blob/v1.0.0/airbyte-api/server-api/src/main/openapi/config.yaml
|
|
1010
|
-
return self.
|
|
1082
|
+
return self._single_request(
|
|
1011
1083
|
method="POST",
|
|
1012
|
-
|
|
1013
|
-
base_url=self.configuration_api_base_url,
|
|
1084
|
+
url=f"{self.configuration_api_base_url}/connections/get",
|
|
1014
1085
|
data={"connectionId": connection_id},
|
|
1015
1086
|
)
|
|
1016
1087
|
|
|
1017
1088
|
def get_destination_details(self, destination_id: str) -> Mapping[str, Any]:
|
|
1018
1089
|
"""Fetches details about a given destination from the Airbyte REST API."""
|
|
1019
|
-
return self.
|
|
1090
|
+
return self._single_request(
|
|
1020
1091
|
method="GET",
|
|
1021
|
-
|
|
1022
|
-
base_url=self.rest_api_base_url,
|
|
1092
|
+
url=f"{self.rest_api_base_url}/destinations/{destination_id}",
|
|
1023
1093
|
)
|
|
1024
1094
|
|
|
1025
1095
|
def start_sync_job(self, connection_id: str) -> Mapping[str, Any]:
|
|
1026
|
-
return self.
|
|
1096
|
+
return self._single_request(
|
|
1027
1097
|
method="POST",
|
|
1028
|
-
|
|
1029
|
-
base_url=self.rest_api_base_url,
|
|
1098
|
+
url=f"{self.rest_api_base_url}/jobs",
|
|
1030
1099
|
data={
|
|
1031
1100
|
"connectionId": connection_id,
|
|
1032
1101
|
"jobType": "sync",
|
|
@@ -1034,13 +1103,15 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1034
1103
|
)
|
|
1035
1104
|
|
|
1036
1105
|
def get_job_details(self, job_id: int) -> Mapping[str, Any]:
|
|
1037
|
-
return self.
|
|
1038
|
-
method="GET",
|
|
1106
|
+
return self._single_request(
|
|
1107
|
+
method="GET",
|
|
1108
|
+
url=f"{self.rest_api_base_url}/jobs/{job_id}",
|
|
1039
1109
|
)
|
|
1040
1110
|
|
|
1041
1111
|
def cancel_job(self, job_id: int) -> Mapping[str, Any]:
|
|
1042
|
-
return self.
|
|
1043
|
-
method="DELETE",
|
|
1112
|
+
return self._single_request(
|
|
1113
|
+
method="DELETE",
|
|
1114
|
+
url=f"{self.rest_api_base_url}/jobs/{job_id}",
|
|
1044
1115
|
)
|
|
1045
1116
|
|
|
1046
1117
|
def sync_and_poll(
|
|
@@ -1119,14 +1190,11 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1119
1190
|
|
|
1120
1191
|
|
|
1121
1192
|
@beta
|
|
1122
|
-
class
|
|
1123
|
-
"""This class represents a Airbyte
|
|
1193
|
+
class BaseAirbyteWorkspace(ConfigurableResource):
|
|
1194
|
+
"""This class represents a Airbyte workspace and provides utilities
|
|
1124
1195
|
to interact with Airbyte APIs.
|
|
1125
1196
|
"""
|
|
1126
1197
|
|
|
1127
|
-
workspace_id: str = Field(..., description="The Airbyte Cloud workspace ID")
|
|
1128
|
-
client_id: str = Field(..., description="The Airbyte Cloud client ID.")
|
|
1129
|
-
client_secret: str = Field(..., description="The Airbyte Cloud client secret.")
|
|
1130
1198
|
request_max_retries: int = Field(
|
|
1131
1199
|
default=3,
|
|
1132
1200
|
description=(
|
|
@@ -1142,18 +1210,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1142
1210
|
default=15,
|
|
1143
1211
|
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
1144
1212
|
)
|
|
1145
|
-
_client:
|
|
1146
|
-
|
|
1147
|
-
@cached_method
|
|
1148
|
-
def get_client(self) -> AirbyteCloudClient:
|
|
1149
|
-
return AirbyteCloudClient(
|
|
1150
|
-
workspace_id=self.workspace_id,
|
|
1151
|
-
client_id=self.client_id,
|
|
1152
|
-
client_secret=self.client_secret,
|
|
1153
|
-
request_max_retries=self.request_max_retries,
|
|
1154
|
-
request_retry_delay=self.request_retry_delay,
|
|
1155
|
-
request_timeout=self.request_timeout,
|
|
1156
|
-
)
|
|
1213
|
+
_client: AirbyteClient = PrivateAttr(default=None) # type: ignore
|
|
1157
1214
|
|
|
1158
1215
|
@cached_method
|
|
1159
1216
|
def fetch_airbyte_workspace_data(
|
|
@@ -1168,7 +1225,10 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1168
1225
|
destinations_by_id = {}
|
|
1169
1226
|
|
|
1170
1227
|
client = self.get_client()
|
|
1171
|
-
|
|
1228
|
+
|
|
1229
|
+
client.validate_workspace_id()
|
|
1230
|
+
|
|
1231
|
+
connections = client.get_connections()
|
|
1172
1232
|
|
|
1173
1233
|
for partial_connection_details in connections:
|
|
1174
1234
|
full_connection_details = client.get_connection_details(
|
|
@@ -1214,14 +1274,14 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1214
1274
|
Loading the asset specs for a given Airbyte workspace:
|
|
1215
1275
|
.. code-block:: python
|
|
1216
1276
|
|
|
1217
|
-
from dagster_airbyte import
|
|
1277
|
+
from dagster_airbyte import AirbyteWorkspace
|
|
1218
1278
|
|
|
1219
1279
|
import dagster as dg
|
|
1220
1280
|
|
|
1221
|
-
airbyte_workspace =
|
|
1222
|
-
workspace_id=dg.EnvVar("
|
|
1223
|
-
client_id=dg.EnvVar("
|
|
1224
|
-
client_secret=dg.EnvVar("
|
|
1281
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1282
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1283
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1284
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1225
1285
|
)
|
|
1226
1286
|
|
|
1227
1287
|
airbyte_specs = airbyte_workspace.load_asset_specs()
|
|
@@ -1229,7 +1289,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1229
1289
|
"""
|
|
1230
1290
|
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1231
1291
|
|
|
1232
|
-
return
|
|
1292
|
+
return load_airbyte_asset_specs(
|
|
1233
1293
|
workspace=self,
|
|
1234
1294
|
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1235
1295
|
connection_selector_fn=connection_selector_fn,
|
|
@@ -1267,7 +1327,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1267
1327
|
yield AssetMaterialization(
|
|
1268
1328
|
asset_key=stream_asset_spec.key,
|
|
1269
1329
|
description=(
|
|
1270
|
-
f"Table generated via Airbyte
|
|
1330
|
+
f"Table generated via Airbyte sync "
|
|
1271
1331
|
f"for connection {connection.name}: {connection_table_name}"
|
|
1272
1332
|
),
|
|
1273
1333
|
metadata=stream_asset_spec.metadata,
|
|
@@ -1276,7 +1336,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1276
1336
|
@public
|
|
1277
1337
|
@beta
|
|
1278
1338
|
def sync_and_poll(self, context: AssetExecutionContext):
|
|
1279
|
-
"""Executes a sync and poll process to materialize Airbyte
|
|
1339
|
+
"""Executes a sync and poll process to materialize Airbyte assets.
|
|
1280
1340
|
This method can only be used in the context of an asset execution.
|
|
1281
1341
|
|
|
1282
1342
|
Args:
|
|
@@ -1322,9 +1382,9 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1322
1382
|
context.log.warning(f"Assets were not materialized: {unmaterialized_asset_keys}")
|
|
1323
1383
|
|
|
1324
1384
|
@contextmanager
|
|
1325
|
-
def process_config_and_initialize_cm_cached(self) -> Iterator["
|
|
1385
|
+
def process_config_and_initialize_cm_cached(self) -> Iterator["AirbyteWorkspace"]:
|
|
1326
1386
|
# Hack to avoid reconstructing initialized copies of this resource, which invalidates
|
|
1327
|
-
# @cached_method caches. This means that multiple calls to
|
|
1387
|
+
# @cached_method caches. This means that multiple calls to load_airbyte_asset_specs
|
|
1328
1388
|
# will not trigger multiple API calls to fetch the workspace data.
|
|
1329
1389
|
# Bespoke impl since @cached_method doesn't play nice with iterators; it's exhausted after
|
|
1330
1390
|
# the first call.
|
|
@@ -1338,6 +1398,247 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1338
1398
|
|
|
1339
1399
|
|
|
1340
1400
|
@beta
|
|
1401
|
+
class AirbyteWorkspace(BaseAirbyteWorkspace):
|
|
1402
|
+
"""This resource allows users to programatically interface with the Airbyte REST API to launch
|
|
1403
|
+
syncs and monitor their progress for a given Airbyte workspace.
|
|
1404
|
+
|
|
1405
|
+
**Examples:**
|
|
1406
|
+
Using OAuth client credentials:
|
|
1407
|
+
|
|
1408
|
+
.. code-block:: python
|
|
1409
|
+
|
|
1410
|
+
import dagster as dg
|
|
1411
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1412
|
+
|
|
1413
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1414
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1415
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1416
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1417
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1418
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1422
|
+
|
|
1423
|
+
defs = dg.Definitions(
|
|
1424
|
+
assets=all_airbyte_assets,
|
|
1425
|
+
resources={"airbyte": airbyte_workspace},
|
|
1426
|
+
)
|
|
1427
|
+
|
|
1428
|
+
Using basic Authentication:
|
|
1429
|
+
|
|
1430
|
+
.. code-block:: python
|
|
1431
|
+
|
|
1432
|
+
import dagster as dg
|
|
1433
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1434
|
+
|
|
1435
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1436
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1437
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1438
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1439
|
+
username=dg.EnvVar("AIRBYTE_USERNAME"),
|
|
1440
|
+
password=dg.EnvVar("AIRBYTE_PASSWORD"),
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1444
|
+
|
|
1445
|
+
defs = dg.Definitions(
|
|
1446
|
+
assets=all_airbyte_assets,
|
|
1447
|
+
resources={"airbyte": airbyte_workspace},
|
|
1448
|
+
)
|
|
1449
|
+
|
|
1450
|
+
Using no authentication:
|
|
1451
|
+
|
|
1452
|
+
.. code-block:: python
|
|
1453
|
+
|
|
1454
|
+
import dagster as dg
|
|
1455
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1456
|
+
|
|
1457
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1458
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1459
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1460
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1461
|
+
)
|
|
1462
|
+
|
|
1463
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1464
|
+
|
|
1465
|
+
defs = dg.Definitions(
|
|
1466
|
+
assets=all_airbyte_assets,
|
|
1467
|
+
resources={"airbyte": airbyte_workspace},
|
|
1468
|
+
)
|
|
1469
|
+
"""
|
|
1470
|
+
|
|
1471
|
+
rest_api_base_url: str = Field(
|
|
1472
|
+
...,
|
|
1473
|
+
description="The base URL for the Airbyte REST API.",
|
|
1474
|
+
examples=[
|
|
1475
|
+
"http://localhost:8000/api/public/v1",
|
|
1476
|
+
"https://my-airbyte-server.com/api/public/v1",
|
|
1477
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/public/v1",
|
|
1478
|
+
],
|
|
1479
|
+
)
|
|
1480
|
+
configuration_api_base_url: str = Field(
|
|
1481
|
+
...,
|
|
1482
|
+
description="The base URL for the Airbyte Configuration API.",
|
|
1483
|
+
examples=[
|
|
1484
|
+
"http://localhost:8000/api/v1",
|
|
1485
|
+
"https://my-airbyte-server.com/api/v1",
|
|
1486
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/v1",
|
|
1487
|
+
],
|
|
1488
|
+
)
|
|
1489
|
+
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
1490
|
+
client_id: Optional[str] = Field(default=None, description="The Airbyte client ID.")
|
|
1491
|
+
client_secret: Optional[str] = Field(default=None, description="The Airbyte client secret.")
|
|
1492
|
+
username: Optional[str] = Field(
|
|
1493
|
+
default=None, description="The Airbyte username for authentication."
|
|
1494
|
+
)
|
|
1495
|
+
password: Optional[str] = Field(
|
|
1496
|
+
default=None, description="The Airbyte password for authentication."
|
|
1497
|
+
)
|
|
1498
|
+
|
|
1499
|
+
@cached_method
|
|
1500
|
+
def get_client(self) -> AirbyteClient:
|
|
1501
|
+
return AirbyteClient(
|
|
1502
|
+
rest_api_base_url=self.rest_api_base_url,
|
|
1503
|
+
configuration_api_base_url=self.configuration_api_base_url,
|
|
1504
|
+
workspace_id=self.workspace_id,
|
|
1505
|
+
client_id=self.client_id,
|
|
1506
|
+
client_secret=self.client_secret,
|
|
1507
|
+
username=self.username,
|
|
1508
|
+
password=self.password,
|
|
1509
|
+
request_max_retries=self.request_max_retries,
|
|
1510
|
+
request_retry_delay=self.request_retry_delay,
|
|
1511
|
+
request_timeout=self.request_timeout,
|
|
1512
|
+
)
|
|
1513
|
+
|
|
1514
|
+
|
|
1515
|
+
@beta
|
|
1516
|
+
class AirbyteCloudWorkspace(BaseAirbyteWorkspace):
|
|
1517
|
+
"""This resource allows users to programatically interface with the Airbyte Cloud REST API to launch
|
|
1518
|
+
syncs and monitor their progress for a given Airbyte Cloud workspace.
|
|
1519
|
+
|
|
1520
|
+
**Examples:**
|
|
1521
|
+
|
|
1522
|
+
.. code-block:: python
|
|
1523
|
+
|
|
1524
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1525
|
+
|
|
1526
|
+
import dagster as dg
|
|
1527
|
+
|
|
1528
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1529
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1530
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1531
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1532
|
+
)
|
|
1533
|
+
|
|
1534
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1535
|
+
|
|
1536
|
+
defs = dg.Definitions(
|
|
1537
|
+
assets=all_airbyte_assets,
|
|
1538
|
+
resources={"airbyte": airbyte_workspace},
|
|
1539
|
+
)
|
|
1540
|
+
"""
|
|
1541
|
+
|
|
1542
|
+
rest_api_base_url: ClassVar[str] = AIRBYTE_CLOUD_REST_API_BASE_URL
|
|
1543
|
+
configuration_api_base_url: ClassVar[str] = AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL
|
|
1544
|
+
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
1545
|
+
client_id: str = Field(..., description="The Airbyte client ID.")
|
|
1546
|
+
client_secret: str = Field(..., description="The Airbyte client secret.")
|
|
1547
|
+
|
|
1548
|
+
@cached_method
|
|
1549
|
+
def get_client(self) -> AirbyteClient:
|
|
1550
|
+
return AirbyteClient(
|
|
1551
|
+
rest_api_base_url=self.rest_api_base_url,
|
|
1552
|
+
configuration_api_base_url=self.configuration_api_base_url,
|
|
1553
|
+
workspace_id=self.workspace_id,
|
|
1554
|
+
client_id=self.client_id,
|
|
1555
|
+
client_secret=self.client_secret,
|
|
1556
|
+
request_max_retries=self.request_max_retries,
|
|
1557
|
+
request_retry_delay=self.request_retry_delay,
|
|
1558
|
+
request_timeout=self.request_timeout,
|
|
1559
|
+
)
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
@public
|
|
1563
|
+
@beta
|
|
1564
|
+
def load_airbyte_asset_specs(
|
|
1565
|
+
workspace: BaseAirbyteWorkspace,
|
|
1566
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1567
|
+
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]] = None,
|
|
1568
|
+
) -> Sequence[AssetSpec]:
|
|
1569
|
+
"""Returns a list of AssetSpecs representing the Airbyte content in the workspace.
|
|
1570
|
+
|
|
1571
|
+
Args:
|
|
1572
|
+
workspace (BaseAirbyteWorkspace): The Airbyte workspace to fetch assets from.
|
|
1573
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1574
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1575
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1576
|
+
connection_selector_fn (Optional[Callable[[AirbyteConnection], bool]]): A function that allows for filtering
|
|
1577
|
+
which Airbyte connection assets are created for.
|
|
1578
|
+
|
|
1579
|
+
Returns:
|
|
1580
|
+
List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
|
|
1581
|
+
|
|
1582
|
+
Examples:
|
|
1583
|
+
Loading the asset specs for a given Airbyte workspace:
|
|
1584
|
+
|
|
1585
|
+
.. code-block:: python
|
|
1586
|
+
|
|
1587
|
+
from dagster_airbyte import AirbyteWorkspace, load_airbyte_asset_specs
|
|
1588
|
+
|
|
1589
|
+
import dagster as dg
|
|
1590
|
+
|
|
1591
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1592
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1593
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1594
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1595
|
+
)
|
|
1596
|
+
|
|
1597
|
+
airbyte_specs = load_airbyte_asset_specs(airbyte_workspace)
|
|
1598
|
+
dg.Definitions(assets=airbyte_specs)
|
|
1599
|
+
|
|
1600
|
+
Filter connections by name:
|
|
1601
|
+
|
|
1602
|
+
.. code-block:: python
|
|
1603
|
+
|
|
1604
|
+
from dagster_airbyte import AirbyteWorkspace, load_airbyte_asset_specs
|
|
1605
|
+
|
|
1606
|
+
import dagster as dg
|
|
1607
|
+
|
|
1608
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1609
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1610
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1611
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1612
|
+
)
|
|
1613
|
+
|
|
1614
|
+
airbyte_specs = load_airbyte_asset_specs(
|
|
1615
|
+
workspace=airbyte_workspace,
|
|
1616
|
+
connection_selector_fn=lambda connection: connection.name in ["connection1", "connection2"]
|
|
1617
|
+
)
|
|
1618
|
+
dg.Definitions(assets=airbyte_specs)
|
|
1619
|
+
"""
|
|
1620
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1621
|
+
|
|
1622
|
+
with workspace.process_config_and_initialize_cm_cached() as initialized_workspace:
|
|
1623
|
+
return [
|
|
1624
|
+
spec.merge_attributes(
|
|
1625
|
+
metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
|
|
1626
|
+
)
|
|
1627
|
+
for spec in check.is_list(
|
|
1628
|
+
AirbyteWorkspaceDefsLoader(
|
|
1629
|
+
workspace=initialized_workspace,
|
|
1630
|
+
translator=dagster_airbyte_translator,
|
|
1631
|
+
connection_selector_fn=connection_selector_fn,
|
|
1632
|
+
)
|
|
1633
|
+
.build_defs()
|
|
1634
|
+
.assets,
|
|
1635
|
+
AssetSpec,
|
|
1636
|
+
)
|
|
1637
|
+
]
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
@public
|
|
1641
|
+
@superseded(additional_warn_text="Use load_airbyte_asset_specs instead.")
|
|
1341
1642
|
def load_airbyte_cloud_asset_specs(
|
|
1342
1643
|
workspace: AirbyteCloudWorkspace,
|
|
1343
1644
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
@@ -1394,29 +1695,16 @@ def load_airbyte_cloud_asset_specs(
|
|
|
1394
1695
|
)
|
|
1395
1696
|
dg.Definitions(assets=airbyte_cloud_specs)
|
|
1396
1697
|
"""
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
|
|
1403
|
-
)
|
|
1404
|
-
for spec in check.is_list(
|
|
1405
|
-
AirbyteCloudWorkspaceDefsLoader(
|
|
1406
|
-
workspace=initialized_workspace,
|
|
1407
|
-
translator=dagster_airbyte_translator,
|
|
1408
|
-
connection_selector_fn=connection_selector_fn,
|
|
1409
|
-
)
|
|
1410
|
-
.build_defs()
|
|
1411
|
-
.assets,
|
|
1412
|
-
AssetSpec,
|
|
1413
|
-
)
|
|
1414
|
-
]
|
|
1698
|
+
return load_airbyte_asset_specs(
|
|
1699
|
+
workspace=workspace,
|
|
1700
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1701
|
+
connection_selector_fn=connection_selector_fn,
|
|
1702
|
+
)
|
|
1415
1703
|
|
|
1416
1704
|
|
|
1417
1705
|
@record
|
|
1418
|
-
class
|
|
1419
|
-
workspace: AirbyteCloudWorkspace
|
|
1706
|
+
class AirbyteWorkspaceDefsLoader(StateBackedDefinitionsLoader[AirbyteWorkspaceData]):
|
|
1707
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace]
|
|
1420
1708
|
translator: DagsterAirbyteTranslator
|
|
1421
1709
|
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]]
|
|
1422
1710
|
|
dagster_airbyte/utils.py
CHANGED
|
@@ -13,7 +13,7 @@ from dagster._utils.names import clean_name_lower
|
|
|
13
13
|
from dagster_airbyte.types import AirbyteOutput
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
|
-
from dagster_airbyte import DagsterAirbyteTranslator
|
|
16
|
+
from dagster_airbyte.translator import DagsterAirbyteTranslator
|
|
17
17
|
|
|
18
18
|
DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY = "dagster-airbyte/dagster_airbyte_translator"
|
|
19
19
|
|
dagster_airbyte/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.27.
|
|
1
|
+
__version__ = "0.27.12"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-airbyte
|
|
3
|
-
Version: 0.27.
|
|
3
|
+
Version: 0.27.12
|
|
4
4
|
Summary: Package for integrating Airbyte with Dagster.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -15,13 +15,13 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Requires-Python: >=3.9,<3.14
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: dagster==1.11.
|
|
18
|
+
Requires-Dist: dagster==1.11.12
|
|
19
19
|
Requires-Dist: requests
|
|
20
20
|
Provides-Extra: test
|
|
21
21
|
Requires-Dist: requests-mock; extra == "test"
|
|
22
22
|
Requires-Dist: flaky; extra == "test"
|
|
23
23
|
Provides-Extra: managed
|
|
24
|
-
Requires-Dist: dagster-managed-elements==0.27.
|
|
24
|
+
Requires-Dist: dagster-managed-elements==0.27.12; extra == "managed"
|
|
25
25
|
Dynamic: author
|
|
26
26
|
Dynamic: author-email
|
|
27
27
|
Dynamic: classifier
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
dagster_airbyte/__init__.py,sha256=
|
|
2
|
-
dagster_airbyte/asset_decorator.py,sha256=
|
|
3
|
-
dagster_airbyte/asset_defs.py,sha256=
|
|
1
|
+
dagster_airbyte/__init__.py,sha256=Bc3BuYTIcz5XU6NbKHqIemxgFSydLsqVLyLcx6K2pVA,2024
|
|
2
|
+
dagster_airbyte/asset_decorator.py,sha256=ziG1U5qleIIR8xbYmoKSA00gLazbrd09FzcgmGdZmkY,4640
|
|
3
|
+
dagster_airbyte/asset_defs.py,sha256=0lGOKSAlTxOYFonDsEI4FzuiVAViV8pr2Zgx3R8fAvQ,51694
|
|
4
4
|
dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
|
|
5
5
|
dagster_airbyte/ops.py,sha256=oOEczVYpqVRTc1-0osfpR5FZbPjNJDYihgRjk_qtOQA,4229
|
|
6
6
|
dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
7
|
-
dagster_airbyte/resources.py,sha256=
|
|
7
|
+
dagster_airbyte/resources.py,sha256=AXN_mtNl_JKauoOY29wyuiv9jjAiCNF9M4baVCQPMYo,67193
|
|
8
8
|
dagster_airbyte/translator.py,sha256=RY2LhGPACIfyd2zOSH1swdLz4koX6_bvclW-alS3Bic,7547
|
|
9
9
|
dagster_airbyte/types.py,sha256=TYUjI3skjLYeANjesgJ-IAJNu8bAnL1ymsUfz5LsRTE,1565
|
|
10
|
-
dagster_airbyte/utils.py,sha256=
|
|
11
|
-
dagster_airbyte/version.py,sha256=
|
|
10
|
+
dagster_airbyte/utils.py,sha256=wG9119kXi87JgcOjK7iNozr-svZocJBQYoHBMmnXZcE,4092
|
|
11
|
+
dagster_airbyte/version.py,sha256=LsrV9vLeQGPkCzZUAe_cL-EObzt7m1nJHGhyU7GcAI0,24
|
|
12
12
|
dagster_airbyte/components/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
dagster_airbyte/components/workspace_component/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
dagster_airbyte/components/workspace_component/component.py,sha256=
|
|
15
|
-
dagster_airbyte/components/workspace_component/scaffolder.py,sha256=
|
|
14
|
+
dagster_airbyte/components/workspace_component/component.py,sha256=TIl8Ja6BHwpyRmkzkn-o75sH1dOUbbHtTftAb68Ag_c,8445
|
|
15
|
+
dagster_airbyte/components/workspace_component/scaffolder.py,sha256=zk5HDJ6C8zcfUWKslJVyqyBB0LV3JzNAocY2ptT517s,1046
|
|
16
16
|
dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
|
|
17
17
|
dagster_airbyte/managed/reconciliation.py,sha256=xoVfqPBpNSldkiqOLIPnc7ei8CppHKWtzv8bvxjdqlI,34859
|
|
18
18
|
dagster_airbyte/managed/types.py,sha256=isPfX8L9YwtZAf9Vk4hhxBePLR00AEldsdK2TsM1H2o,14611
|
|
19
19
|
dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
|
|
20
20
|
dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
|
|
21
21
|
dagster_airbyte/managed/generated/sources.py,sha256=y0TPNvcRd8c9mhje-NoXsHeKRPt1nXcpww8mNAtqCps,282685
|
|
22
|
-
dagster_airbyte-0.27.
|
|
23
|
-
dagster_airbyte-0.27.
|
|
24
|
-
dagster_airbyte-0.27.
|
|
25
|
-
dagster_airbyte-0.27.
|
|
26
|
-
dagster_airbyte-0.27.
|
|
27
|
-
dagster_airbyte-0.27.
|
|
22
|
+
dagster_airbyte-0.27.12.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
|
|
23
|
+
dagster_airbyte-0.27.12.dist-info/METADATA,sha256=EqAF0V1BTHFPRJn3dL8OLbZen9irkb8W1IpWKmno2Sw,1169
|
|
24
|
+
dagster_airbyte-0.27.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
+
dagster_airbyte-0.27.12.dist-info/entry_points.txt,sha256=096yvfMP-gNsCgDg9vDQtinis5QGpD-e_kHEhcHaML8,120
|
|
26
|
+
dagster_airbyte-0.27.12.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
|
|
27
|
+
dagster_airbyte-0.27.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|