dagster-airbyte 0.27.9__py3-none-any.whl → 0.27.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-airbyte might be problematic. Click here for more details.
- dagster_airbyte/__init__.py +3 -0
- dagster_airbyte/asset_decorator.py +4 -4
- dagster_airbyte/asset_defs.py +5 -3
- dagster_airbyte/components/workspace_component/component.py +120 -23
- dagster_airbyte/components/workspace_component/scaffolder.py +1 -1
- dagster_airbyte/resources.py +397 -116
- dagster_airbyte/utils.py +1 -1
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/METADATA +3 -3
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/RECORD +14 -14
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/WHEEL +0 -0
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/entry_points.txt +0 -0
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/licenses/LICENSE +0 -0
- {dagster_airbyte-0.27.9.dist-info → dagster_airbyte-0.27.11.dist-info}/top_level.txt +0 -0
dagster_airbyte/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@ from dagster_shared.libraries import DagsterLibraryRegistry
|
|
|
2
2
|
|
|
3
3
|
from dagster_airbyte.components.workspace_component.component import (
|
|
4
4
|
AirbyteCloudWorkspaceComponent as AirbyteCloudWorkspaceComponent,
|
|
5
|
+
AirbyteWorkspaceComponent as AirbyteWorkspaceComponent,
|
|
5
6
|
)
|
|
6
7
|
|
|
7
8
|
try:
|
|
@@ -29,8 +30,10 @@ from dagster_airbyte.resources import (
|
|
|
29
30
|
AirbyteCloudResource as AirbyteCloudResource,
|
|
30
31
|
AirbyteCloudWorkspace as AirbyteCloudWorkspace,
|
|
31
32
|
AirbyteResource as AirbyteResource,
|
|
33
|
+
AirbyteWorkspace as AirbyteWorkspace,
|
|
32
34
|
airbyte_cloud_resource as airbyte_cloud_resource,
|
|
33
35
|
airbyte_resource as airbyte_resource,
|
|
36
|
+
load_airbyte_asset_specs as load_airbyte_asset_specs,
|
|
34
37
|
load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
|
|
35
38
|
)
|
|
36
39
|
from dagster_airbyte.translator import (
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Any, Callable, Optional
|
|
1
|
+
from typing import Any, Callable, Optional, Union
|
|
2
2
|
|
|
3
3
|
from dagster import AssetsDefinition, multi_asset
|
|
4
4
|
from dagster._annotations import beta
|
|
5
5
|
from dagster._core.errors import DagsterInvariantViolationError
|
|
6
6
|
|
|
7
|
-
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
7
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace, AirbyteWorkspace
|
|
8
8
|
from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
|
|
9
9
|
|
|
10
10
|
|
|
@@ -12,7 +12,7 @@ from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTransla
|
|
|
12
12
|
def airbyte_assets(
|
|
13
13
|
*,
|
|
14
14
|
connection_id: str,
|
|
15
|
-
workspace: AirbyteCloudWorkspace,
|
|
15
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
16
16
|
name: Optional[str] = None,
|
|
17
17
|
group_name: Optional[str] = None,
|
|
18
18
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
@@ -21,7 +21,7 @@ def airbyte_assets(
|
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
23
|
connection_id (str): The Airbyte Connection ID.
|
|
24
|
-
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
24
|
+
workspace (Union[AirbyteWorkspace, AirbyteCloudWorkspace]): The Airbyte workspace to fetch assets from.
|
|
25
25
|
name (Optional[str], optional): The name of the op.
|
|
26
26
|
group_name (Optional[str], optional): The name of the asset group.
|
|
27
27
|
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
dagster_airbyte/asset_defs.py
CHANGED
|
@@ -38,7 +38,9 @@ from dagster_airbyte.resources import (
|
|
|
38
38
|
AirbyteCloudResource,
|
|
39
39
|
AirbyteCloudWorkspace,
|
|
40
40
|
AirbyteResource,
|
|
41
|
+
AirbyteWorkspace,
|
|
41
42
|
BaseAirbyteResource,
|
|
43
|
+
BaseAirbyteWorkspace,
|
|
42
44
|
)
|
|
43
45
|
from dagster_airbyte.translator import (
|
|
44
46
|
AirbyteConnection,
|
|
@@ -1040,14 +1042,14 @@ def load_assets_from_airbyte_instance(
|
|
|
1040
1042
|
@beta
|
|
1041
1043
|
def build_airbyte_assets_definitions(
|
|
1042
1044
|
*,
|
|
1043
|
-
workspace: AirbyteCloudWorkspace,
|
|
1045
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
1044
1046
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1045
1047
|
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]] = None,
|
|
1046
1048
|
) -> Sequence[AssetsDefinition]:
|
|
1047
1049
|
"""The list of AssetsDefinition for all connections in the Airbyte workspace.
|
|
1048
1050
|
|
|
1049
1051
|
Args:
|
|
1050
|
-
workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
|
|
1052
|
+
workspace (Union[AirbyteWorkspace, AirbyteCloudWorkspace]): The Airbyte workspace to fetch assets from.
|
|
1051
1053
|
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1052
1054
|
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1053
1055
|
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
@@ -1164,7 +1166,7 @@ def build_airbyte_assets_definitions(
|
|
|
1164
1166
|
name=f"airbyte_{clean_name(connection_name)}",
|
|
1165
1167
|
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1166
1168
|
)
|
|
1167
|
-
def _asset_fn(context: AssetExecutionContext, airbyte:
|
|
1169
|
+
def _asset_fn(context: AssetExecutionContext, airbyte: BaseAirbyteWorkspace):
|
|
1168
1170
|
yield from airbyte.sync_and_poll(context=context)
|
|
1169
1171
|
|
|
1170
1172
|
_asset_fns.append(_asset_fn)
|
|
@@ -4,6 +4,7 @@ from typing import Annotated, Callable, Optional, Union
|
|
|
4
4
|
|
|
5
5
|
import dagster as dg
|
|
6
6
|
import pydantic
|
|
7
|
+
from dagster._annotations import superseded
|
|
7
8
|
from dagster._core.definitions.job_definition import default_job_io_manager
|
|
8
9
|
from dagster.components.resolved.base import resolve_fields
|
|
9
10
|
from dagster.components.utils.translation import TranslationFn, TranslationFnResolver
|
|
@@ -11,9 +12,9 @@ from dagster_shared import check
|
|
|
11
12
|
|
|
12
13
|
from dagster_airbyte.asset_defs import build_airbyte_assets_definitions
|
|
13
14
|
from dagster_airbyte.components.workspace_component.scaffolder import (
|
|
14
|
-
|
|
15
|
+
AirbyteWorkspaceComponentScaffolder,
|
|
15
16
|
)
|
|
16
|
-
from dagster_airbyte.resources import AirbyteCloudWorkspace
|
|
17
|
+
from dagster_airbyte.resources import AirbyteCloudWorkspace, AirbyteWorkspace
|
|
17
18
|
from dagster_airbyte.translator import (
|
|
18
19
|
AirbyteConnection,
|
|
19
20
|
AirbyteConnectionTableProps,
|
|
@@ -32,13 +33,100 @@ class ProxyDagsterAirbyteTranslator(DagsterAirbyteTranslator):
|
|
|
32
33
|
return spec
|
|
33
34
|
|
|
34
35
|
|
|
35
|
-
class
|
|
36
|
-
|
|
36
|
+
class BaseAirbyteWorkspaceModel(dg.Model, dg.Resolvable):
|
|
37
|
+
request_max_retries: Annotated[
|
|
38
|
+
int,
|
|
39
|
+
pydantic.Field(
|
|
40
|
+
default=3,
|
|
41
|
+
description=(
|
|
42
|
+
"The maximum number of times requests to the Airbyte API should be retried "
|
|
43
|
+
"before failing."
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
request_retry_delay: Annotated[
|
|
48
|
+
float,
|
|
49
|
+
pydantic.Field(
|
|
50
|
+
default=0.25,
|
|
51
|
+
description="Time (in seconds) to wait between each request retry.",
|
|
52
|
+
),
|
|
53
|
+
]
|
|
54
|
+
request_timeout: Annotated[
|
|
55
|
+
int,
|
|
56
|
+
pydantic.Field(
|
|
57
|
+
default=15,
|
|
58
|
+
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
59
|
+
),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AirbyteWorkspaceModel(BaseAirbyteWorkspaceModel):
|
|
64
|
+
rest_api_base_url: Annotated[
|
|
65
|
+
str,
|
|
66
|
+
pydantic.Field(
|
|
67
|
+
...,
|
|
68
|
+
description=(
|
|
69
|
+
"The base URL for the Airbyte REST API. "
|
|
70
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
71
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/public/v1."
|
|
72
|
+
),
|
|
73
|
+
examples=[
|
|
74
|
+
"http://localhost:8000/api/public/v1",
|
|
75
|
+
"https://my-airbyte-server.com/api/public/v1",
|
|
76
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/public/v1",
|
|
77
|
+
],
|
|
78
|
+
),
|
|
79
|
+
]
|
|
80
|
+
configuration_api_base_url: Annotated[
|
|
81
|
+
str,
|
|
82
|
+
pydantic.Field(
|
|
83
|
+
...,
|
|
84
|
+
description=(
|
|
85
|
+
"The base URL for the Airbyte Configuration API. "
|
|
86
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
87
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/v1."
|
|
88
|
+
),
|
|
89
|
+
examples=[
|
|
90
|
+
"http://localhost:8000/api/v1",
|
|
91
|
+
"https://my-airbyte-server.com/api/v1",
|
|
92
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/v1",
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
]
|
|
96
|
+
workspace_id: Annotated[str, pydantic.Field(..., description="The Airbyte workspace ID.")]
|
|
97
|
+
client_id: Annotated[
|
|
98
|
+
Optional[str],
|
|
99
|
+
pydantic.Field(None, description="Client ID used to authenticate to Airbyte."),
|
|
100
|
+
]
|
|
101
|
+
client_secret: Annotated[
|
|
102
|
+
Optional[str],
|
|
103
|
+
pydantic.Field(None, description="Client secret used to authenticate to Airbyte."),
|
|
104
|
+
]
|
|
105
|
+
username: Annotated[
|
|
106
|
+
Optional[str],
|
|
107
|
+
pydantic.Field(
|
|
108
|
+
None,
|
|
109
|
+
description="Username used to authenticate to Airbyte. Used for self-managed Airbyte with basic auth.",
|
|
110
|
+
),
|
|
111
|
+
]
|
|
112
|
+
password: Annotated[
|
|
113
|
+
Optional[str],
|
|
114
|
+
pydantic.Field(
|
|
115
|
+
None,
|
|
116
|
+
description="Password used to authenticate to Airbyte. Used for self-managed Airbyte with basic auth.",
|
|
117
|
+
),
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class AirbyteCloudWorkspaceModel(BaseAirbyteWorkspaceModel):
|
|
122
|
+
workspace_id: Annotated[str, pydantic.Field(..., description="The Airbyte workspace ID.")]
|
|
37
123
|
client_id: Annotated[
|
|
38
|
-
str,
|
|
124
|
+
Optional[str],
|
|
125
|
+
pydantic.Field(..., description="Client ID used to authenticate to Airbyte."),
|
|
39
126
|
]
|
|
40
127
|
client_secret: Annotated[
|
|
41
|
-
str,
|
|
128
|
+
Optional[str],
|
|
129
|
+
pydantic.Field(..., description="Client secret used to authenticate to Airbyte."),
|
|
42
130
|
]
|
|
43
131
|
|
|
44
132
|
|
|
@@ -70,19 +158,27 @@ def resolve_connection_selector(
|
|
|
70
158
|
check.failed(f"Unknown connection target type: {type(model)}")
|
|
71
159
|
|
|
72
160
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
161
|
+
def resolve_airbyte_workspace_type(context: dg.ResolutionContext, model):
|
|
162
|
+
if isinstance(model, AirbyteWorkspaceModel):
|
|
163
|
+
return AirbyteWorkspace(**resolve_fields(model, AirbyteWorkspaceModel, context))
|
|
164
|
+
elif isinstance(model, AirbyteCloudWorkspaceModel):
|
|
165
|
+
return AirbyteCloudWorkspace(**resolve_fields(model, AirbyteCloudWorkspaceModel, context))
|
|
166
|
+
else:
|
|
167
|
+
check.failed(f"Unknown Airbyte workspace type: {type(model)}")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dg.scaffold_with(AirbyteWorkspaceComponentScaffolder)
|
|
171
|
+
class AirbyteWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
172
|
+
"""Loads Airbyte connections from a given Airbyte workspace as Dagster assets.
|
|
173
|
+
Materializing these assets will trigger a sync of the Airbyte connection, enabling
|
|
174
|
+
you to schedule Airbyte syncs using Dagster.
|
|
78
175
|
"""
|
|
79
176
|
|
|
80
177
|
workspace: Annotated[
|
|
81
|
-
AirbyteCloudWorkspace,
|
|
178
|
+
Union[AirbyteWorkspace, AirbyteCloudWorkspace],
|
|
82
179
|
dg.Resolver(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
)
|
|
180
|
+
resolve_airbyte_workspace_type,
|
|
181
|
+
model_field_type=Union[AirbyteWorkspaceModel, AirbyteCloudWorkspaceModel],
|
|
86
182
|
),
|
|
87
183
|
]
|
|
88
184
|
connection_selector: Annotated[
|
|
@@ -92,7 +188,7 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
92
188
|
model_field_type=Union[
|
|
93
189
|
str, AirbyteConnectionSelectorByName, AirbyteConnectionSelectorById
|
|
94
190
|
],
|
|
95
|
-
description="Function used to select Airbyte
|
|
191
|
+
description="Function used to select Airbyte connections to pull into Dagster.",
|
|
96
192
|
),
|
|
97
193
|
] = None
|
|
98
194
|
translation: Optional[
|
|
@@ -102,13 +198,9 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
102
198
|
]
|
|
103
199
|
] = pydantic.Field(
|
|
104
200
|
None,
|
|
105
|
-
description="Function used to translate Airbyte
|
|
201
|
+
description="Function used to translate Airbyte connection table properties into Dagster asset specs.",
|
|
106
202
|
)
|
|
107
203
|
|
|
108
|
-
@cached_property
|
|
109
|
-
def workspace_resource(self) -> AirbyteCloudWorkspace:
|
|
110
|
-
return self.workspace
|
|
111
|
-
|
|
112
204
|
@cached_property
|
|
113
205
|
def translator(self) -> DagsterAirbyteTranslator:
|
|
114
206
|
if self.translation:
|
|
@@ -117,17 +209,22 @@ class AirbyteCloudWorkspaceComponent(dg.Component, dg.Model, dg.Resolvable):
|
|
|
117
209
|
|
|
118
210
|
def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
|
|
119
211
|
airbyte_assets = build_airbyte_assets_definitions(
|
|
120
|
-
workspace=self.
|
|
212
|
+
workspace=self.workspace,
|
|
121
213
|
dagster_airbyte_translator=self.translator,
|
|
122
214
|
connection_selector_fn=self.connection_selector,
|
|
123
215
|
)
|
|
124
216
|
assets_with_resource = [
|
|
125
217
|
airbyte_asset.with_resources(
|
|
126
218
|
{
|
|
127
|
-
"airbyte": self.
|
|
219
|
+
"airbyte": self.workspace.get_resource_definition(),
|
|
128
220
|
"io_manager": default_job_io_manager,
|
|
129
221
|
}
|
|
130
222
|
)
|
|
131
223
|
for airbyte_asset in airbyte_assets
|
|
132
224
|
]
|
|
133
225
|
return dg.Definitions(assets=assets_with_resource)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Subclassing to create the alias to be able to use the superseded decorator.
|
|
229
|
+
@superseded(additional_warn_text="Superseded. Use AirbyteWorkspaceComponent instead.")
|
|
230
|
+
class AirbyteCloudWorkspaceComponent(AirbyteWorkspaceComponent): ...
|
|
@@ -12,7 +12,7 @@ class AirbyteScaffolderParams(BaseModel):
|
|
|
12
12
|
client_secret: Optional[str] = None
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class AirbyteWorkspaceComponentScaffolder(Scaffolder[AirbyteScaffolderParams]):
|
|
16
16
|
@classmethod
|
|
17
17
|
def get_scaffold_params(cls) -> type[AirbyteScaffolderParams]:
|
|
18
18
|
return AirbyteScaffolderParams
|
dagster_airbyte/resources.py
CHANGED
|
@@ -7,12 +7,13 @@ from abc import abstractmethod
|
|
|
7
7
|
from collections.abc import Iterator, Mapping, Sequence
|
|
8
8
|
from contextlib import contextmanager
|
|
9
9
|
from datetime import datetime, timedelta
|
|
10
|
-
from typing import Any, Callable, Optional, cast
|
|
10
|
+
from typing import Any, Callable, ClassVar, Optional, Union, cast
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
13
|
from dagster import (
|
|
14
14
|
AssetExecutionContext,
|
|
15
15
|
AssetMaterialization,
|
|
16
|
+
AssetSpec,
|
|
16
17
|
ConfigurableResource,
|
|
17
18
|
Definitions,
|
|
18
19
|
Failure,
|
|
@@ -22,16 +23,16 @@ from dagster import (
|
|
|
22
23
|
get_dagster_logger,
|
|
23
24
|
resource,
|
|
24
25
|
)
|
|
25
|
-
from dagster._annotations import
|
|
26
|
+
from dagster._annotations import superseded
|
|
26
27
|
from dagster._config.pythonic_config import infer_schema_from_config_class
|
|
27
|
-
from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
|
|
28
28
|
from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
|
|
29
29
|
from dagster._core.definitions.resource_definition import dagster_maintained_resource
|
|
30
|
-
from dagster.
|
|
31
|
-
from dagster._utils.cached_method import cached_method
|
|
30
|
+
from dagster._symbol_annotations import beta, public
|
|
32
31
|
from dagster._utils.merger import deep_merge_dicts
|
|
33
32
|
from dagster_shared.dagster_model import DagsterModel
|
|
34
|
-
from
|
|
33
|
+
from dagster_shared.record import record
|
|
34
|
+
from dagster_shared.utils.cached_method import cached_method
|
|
35
|
+
from pydantic import Field, PrivateAttr, model_validator
|
|
35
36
|
from requests.exceptions import RequestException
|
|
36
37
|
|
|
37
38
|
from dagster_airbyte.translator import (
|
|
@@ -51,17 +52,20 @@ from dagster_airbyte.utils import (
|
|
|
51
52
|
get_translator_from_airbyte_assets,
|
|
52
53
|
)
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
55
|
+
AIRBYTE_CLOUD_REST_API_BASE = "https://api.airbyte.com"
|
|
56
|
+
AIRBYTE_CLOUD_REST_API_VERSION = "v1"
|
|
57
|
+
AIRBYTE_CLOUD_REST_API_BASE_URL = f"{AIRBYTE_CLOUD_REST_API_BASE}/{AIRBYTE_CLOUD_REST_API_VERSION}"
|
|
58
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_BASE = "https://cloud.airbyte.com/api"
|
|
59
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_VERSION = "v1"
|
|
60
|
+
AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL = (
|
|
61
|
+
f"{AIRBYTE_CLOUD_CONFIGURATION_API_BASE}/{AIRBYTE_CLOUD_CONFIGURATION_API_VERSION}"
|
|
62
|
+
)
|
|
59
63
|
|
|
60
64
|
DEFAULT_POLL_INTERVAL_SECONDS = 10
|
|
61
65
|
|
|
62
66
|
# The access token expire every 3 minutes in Airbyte Cloud.
|
|
63
67
|
# Refresh is needed after 2.5 minutes to avoid the "token expired" error message.
|
|
64
|
-
|
|
68
|
+
AIRBYTE_REFRESH_TIMEDELTA_SECONDS = 150
|
|
65
69
|
|
|
66
70
|
AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
|
|
67
71
|
|
|
@@ -410,7 +414,7 @@ class AirbyteCloudResource(BaseAirbyteResource):
|
|
|
410
414
|
or not self._access_token_timestamp
|
|
411
415
|
or self._access_token_timestamp
|
|
412
416
|
<= datetime.timestamp(
|
|
413
|
-
datetime.now() - timedelta(seconds=
|
|
417
|
+
datetime.now() - timedelta(seconds=AIRBYTE_REFRESH_TIMEDELTA_SECONDS)
|
|
414
418
|
)
|
|
415
419
|
)
|
|
416
420
|
|
|
@@ -849,12 +853,36 @@ def airbyte_cloud_resource(context) -> AirbyteCloudResource:
|
|
|
849
853
|
|
|
850
854
|
|
|
851
855
|
@beta
|
|
852
|
-
class
|
|
853
|
-
"""This class exposes methods on top of the Airbyte APIs for Airbyte
|
|
856
|
+
class AirbyteClient(DagsterModel):
|
|
857
|
+
"""This class exposes methods on top of the Airbyte APIs for Airbyte."""
|
|
854
858
|
|
|
859
|
+
rest_api_base_url: str = Field(
|
|
860
|
+
default=AIRBYTE_CLOUD_REST_API_BASE_URL,
|
|
861
|
+
description=(
|
|
862
|
+
"The base URL for the Airbyte REST API. "
|
|
863
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
864
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/public/v1."
|
|
865
|
+
),
|
|
866
|
+
)
|
|
867
|
+
configuration_api_base_url: str = Field(
|
|
868
|
+
default=AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL,
|
|
869
|
+
description=(
|
|
870
|
+
"The base URL for the Airbyte Configuration API. "
|
|
871
|
+
"For Airbyte Cloud, leave this as the default. "
|
|
872
|
+
"For self-managed Airbyte, this is usually <your Airbyte host>/api/v1."
|
|
873
|
+
),
|
|
874
|
+
)
|
|
855
875
|
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
856
|
-
client_id: str = Field(
|
|
857
|
-
client_secret: str = Field(
|
|
876
|
+
client_id: Optional[str] = Field(default=None, description="The Airbyte client ID.")
|
|
877
|
+
client_secret: Optional[str] = Field(default=None, description="The Airbyte client secret.")
|
|
878
|
+
username: Optional[str] = Field(
|
|
879
|
+
default=None,
|
|
880
|
+
description="The Airbyte username for authentication. Used for self-managed Airbyte with basic auth.",
|
|
881
|
+
)
|
|
882
|
+
password: Optional[str] = Field(
|
|
883
|
+
default=None,
|
|
884
|
+
description="The Airbyte password for authentication. Used for self-managed Airbyte with basic auth.",
|
|
885
|
+
)
|
|
858
886
|
request_max_retries: int = Field(
|
|
859
887
|
...,
|
|
860
888
|
description=(
|
|
@@ -874,19 +902,34 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
874
902
|
_access_token_value: Optional[str] = PrivateAttr(default=None)
|
|
875
903
|
_access_token_timestamp: Optional[float] = PrivateAttr(default=None)
|
|
876
904
|
|
|
905
|
+
@model_validator(mode="before")
|
|
906
|
+
def validate_authentication(cls, values):
|
|
907
|
+
has_client_id = values.get("client_id") is not None
|
|
908
|
+
has_client_secret = values.get("client_secret") is not None
|
|
909
|
+
has_username = values.get("username") is not None
|
|
910
|
+
has_password = values.get("password") is not None
|
|
911
|
+
|
|
912
|
+
check.invariant(
|
|
913
|
+
has_username == has_password,
|
|
914
|
+
"Missing config: both username and password are required for Airbyte authentication.",
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
check.invariant(
|
|
918
|
+
has_client_id == has_client_secret,
|
|
919
|
+
"Missing config: both client_id and client_secret are required for Airbyte authentication.",
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
check.invariant(
|
|
923
|
+
not ((has_client_id or has_client_secret) and (has_username or has_password)),
|
|
924
|
+
"Invalid config: cannot provide both client_id/client_secret and username/password for Airbyte authentication.",
|
|
925
|
+
)
|
|
926
|
+
return values
|
|
927
|
+
|
|
877
928
|
@property
|
|
878
929
|
@cached_method
|
|
879
930
|
def _log(self) -> logging.Logger:
|
|
880
931
|
return get_dagster_logger()
|
|
881
932
|
|
|
882
|
-
@property
|
|
883
|
-
def rest_api_base_url(self) -> str:
|
|
884
|
-
return f"{AIRBYTE_REST_API_BASE}/{AIRBYTE_REST_API_VERSION}"
|
|
885
|
-
|
|
886
|
-
@property
|
|
887
|
-
def configuration_api_base_url(self) -> str:
|
|
888
|
-
return f"{AIRBYTE_CONFIGURATION_API_BASE}/{AIRBYTE_CONFIGURATION_API_VERSION}"
|
|
889
|
-
|
|
890
933
|
@property
|
|
891
934
|
def all_additional_request_params(self) -> Mapping[str, Any]:
|
|
892
935
|
return {**self.authorization_request_params, **self.user_agent_request_params}
|
|
@@ -894,6 +937,9 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
894
937
|
@property
|
|
895
938
|
def authorization_request_params(self) -> Mapping[str, Any]:
|
|
896
939
|
# Make sure the access token is refreshed before using it when calling the API.
|
|
940
|
+
if not (self.client_id and self.client_secret):
|
|
941
|
+
return {}
|
|
942
|
+
|
|
897
943
|
if self._needs_refreshed_access_token():
|
|
898
944
|
self._refresh_access_token()
|
|
899
945
|
return {
|
|
@@ -908,10 +954,9 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
908
954
|
|
|
909
955
|
def _refresh_access_token(self) -> None:
|
|
910
956
|
response = check.not_none(
|
|
911
|
-
self.
|
|
957
|
+
self._single_request(
|
|
912
958
|
method="POST",
|
|
913
|
-
|
|
914
|
-
base_url=self.rest_api_base_url,
|
|
959
|
+
url=f"{self.rest_api_base_url}/applications/token",
|
|
915
960
|
data={
|
|
916
961
|
"client_id": self.client_id,
|
|
917
962
|
"client_secret": self.client_secret,
|
|
@@ -928,9 +973,7 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
928
973
|
not self._access_token_value
|
|
929
974
|
or not self._access_token_timestamp
|
|
930
975
|
or self._access_token_timestamp
|
|
931
|
-
<= (
|
|
932
|
-
datetime.now() - timedelta(seconds=AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS)
|
|
933
|
-
).timestamp()
|
|
976
|
+
<= (datetime.now() - timedelta(seconds=AIRBYTE_REFRESH_TIMEDELTA_SECONDS)).timestamp()
|
|
934
977
|
)
|
|
935
978
|
|
|
936
979
|
def _get_session(self, include_additional_request_params: bool) -> requests.Session:
|
|
@@ -942,33 +985,21 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
942
985
|
}
|
|
943
986
|
session = requests.Session()
|
|
944
987
|
session.headers.update(headers)
|
|
988
|
+
|
|
989
|
+
if self.username and self.password:
|
|
990
|
+
session.auth = (self.username, self.password)
|
|
991
|
+
|
|
945
992
|
return session
|
|
946
993
|
|
|
947
|
-
def
|
|
994
|
+
def _single_request(
|
|
948
995
|
self,
|
|
949
996
|
method: str,
|
|
950
|
-
|
|
951
|
-
base_url: str,
|
|
997
|
+
url: str,
|
|
952
998
|
data: Optional[Mapping[str, Any]] = None,
|
|
953
999
|
params: Optional[Mapping[str, Any]] = None,
|
|
954
1000
|
include_additional_request_params: bool = True,
|
|
955
1001
|
) -> Mapping[str, Any]:
|
|
956
|
-
"""
|
|
957
|
-
|
|
958
|
-
Args:
|
|
959
|
-
method (str): The http method to use for this request (e.g. "POST", "GET", "PATCH").
|
|
960
|
-
endpoint (str): The Airbyte API endpoint to send this request to.
|
|
961
|
-
base_url (str): The base url to the Airbyte API to use.
|
|
962
|
-
data (Optional[Dict[str, Any]]): JSON-formatted data string to be included in the request.
|
|
963
|
-
params (Optional[Dict[str, Any]]): JSON-formatted query params to be included in the request.
|
|
964
|
-
include_additional_request_params (bool): Whether to include authorization and user-agent headers
|
|
965
|
-
to the request parameters. Defaults to True.
|
|
966
|
-
|
|
967
|
-
Returns:
|
|
968
|
-
Dict[str, Any]: Parsed json data from the response to this request
|
|
969
|
-
"""
|
|
970
|
-
url = f"{base_url}/{endpoint}"
|
|
971
|
-
|
|
1002
|
+
"""Execute a single HTTP request with retry logic."""
|
|
972
1003
|
num_retries = 0
|
|
973
1004
|
while True:
|
|
974
1005
|
try:
|
|
@@ -989,14 +1020,48 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
989
1020
|
num_retries += 1
|
|
990
1021
|
time.sleep(self.request_retry_delay)
|
|
991
1022
|
|
|
992
|
-
|
|
1023
|
+
raise Failure(f"Max retries ({self.request_max_retries}) exceeded with url: {url}.")
|
|
1024
|
+
|
|
1025
|
+
return {}
|
|
993
1026
|
|
|
994
|
-
def
|
|
1027
|
+
def _paginated_request(
|
|
1028
|
+
self,
|
|
1029
|
+
method: str,
|
|
1030
|
+
url: str,
|
|
1031
|
+
params: Mapping[str, Any],
|
|
1032
|
+
data: Optional[Mapping[str, Any]] = None,
|
|
1033
|
+
include_additional_request_params: bool = True,
|
|
1034
|
+
) -> Sequence[Mapping[str, Any]]:
|
|
1035
|
+
"""Execute paginated requests and yield all items."""
|
|
1036
|
+
result_data = []
|
|
1037
|
+
while url != "":
|
|
1038
|
+
response = self._single_request(
|
|
1039
|
+
method=method,
|
|
1040
|
+
url=url,
|
|
1041
|
+
data=data,
|
|
1042
|
+
params=params,
|
|
1043
|
+
include_additional_request_params=include_additional_request_params,
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
# Handle different response structures
|
|
1047
|
+
result_data.extend(response.get("data", []))
|
|
1048
|
+
url = response.get("next", "")
|
|
1049
|
+
params = {}
|
|
1050
|
+
|
|
1051
|
+
return result_data
|
|
1052
|
+
|
|
1053
|
+
def validate_workspace_id(self) -> None:
|
|
1054
|
+
"""Fetches workspace details. This is used to validate that the workspace exists."""
|
|
1055
|
+
self._single_request(
|
|
1056
|
+
method="GET",
|
|
1057
|
+
url=f"{self.rest_api_base_url}/workspaces/{self.workspace_id}",
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
def get_connections(self) -> Sequence[Mapping[str, Any]]:
|
|
995
1061
|
"""Fetches all connections of an Airbyte workspace from the Airbyte REST API."""
|
|
996
|
-
return self.
|
|
1062
|
+
return self._paginated_request(
|
|
997
1063
|
method="GET",
|
|
998
|
-
|
|
999
|
-
base_url=self.rest_api_base_url,
|
|
1064
|
+
url=f"{self.rest_api_base_url}/connections",
|
|
1000
1065
|
params={"workspaceIds": self.workspace_id},
|
|
1001
1066
|
)
|
|
1002
1067
|
|
|
@@ -1007,26 +1072,23 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1007
1072
|
# Using the Airbyte Configuration API to get the connection details, including streams and their configs.
|
|
1008
1073
|
# https://airbyte-public-api-docs.s3.us-east-2.amazonaws.com/rapidoc-api-docs.html#post-/v1/connections/get
|
|
1009
1074
|
# https://github.com/airbytehq/airbyte-platform/blob/v1.0.0/airbyte-api/server-api/src/main/openapi/config.yaml
|
|
1010
|
-
return self.
|
|
1075
|
+
return self._single_request(
|
|
1011
1076
|
method="POST",
|
|
1012
|
-
|
|
1013
|
-
base_url=self.configuration_api_base_url,
|
|
1077
|
+
url=f"{self.configuration_api_base_url}/connections/get",
|
|
1014
1078
|
data={"connectionId": connection_id},
|
|
1015
1079
|
)
|
|
1016
1080
|
|
|
1017
1081
|
def get_destination_details(self, destination_id: str) -> Mapping[str, Any]:
|
|
1018
1082
|
"""Fetches details about a given destination from the Airbyte REST API."""
|
|
1019
|
-
return self.
|
|
1083
|
+
return self._single_request(
|
|
1020
1084
|
method="GET",
|
|
1021
|
-
|
|
1022
|
-
base_url=self.rest_api_base_url,
|
|
1085
|
+
url=f"{self.rest_api_base_url}/destinations/{destination_id}",
|
|
1023
1086
|
)
|
|
1024
1087
|
|
|
1025
1088
|
def start_sync_job(self, connection_id: str) -> Mapping[str, Any]:
|
|
1026
|
-
return self.
|
|
1089
|
+
return self._single_request(
|
|
1027
1090
|
method="POST",
|
|
1028
|
-
|
|
1029
|
-
base_url=self.rest_api_base_url,
|
|
1091
|
+
url=f"{self.rest_api_base_url}/jobs",
|
|
1030
1092
|
data={
|
|
1031
1093
|
"connectionId": connection_id,
|
|
1032
1094
|
"jobType": "sync",
|
|
@@ -1034,13 +1096,15 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1034
1096
|
)
|
|
1035
1097
|
|
|
1036
1098
|
def get_job_details(self, job_id: int) -> Mapping[str, Any]:
|
|
1037
|
-
return self.
|
|
1038
|
-
method="GET",
|
|
1099
|
+
return self._single_request(
|
|
1100
|
+
method="GET",
|
|
1101
|
+
url=f"{self.rest_api_base_url}/jobs/{job_id}",
|
|
1039
1102
|
)
|
|
1040
1103
|
|
|
1041
1104
|
def cancel_job(self, job_id: int) -> Mapping[str, Any]:
|
|
1042
|
-
return self.
|
|
1043
|
-
method="DELETE",
|
|
1105
|
+
return self._single_request(
|
|
1106
|
+
method="DELETE",
|
|
1107
|
+
url=f"{self.rest_api_base_url}/jobs/{job_id}",
|
|
1044
1108
|
)
|
|
1045
1109
|
|
|
1046
1110
|
def sync_and_poll(
|
|
@@ -1119,14 +1183,11 @@ class AirbyteCloudClient(DagsterModel):
|
|
|
1119
1183
|
|
|
1120
1184
|
|
|
1121
1185
|
@beta
|
|
1122
|
-
class
|
|
1123
|
-
"""This class represents a Airbyte
|
|
1186
|
+
class BaseAirbyteWorkspace(ConfigurableResource):
|
|
1187
|
+
"""This class represents a Airbyte workspace and provides utilities
|
|
1124
1188
|
to interact with Airbyte APIs.
|
|
1125
1189
|
"""
|
|
1126
1190
|
|
|
1127
|
-
workspace_id: str = Field(..., description="The Airbyte Cloud workspace ID")
|
|
1128
|
-
client_id: str = Field(..., description="The Airbyte Cloud client ID.")
|
|
1129
|
-
client_secret: str = Field(..., description="The Airbyte Cloud client secret.")
|
|
1130
1191
|
request_max_retries: int = Field(
|
|
1131
1192
|
default=3,
|
|
1132
1193
|
description=(
|
|
@@ -1142,18 +1203,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1142
1203
|
default=15,
|
|
1143
1204
|
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
1144
1205
|
)
|
|
1145
|
-
_client:
|
|
1146
|
-
|
|
1147
|
-
@cached_method
|
|
1148
|
-
def get_client(self) -> AirbyteCloudClient:
|
|
1149
|
-
return AirbyteCloudClient(
|
|
1150
|
-
workspace_id=self.workspace_id,
|
|
1151
|
-
client_id=self.client_id,
|
|
1152
|
-
client_secret=self.client_secret,
|
|
1153
|
-
request_max_retries=self.request_max_retries,
|
|
1154
|
-
request_retry_delay=self.request_retry_delay,
|
|
1155
|
-
request_timeout=self.request_timeout,
|
|
1156
|
-
)
|
|
1206
|
+
_client: AirbyteClient = PrivateAttr(default=None) # type: ignore
|
|
1157
1207
|
|
|
1158
1208
|
@cached_method
|
|
1159
1209
|
def fetch_airbyte_workspace_data(
|
|
@@ -1168,7 +1218,10 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1168
1218
|
destinations_by_id = {}
|
|
1169
1219
|
|
|
1170
1220
|
client = self.get_client()
|
|
1171
|
-
|
|
1221
|
+
|
|
1222
|
+
client.validate_workspace_id()
|
|
1223
|
+
|
|
1224
|
+
connections = client.get_connections()
|
|
1172
1225
|
|
|
1173
1226
|
for partial_connection_details in connections:
|
|
1174
1227
|
full_connection_details = client.get_connection_details(
|
|
@@ -1214,14 +1267,14 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1214
1267
|
Loading the asset specs for a given Airbyte workspace:
|
|
1215
1268
|
.. code-block:: python
|
|
1216
1269
|
|
|
1217
|
-
from dagster_airbyte import
|
|
1270
|
+
from dagster_airbyte import AirbyteWorkspace
|
|
1218
1271
|
|
|
1219
1272
|
import dagster as dg
|
|
1220
1273
|
|
|
1221
|
-
airbyte_workspace =
|
|
1222
|
-
workspace_id=dg.EnvVar("
|
|
1223
|
-
client_id=dg.EnvVar("
|
|
1224
|
-
client_secret=dg.EnvVar("
|
|
1274
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1275
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1276
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1277
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1225
1278
|
)
|
|
1226
1279
|
|
|
1227
1280
|
airbyte_specs = airbyte_workspace.load_asset_specs()
|
|
@@ -1229,7 +1282,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1229
1282
|
"""
|
|
1230
1283
|
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1231
1284
|
|
|
1232
|
-
return
|
|
1285
|
+
return load_airbyte_asset_specs(
|
|
1233
1286
|
workspace=self,
|
|
1234
1287
|
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1235
1288
|
connection_selector_fn=connection_selector_fn,
|
|
@@ -1267,7 +1320,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1267
1320
|
yield AssetMaterialization(
|
|
1268
1321
|
asset_key=stream_asset_spec.key,
|
|
1269
1322
|
description=(
|
|
1270
|
-
f"Table generated via Airbyte
|
|
1323
|
+
f"Table generated via Airbyte sync "
|
|
1271
1324
|
f"for connection {connection.name}: {connection_table_name}"
|
|
1272
1325
|
),
|
|
1273
1326
|
metadata=stream_asset_spec.metadata,
|
|
@@ -1276,7 +1329,7 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1276
1329
|
@public
|
|
1277
1330
|
@beta
|
|
1278
1331
|
def sync_and_poll(self, context: AssetExecutionContext):
|
|
1279
|
-
"""Executes a sync and poll process to materialize Airbyte
|
|
1332
|
+
"""Executes a sync and poll process to materialize Airbyte assets.
|
|
1280
1333
|
This method can only be used in the context of an asset execution.
|
|
1281
1334
|
|
|
1282
1335
|
Args:
|
|
@@ -1322,9 +1375,9 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1322
1375
|
context.log.warning(f"Assets were not materialized: {unmaterialized_asset_keys}")
|
|
1323
1376
|
|
|
1324
1377
|
@contextmanager
|
|
1325
|
-
def process_config_and_initialize_cm_cached(self) -> Iterator["
|
|
1378
|
+
def process_config_and_initialize_cm_cached(self) -> Iterator["AirbyteWorkspace"]:
|
|
1326
1379
|
# Hack to avoid reconstructing initialized copies of this resource, which invalidates
|
|
1327
|
-
# @cached_method caches. This means that multiple calls to
|
|
1380
|
+
# @cached_method caches. This means that multiple calls to load_airbyte_asset_specs
|
|
1328
1381
|
# will not trigger multiple API calls to fetch the workspace data.
|
|
1329
1382
|
# Bespoke impl since @cached_method doesn't play nice with iterators; it's exhausted after
|
|
1330
1383
|
# the first call.
|
|
@@ -1338,6 +1391,247 @@ class AirbyteCloudWorkspace(ConfigurableResource):
|
|
|
1338
1391
|
|
|
1339
1392
|
|
|
1340
1393
|
@beta
|
|
1394
|
+
class AirbyteWorkspace(BaseAirbyteWorkspace):
|
|
1395
|
+
"""This resource allows users to programatically interface with the Airbyte REST API to launch
|
|
1396
|
+
syncs and monitor their progress for a given Airbyte workspace.
|
|
1397
|
+
|
|
1398
|
+
**Examples:**
|
|
1399
|
+
Using OAuth client credentials:
|
|
1400
|
+
|
|
1401
|
+
.. code-block:: python
|
|
1402
|
+
|
|
1403
|
+
import dagster as dg
|
|
1404
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1405
|
+
|
|
1406
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1407
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1408
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1409
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1410
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1411
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1414
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1415
|
+
|
|
1416
|
+
defs = dg.Definitions(
|
|
1417
|
+
assets=all_airbyte_assets,
|
|
1418
|
+
resources={"airbyte": airbyte_workspace},
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
Using basic Authentication:
|
|
1422
|
+
|
|
1423
|
+
.. code-block:: python
|
|
1424
|
+
|
|
1425
|
+
import dagster as dg
|
|
1426
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1427
|
+
|
|
1428
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1429
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1430
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1431
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1432
|
+
username=dg.EnvVar("AIRBYTE_USERNAME"),
|
|
1433
|
+
password=dg.EnvVar("AIRBYTE_PASSWORD"),
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1437
|
+
|
|
1438
|
+
defs = dg.Definitions(
|
|
1439
|
+
assets=all_airbyte_assets,
|
|
1440
|
+
resources={"airbyte": airbyte_workspace},
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
Using no authentication:
|
|
1444
|
+
|
|
1445
|
+
.. code-block:: python
|
|
1446
|
+
|
|
1447
|
+
import dagster as dg
|
|
1448
|
+
from dagster_airbyte import AirbyteWorkspace, build_airbyte_assets_definitions
|
|
1449
|
+
|
|
1450
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1451
|
+
rest_api_base_url=dg.EnvVar("AIRBYTE_REST_API_BASE_URL"),
|
|
1452
|
+
configuration_api_base_url=dg.EnvVar("AIRBYTE_CONFIGURATION_API_BASE_URL"),
|
|
1453
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1454
|
+
)
|
|
1455
|
+
|
|
1456
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1457
|
+
|
|
1458
|
+
defs = dg.Definitions(
|
|
1459
|
+
assets=all_airbyte_assets,
|
|
1460
|
+
resources={"airbyte": airbyte_workspace},
|
|
1461
|
+
)
|
|
1462
|
+
"""
|
|
1463
|
+
|
|
1464
|
+
rest_api_base_url: str = Field(
|
|
1465
|
+
...,
|
|
1466
|
+
description="The base URL for the Airbyte REST API.",
|
|
1467
|
+
examples=[
|
|
1468
|
+
"http://localhost:8000/api/public/v1",
|
|
1469
|
+
"https://my-airbyte-server.com/api/public/v1",
|
|
1470
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/public/v1",
|
|
1471
|
+
],
|
|
1472
|
+
)
|
|
1473
|
+
configuration_api_base_url: str = Field(
|
|
1474
|
+
...,
|
|
1475
|
+
description="The base URL for the Airbyte Configuration API.",
|
|
1476
|
+
examples=[
|
|
1477
|
+
"http://localhost:8000/api/v1",
|
|
1478
|
+
"https://my-airbyte-server.com/api/v1",
|
|
1479
|
+
"http://airbyte-airbyte-server-svc.airbyte.svc.cluster.local:8001/api/v1",
|
|
1480
|
+
],
|
|
1481
|
+
)
|
|
1482
|
+
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
1483
|
+
client_id: Optional[str] = Field(default=None, description="The Airbyte client ID.")
|
|
1484
|
+
client_secret: Optional[str] = Field(default=None, description="The Airbyte client secret.")
|
|
1485
|
+
username: Optional[str] = Field(
|
|
1486
|
+
default=None, description="The Airbyte username for authentication."
|
|
1487
|
+
)
|
|
1488
|
+
password: Optional[str] = Field(
|
|
1489
|
+
default=None, description="The Airbyte password for authentication."
|
|
1490
|
+
)
|
|
1491
|
+
|
|
1492
|
+
@cached_method
|
|
1493
|
+
def get_client(self) -> AirbyteClient:
|
|
1494
|
+
return AirbyteClient(
|
|
1495
|
+
rest_api_base_url=self.rest_api_base_url,
|
|
1496
|
+
configuration_api_base_url=self.configuration_api_base_url,
|
|
1497
|
+
workspace_id=self.workspace_id,
|
|
1498
|
+
client_id=self.client_id,
|
|
1499
|
+
client_secret=self.client_secret,
|
|
1500
|
+
username=self.username,
|
|
1501
|
+
password=self.password,
|
|
1502
|
+
request_max_retries=self.request_max_retries,
|
|
1503
|
+
request_retry_delay=self.request_retry_delay,
|
|
1504
|
+
request_timeout=self.request_timeout,
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
@beta
|
|
1509
|
+
class AirbyteCloudWorkspace(BaseAirbyteWorkspace):
|
|
1510
|
+
"""This resource allows users to programatically interface with the Airbyte Cloud REST API to launch
|
|
1511
|
+
syncs and monitor their progress for a given Airbyte Cloud workspace.
|
|
1512
|
+
|
|
1513
|
+
**Examples:**
|
|
1514
|
+
|
|
1515
|
+
.. code-block:: python
|
|
1516
|
+
|
|
1517
|
+
from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
|
|
1518
|
+
|
|
1519
|
+
import dagster as dg
|
|
1520
|
+
|
|
1521
|
+
airbyte_workspace = AirbyteCloudWorkspace(
|
|
1522
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1523
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1524
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1525
|
+
)
|
|
1526
|
+
|
|
1527
|
+
all_airbyte_assets = build_airbyte_assets_definitions(workspace=airbyte_workspace)
|
|
1528
|
+
|
|
1529
|
+
defs = dg.Definitions(
|
|
1530
|
+
assets=all_airbyte_assets,
|
|
1531
|
+
resources={"airbyte": airbyte_workspace},
|
|
1532
|
+
)
|
|
1533
|
+
"""
|
|
1534
|
+
|
|
1535
|
+
rest_api_base_url: ClassVar[str] = AIRBYTE_CLOUD_REST_API_BASE_URL
|
|
1536
|
+
configuration_api_base_url: ClassVar[str] = AIRBYTE_CLOUD_CONFIGURATION_API_BASE_URL
|
|
1537
|
+
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
1538
|
+
client_id: str = Field(..., description="The Airbyte client ID.")
|
|
1539
|
+
client_secret: str = Field(..., description="The Airbyte client secret.")
|
|
1540
|
+
|
|
1541
|
+
@cached_method
|
|
1542
|
+
def get_client(self) -> AirbyteClient:
|
|
1543
|
+
return AirbyteClient(
|
|
1544
|
+
rest_api_base_url=self.rest_api_base_url,
|
|
1545
|
+
configuration_api_base_url=self.configuration_api_base_url,
|
|
1546
|
+
workspace_id=self.workspace_id,
|
|
1547
|
+
client_id=self.client_id,
|
|
1548
|
+
client_secret=self.client_secret,
|
|
1549
|
+
request_max_retries=self.request_max_retries,
|
|
1550
|
+
request_retry_delay=self.request_retry_delay,
|
|
1551
|
+
request_timeout=self.request_timeout,
|
|
1552
|
+
)
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
@public
|
|
1556
|
+
@beta
|
|
1557
|
+
def load_airbyte_asset_specs(
|
|
1558
|
+
workspace: BaseAirbyteWorkspace,
|
|
1559
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1560
|
+
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]] = None,
|
|
1561
|
+
) -> Sequence[AssetSpec]:
|
|
1562
|
+
"""Returns a list of AssetSpecs representing the Airbyte content in the workspace.
|
|
1563
|
+
|
|
1564
|
+
Args:
|
|
1565
|
+
workspace (BaseAirbyteWorkspace): The Airbyte workspace to fetch assets from.
|
|
1566
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1567
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1568
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1569
|
+
connection_selector_fn (Optional[Callable[[AirbyteConnection], bool]]): A function that allows for filtering
|
|
1570
|
+
which Airbyte connection assets are created for.
|
|
1571
|
+
|
|
1572
|
+
Returns:
|
|
1573
|
+
List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
|
|
1574
|
+
|
|
1575
|
+
Examples:
|
|
1576
|
+
Loading the asset specs for a given Airbyte workspace:
|
|
1577
|
+
|
|
1578
|
+
.. code-block:: python
|
|
1579
|
+
|
|
1580
|
+
from dagster_airbyte import AirbyteWorkspace, load_airbyte_asset_specs
|
|
1581
|
+
|
|
1582
|
+
import dagster as dg
|
|
1583
|
+
|
|
1584
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1585
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1586
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1587
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1588
|
+
)
|
|
1589
|
+
|
|
1590
|
+
airbyte_specs = load_airbyte_asset_specs(airbyte_workspace)
|
|
1591
|
+
dg.Definitions(assets=airbyte_specs)
|
|
1592
|
+
|
|
1593
|
+
Filter connections by name:
|
|
1594
|
+
|
|
1595
|
+
.. code-block:: python
|
|
1596
|
+
|
|
1597
|
+
from dagster_airbyte import AirbyteWorkspace, load_airbyte_asset_specs
|
|
1598
|
+
|
|
1599
|
+
import dagster as dg
|
|
1600
|
+
|
|
1601
|
+
airbyte_workspace = AirbyteWorkspace(
|
|
1602
|
+
workspace_id=dg.EnvVar("AIRBYTE_WORKSPACE_ID"),
|
|
1603
|
+
client_id=dg.EnvVar("AIRBYTE_CLIENT_ID"),
|
|
1604
|
+
client_secret=dg.EnvVar("AIRBYTE_CLIENT_SECRET"),
|
|
1605
|
+
)
|
|
1606
|
+
|
|
1607
|
+
airbyte_specs = load_airbyte_asset_specs(
|
|
1608
|
+
workspace=airbyte_workspace,
|
|
1609
|
+
connection_selector_fn=lambda connection: connection.name in ["connection1", "connection2"]
|
|
1610
|
+
)
|
|
1611
|
+
dg.Definitions(assets=airbyte_specs)
|
|
1612
|
+
"""
|
|
1613
|
+
dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
|
|
1614
|
+
|
|
1615
|
+
with workspace.process_config_and_initialize_cm_cached() as initialized_workspace:
|
|
1616
|
+
return [
|
|
1617
|
+
spec.merge_attributes(
|
|
1618
|
+
metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
|
|
1619
|
+
)
|
|
1620
|
+
for spec in check.is_list(
|
|
1621
|
+
AirbyteWorkspaceDefsLoader(
|
|
1622
|
+
workspace=initialized_workspace,
|
|
1623
|
+
translator=dagster_airbyte_translator,
|
|
1624
|
+
connection_selector_fn=connection_selector_fn,
|
|
1625
|
+
)
|
|
1626
|
+
.build_defs()
|
|
1627
|
+
.assets,
|
|
1628
|
+
AssetSpec,
|
|
1629
|
+
)
|
|
1630
|
+
]
|
|
1631
|
+
|
|
1632
|
+
|
|
1633
|
+
@public
|
|
1634
|
+
@superseded(additional_warn_text="Use load_airbyte_asset_specs instead.")
|
|
1341
1635
|
def load_airbyte_cloud_asset_specs(
|
|
1342
1636
|
workspace: AirbyteCloudWorkspace,
|
|
1343
1637
|
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
@@ -1394,29 +1688,16 @@ def load_airbyte_cloud_asset_specs(
|
|
|
1394
1688
|
)
|
|
1395
1689
|
dg.Definitions(assets=airbyte_cloud_specs)
|
|
1396
1690
|
"""
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
|
|
1403
|
-
)
|
|
1404
|
-
for spec in check.is_list(
|
|
1405
|
-
AirbyteCloudWorkspaceDefsLoader(
|
|
1406
|
-
workspace=initialized_workspace,
|
|
1407
|
-
translator=dagster_airbyte_translator,
|
|
1408
|
-
connection_selector_fn=connection_selector_fn,
|
|
1409
|
-
)
|
|
1410
|
-
.build_defs()
|
|
1411
|
-
.assets,
|
|
1412
|
-
AssetSpec,
|
|
1413
|
-
)
|
|
1414
|
-
]
|
|
1691
|
+
return load_airbyte_asset_specs(
|
|
1692
|
+
workspace=workspace,
|
|
1693
|
+
dagster_airbyte_translator=dagster_airbyte_translator,
|
|
1694
|
+
connection_selector_fn=connection_selector_fn,
|
|
1695
|
+
)
|
|
1415
1696
|
|
|
1416
1697
|
|
|
1417
1698
|
@record
|
|
1418
|
-
class
|
|
1419
|
-
workspace: AirbyteCloudWorkspace
|
|
1699
|
+
class AirbyteWorkspaceDefsLoader(StateBackedDefinitionsLoader[AirbyteWorkspaceData]):
|
|
1700
|
+
workspace: Union[AirbyteWorkspace, AirbyteCloudWorkspace]
|
|
1420
1701
|
translator: DagsterAirbyteTranslator
|
|
1421
1702
|
connection_selector_fn: Optional[Callable[[AirbyteConnection], bool]]
|
|
1422
1703
|
|
dagster_airbyte/utils.py
CHANGED
|
@@ -13,7 +13,7 @@ from dagster._utils.names import clean_name_lower
|
|
|
13
13
|
from dagster_airbyte.types import AirbyteOutput
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
|
-
from dagster_airbyte import DagsterAirbyteTranslator
|
|
16
|
+
from dagster_airbyte.translator import DagsterAirbyteTranslator
|
|
17
17
|
|
|
18
18
|
DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY = "dagster-airbyte/dagster_airbyte_translator"
|
|
19
19
|
|
dagster_airbyte/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.27.
|
|
1
|
+
__version__ = "0.27.11"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-airbyte
|
|
3
|
-
Version: 0.27.
|
|
3
|
+
Version: 0.27.11
|
|
4
4
|
Summary: Package for integrating Airbyte with Dagster.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -15,13 +15,13 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Requires-Python: >=3.9,<3.14
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: dagster==1.11.
|
|
18
|
+
Requires-Dist: dagster==1.11.11
|
|
19
19
|
Requires-Dist: requests
|
|
20
20
|
Provides-Extra: test
|
|
21
21
|
Requires-Dist: requests-mock; extra == "test"
|
|
22
22
|
Requires-Dist: flaky; extra == "test"
|
|
23
23
|
Provides-Extra: managed
|
|
24
|
-
Requires-Dist: dagster-managed-elements==0.27.
|
|
24
|
+
Requires-Dist: dagster-managed-elements==0.27.11; extra == "managed"
|
|
25
25
|
Dynamic: author
|
|
26
26
|
Dynamic: author-email
|
|
27
27
|
Dynamic: classifier
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
dagster_airbyte/__init__.py,sha256=
|
|
2
|
-
dagster_airbyte/asset_decorator.py,sha256=
|
|
3
|
-
dagster_airbyte/asset_defs.py,sha256=
|
|
1
|
+
dagster_airbyte/__init__.py,sha256=Bc3BuYTIcz5XU6NbKHqIemxgFSydLsqVLyLcx6K2pVA,2024
|
|
2
|
+
dagster_airbyte/asset_decorator.py,sha256=ziG1U5qleIIR8xbYmoKSA00gLazbrd09FzcgmGdZmkY,4640
|
|
3
|
+
dagster_airbyte/asset_defs.py,sha256=0lGOKSAlTxOYFonDsEI4FzuiVAViV8pr2Zgx3R8fAvQ,51694
|
|
4
4
|
dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
|
|
5
5
|
dagster_airbyte/ops.py,sha256=oOEczVYpqVRTc1-0osfpR5FZbPjNJDYihgRjk_qtOQA,4229
|
|
6
6
|
dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
7
|
-
dagster_airbyte/resources.py,sha256=
|
|
7
|
+
dagster_airbyte/resources.py,sha256=y5FEHu2TfOrp6XRFnDU18umcEnXBS701FxBIl3Xtc9k,66883
|
|
8
8
|
dagster_airbyte/translator.py,sha256=RY2LhGPACIfyd2zOSH1swdLz4koX6_bvclW-alS3Bic,7547
|
|
9
9
|
dagster_airbyte/types.py,sha256=TYUjI3skjLYeANjesgJ-IAJNu8bAnL1ymsUfz5LsRTE,1565
|
|
10
|
-
dagster_airbyte/utils.py,sha256=
|
|
11
|
-
dagster_airbyte/version.py,sha256=
|
|
10
|
+
dagster_airbyte/utils.py,sha256=wG9119kXi87JgcOjK7iNozr-svZocJBQYoHBMmnXZcE,4092
|
|
11
|
+
dagster_airbyte/version.py,sha256=l83uHuTgD05VeP3nagYShYWw77kirwVHttN5jnj5UUU,24
|
|
12
12
|
dagster_airbyte/components/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
dagster_airbyte/components/workspace_component/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
dagster_airbyte/components/workspace_component/component.py,sha256=
|
|
15
|
-
dagster_airbyte/components/workspace_component/scaffolder.py,sha256=
|
|
14
|
+
dagster_airbyte/components/workspace_component/component.py,sha256=TIl8Ja6BHwpyRmkzkn-o75sH1dOUbbHtTftAb68Ag_c,8445
|
|
15
|
+
dagster_airbyte/components/workspace_component/scaffolder.py,sha256=zk5HDJ6C8zcfUWKslJVyqyBB0LV3JzNAocY2ptT517s,1046
|
|
16
16
|
dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
|
|
17
17
|
dagster_airbyte/managed/reconciliation.py,sha256=xoVfqPBpNSldkiqOLIPnc7ei8CppHKWtzv8bvxjdqlI,34859
|
|
18
18
|
dagster_airbyte/managed/types.py,sha256=isPfX8L9YwtZAf9Vk4hhxBePLR00AEldsdK2TsM1H2o,14611
|
|
19
19
|
dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
|
|
20
20
|
dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
|
|
21
21
|
dagster_airbyte/managed/generated/sources.py,sha256=y0TPNvcRd8c9mhje-NoXsHeKRPt1nXcpww8mNAtqCps,282685
|
|
22
|
-
dagster_airbyte-0.27.
|
|
23
|
-
dagster_airbyte-0.27.
|
|
24
|
-
dagster_airbyte-0.27.
|
|
25
|
-
dagster_airbyte-0.27.
|
|
26
|
-
dagster_airbyte-0.27.
|
|
27
|
-
dagster_airbyte-0.27.
|
|
22
|
+
dagster_airbyte-0.27.11.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
|
|
23
|
+
dagster_airbyte-0.27.11.dist-info/METADATA,sha256=rSqLRhd0zoFKOzoy3VafEKCoBuhWAolVAcwJdX8zn5o,1169
|
|
24
|
+
dagster_airbyte-0.27.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
+
dagster_airbyte-0.27.11.dist-info/entry_points.txt,sha256=096yvfMP-gNsCgDg9vDQtinis5QGpD-e_kHEhcHaML8,120
|
|
26
|
+
dagster_airbyte-0.27.11.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
|
|
27
|
+
dagster_airbyte-0.27.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|