dagster-airbyte 0.25.4__py3-none-any.whl → 0.25.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-airbyte might be problematic. Click here for more details.
- dagster_airbyte/__init__.py +3 -0
- dagster_airbyte/resources.py +344 -1
- dagster_airbyte/translator.py +190 -0
- dagster_airbyte/utils.py +4 -0
- dagster_airbyte/version.py +1 -1
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/METADATA +3 -3
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/RECORD +11 -10
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/LICENSE +0 -0
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/WHEEL +0 -0
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/entry_points.txt +0 -0
- {dagster_airbyte-0.25.4.dist-info → dagster_airbyte-0.25.5.dist-info}/top_level.txt +0 -0
dagster_airbyte/__init__.py
CHANGED
|
@@ -21,11 +21,14 @@ from dagster_airbyte.asset_defs import (
|
|
|
21
21
|
from dagster_airbyte.ops import airbyte_sync_op as airbyte_sync_op
|
|
22
22
|
from dagster_airbyte.resources import (
|
|
23
23
|
AirbyteCloudResource as AirbyteCloudResource,
|
|
24
|
+
AirbyteCloudWorkspace as AirbyteCloudWorkspace,
|
|
24
25
|
AirbyteResource as AirbyteResource,
|
|
25
26
|
AirbyteState as AirbyteState,
|
|
26
27
|
airbyte_cloud_resource as airbyte_cloud_resource,
|
|
27
28
|
airbyte_resource as airbyte_resource,
|
|
29
|
+
load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
|
|
28
30
|
)
|
|
31
|
+
from dagster_airbyte.translator import DagsterAirbyteTranslator as DagsterAirbyteTranslator
|
|
29
32
|
from dagster_airbyte.types import AirbyteOutput as AirbyteOutput
|
|
30
33
|
from dagster_airbyte.version import __version__ as __version__
|
|
31
34
|
|
dagster_airbyte/resources.py
CHANGED
|
@@ -6,32 +6,52 @@ import time
|
|
|
6
6
|
from abc import abstractmethod
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from datetime import datetime, timedelta
|
|
9
|
-
from typing import Any, Dict, List, Mapping, Optional, cast
|
|
9
|
+
from typing import Any, Dict, List, Mapping, Optional, Sequence, cast
|
|
10
10
|
|
|
11
11
|
import requests
|
|
12
12
|
from dagster import (
|
|
13
13
|
ConfigurableResource,
|
|
14
|
+
Definitions,
|
|
14
15
|
Failure,
|
|
15
16
|
InitResourceContext,
|
|
16
17
|
_check as check,
|
|
17
18
|
get_dagster_logger,
|
|
18
19
|
resource,
|
|
19
20
|
)
|
|
21
|
+
from dagster._annotations import experimental
|
|
20
22
|
from dagster._config.pythonic_config import infer_schema_from_config_class
|
|
23
|
+
from dagster._core.definitions.asset_spec import AssetSpec
|
|
24
|
+
from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
|
|
21
25
|
from dagster._core.definitions.resource_definition import dagster_maintained_resource
|
|
26
|
+
from dagster._model import DagsterModel
|
|
27
|
+
from dagster._record import record
|
|
22
28
|
from dagster._utils.cached_method import cached_method
|
|
23
29
|
from dagster._utils.merger import deep_merge_dicts
|
|
24
30
|
from pydantic import Field, PrivateAttr
|
|
25
31
|
from requests.exceptions import RequestException
|
|
26
32
|
|
|
33
|
+
from dagster_airbyte.translator import (
|
|
34
|
+
AirbyteConnection,
|
|
35
|
+
AirbyteDestination,
|
|
36
|
+
AirbyteWorkspaceData,
|
|
37
|
+
DagsterAirbyteTranslator,
|
|
38
|
+
)
|
|
27
39
|
from dagster_airbyte.types import AirbyteOutput
|
|
28
40
|
|
|
41
|
+
AIRBYTE_REST_API_BASE = "https://api.airbyte.com"
|
|
42
|
+
AIRBYTE_REST_API_VERSION = "v1"
|
|
43
|
+
|
|
44
|
+
AIRBYTE_CONFIGURATION_API_BASE = "https://cloud.airbyte.com/api"
|
|
45
|
+
AIRBYTE_CONFIGURATION_API_VERSION = "v1"
|
|
46
|
+
|
|
29
47
|
DEFAULT_POLL_INTERVAL_SECONDS = 10
|
|
30
48
|
|
|
31
49
|
# The access token expire every 3 minutes in Airbyte Cloud.
|
|
32
50
|
# Refresh is needed after 2.5 minutes to avoid the "token expired" error message.
|
|
33
51
|
AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS = 150
|
|
34
52
|
|
|
53
|
+
AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
|
|
54
|
+
|
|
35
55
|
|
|
36
56
|
class AirbyteState:
|
|
37
57
|
RUNNING = "running"
|
|
@@ -791,3 +811,326 @@ def airbyte_cloud_resource(context) -> AirbyteCloudResource:
|
|
|
791
811
|
|
|
792
812
|
"""
|
|
793
813
|
return AirbyteCloudResource.from_resource_context(context)
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
# -------------
|
|
817
|
+
# Resources v2
|
|
818
|
+
# -------------
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
@experimental
|
|
822
|
+
class AirbyteCloudClient(DagsterModel):
|
|
823
|
+
"""This class exposes methods on top of the Airbyte APIs for Airbyte Cloud."""
|
|
824
|
+
|
|
825
|
+
workspace_id: str = Field(..., description="The Airbyte workspace ID")
|
|
826
|
+
client_id: str = Field(..., description="The Airbyte client ID.")
|
|
827
|
+
client_secret: str = Field(..., description="The Airbyte client secret.")
|
|
828
|
+
request_max_retries: int = Field(
|
|
829
|
+
...,
|
|
830
|
+
description=(
|
|
831
|
+
"The maximum number of times requests to the Airbyte API should be retried "
|
|
832
|
+
"before failing."
|
|
833
|
+
),
|
|
834
|
+
)
|
|
835
|
+
request_retry_delay: float = Field(
|
|
836
|
+
...,
|
|
837
|
+
description="Time (in seconds) to wait between each request retry.",
|
|
838
|
+
)
|
|
839
|
+
request_timeout: int = Field(
|
|
840
|
+
...,
|
|
841
|
+
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
_access_token_value: Optional[str] = PrivateAttr(default=None)
|
|
845
|
+
_access_token_timestamp: Optional[float] = PrivateAttr(default=None)
|
|
846
|
+
|
|
847
|
+
@property
|
|
848
|
+
@cached_method
|
|
849
|
+
def _log(self) -> logging.Logger:
|
|
850
|
+
return get_dagster_logger()
|
|
851
|
+
|
|
852
|
+
@property
|
|
853
|
+
def rest_api_base_url(self) -> str:
|
|
854
|
+
return f"{AIRBYTE_REST_API_BASE}/{AIRBYTE_REST_API_VERSION}"
|
|
855
|
+
|
|
856
|
+
@property
|
|
857
|
+
def configuration_api_base_url(self) -> str:
|
|
858
|
+
return f"{AIRBYTE_CONFIGURATION_API_BASE}/{AIRBYTE_CONFIGURATION_API_VERSION}"
|
|
859
|
+
|
|
860
|
+
@property
|
|
861
|
+
def all_additional_request_params(self) -> Mapping[str, Any]:
|
|
862
|
+
return {**self.authorization_request_params, **self.user_agent_request_params}
|
|
863
|
+
|
|
864
|
+
@property
|
|
865
|
+
def authorization_request_params(self) -> Mapping[str, Any]:
|
|
866
|
+
# Make sure the access token is refreshed before using it when calling the API.
|
|
867
|
+
if self._needs_refreshed_access_token():
|
|
868
|
+
self._refresh_access_token()
|
|
869
|
+
return {
|
|
870
|
+
"Authorization": f"Bearer {self._access_token_value}",
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
@property
|
|
874
|
+
def user_agent_request_params(self) -> Mapping[str, Any]:
|
|
875
|
+
return {
|
|
876
|
+
"User-Agent": "dagster",
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
def _refresh_access_token(self) -> None:
|
|
880
|
+
response = check.not_none(
|
|
881
|
+
self._make_request(
|
|
882
|
+
method="POST",
|
|
883
|
+
endpoint="applications/token",
|
|
884
|
+
base_url=self.rest_api_base_url,
|
|
885
|
+
data={
|
|
886
|
+
"client_id": self.client_id,
|
|
887
|
+
"client_secret": self.client_secret,
|
|
888
|
+
},
|
|
889
|
+
# Must not pass the bearer access token when refreshing it.
|
|
890
|
+
include_additional_request_params=False,
|
|
891
|
+
)
|
|
892
|
+
)
|
|
893
|
+
self._access_token_value = str(response["access_token"])
|
|
894
|
+
self._access_token_timestamp = datetime.now().timestamp()
|
|
895
|
+
|
|
896
|
+
def _needs_refreshed_access_token(self) -> bool:
|
|
897
|
+
return (
|
|
898
|
+
not self._access_token_value
|
|
899
|
+
or not self._access_token_timestamp
|
|
900
|
+
or self._access_token_timestamp
|
|
901
|
+
<= (
|
|
902
|
+
datetime.now() - timedelta(seconds=AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS)
|
|
903
|
+
).timestamp()
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
def _get_session(self, include_additional_request_params: bool) -> requests.Session:
|
|
907
|
+
headers = {"accept": "application/json"}
|
|
908
|
+
if include_additional_request_params:
|
|
909
|
+
headers = {
|
|
910
|
+
**headers,
|
|
911
|
+
**self.all_additional_request_params,
|
|
912
|
+
}
|
|
913
|
+
session = requests.Session()
|
|
914
|
+
session.headers.update(headers)
|
|
915
|
+
return session
|
|
916
|
+
|
|
917
|
+
def _make_request(
|
|
918
|
+
self,
|
|
919
|
+
method: str,
|
|
920
|
+
endpoint: str,
|
|
921
|
+
base_url: str,
|
|
922
|
+
data: Optional[Mapping[str, Any]] = None,
|
|
923
|
+
params: Optional[Mapping[str, Any]] = None,
|
|
924
|
+
include_additional_request_params: bool = True,
|
|
925
|
+
) -> Mapping[str, Any]:
|
|
926
|
+
"""Creates and sends a request to the desired Airbyte REST API endpoint.
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
method (str): The http method to use for this request (e.g. "POST", "GET", "PATCH").
|
|
930
|
+
endpoint (str): The Airbyte API endpoint to send this request to.
|
|
931
|
+
base_url (str): The base url to the Airbyte API to use.
|
|
932
|
+
data (Optional[Dict[str, Any]]): JSON-formatted data string to be included in the request.
|
|
933
|
+
params (Optional[Dict[str, Any]]): JSON-formatted query params to be included in the request.
|
|
934
|
+
include_additional_request_params (bool): Whether to include authorization and user-agent headers
|
|
935
|
+
to the request parameters. Defaults to True.
|
|
936
|
+
|
|
937
|
+
Returns:
|
|
938
|
+
Dict[str, Any]: Parsed json data from the response to this request
|
|
939
|
+
"""
|
|
940
|
+
url = f"{base_url}/{endpoint}"
|
|
941
|
+
|
|
942
|
+
num_retries = 0
|
|
943
|
+
while True:
|
|
944
|
+
try:
|
|
945
|
+
session = self._get_session(
|
|
946
|
+
include_additional_request_params=include_additional_request_params
|
|
947
|
+
)
|
|
948
|
+
response = session.request(
|
|
949
|
+
method=method, url=url, json=data, params=params, timeout=self.request_timeout
|
|
950
|
+
)
|
|
951
|
+
response.raise_for_status()
|
|
952
|
+
return response.json()
|
|
953
|
+
except RequestException as e:
|
|
954
|
+
self._log.error(
|
|
955
|
+
f"Request to Airbyte API failed for url {url} with method {method} : {e}"
|
|
956
|
+
)
|
|
957
|
+
if num_retries == self.request_max_retries:
|
|
958
|
+
break
|
|
959
|
+
num_retries += 1
|
|
960
|
+
time.sleep(self.request_retry_delay)
|
|
961
|
+
|
|
962
|
+
raise Failure(f"Max retries ({self.request_max_retries}) exceeded with url: {url}.")
|
|
963
|
+
|
|
964
|
+
def get_connections(self) -> Mapping[str, Any]:
|
|
965
|
+
"""Fetches all connections of an Airbyte workspace from the Airbyte REST API."""
|
|
966
|
+
return self._make_request(
|
|
967
|
+
method="GET",
|
|
968
|
+
endpoint="connections",
|
|
969
|
+
base_url=self.rest_api_base_url,
|
|
970
|
+
params={"workspaceIds": self.workspace_id},
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
def get_connection_details(self, connection_id) -> Mapping[str, Any]:
|
|
974
|
+
"""Fetches details about a given connection from the Airbyte Configuration API.
|
|
975
|
+
The Airbyte Configuration API is an internal and may change in the future.
|
|
976
|
+
"""
|
|
977
|
+
# Using the Airbyte Configuration API to get the connection details, including streams and their configs.
|
|
978
|
+
# https://airbyte-public-api-docs.s3.us-east-2.amazonaws.com/rapidoc-api-docs.html#post-/v1/connections/get
|
|
979
|
+
# https://github.com/airbytehq/airbyte-platform/blob/v1.0.0/airbyte-api/server-api/src/main/openapi/config.yaml
|
|
980
|
+
return self._make_request(
|
|
981
|
+
method="POST",
|
|
982
|
+
endpoint="connections/get",
|
|
983
|
+
base_url=self.configuration_api_base_url,
|
|
984
|
+
data={"connectionId": connection_id},
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
def get_destination_details(self, destination_id: str) -> Mapping[str, Any]:
|
|
988
|
+
"""Fetches details about a given destination from the Airbyte REST API."""
|
|
989
|
+
return self._make_request(
|
|
990
|
+
method="GET",
|
|
991
|
+
endpoint=f"destinations/{destination_id}",
|
|
992
|
+
base_url=self.rest_api_base_url,
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
@experimental
|
|
997
|
+
class AirbyteCloudWorkspace(ConfigurableResource):
|
|
998
|
+
"""This class represents a Airbyte Cloud workspace and provides utilities
|
|
999
|
+
to interact with Airbyte APIs.
|
|
1000
|
+
"""
|
|
1001
|
+
|
|
1002
|
+
workspace_id: str = Field(..., description="The Airbyte Cloud workspace ID")
|
|
1003
|
+
client_id: str = Field(..., description="The Airbyte Cloud client ID.")
|
|
1004
|
+
client_secret: str = Field(..., description="The Airbyte Cloud client secret.")
|
|
1005
|
+
request_max_retries: int = Field(
|
|
1006
|
+
default=3,
|
|
1007
|
+
description=(
|
|
1008
|
+
"The maximum number of times requests to the Airbyte API should be retried "
|
|
1009
|
+
"before failing."
|
|
1010
|
+
),
|
|
1011
|
+
)
|
|
1012
|
+
request_retry_delay: float = Field(
|
|
1013
|
+
default=0.25,
|
|
1014
|
+
description="Time (in seconds) to wait between each request retry.",
|
|
1015
|
+
)
|
|
1016
|
+
request_timeout: int = Field(
|
|
1017
|
+
default=15,
|
|
1018
|
+
description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
_client: AirbyteCloudClient = PrivateAttr(default=None)
|
|
1022
|
+
|
|
1023
|
+
@cached_method
|
|
1024
|
+
def get_client(self) -> AirbyteCloudClient:
|
|
1025
|
+
return AirbyteCloudClient(
|
|
1026
|
+
workspace_id=self.workspace_id,
|
|
1027
|
+
client_id=self.client_id,
|
|
1028
|
+
client_secret=self.client_secret,
|
|
1029
|
+
request_max_retries=self.request_max_retries,
|
|
1030
|
+
request_retry_delay=self.request_retry_delay,
|
|
1031
|
+
request_timeout=self.request_timeout,
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
def fetch_airbyte_workspace_data(
|
|
1035
|
+
self,
|
|
1036
|
+
) -> AirbyteWorkspaceData:
|
|
1037
|
+
"""Retrieves all Airbyte content from the workspace and returns it as a AirbyteWorkspaceData object.
|
|
1038
|
+
|
|
1039
|
+
Returns:
|
|
1040
|
+
AirbyteWorkspaceData: A snapshot of the Airbyte workspace's content.
|
|
1041
|
+
"""
|
|
1042
|
+
connections_by_id = {}
|
|
1043
|
+
destinations_by_id = {}
|
|
1044
|
+
|
|
1045
|
+
client = self.get_client()
|
|
1046
|
+
connections = client.get_connections()["data"]
|
|
1047
|
+
|
|
1048
|
+
for partial_connection_details in connections:
|
|
1049
|
+
full_connection_details = client.get_connection_details(
|
|
1050
|
+
connection_id=partial_connection_details["connectionId"]
|
|
1051
|
+
)
|
|
1052
|
+
connection = AirbyteConnection.from_connection_details(
|
|
1053
|
+
connection_details=full_connection_details
|
|
1054
|
+
)
|
|
1055
|
+
connections_by_id[connection.id] = connection
|
|
1056
|
+
|
|
1057
|
+
destination_details = client.get_destination_details(
|
|
1058
|
+
destination_id=connection.destination_id
|
|
1059
|
+
)
|
|
1060
|
+
destination = AirbyteDestination.from_destination_details(
|
|
1061
|
+
destination_details=destination_details
|
|
1062
|
+
)
|
|
1063
|
+
destinations_by_id[destination.id] = destination
|
|
1064
|
+
|
|
1065
|
+
return AirbyteWorkspaceData(
|
|
1066
|
+
connections_by_id=connections_by_id,
|
|
1067
|
+
destinations_by_id=destinations_by_id,
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
@experimental
|
|
1072
|
+
def load_airbyte_cloud_asset_specs(
|
|
1073
|
+
workspace: AirbyteCloudWorkspace,
|
|
1074
|
+
dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
|
|
1075
|
+
) -> Sequence[AssetSpec]:
|
|
1076
|
+
"""Returns a list of AssetSpecs representing the Airbyte content in the workspace.
|
|
1077
|
+
|
|
1078
|
+
Args:
|
|
1079
|
+
workspace (AirbyteCloudWorkspace): The Airbyte Cloud workspace to fetch assets from.
|
|
1080
|
+
dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
|
|
1081
|
+
to convert Airbyte content into :py:class:`dagster.AssetSpec`.
|
|
1082
|
+
Defaults to :py:class:`DagsterAirbyteTranslator`.
|
|
1083
|
+
|
|
1084
|
+
Returns:
|
|
1085
|
+
List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
|
|
1086
|
+
|
|
1087
|
+
Examples:
|
|
1088
|
+
Loading the asset specs for a given Airbyte Cloud workspace:
|
|
1089
|
+
|
|
1090
|
+
.. code-block:: python
|
|
1091
|
+
|
|
1092
|
+
from dagster_airbyte import AirbyteCloudWorkspace, load_airbyte_cloud_asset_specs
|
|
1093
|
+
|
|
1094
|
+
import dagster as dg
|
|
1095
|
+
|
|
1096
|
+
airbyte_cloud_workspace = AirbyteCloudWorkspace(
|
|
1097
|
+
workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
|
|
1098
|
+
client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
|
|
1099
|
+
client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
airbyte_cloud_specs = load_airbyte_cloud_asset_specs(airbyte_cloud_workspace)
|
|
1104
|
+
defs = dg.Definitions(assets=airbyte_cloud_specs)
|
|
1105
|
+
"""
|
|
1106
|
+
with workspace.process_config_and_initialize_cm() as initialized_workspace:
|
|
1107
|
+
return check.is_list(
|
|
1108
|
+
AirbyteCloudWorkspaceDefsLoader(
|
|
1109
|
+
workspace=initialized_workspace,
|
|
1110
|
+
translator=dagster_airbyte_translator or DagsterAirbyteTranslator(),
|
|
1111
|
+
)
|
|
1112
|
+
.build_defs()
|
|
1113
|
+
.assets,
|
|
1114
|
+
AssetSpec,
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
@record
|
|
1119
|
+
class AirbyteCloudWorkspaceDefsLoader(StateBackedDefinitionsLoader[Mapping[str, Any]]):
|
|
1120
|
+
workspace: AirbyteCloudWorkspace
|
|
1121
|
+
translator: DagsterAirbyteTranslator
|
|
1122
|
+
|
|
1123
|
+
@property
|
|
1124
|
+
def defs_key(self) -> str:
|
|
1125
|
+
return f"{AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX}/{self.workspace.workspace_id}"
|
|
1126
|
+
|
|
1127
|
+
def fetch_state(self) -> AirbyteWorkspaceData:
|
|
1128
|
+
return self.workspace.fetch_airbyte_workspace_data()
|
|
1129
|
+
|
|
1130
|
+
def defs_from_state(self, state: AirbyteWorkspaceData) -> Definitions:
|
|
1131
|
+
all_asset_specs = [
|
|
1132
|
+
self.translator.get_asset_spec(props)
|
|
1133
|
+
for props in state.to_airbyte_connection_table_props_data()
|
|
1134
|
+
]
|
|
1135
|
+
|
|
1136
|
+
return Definitions(assets=all_asset_specs)
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
from typing import Any, List, Mapping, Optional, Sequence
|
|
2
|
+
|
|
3
|
+
from dagster._annotations import experimental
|
|
4
|
+
from dagster._core.definitions.asset_key import AssetKey
|
|
5
|
+
from dagster._core.definitions.asset_spec import AssetSpec
|
|
6
|
+
from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet
|
|
7
|
+
from dagster._record import record
|
|
8
|
+
from dagster._serdes.serdes import whitelist_for_serdes
|
|
9
|
+
from dagster._utils.cached_method import cached_method
|
|
10
|
+
|
|
11
|
+
from dagster_airbyte.utils import generate_table_schema, get_airbyte_connection_table_name
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@record
|
|
15
|
+
class AirbyteConnectionTableProps:
|
|
16
|
+
table_name: str
|
|
17
|
+
stream_prefix: Optional[str]
|
|
18
|
+
stream_name: str
|
|
19
|
+
json_schema: Mapping[str, Any]
|
|
20
|
+
connection_id: str
|
|
21
|
+
connection_name: str
|
|
22
|
+
database: Optional[str]
|
|
23
|
+
schema: Optional[str]
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def fully_qualified_table_name(self) -> Optional[str]:
|
|
27
|
+
return (
|
|
28
|
+
f"{self.database}.{self.schema}.{self.stream_name}"
|
|
29
|
+
if self.database and self.schema
|
|
30
|
+
else None
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@whitelist_for_serdes
|
|
35
|
+
@record
|
|
36
|
+
class AirbyteConnection:
|
|
37
|
+
"""Represents an Airbyte connection, based on data as returned from the API."""
|
|
38
|
+
|
|
39
|
+
id: str
|
|
40
|
+
name: str
|
|
41
|
+
stream_prefix: Optional[str]
|
|
42
|
+
streams: Mapping[str, "AirbyteStream"]
|
|
43
|
+
destination_id: str
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_connection_details(
|
|
47
|
+
cls,
|
|
48
|
+
connection_details: Mapping[str, Any],
|
|
49
|
+
) -> "AirbyteConnection":
|
|
50
|
+
return cls(
|
|
51
|
+
id=connection_details["connectionId"],
|
|
52
|
+
name=connection_details["name"],
|
|
53
|
+
stream_prefix=connection_details.get("prefix"),
|
|
54
|
+
streams={
|
|
55
|
+
stream_details["stream"]["name"]: AirbyteStream.from_stream_details(
|
|
56
|
+
stream_details=stream_details
|
|
57
|
+
)
|
|
58
|
+
for stream_details in connection_details.get("syncCatalog", {}).get("streams", [])
|
|
59
|
+
},
|
|
60
|
+
destination_id=connection_details["destinationId"],
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@whitelist_for_serdes
|
|
65
|
+
@record
|
|
66
|
+
class AirbyteDestination:
|
|
67
|
+
"""Represents an Airbyte destination, based on data as returned from the API."""
|
|
68
|
+
|
|
69
|
+
id: str
|
|
70
|
+
database: Optional[str]
|
|
71
|
+
schema: Optional[str]
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_destination_details(
|
|
75
|
+
cls,
|
|
76
|
+
destination_details: Mapping[str, Any],
|
|
77
|
+
) -> "AirbyteDestination":
|
|
78
|
+
return cls(
|
|
79
|
+
id=destination_details["destinationId"],
|
|
80
|
+
database=destination_details["configuration"].get("database"),
|
|
81
|
+
schema=destination_details["configuration"].get("schema"),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@whitelist_for_serdes
|
|
86
|
+
@record
|
|
87
|
+
class AirbyteStream:
|
|
88
|
+
"""Represents an Airbyte stream, based on data as returned from the API.
|
|
89
|
+
A stream in Airbyte corresponds to a table.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
name: str
|
|
93
|
+
selected: bool
|
|
94
|
+
json_schema: Mapping[str, Any]
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_stream_details(
|
|
98
|
+
cls,
|
|
99
|
+
stream_details: Mapping[str, Any],
|
|
100
|
+
) -> "AirbyteStream":
|
|
101
|
+
return cls(
|
|
102
|
+
name=stream_details["stream"]["name"],
|
|
103
|
+
selected=stream_details["config"].get("selected", False),
|
|
104
|
+
json_schema=stream_details["stream"].get("jsonSchema", {}),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@whitelist_for_serdes
|
|
109
|
+
@record
|
|
110
|
+
class AirbyteWorkspaceData:
|
|
111
|
+
"""A record representing all content in an Airbyte workspace.
|
|
112
|
+
This applies to both Airbyte OSS and Cloud.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
connections_by_id: Mapping[str, AirbyteConnection]
|
|
116
|
+
destinations_by_id: Mapping[str, AirbyteDestination]
|
|
117
|
+
|
|
118
|
+
@cached_method
|
|
119
|
+
def to_airbyte_connection_table_props_data(self) -> Sequence[AirbyteConnectionTableProps]:
|
|
120
|
+
"""Method that converts a `AirbyteWorkspaceData` object
|
|
121
|
+
to a collection of `AirbyteConnectionTableProps` objects.
|
|
122
|
+
"""
|
|
123
|
+
data: List[AirbyteConnectionTableProps] = []
|
|
124
|
+
|
|
125
|
+
for connection in self.connections_by_id.values():
|
|
126
|
+
destination = self.destinations_by_id[connection.destination_id]
|
|
127
|
+
|
|
128
|
+
for stream in connection.streams.values():
|
|
129
|
+
if stream.selected:
|
|
130
|
+
data.append(
|
|
131
|
+
AirbyteConnectionTableProps(
|
|
132
|
+
table_name=get_airbyte_connection_table_name(
|
|
133
|
+
stream_prefix=connection.stream_prefix,
|
|
134
|
+
stream_name=stream.name,
|
|
135
|
+
),
|
|
136
|
+
stream_prefix=connection.stream_prefix,
|
|
137
|
+
stream_name=stream.name,
|
|
138
|
+
json_schema=stream.json_schema,
|
|
139
|
+
connection_id=connection.id,
|
|
140
|
+
connection_name=connection.name,
|
|
141
|
+
database=destination.database,
|
|
142
|
+
schema=destination.schema,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return data
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class AirbyteMetadataSet(NamespacedMetadataSet):
|
|
150
|
+
connection_id: str
|
|
151
|
+
connection_name: str
|
|
152
|
+
stream_prefix: Optional[str] = None
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def namespace(cls) -> str:
|
|
156
|
+
return "dagster-airbyte"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@experimental
|
|
160
|
+
class DagsterAirbyteTranslator:
|
|
161
|
+
"""Translator class which converts a `AirbyteConnectionTableProps` object into AssetSpecs.
|
|
162
|
+
Subclass this class to implement custom logic how to translate Airbyte content into asset spec.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def get_asset_spec(self, props: AirbyteConnectionTableProps) -> AssetSpec:
|
|
166
|
+
"""Get the AssetSpec for a table synced by an Airbyte connection."""
|
|
167
|
+
table_schema_props = (
|
|
168
|
+
props.json_schema.get("properties")
|
|
169
|
+
or props.json_schema.get("items", {}).get("properties")
|
|
170
|
+
or {}
|
|
171
|
+
)
|
|
172
|
+
column_schema = generate_table_schema(table_schema_props)
|
|
173
|
+
|
|
174
|
+
metadata = {
|
|
175
|
+
**TableMetadataSet(
|
|
176
|
+
column_schema=column_schema,
|
|
177
|
+
table_name=props.fully_qualified_table_name,
|
|
178
|
+
),
|
|
179
|
+
**AirbyteMetadataSet(
|
|
180
|
+
connection_id=props.connection_id,
|
|
181
|
+
connection_name=props.connection_name,
|
|
182
|
+
stream_prefix=props.stream_prefix,
|
|
183
|
+
),
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return AssetSpec(
|
|
187
|
+
key=AssetKey(props.table_name),
|
|
188
|
+
metadata=metadata,
|
|
189
|
+
kinds={"airbyte"},
|
|
190
|
+
)
|
dagster_airbyte/utils.py
CHANGED
|
@@ -6,6 +6,10 @@ from dagster._core.definitions.metadata.table import TableColumn, TableSchema
|
|
|
6
6
|
from dagster_airbyte.types import AirbyteOutput
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
def get_airbyte_connection_table_name(stream_prefix: Optional[str], stream_name: str) -> str:
|
|
10
|
+
return f"{stream_prefix if stream_prefix else ''}{stream_name}"
|
|
11
|
+
|
|
12
|
+
|
|
9
13
|
def generate_table_schema(stream_schema_props: Mapping[str, Any]) -> TableSchema:
|
|
10
14
|
return TableSchema(
|
|
11
15
|
columns=sorted(
|
dagster_airbyte/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.25.
|
|
1
|
+
__version__ = "0.25.5"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dagster-airbyte
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.5
|
|
4
4
|
Summary: Package for integrating Airbyte with Dagster.
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -14,10 +14,10 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
14
14
|
Classifier: Operating System :: OS Independent
|
|
15
15
|
Requires-Python: >=3.9,<3.13
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: dagster ==1.9.
|
|
17
|
+
Requires-Dist: dagster ==1.9.5
|
|
18
18
|
Requires-Dist: requests
|
|
19
19
|
Provides-Extra: managed
|
|
20
|
-
Requires-Dist: dagster-managed-elements ==0.25.
|
|
20
|
+
Requires-Dist: dagster-managed-elements ==0.25.5 ; extra == 'managed'
|
|
21
21
|
Provides-Extra: test
|
|
22
22
|
Requires-Dist: requests-mock ; extra == 'test'
|
|
23
23
|
Requires-Dist: flaky ; extra == 'test'
|
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
dagster_airbyte/__init__.py,sha256=
|
|
1
|
+
dagster_airbyte/__init__.py,sha256=deK1ieUOzZr63mpkWLapn9RLAI1uNnsXFZpNMfm-bgw,1445
|
|
2
2
|
dagster_airbyte/asset_defs.py,sha256=UFzHdNRgFPNU8xqDGcD-ce-J9I82HIj3N2GN5tg8G_Y,46039
|
|
3
3
|
dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
|
|
4
4
|
dagster_airbyte/ops.py,sha256=pq6mp7vN2wXgo3gJMuWaAcxTmfkZ7d1zWzPyL_auSEY,4208
|
|
5
5
|
dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
6
|
-
dagster_airbyte/resources.py,sha256=
|
|
6
|
+
dagster_airbyte/resources.py,sha256=4rDsm4FCaONEB_WGYDa54uoNzD9nX4d7mBQ1WDmdG8M,42920
|
|
7
|
+
dagster_airbyte/translator.py,sha256=IIpsmPhXePJM4TnVxX_nARAgDOTXX0oCU5iU3hjUzLU,6225
|
|
7
8
|
dagster_airbyte/types.py,sha256=w1DyTcXyuzrG3wfkOPYFtwj7snHcgqf-dC7_pRjiE1Q,1544
|
|
8
|
-
dagster_airbyte/utils.py,sha256=
|
|
9
|
-
dagster_airbyte/version.py,sha256=
|
|
9
|
+
dagster_airbyte/utils.py,sha256=hRUURJiVeximSfFP6pWxb0beh9PsOPdW4obX1pqBVt4,2987
|
|
10
|
+
dagster_airbyte/version.py,sha256=9zCh_8lz8S_e8JlXmwkSbkp_uQhvRgQSEOH1abFGMw8,23
|
|
10
11
|
dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
|
|
11
12
|
dagster_airbyte/managed/reconciliation.py,sha256=HgrLT-Xs8vWY9SfbdBXuorMf60KCn5Qz7bPITW5MxJo,34862
|
|
12
13
|
dagster_airbyte/managed/types.py,sha256=ja056Wm7_ZFw1XGSNmdxmBy2TcOxbnylJCpRA2ng2TE,14596
|
|
13
14
|
dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
|
|
14
15
|
dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
|
|
15
16
|
dagster_airbyte/managed/generated/sources.py,sha256=wyNoGJiNvW8mjRRs6b-_lWFs0Fgy-MZlRaxiN6bP-4s,282691
|
|
16
|
-
dagster_airbyte-0.25.
|
|
17
|
-
dagster_airbyte-0.25.
|
|
18
|
-
dagster_airbyte-0.25.
|
|
19
|
-
dagster_airbyte-0.25.
|
|
20
|
-
dagster_airbyte-0.25.
|
|
21
|
-
dagster_airbyte-0.25.
|
|
17
|
+
dagster_airbyte-0.25.5.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
|
|
18
|
+
dagster_airbyte-0.25.5.dist-info/METADATA,sha256=pzD-yKSHZbB3APVxyshGfDc83d5S3ZGNv0ct0BQXsA8,915
|
|
19
|
+
dagster_airbyte-0.25.5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
20
|
+
dagster_airbyte-0.25.5.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
|
|
21
|
+
dagster_airbyte-0.25.5.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
|
|
22
|
+
dagster_airbyte-0.25.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|