dagster-airbyte 0.25.4__tar.gz → 0.25.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dagster-airbyte might be problematic. Click here for more details.

Files changed (30) hide show
  1. {dagster-airbyte-0.25.4/dagster_airbyte.egg-info → dagster-airbyte-0.25.5}/PKG-INFO +1 -1
  2. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/__init__.py +3 -0
  3. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/resources.py +344 -1
  4. dagster-airbyte-0.25.5/dagster_airbyte/translator.py +190 -0
  5. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/utils.py +4 -0
  6. dagster-airbyte-0.25.5/dagster_airbyte/version.py +1 -0
  7. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5/dagster_airbyte.egg-info}/PKG-INFO +1 -1
  8. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/SOURCES.txt +1 -0
  9. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/requires.txt +2 -2
  10. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/setup.py +2 -2
  11. dagster-airbyte-0.25.4/dagster_airbyte/version.py +0 -1
  12. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/LICENSE +0 -0
  13. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/MANIFEST.in +0 -0
  14. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/README.md +0 -0
  15. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/asset_defs.py +0 -0
  16. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/cli.py +0 -0
  17. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/__init__.py +0 -0
  18. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/generated/__init__.py +0 -0
  19. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/generated/destinations.py +0 -0
  20. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/generated/sources.py +0 -0
  21. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/reconciliation.py +0 -0
  22. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/managed/types.py +0 -0
  23. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/ops.py +0 -0
  24. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/py.typed +0 -0
  25. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte/types.py +0 -0
  26. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/dependency_links.txt +0 -0
  27. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/entry_points.txt +0 -0
  28. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/not-zip-safe +0 -0
  29. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/dagster_airbyte.egg-info/top_level.txt +0 -0
  30. {dagster-airbyte-0.25.4 → dagster-airbyte-0.25.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.25.4
3
+ Version: 0.25.5
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
@@ -21,11 +21,14 @@ from dagster_airbyte.asset_defs import (
21
21
  from dagster_airbyte.ops import airbyte_sync_op as airbyte_sync_op
22
22
  from dagster_airbyte.resources import (
23
23
  AirbyteCloudResource as AirbyteCloudResource,
24
+ AirbyteCloudWorkspace as AirbyteCloudWorkspace,
24
25
  AirbyteResource as AirbyteResource,
25
26
  AirbyteState as AirbyteState,
26
27
  airbyte_cloud_resource as airbyte_cloud_resource,
27
28
  airbyte_resource as airbyte_resource,
29
+ load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
28
30
  )
31
+ from dagster_airbyte.translator import DagsterAirbyteTranslator as DagsterAirbyteTranslator
29
32
  from dagster_airbyte.types import AirbyteOutput as AirbyteOutput
30
33
  from dagster_airbyte.version import __version__ as __version__
31
34
 
@@ -6,32 +6,52 @@ import time
6
6
  from abc import abstractmethod
7
7
  from contextlib import contextmanager
8
8
  from datetime import datetime, timedelta
9
- from typing import Any, Dict, List, Mapping, Optional, cast
9
+ from typing import Any, Dict, List, Mapping, Optional, Sequence, cast
10
10
 
11
11
  import requests
12
12
  from dagster import (
13
13
  ConfigurableResource,
14
+ Definitions,
14
15
  Failure,
15
16
  InitResourceContext,
16
17
  _check as check,
17
18
  get_dagster_logger,
18
19
  resource,
19
20
  )
21
+ from dagster._annotations import experimental
20
22
  from dagster._config.pythonic_config import infer_schema_from_config_class
23
+ from dagster._core.definitions.asset_spec import AssetSpec
24
+ from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
21
25
  from dagster._core.definitions.resource_definition import dagster_maintained_resource
26
+ from dagster._model import DagsterModel
27
+ from dagster._record import record
22
28
  from dagster._utils.cached_method import cached_method
23
29
  from dagster._utils.merger import deep_merge_dicts
24
30
  from pydantic import Field, PrivateAttr
25
31
  from requests.exceptions import RequestException
26
32
 
33
+ from dagster_airbyte.translator import (
34
+ AirbyteConnection,
35
+ AirbyteDestination,
36
+ AirbyteWorkspaceData,
37
+ DagsterAirbyteTranslator,
38
+ )
27
39
  from dagster_airbyte.types import AirbyteOutput
28
40
 
41
+ AIRBYTE_REST_API_BASE = "https://api.airbyte.com"
42
+ AIRBYTE_REST_API_VERSION = "v1"
43
+
44
+ AIRBYTE_CONFIGURATION_API_BASE = "https://cloud.airbyte.com/api"
45
+ AIRBYTE_CONFIGURATION_API_VERSION = "v1"
46
+
29
47
  DEFAULT_POLL_INTERVAL_SECONDS = 10
30
48
 
31
49
  # The access token expire every 3 minutes in Airbyte Cloud.
32
50
  # Refresh is needed after 2.5 minutes to avoid the "token expired" error message.
33
51
  AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS = 150
34
52
 
53
+ AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
54
+
35
55
 
36
56
  class AirbyteState:
37
57
  RUNNING = "running"
@@ -791,3 +811,326 @@ def airbyte_cloud_resource(context) -> AirbyteCloudResource:
791
811
 
792
812
  """
793
813
  return AirbyteCloudResource.from_resource_context(context)
814
+
815
+
816
+ # -------------
817
+ # Resources v2
818
+ # -------------
819
+
820
+
821
+ @experimental
822
+ class AirbyteCloudClient(DagsterModel):
823
+ """This class exposes methods on top of the Airbyte APIs for Airbyte Cloud."""
824
+
825
+ workspace_id: str = Field(..., description="The Airbyte workspace ID")
826
+ client_id: str = Field(..., description="The Airbyte client ID.")
827
+ client_secret: str = Field(..., description="The Airbyte client secret.")
828
+ request_max_retries: int = Field(
829
+ ...,
830
+ description=(
831
+ "The maximum number of times requests to the Airbyte API should be retried "
832
+ "before failing."
833
+ ),
834
+ )
835
+ request_retry_delay: float = Field(
836
+ ...,
837
+ description="Time (in seconds) to wait between each request retry.",
838
+ )
839
+ request_timeout: int = Field(
840
+ ...,
841
+ description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
842
+ )
843
+
844
+ _access_token_value: Optional[str] = PrivateAttr(default=None)
845
+ _access_token_timestamp: Optional[float] = PrivateAttr(default=None)
846
+
847
+ @property
848
+ @cached_method
849
+ def _log(self) -> logging.Logger:
850
+ return get_dagster_logger()
851
+
852
+ @property
853
+ def rest_api_base_url(self) -> str:
854
+ return f"{AIRBYTE_REST_API_BASE}/{AIRBYTE_REST_API_VERSION}"
855
+
856
+ @property
857
+ def configuration_api_base_url(self) -> str:
858
+ return f"{AIRBYTE_CONFIGURATION_API_BASE}/{AIRBYTE_CONFIGURATION_API_VERSION}"
859
+
860
+ @property
861
+ def all_additional_request_params(self) -> Mapping[str, Any]:
862
+ return {**self.authorization_request_params, **self.user_agent_request_params}
863
+
864
+ @property
865
+ def authorization_request_params(self) -> Mapping[str, Any]:
866
+ # Make sure the access token is refreshed before using it when calling the API.
867
+ if self._needs_refreshed_access_token():
868
+ self._refresh_access_token()
869
+ return {
870
+ "Authorization": f"Bearer {self._access_token_value}",
871
+ }
872
+
873
+ @property
874
+ def user_agent_request_params(self) -> Mapping[str, Any]:
875
+ return {
876
+ "User-Agent": "dagster",
877
+ }
878
+
879
+ def _refresh_access_token(self) -> None:
880
+ response = check.not_none(
881
+ self._make_request(
882
+ method="POST",
883
+ endpoint="applications/token",
884
+ base_url=self.rest_api_base_url,
885
+ data={
886
+ "client_id": self.client_id,
887
+ "client_secret": self.client_secret,
888
+ },
889
+ # Must not pass the bearer access token when refreshing it.
890
+ include_additional_request_params=False,
891
+ )
892
+ )
893
+ self._access_token_value = str(response["access_token"])
894
+ self._access_token_timestamp = datetime.now().timestamp()
895
+
896
+ def _needs_refreshed_access_token(self) -> bool:
897
+ return (
898
+ not self._access_token_value
899
+ or not self._access_token_timestamp
900
+ or self._access_token_timestamp
901
+ <= (
902
+ datetime.now() - timedelta(seconds=AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS)
903
+ ).timestamp()
904
+ )
905
+
906
+ def _get_session(self, include_additional_request_params: bool) -> requests.Session:
907
+ headers = {"accept": "application/json"}
908
+ if include_additional_request_params:
909
+ headers = {
910
+ **headers,
911
+ **self.all_additional_request_params,
912
+ }
913
+ session = requests.Session()
914
+ session.headers.update(headers)
915
+ return session
916
+
917
+ def _make_request(
918
+ self,
919
+ method: str,
920
+ endpoint: str,
921
+ base_url: str,
922
+ data: Optional[Mapping[str, Any]] = None,
923
+ params: Optional[Mapping[str, Any]] = None,
924
+ include_additional_request_params: bool = True,
925
+ ) -> Mapping[str, Any]:
926
+ """Creates and sends a request to the desired Airbyte REST API endpoint.
927
+
928
+ Args:
929
+ method (str): The http method to use for this request (e.g. "POST", "GET", "PATCH").
930
+ endpoint (str): The Airbyte API endpoint to send this request to.
931
+ base_url (str): The base url to the Airbyte API to use.
932
+ data (Optional[Dict[str, Any]]): JSON-formatted data string to be included in the request.
933
+ params (Optional[Dict[str, Any]]): JSON-formatted query params to be included in the request.
934
+ include_additional_request_params (bool): Whether to include authorization and user-agent headers
935
+ to the request parameters. Defaults to True.
936
+
937
+ Returns:
938
+ Dict[str, Any]: Parsed json data from the response to this request
939
+ """
940
+ url = f"{base_url}/{endpoint}"
941
+
942
+ num_retries = 0
943
+ while True:
944
+ try:
945
+ session = self._get_session(
946
+ include_additional_request_params=include_additional_request_params
947
+ )
948
+ response = session.request(
949
+ method=method, url=url, json=data, params=params, timeout=self.request_timeout
950
+ )
951
+ response.raise_for_status()
952
+ return response.json()
953
+ except RequestException as e:
954
+ self._log.error(
955
+ f"Request to Airbyte API failed for url {url} with method {method} : {e}"
956
+ )
957
+ if num_retries == self.request_max_retries:
958
+ break
959
+ num_retries += 1
960
+ time.sleep(self.request_retry_delay)
961
+
962
+ raise Failure(f"Max retries ({self.request_max_retries}) exceeded with url: {url}.")
963
+
964
+ def get_connections(self) -> Mapping[str, Any]:
965
+ """Fetches all connections of an Airbyte workspace from the Airbyte REST API."""
966
+ return self._make_request(
967
+ method="GET",
968
+ endpoint="connections",
969
+ base_url=self.rest_api_base_url,
970
+ params={"workspaceIds": self.workspace_id},
971
+ )
972
+
973
+ def get_connection_details(self, connection_id) -> Mapping[str, Any]:
974
+ """Fetches details about a given connection from the Airbyte Configuration API.
975
+ The Airbyte Configuration API is an internal and may change in the future.
976
+ """
977
+ # Using the Airbyte Configuration API to get the connection details, including streams and their configs.
978
+ # https://airbyte-public-api-docs.s3.us-east-2.amazonaws.com/rapidoc-api-docs.html#post-/v1/connections/get
979
+ # https://github.com/airbytehq/airbyte-platform/blob/v1.0.0/airbyte-api/server-api/src/main/openapi/config.yaml
980
+ return self._make_request(
981
+ method="POST",
982
+ endpoint="connections/get",
983
+ base_url=self.configuration_api_base_url,
984
+ data={"connectionId": connection_id},
985
+ )
986
+
987
+ def get_destination_details(self, destination_id: str) -> Mapping[str, Any]:
988
+ """Fetches details about a given destination from the Airbyte REST API."""
989
+ return self._make_request(
990
+ method="GET",
991
+ endpoint=f"destinations/{destination_id}",
992
+ base_url=self.rest_api_base_url,
993
+ )
994
+
995
+
996
+ @experimental
997
+ class AirbyteCloudWorkspace(ConfigurableResource):
998
+ """This class represents a Airbyte Cloud workspace and provides utilities
999
+ to interact with Airbyte APIs.
1000
+ """
1001
+
1002
+ workspace_id: str = Field(..., description="The Airbyte Cloud workspace ID")
1003
+ client_id: str = Field(..., description="The Airbyte Cloud client ID.")
1004
+ client_secret: str = Field(..., description="The Airbyte Cloud client secret.")
1005
+ request_max_retries: int = Field(
1006
+ default=3,
1007
+ description=(
1008
+ "The maximum number of times requests to the Airbyte API should be retried "
1009
+ "before failing."
1010
+ ),
1011
+ )
1012
+ request_retry_delay: float = Field(
1013
+ default=0.25,
1014
+ description="Time (in seconds) to wait between each request retry.",
1015
+ )
1016
+ request_timeout: int = Field(
1017
+ default=15,
1018
+ description="Time (in seconds) after which the requests to Airbyte are declared timed out.",
1019
+ )
1020
+
1021
+ _client: AirbyteCloudClient = PrivateAttr(default=None)
1022
+
1023
+ @cached_method
1024
+ def get_client(self) -> AirbyteCloudClient:
1025
+ return AirbyteCloudClient(
1026
+ workspace_id=self.workspace_id,
1027
+ client_id=self.client_id,
1028
+ client_secret=self.client_secret,
1029
+ request_max_retries=self.request_max_retries,
1030
+ request_retry_delay=self.request_retry_delay,
1031
+ request_timeout=self.request_timeout,
1032
+ )
1033
+
1034
+ def fetch_airbyte_workspace_data(
1035
+ self,
1036
+ ) -> AirbyteWorkspaceData:
1037
+ """Retrieves all Airbyte content from the workspace and returns it as a AirbyteWorkspaceData object.
1038
+
1039
+ Returns:
1040
+ AirbyteWorkspaceData: A snapshot of the Airbyte workspace's content.
1041
+ """
1042
+ connections_by_id = {}
1043
+ destinations_by_id = {}
1044
+
1045
+ client = self.get_client()
1046
+ connections = client.get_connections()["data"]
1047
+
1048
+ for partial_connection_details in connections:
1049
+ full_connection_details = client.get_connection_details(
1050
+ connection_id=partial_connection_details["connectionId"]
1051
+ )
1052
+ connection = AirbyteConnection.from_connection_details(
1053
+ connection_details=full_connection_details
1054
+ )
1055
+ connections_by_id[connection.id] = connection
1056
+
1057
+ destination_details = client.get_destination_details(
1058
+ destination_id=connection.destination_id
1059
+ )
1060
+ destination = AirbyteDestination.from_destination_details(
1061
+ destination_details=destination_details
1062
+ )
1063
+ destinations_by_id[destination.id] = destination
1064
+
1065
+ return AirbyteWorkspaceData(
1066
+ connections_by_id=connections_by_id,
1067
+ destinations_by_id=destinations_by_id,
1068
+ )
1069
+
1070
+
1071
+ @experimental
1072
+ def load_airbyte_cloud_asset_specs(
1073
+ workspace: AirbyteCloudWorkspace,
1074
+ dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
1075
+ ) -> Sequence[AssetSpec]:
1076
+ """Returns a list of AssetSpecs representing the Airbyte content in the workspace.
1077
+
1078
+ Args:
1079
+ workspace (AirbyteCloudWorkspace): The Airbyte Cloud workspace to fetch assets from.
1080
+ dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
1081
+ to convert Airbyte content into :py:class:`dagster.AssetSpec`.
1082
+ Defaults to :py:class:`DagsterAirbyteTranslator`.
1083
+
1084
+ Returns:
1085
+ List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
1086
+
1087
+ Examples:
1088
+ Loading the asset specs for a given Airbyte Cloud workspace:
1089
+
1090
+ .. code-block:: python
1091
+
1092
+ from dagster_airbyte import AirbyteCloudWorkspace, load_airbyte_cloud_asset_specs
1093
+
1094
+ import dagster as dg
1095
+
1096
+ airbyte_cloud_workspace = AirbyteCloudWorkspace(
1097
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
1098
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
1099
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
1100
+ )
1101
+
1102
+
1103
+ airbyte_cloud_specs = load_airbyte_cloud_asset_specs(airbyte_cloud_workspace)
1104
+ defs = dg.Definitions(assets=airbyte_cloud_specs)
1105
+ """
1106
+ with workspace.process_config_and_initialize_cm() as initialized_workspace:
1107
+ return check.is_list(
1108
+ AirbyteCloudWorkspaceDefsLoader(
1109
+ workspace=initialized_workspace,
1110
+ translator=dagster_airbyte_translator or DagsterAirbyteTranslator(),
1111
+ )
1112
+ .build_defs()
1113
+ .assets,
1114
+ AssetSpec,
1115
+ )
1116
+
1117
+
1118
+ @record
1119
+ class AirbyteCloudWorkspaceDefsLoader(StateBackedDefinitionsLoader[Mapping[str, Any]]):
1120
+ workspace: AirbyteCloudWorkspace
1121
+ translator: DagsterAirbyteTranslator
1122
+
1123
+ @property
1124
+ def defs_key(self) -> str:
1125
+ return f"{AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX}/{self.workspace.workspace_id}"
1126
+
1127
+ def fetch_state(self) -> AirbyteWorkspaceData:
1128
+ return self.workspace.fetch_airbyte_workspace_data()
1129
+
1130
+ def defs_from_state(self, state: AirbyteWorkspaceData) -> Definitions:
1131
+ all_asset_specs = [
1132
+ self.translator.get_asset_spec(props)
1133
+ for props in state.to_airbyte_connection_table_props_data()
1134
+ ]
1135
+
1136
+ return Definitions(assets=all_asset_specs)
@@ -0,0 +1,190 @@
1
+ from typing import Any, List, Mapping, Optional, Sequence
2
+
3
+ from dagster._annotations import experimental
4
+ from dagster._core.definitions.asset_key import AssetKey
5
+ from dagster._core.definitions.asset_spec import AssetSpec
6
+ from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet
7
+ from dagster._record import record
8
+ from dagster._serdes.serdes import whitelist_for_serdes
9
+ from dagster._utils.cached_method import cached_method
10
+
11
+ from dagster_airbyte.utils import generate_table_schema, get_airbyte_connection_table_name
12
+
13
+
14
+ @record
15
+ class AirbyteConnectionTableProps:
16
+ table_name: str
17
+ stream_prefix: Optional[str]
18
+ stream_name: str
19
+ json_schema: Mapping[str, Any]
20
+ connection_id: str
21
+ connection_name: str
22
+ database: Optional[str]
23
+ schema: Optional[str]
24
+
25
+ @property
26
+ def fully_qualified_table_name(self) -> Optional[str]:
27
+ return (
28
+ f"{self.database}.{self.schema}.{self.stream_name}"
29
+ if self.database and self.schema
30
+ else None
31
+ )
32
+
33
+
34
+ @whitelist_for_serdes
35
+ @record
36
+ class AirbyteConnection:
37
+ """Represents an Airbyte connection, based on data as returned from the API."""
38
+
39
+ id: str
40
+ name: str
41
+ stream_prefix: Optional[str]
42
+ streams: Mapping[str, "AirbyteStream"]
43
+ destination_id: str
44
+
45
+ @classmethod
46
+ def from_connection_details(
47
+ cls,
48
+ connection_details: Mapping[str, Any],
49
+ ) -> "AirbyteConnection":
50
+ return cls(
51
+ id=connection_details["connectionId"],
52
+ name=connection_details["name"],
53
+ stream_prefix=connection_details.get("prefix"),
54
+ streams={
55
+ stream_details["stream"]["name"]: AirbyteStream.from_stream_details(
56
+ stream_details=stream_details
57
+ )
58
+ for stream_details in connection_details.get("syncCatalog", {}).get("streams", [])
59
+ },
60
+ destination_id=connection_details["destinationId"],
61
+ )
62
+
63
+
64
+ @whitelist_for_serdes
65
+ @record
66
+ class AirbyteDestination:
67
+ """Represents an Airbyte destination, based on data as returned from the API."""
68
+
69
+ id: str
70
+ database: Optional[str]
71
+ schema: Optional[str]
72
+
73
+ @classmethod
74
+ def from_destination_details(
75
+ cls,
76
+ destination_details: Mapping[str, Any],
77
+ ) -> "AirbyteDestination":
78
+ return cls(
79
+ id=destination_details["destinationId"],
80
+ database=destination_details["configuration"].get("database"),
81
+ schema=destination_details["configuration"].get("schema"),
82
+ )
83
+
84
+
85
+ @whitelist_for_serdes
86
+ @record
87
+ class AirbyteStream:
88
+ """Represents an Airbyte stream, based on data as returned from the API.
89
+ A stream in Airbyte corresponds to a table.
90
+ """
91
+
92
+ name: str
93
+ selected: bool
94
+ json_schema: Mapping[str, Any]
95
+
96
+ @classmethod
97
+ def from_stream_details(
98
+ cls,
99
+ stream_details: Mapping[str, Any],
100
+ ) -> "AirbyteStream":
101
+ return cls(
102
+ name=stream_details["stream"]["name"],
103
+ selected=stream_details["config"].get("selected", False),
104
+ json_schema=stream_details["stream"].get("jsonSchema", {}),
105
+ )
106
+
107
+
108
+ @whitelist_for_serdes
109
+ @record
110
+ class AirbyteWorkspaceData:
111
+ """A record representing all content in an Airbyte workspace.
112
+ This applies to both Airbyte OSS and Cloud.
113
+ """
114
+
115
+ connections_by_id: Mapping[str, AirbyteConnection]
116
+ destinations_by_id: Mapping[str, AirbyteDestination]
117
+
118
+ @cached_method
119
+ def to_airbyte_connection_table_props_data(self) -> Sequence[AirbyteConnectionTableProps]:
120
+ """Method that converts a `AirbyteWorkspaceData` object
121
+ to a collection of `AirbyteConnectionTableProps` objects.
122
+ """
123
+ data: List[AirbyteConnectionTableProps] = []
124
+
125
+ for connection in self.connections_by_id.values():
126
+ destination = self.destinations_by_id[connection.destination_id]
127
+
128
+ for stream in connection.streams.values():
129
+ if stream.selected:
130
+ data.append(
131
+ AirbyteConnectionTableProps(
132
+ table_name=get_airbyte_connection_table_name(
133
+ stream_prefix=connection.stream_prefix,
134
+ stream_name=stream.name,
135
+ ),
136
+ stream_prefix=connection.stream_prefix,
137
+ stream_name=stream.name,
138
+ json_schema=stream.json_schema,
139
+ connection_id=connection.id,
140
+ connection_name=connection.name,
141
+ database=destination.database,
142
+ schema=destination.schema,
143
+ )
144
+ )
145
+
146
+ return data
147
+
148
+
149
+ class AirbyteMetadataSet(NamespacedMetadataSet):
150
+ connection_id: str
151
+ connection_name: str
152
+ stream_prefix: Optional[str] = None
153
+
154
+ @classmethod
155
+ def namespace(cls) -> str:
156
+ return "dagster-airbyte"
157
+
158
+
159
+ @experimental
160
+ class DagsterAirbyteTranslator:
161
+ """Translator class which converts a `AirbyteConnectionTableProps` object into AssetSpecs.
162
+ Subclass this class to implement custom logic how to translate Airbyte content into asset spec.
163
+ """
164
+
165
+ def get_asset_spec(self, props: AirbyteConnectionTableProps) -> AssetSpec:
166
+ """Get the AssetSpec for a table synced by an Airbyte connection."""
167
+ table_schema_props = (
168
+ props.json_schema.get("properties")
169
+ or props.json_schema.get("items", {}).get("properties")
170
+ or {}
171
+ )
172
+ column_schema = generate_table_schema(table_schema_props)
173
+
174
+ metadata = {
175
+ **TableMetadataSet(
176
+ column_schema=column_schema,
177
+ table_name=props.fully_qualified_table_name,
178
+ ),
179
+ **AirbyteMetadataSet(
180
+ connection_id=props.connection_id,
181
+ connection_name=props.connection_name,
182
+ stream_prefix=props.stream_prefix,
183
+ ),
184
+ }
185
+
186
+ return AssetSpec(
187
+ key=AssetKey(props.table_name),
188
+ metadata=metadata,
189
+ kinds={"airbyte"},
190
+ )
@@ -6,6 +6,10 @@ from dagster._core.definitions.metadata.table import TableColumn, TableSchema
6
6
  from dagster_airbyte.types import AirbyteOutput
7
7
 
8
8
 
9
+ def get_airbyte_connection_table_name(stream_prefix: Optional[str], stream_name: str) -> str:
10
+ return f"{stream_prefix if stream_prefix else ''}{stream_name}"
11
+
12
+
9
13
  def generate_table_schema(stream_schema_props: Mapping[str, Any]) -> TableSchema:
10
14
  return TableSchema(
11
15
  columns=sorted(
@@ -0,0 +1 @@
1
+ __version__ = "0.25.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.25.4
3
+ Version: 0.25.5
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
@@ -9,6 +9,7 @@ dagster_airbyte/cli.py
9
9
  dagster_airbyte/ops.py
10
10
  dagster_airbyte/py.typed
11
11
  dagster_airbyte/resources.py
12
+ dagster_airbyte/translator.py
12
13
  dagster_airbyte/types.py
13
14
  dagster_airbyte/utils.py
14
15
  dagster_airbyte/version.py
@@ -1,8 +1,8 @@
1
- dagster==1.9.4
1
+ dagster==1.9.5
2
2
  requests
3
3
 
4
4
  [managed]
5
- dagster-managed-elements==0.25.4
5
+ dagster-managed-elements==0.25.5
6
6
 
7
7
  [test]
8
8
  requests-mock
@@ -37,7 +37,7 @@ setup(
37
37
  include_package_data=True,
38
38
  python_requires=">=3.9,<3.13",
39
39
  install_requires=[
40
- "dagster==1.9.4",
40
+ "dagster==1.9.5",
41
41
  "requests",
42
42
  ],
43
43
  zip_safe=False,
@@ -52,7 +52,7 @@ setup(
52
52
  "flaky",
53
53
  ],
54
54
  "managed": [
55
- "dagster-managed-elements==0.25.4",
55
+ "dagster-managed-elements==0.25.5",
56
56
  ],
57
57
  },
58
58
  )
@@ -1 +0,0 @@
1
- __version__ = "0.25.4"