dagster-airbyte 0.23.7__py3-none-any.whl → 0.25.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dagster-airbyte might be problematic. Click here for more details.

@@ -0,0 +1,236 @@
1
+ from collections.abc import Mapping, Sequence
2
+ from enum import Enum
3
+ from typing import Any, Optional
4
+
5
+ from dagster._annotations import deprecated, experimental
6
+ from dagster._core.definitions.asset_key import AssetKey
7
+ from dagster._core.definitions.asset_spec import AssetSpec
8
+ from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet
9
+ from dagster._record import record
10
+ from dagster._serdes.serdes import whitelist_for_serdes
11
+ from dagster._utils.cached_method import cached_method
12
+
13
+ from dagster_airbyte.utils import generate_table_schema, get_airbyte_connection_table_name
14
+
15
+
16
+ class AirbyteJobStatusType(str, Enum):
17
+ RUNNING = "running"
18
+ SUCCEEDED = "succeeded"
19
+ CANCELLED = "cancelled"
20
+ PENDING = "pending"
21
+ FAILED = "failed"
22
+ ERROR = "error"
23
+ INCOMPLETE = "incomplete"
24
+
25
+
26
+ @deprecated(breaking_version="1.10", additional_warn_text="Use `AirbyteJobStatusType` instead.")
27
+ class AirbyteState:
28
+ RUNNING = AirbyteJobStatusType.RUNNING
29
+ SUCCEEDED = AirbyteJobStatusType.SUCCEEDED
30
+ CANCELLED = AirbyteJobStatusType.CANCELLED
31
+ PENDING = AirbyteJobStatusType.PENDING
32
+ FAILED = AirbyteJobStatusType.FAILED
33
+ ERROR = AirbyteJobStatusType.ERROR
34
+ INCOMPLETE = AirbyteJobStatusType.INCOMPLETE
35
+
36
+
37
+ @record
38
+ class AirbyteConnectionTableProps:
39
+ table_name: str
40
+ stream_prefix: Optional[str]
41
+ stream_name: str
42
+ json_schema: Mapping[str, Any]
43
+ connection_id: str
44
+ connection_name: str
45
+ destination_type: Optional[str]
46
+ database: Optional[str]
47
+ schema: Optional[str]
48
+
49
+ @property
50
+ def fully_qualified_table_name(self) -> Optional[str]:
51
+ return (
52
+ f"{self.database}.{self.schema}.{self.stream_name}"
53
+ if self.database and self.schema
54
+ else None
55
+ )
56
+
57
+
58
+ @whitelist_for_serdes
59
+ @record
60
+ class AirbyteConnection:
61
+ """Represents an Airbyte connection, based on data as returned from the API."""
62
+
63
+ id: str
64
+ name: str
65
+ stream_prefix: Optional[str]
66
+ streams: Mapping[str, "AirbyteStream"]
67
+ destination_id: str
68
+
69
+ @classmethod
70
+ def from_connection_details(
71
+ cls,
72
+ connection_details: Mapping[str, Any],
73
+ ) -> "AirbyteConnection":
74
+ return cls(
75
+ id=connection_details["connectionId"],
76
+ name=connection_details["name"],
77
+ stream_prefix=connection_details.get("prefix"),
78
+ streams={
79
+ stream_details["stream"]["name"]: AirbyteStream.from_stream_details(
80
+ stream_details=stream_details
81
+ )
82
+ for stream_details in connection_details.get("syncCatalog", {}).get("streams", [])
83
+ },
84
+ destination_id=connection_details["destinationId"],
85
+ )
86
+
87
+
88
+ @whitelist_for_serdes
89
+ @record
90
+ class AirbyteDestination:
91
+ """Represents an Airbyte destination, based on data as returned from the API."""
92
+
93
+ id: str
94
+ type: str
95
+ database: Optional[str]
96
+ schema: Optional[str]
97
+
98
+ @classmethod
99
+ def from_destination_details(
100
+ cls,
101
+ destination_details: Mapping[str, Any],
102
+ ) -> "AirbyteDestination":
103
+ return cls(
104
+ id=destination_details["destinationId"],
105
+ type=destination_details["destinationType"],
106
+ database=destination_details["configuration"].get("database"),
107
+ schema=destination_details["configuration"].get("schema"),
108
+ )
109
+
110
+
111
+ @whitelist_for_serdes
112
+ @record
113
+ class AirbyteStream:
114
+ """Represents an Airbyte stream, based on data as returned from the API.
115
+ A stream in Airbyte corresponds to a table.
116
+ """
117
+
118
+ name: str
119
+ selected: bool
120
+ json_schema: Mapping[str, Any]
121
+
122
+ @classmethod
123
+ def from_stream_details(
124
+ cls,
125
+ stream_details: Mapping[str, Any],
126
+ ) -> "AirbyteStream":
127
+ return cls(
128
+ name=stream_details["stream"]["name"],
129
+ selected=stream_details["config"].get("selected", False),
130
+ json_schema=stream_details["stream"].get("jsonSchema", {}),
131
+ )
132
+
133
+
134
+ @whitelist_for_serdes
135
+ @record
136
+ class AirbyteJob:
137
+ """Represents an Airbyte job, based on data as returned from the API."""
138
+
139
+ id: int
140
+ status: str
141
+
142
+ @classmethod
143
+ def from_job_details(
144
+ cls,
145
+ job_details: Mapping[str, Any],
146
+ ) -> "AirbyteJob":
147
+ return cls(
148
+ id=job_details["jobId"],
149
+ status=job_details["status"],
150
+ )
151
+
152
+
153
+ @whitelist_for_serdes
154
+ @record
155
+ class AirbyteWorkspaceData:
156
+ """A record representing all content in an Airbyte workspace.
157
+ This applies to both Airbyte OSS and Cloud.
158
+ """
159
+
160
+ connections_by_id: Mapping[str, AirbyteConnection]
161
+ destinations_by_id: Mapping[str, AirbyteDestination]
162
+
163
+ @cached_method
164
+ def to_airbyte_connection_table_props_data(self) -> Sequence[AirbyteConnectionTableProps]:
165
+ """Method that converts a `AirbyteWorkspaceData` object
166
+ to a collection of `AirbyteConnectionTableProps` objects.
167
+ """
168
+ data: list[AirbyteConnectionTableProps] = []
169
+
170
+ for connection in self.connections_by_id.values():
171
+ destination = self.destinations_by_id[connection.destination_id]
172
+
173
+ for stream in connection.streams.values():
174
+ if stream.selected:
175
+ data.append(
176
+ AirbyteConnectionTableProps(
177
+ table_name=get_airbyte_connection_table_name(
178
+ stream_prefix=connection.stream_prefix,
179
+ stream_name=stream.name,
180
+ ),
181
+ stream_prefix=connection.stream_prefix,
182
+ stream_name=stream.name,
183
+ json_schema=stream.json_schema,
184
+ connection_id=connection.id,
185
+ connection_name=connection.name,
186
+ destination_type=destination.type,
187
+ database=destination.database,
188
+ schema=destination.schema,
189
+ )
190
+ )
191
+
192
+ return data
193
+
194
+
195
+ class AirbyteMetadataSet(NamespacedMetadataSet):
196
+ connection_id: str
197
+ connection_name: str
198
+ stream_prefix: Optional[str] = None
199
+
200
+ @classmethod
201
+ def namespace(cls) -> str:
202
+ return "dagster-airbyte"
203
+
204
+
205
+ @experimental
206
+ class DagsterAirbyteTranslator:
207
+ """Translator class which converts a `AirbyteConnectionTableProps` object into AssetSpecs.
208
+ Subclass this class to implement custom logic how to translate Airbyte content into asset spec.
209
+ """
210
+
211
+ def get_asset_spec(self, props: AirbyteConnectionTableProps) -> AssetSpec:
212
+ """Get the AssetSpec for a table synced by an Airbyte connection."""
213
+ table_schema_props = (
214
+ props.json_schema.get("properties")
215
+ or props.json_schema.get("items", {}).get("properties")
216
+ or {}
217
+ )
218
+ column_schema = generate_table_schema(table_schema_props)
219
+
220
+ metadata = {
221
+ **TableMetadataSet(
222
+ column_schema=column_schema,
223
+ table_name=props.fully_qualified_table_name,
224
+ ),
225
+ **AirbyteMetadataSet(
226
+ connection_id=props.connection_id,
227
+ connection_name=props.connection_name,
228
+ stream_prefix=props.stream_prefix,
229
+ ),
230
+ }
231
+
232
+ return AssetSpec(
233
+ key=AssetKey(props.table_name),
234
+ metadata=metadata,
235
+ kinds={"airbyte", *({props.destination_type} if props.destination_type else set())},
236
+ )
dagster_airbyte/types.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Any, Mapping, NamedTuple, Optional
1
+ from collections.abc import Mapping
2
+ from typing import Any, NamedTuple, Optional
2
3
 
3
4
  from dagster._core.definitions.metadata.table import TableSchema
4
5
 
@@ -6,10 +7,14 @@ from dagster._core.definitions.metadata.table import TableSchema
6
7
  class AirbyteTableMetadata:
7
8
  def __init__(
8
9
  self,
10
+ raw_table_name: str,
9
11
  schema: TableSchema,
10
12
  normalization_tables: Optional[Mapping[str, "AirbyteTableMetadata"]] = None,
11
13
  ):
12
- """Contains metadata about an Airbyte table, including its schema and any created normalization tables."""
14
+ """Contains metadata about an Airbyte table, including its destination raw table name,
15
+ schema and any created normalization tables.
16
+ """
17
+ self.raw_table_name = raw_table_name
13
18
  self.schema = schema
14
19
  self.normalization_tables = normalization_tables or dict()
15
20
 
dagster_airbyte/utils.py CHANGED
@@ -1,10 +1,31 @@
1
- from typing import Any, Iterator, Mapping, Optional, Sequence
1
+ import re
2
+ from collections.abc import Iterator, Mapping, Sequence
3
+ from typing import TYPE_CHECKING, Any, Optional
2
4
 
3
- from dagster import AssetMaterialization, MetadataValue
5
+ from dagster import (
6
+ AssetMaterialization,
7
+ AssetsDefinition,
8
+ DagsterInvariantViolationError,
9
+ MetadataValue,
10
+ )
4
11
  from dagster._core.definitions.metadata.table import TableColumn, TableSchema
5
12
 
6
13
  from dagster_airbyte.types import AirbyteOutput
7
14
 
15
+ if TYPE_CHECKING:
16
+ from dagster_airbyte import DagsterAirbyteTranslator
17
+
18
+ DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY = "dagster-airbyte/dagster_airbyte_translator"
19
+
20
+
21
+ def clean_name(name: str) -> str:
22
+ """Cleans an input to be a valid Dagster asset name."""
23
+ return re.sub(r"[^a-z0-9]+", "_", name.lower())
24
+
25
+
26
+ def get_airbyte_connection_table_name(stream_prefix: Optional[str], stream_name: str) -> str:
27
+ return f"{stream_prefix if stream_prefix else ''}{stream_name}"
28
+
8
29
 
9
30
  def generate_table_schema(stream_schema_props: Mapping[str, Any]) -> TableSchema:
10
31
  return TableSchema(
@@ -74,3 +95,18 @@ def generate_materializations(
74
95
  all_stream_stats.get(stream_name, {}),
75
96
  asset_key_prefix=asset_key_prefix,
76
97
  )
98
+
99
+
100
+ def get_translator_from_airbyte_assets(
101
+ airbyte_assets: AssetsDefinition,
102
+ ) -> "DagsterAirbyteTranslator":
103
+ metadata_by_key = airbyte_assets.metadata_by_key or {}
104
+ first_asset_key = next(iter(airbyte_assets.metadata_by_key.keys()))
105
+ first_metadata = metadata_by_key.get(first_asset_key, {})
106
+ dagster_airbyte_translator = first_metadata.get(DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY)
107
+ if dagster_airbyte_translator is None:
108
+ raise DagsterInvariantViolationError(
109
+ f"Expected to find airbyte translator metadata on asset {first_asset_key.to_user_string()},"
110
+ " but did not. Did you pass in assets that weren't generated by @airbyte_assets?"
111
+ )
112
+ return dagster_airbyte_translator
@@ -1 +1 @@
1
- __version__ = "0.23.7"
1
+ __version__ = "0.25.10"
@@ -1,24 +1,24 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.23.7
3
+ Version: 0.25.10
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
7
7
  Author-email: hello@dagsterlabs.com
8
8
  License: Apache-2.0
9
- Classifier: Programming Language :: Python :: 3.8
10
9
  Classifier: Programming Language :: Python :: 3.9
11
10
  Classifier: Programming Language :: Python :: 3.10
12
11
  Classifier: Programming Language :: Python :: 3.11
13
12
  Classifier: Programming Language :: Python :: 3.12
14
13
  Classifier: License :: OSI Approved :: Apache Software License
15
14
  Classifier: Operating System :: OS Independent
16
- Requires-Python: >=3.8,<3.13
15
+ Requires-Python: >=3.9,<3.13
17
16
  License-File: LICENSE
18
- Requires-Dist: dagster ==1.7.7
17
+ Requires-Dist: dagster ==1.9.10
19
18
  Requires-Dist: requests
20
19
  Provides-Extra: managed
21
- Requires-Dist: dagster-managed-elements ==0.23.7 ; extra == 'managed'
20
+ Requires-Dist: dagster-managed-elements ==0.25.10 ; extra == 'managed'
22
21
  Provides-Extra: test
23
22
  Requires-Dist: requests-mock ; extra == 'test'
23
+ Requires-Dist: flaky ; extra == 'test'
24
24
 
@@ -0,0 +1,23 @@
1
+ dagster_airbyte/__init__.py,sha256=T3mFUhqESk-F0FISreWfuLzULKYCO67WtGzbxGVUxdY,1719
2
+ dagster_airbyte/asset_decorator.py,sha256=s0wFVFqe8kOGSiwyZx9n96C-_tnYfGuxsf_qefeFjJU,4209
3
+ dagster_airbyte/asset_defs.py,sha256=039JPL5Jy0WP34K3JUO3hvQpbBLVLMOqY0bgyh73VHA,50059
4
+ dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
5
+ dagster_airbyte/ops.py,sha256=oOEczVYpqVRTc1-0osfpR5FZbPjNJDYihgRjk_qtOQA,4229
6
+ dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
7
+ dagster_airbyte/resources.py,sha256=PhRaLAiYtN83vk3sqhzebhw605DYF1ZY6Q87XvG0KRs,53552
8
+ dagster_airbyte/translator.py,sha256=jOdBA5cDb1dRIgJZlqQv_SzqPRzvnm9N-UmA0yK_tsI,7546
9
+ dagster_airbyte/types.py,sha256=Pc3X0BGYo2dabRGUJf0TinSNoW_QHFOEioTyAN0NCyc,1571
10
+ dagster_airbyte/utils.py,sha256=wJtzx5cWq8Bp1f3T4XjLPaxjXZGstHlL4kRYP2c49bs,4157
11
+ dagster_airbyte/version.py,sha256=Nw3Sk1aJKD__b1pX4zBOVEcqxnFwhwsJ1cNd5rN8Cgs,24
12
+ dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
13
+ dagster_airbyte/managed/reconciliation.py,sha256=K-n2_nPwLQrsZSXSo7a0AcAjglMDsvhRErb5zmxlu7E,34814
14
+ dagster_airbyte/managed/types.py,sha256=isPfX8L9YwtZAf9Vk4hhxBePLR00AEldsdK2TsM1H2o,14611
15
+ dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
16
+ dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
17
+ dagster_airbyte/managed/generated/sources.py,sha256=y0TPNvcRd8c9mhje-NoXsHeKRPt1nXcpww8mNAtqCps,282685
18
+ dagster_airbyte-0.25.10.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
19
+ dagster_airbyte-0.25.10.dist-info/METADATA,sha256=K8P5rqqTVw-_l9_WtHXht4DHtvitORG5g_U2V3O2mso,918
20
+ dagster_airbyte-0.25.10.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
21
+ dagster_airbyte-0.25.10.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
22
+ dagster_airbyte-0.25.10.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
23
+ dagster_airbyte-0.25.10.dist-info/RECORD,,
@@ -1,20 +0,0 @@
1
- dagster_airbyte/__init__.py,sha256=KRUcHzOxDEmo9oO7qRwJNrRnxK28PeHTeUemMS4z2Gc,1215
2
- dagster_airbyte/asset_defs.py,sha256=BhC0MSF8U2V-rA_rFUlMrN5WuvihSQAGXls3zlQNfys,48304
3
- dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
4
- dagster_airbyte/ops.py,sha256=OaD6DA9Iz-T6hroSAQcrruqZWgR8ECxksupg0CQR_DU,4128
5
- dagster_airbyte/resources.py,sha256=ZRRKc1DwKluK6exEKkJ_FWIG5TKyTDOg1zt83TEM3Bg,27040
6
- dagster_airbyte/types.py,sha256=fwqUv_MZCegwHhSELgUqm1H1JVUb-m83CyXFkd0r-ko,1425
7
- dagster_airbyte/utils.py,sha256=cFKCkGFAvwr17KFTeqpVtQRDsNo4zpqw9yr2-1YSJeI,2823
8
- dagster_airbyte/version.py,sha256=cC_HEsX6d06QmpjNLfRezzJvL501PPwiQYXWDM2_2UI,23
9
- dagster_airbyte/managed/__init__.py,sha256=azuKhCI8eUsmEGBL2M2d6qkqQ0vhsV9H5IioP83kbm8,423
10
- dagster_airbyte/managed/reconciliation.py,sha256=HgrLT-Xs8vWY9SfbdBXuorMf60KCn5Qz7bPITW5MxJo,34862
11
- dagster_airbyte/managed/types.py,sha256=em14WkgRz0w_N6mflebHaJQzMA1Iide29u0mTfFTs4Y,14588
12
- dagster_airbyte/managed/generated/__init__.py,sha256=1ChWhXU1uyX_9rClMD6XHzkriKOc0fjIQntsoI3G99g,76
13
- dagster_airbyte/managed/generated/destinations.py,sha256=afBHge9FQENIBVywHcBoUcSyGqrLDjQpv1XgHkks7Vs,119751
14
- dagster_airbyte/managed/generated/sources.py,sha256=dJKeOiYZtq23svje9ohXSZozlInAaEkDJaMiV_Kdfoo,282772
15
- dagster_airbyte-0.23.7.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
16
- dagster_airbyte-0.23.7.dist-info/METADATA,sha256=2XfQZwPtIXXCVn47vwYyJkeB5wVsjRJwqe90v01TCag,926
17
- dagster_airbyte-0.23.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
18
- dagster_airbyte-0.23.7.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
19
- dagster_airbyte-0.23.7.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
20
- dagster_airbyte-0.23.7.dist-info/RECORD,,