dagster-airbyte 0.24.9__py3-none-any.whl → 0.24.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dagster-airbyte might be problematic. Click here for more details.

@@ -40,7 +40,7 @@ from dagster._core.definitions.cacheable_assets import (
40
40
  CacheableAssetsDefinition,
41
41
  )
42
42
  from dagster._core.definitions.events import CoercibleToAssetKey, CoercibleToAssetKeyPrefix
43
- from dagster._core.definitions.metadata import MetadataValue, TableSchemaMetadataValue
43
+ from dagster._core.definitions.metadata.metadata_set import TableMetadataSet
44
44
  from dagster._core.definitions.metadata.table import TableSchema
45
45
  from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvalidInvocationError
46
46
  from dagster._core.execution.context.init import build_init_resource_context
@@ -62,9 +62,13 @@ def _table_to_output_name_fn(table: str) -> str:
62
62
  def _build_airbyte_asset_defn_metadata(
63
63
  connection_id: str,
64
64
  destination_tables: Sequence[str],
65
+ destination_raw_table_names_by_table: Mapping[str, str],
66
+ destination_database: Optional[str],
67
+ destination_schema: Optional[str],
65
68
  table_to_asset_key_fn: Callable[[str], AssetKey],
66
69
  asset_key_prefix: Optional[Sequence[str]] = None,
67
70
  normalization_tables: Optional[Mapping[str, Set[str]]] = None,
71
+ normalization_raw_table_names_by_table: Optional[Mapping[str, str]] = None,
68
72
  upstream_assets: Optional[Iterable[AssetKey]] = None,
69
73
  group_name: Optional[str] = None,
70
74
  io_manager_key: Optional[str] = None,
@@ -112,6 +116,30 @@ def _build_airbyte_asset_defn_metadata(
112
116
  for table in destination_tables:
113
117
  internal_deps[table] = set(upstream_assets or [])
114
118
 
119
+ relation_identifiers: Dict[str, str] = {}
120
+ for table in destination_tables:
121
+ if destination_database and destination_schema and table:
122
+ # Use the destination raw table name to create the relation identifier
123
+ relation_identifiers[table] = ".".join(
124
+ [
125
+ destination_database,
126
+ destination_schema,
127
+ destination_raw_table_names_by_table[table],
128
+ ]
129
+ )
130
+ if normalization_tables and normalization_raw_table_names_by_table:
131
+ for normalization_table in normalization_tables.get(table, set()):
132
+ relation_identifiers[normalization_table] = ".".join(
133
+ [
134
+ destination_database,
135
+ destination_schema,
136
+ destination_raw_table_names_by_table[table],
137
+ normalization_raw_table_names_by_table[normalization_table],
138
+ ]
139
+ )
140
+
141
+ schema_by_table_name = schema_by_table_name if schema_by_table_name else {}
142
+
115
143
  return AssetsDefinitionCacheableData(
116
144
  keys_by_input_name=(
117
145
  {asset_key.path[-1]: asset_key for asset_key in upstream_assets}
@@ -125,11 +153,14 @@ def _build_airbyte_asset_defn_metadata(
125
153
  can_subset=False,
126
154
  metadata_by_output_name=(
127
155
  {
128
- table: {"table_schema": MetadataValue.table_schema(schema_by_table_name[table])}
156
+ table: {
157
+ **TableMetadataSet(
158
+ column_schema=schema_by_table_name.get(table),
159
+ relation_identifier=relation_identifiers.get(table),
160
+ ),
161
+ }
129
162
  for table in tables
130
163
  }
131
- if schema_by_table_name
132
- else None
133
164
  ),
134
165
  freshness_policies_by_output_name=(
135
166
  {output: freshness_policy for output in outputs} if freshness_policy else None
@@ -167,10 +198,7 @@ def _build_airbyte_assets_from_metadata(
167
198
  k: AssetOut(
168
199
  key=v,
169
200
  metadata=(
170
- {
171
- k: cast(TableSchemaMetadataValue, v)
172
- for k, v in assets_defn_meta.metadata_by_output_name.get(k, {}).items()
173
- }
201
+ assets_defn_meta.metadata_by_output_name.get(k)
174
202
  if assets_defn_meta.metadata_by_output_name
175
203
  else None
176
204
  ),
@@ -225,6 +253,8 @@ def _build_airbyte_assets_from_metadata(
225
253
  def build_airbyte_assets(
226
254
  connection_id: str,
227
255
  destination_tables: Sequence[str],
256
+ destination_database: Optional[str] = None,
257
+ destination_schema: Optional[str] = None,
228
258
  asset_key_prefix: Optional[Sequence[str]] = None,
229
259
  group_name: Optional[str] = None,
230
260
  normalization_tables: Optional[Mapping[str, Set[str]]] = None,
@@ -243,6 +273,8 @@ def build_airbyte_assets(
243
273
  destination_tables (List[str]): The names of the tables that you want to be represented
244
274
  in the Dagster asset graph for this sync. This will generally map to the name of the
245
275
  stream in Airbyte, unless a stream prefix has been specified in Airbyte.
276
+ destination_database (Optional[str]): The name of the destination database.
277
+ destination_schema (Optional[str]): The name of the destination schema.
246
278
  normalization_tables (Optional[Mapping[str, List[str]]]): If you are using Airbyte's
247
279
  normalization feature, you may specify a mapping of destination table to a list of
248
280
  derived tables that will be created by the normalization process.
@@ -269,13 +301,36 @@ def build_airbyte_assets(
269
301
  tables = chain.from_iterable(
270
302
  chain([destination_tables], normalization_tables.values() if normalization_tables else [])
271
303
  )
304
+
305
+ relation_identifiers: Dict[str, str] = {}
306
+ for table in destination_tables:
307
+ if destination_database and destination_schema and table:
308
+ relation_identifiers[table] = ".".join(
309
+ [destination_database, destination_schema, table]
310
+ )
311
+ if normalization_tables:
312
+ for normalization_table in normalization_tables.get(table, set()):
313
+ relation_identifiers[normalization_table] = ".".join(
314
+ [
315
+ destination_database,
316
+ destination_schema,
317
+ table,
318
+ normalization_table,
319
+ ]
320
+ )
321
+
322
+ schema_by_table_name = schema_by_table_name if schema_by_table_name else {}
323
+
272
324
  outputs = {
273
325
  table: AssetOut(
274
326
  key=AssetKey([*asset_key_prefix, table]),
275
327
  metadata=(
276
- {"table_schema": MetadataValue.table_schema(schema_by_table_name[table])}
277
- if schema_by_table_name
278
- else None
328
+ {
329
+ **TableMetadataSet(
330
+ column_schema=schema_by_table_name.get(table),
331
+ relation_identifier=relation_identifiers.get(table),
332
+ ),
333
+ }
279
334
  ),
280
335
  freshness_policy=freshness_policy,
281
336
  auto_materialize_policy=auto_materialize_policy,
@@ -389,7 +444,7 @@ def _get_normalization_tables_for_schema(
389
444
 
390
445
  if "object" in schema_types and len(sub_schema.get("properties", {})) > 0:
391
446
  out[prefix + key] = AirbyteTableMetadata(
392
- schema=generate_table_schema(sub_schema.get("properties", {}))
447
+ raw_table_name=key, schema=generate_table_schema(sub_schema.get("properties", {}))
393
448
  )
394
449
  for k, v in sub_schema["properties"].items():
395
450
  out = merge_dicts(
@@ -398,7 +453,8 @@ def _get_normalization_tables_for_schema(
398
453
  # Array types are also broken into a new table
399
454
  elif "array" in schema_types:
400
455
  out[prefix + key] = AirbyteTableMetadata(
401
- schema=generate_table_schema(sub_schema.get("items", {}).get("properties", {}))
456
+ raw_table_name=key,
457
+ schema=generate_table_schema(sub_schema.get("items", {}).get("properties", {})),
402
458
  )
403
459
  if sub_schema.get("items", {}).get("properties"):
404
460
  for k, v in sub_schema["items"]["properties"].items():
@@ -422,6 +478,7 @@ class AirbyteConnectionMetadata(
422
478
  ("stream_prefix", str),
423
479
  ("has_basic_normalization", bool),
424
480
  ("stream_data", List[Mapping[str, Any]]),
481
+ ("destination", Mapping[str, Any]),
425
482
  ],
426
483
  )
427
484
  ):
@@ -436,7 +493,10 @@ class AirbyteConnectionMetadata(
436
493
 
437
494
  @classmethod
438
495
  def from_api_json(
439
- cls, contents: Mapping[str, Any], operations: Mapping[str, Any]
496
+ cls,
497
+ contents: Mapping[str, Any],
498
+ operations: Mapping[str, Any],
499
+ destination: Mapping[str, Any],
440
500
  ) -> "AirbyteConnectionMetadata":
441
501
  return cls(
442
502
  name=contents["name"],
@@ -446,10 +506,13 @@ class AirbyteConnectionMetadata(
446
506
  for op in operations.get("operations", [])
447
507
  ),
448
508
  stream_data=contents.get("syncCatalog", {}).get("streams", []),
509
+ destination=destination,
449
510
  )
450
511
 
451
512
  @classmethod
452
- def from_config(cls, contents: Mapping[str, Any]) -> "AirbyteConnectionMetadata":
513
+ def from_config(
514
+ cls, contents: Mapping[str, Any], destination: Mapping[str, Any]
515
+ ) -> "AirbyteConnectionMetadata":
453
516
  config_contents = cast(Mapping[str, Any], contents.get("configuration"))
454
517
  check.invariant(
455
518
  config_contents is not None, "Airbyte connection config is missing 'configuration' key"
@@ -463,6 +526,7 @@ class AirbyteConnectionMetadata(
463
526
  for op in config_contents.get("operations", [])
464
527
  ),
465
528
  stream_data=config_contents.get("sync_catalog", {}).get("streams", []),
529
+ destination=destination,
466
530
  )
467
531
 
468
532
  def parse_stream_tables(
@@ -497,6 +561,7 @@ class AirbyteConnectionMetadata(
497
561
  prefixed_norm_table_name = f"{self.stream_prefix}{normalization_table_name}"
498
562
  normalization_tables[prefixed_norm_table_name] = meta
499
563
  tables[prefixed_name] = AirbyteTableMetadata(
564
+ raw_table_name=name,
500
565
  schema=generate_table_schema(schema_props),
501
566
  normalization_tables=normalization_tables,
502
567
  )
@@ -576,14 +641,35 @@ class AirbyteCoreCacheableAssetsDefinition(CacheableAssetsDefinition):
576
641
  )
577
642
  schema_by_table_name = _get_schema_by_table_name(stream_table_metadata)
578
643
 
644
+ destination_database = connection.destination.get("configuration", {}).get("database")
645
+ destination_schema = connection.destination.get("configuration", {}).get("schema")
646
+
579
647
  table_to_asset_key = partial(self._connection_to_asset_key_fn, connection)
648
+
649
+ destination_tables = list(stream_table_metadata.keys())
650
+ destination_raw_table_names_by_table = {
651
+ table: metadata.raw_table_name for table, metadata in stream_table_metadata.items()
652
+ }
653
+ normalization_tables = {
654
+ table: set(metadata.normalization_tables.keys())
655
+ for table, metadata in stream_table_metadata.items()
656
+ }
657
+ normalization_raw_table_names_by_table = {
658
+ normalization_table: metadata.normalization_tables[
659
+ normalization_table
660
+ ].raw_table_name
661
+ for table, metadata in stream_table_metadata.items()
662
+ for normalization_table in normalization_tables[table]
663
+ }
664
+
580
665
  asset_data_for_conn = _build_airbyte_asset_defn_metadata(
581
666
  connection_id=connection_id,
582
- destination_tables=list(stream_table_metadata.keys()),
583
- normalization_tables={
584
- table: set(metadata.normalization_tables.keys())
585
- for table, metadata in stream_table_metadata.items()
586
- },
667
+ destination_tables=destination_tables,
668
+ destination_raw_table_names_by_table=destination_raw_table_names_by_table,
669
+ destination_database=destination_database,
670
+ destination_schema=destination_schema,
671
+ normalization_tables=normalization_tables,
672
+ normalization_raw_table_names_by_table=normalization_raw_table_names_by_table,
587
673
  asset_key_prefix=self._key_prefix,
588
674
  group_name=(
589
675
  self._connection_meta_to_group_fn(connection)
@@ -699,7 +785,21 @@ class AirbyteInstanceCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinit
699
785
  )
700
786
  ),
701
787
  )
702
- connection = AirbyteConnectionMetadata.from_api_json(connection_json, operations_json)
788
+
789
+ destination_id = cast(str, connection_json.get("destinationId"))
790
+ destination_json = cast(
791
+ Dict[str, Any],
792
+ check.not_none(
793
+ self._airbyte_instance.make_request(
794
+ endpoint="/destinations/get",
795
+ data={"destinationId": destination_id},
796
+ )
797
+ ),
798
+ )
799
+
800
+ connection = AirbyteConnectionMetadata.from_api_json(
801
+ connection_json, operations_json, destination_json
802
+ )
703
803
 
704
804
  # Filter out connections that don't match the filter function
705
805
  if self._connection_filter and not self._connection_filter(connection):
@@ -759,7 +859,17 @@ class AirbyteYAMLCacheableAssetsDefinition(AirbyteCoreCacheableAssetsDefinition)
759
859
  for connection_name in connection_directories:
760
860
  connection_dir = os.path.join(connections_dir, connection_name)
761
861
  with open(os.path.join(connection_dir, "configuration.yaml"), encoding="utf-8") as f:
762
- connection = AirbyteConnectionMetadata.from_config(yaml.safe_load(f.read()))
862
+ connection_data = yaml.safe_load(f.read())
863
+
864
+ destination_configuration_path = cast(
865
+ str, connection_data.get("destination_configuration_path")
866
+ )
867
+ with open(
868
+ os.path.join(self._project_dir, destination_configuration_path), encoding="utf-8"
869
+ ) as f:
870
+ destination_data = yaml.safe_load(f.read())
871
+
872
+ connection = AirbyteConnectionMetadata.from_config(connection_data, destination_data)
763
873
 
764
874
  # Filter out connections that don't match the filter function
765
875
  if self._connection_filter and not self._connection_filter(connection):
dagster_airbyte/types.py CHANGED
@@ -6,10 +6,14 @@ from dagster._core.definitions.metadata.table import TableSchema
6
6
  class AirbyteTableMetadata:
7
7
  def __init__(
8
8
  self,
9
+ raw_table_name: str,
9
10
  schema: TableSchema,
10
11
  normalization_tables: Optional[Mapping[str, "AirbyteTableMetadata"]] = None,
11
12
  ):
12
- """Contains metadata about an Airbyte table, including its schema and any created normalization tables."""
13
+ """Contains metadata about an Airbyte table, including its destination raw table name,
14
+ schema and any created normalization tables.
15
+ """
16
+ self.raw_table_name = raw_table_name
13
17
  self.schema = schema
14
18
  self.normalization_tables = normalization_tables or dict()
15
19
 
@@ -1 +1 @@
1
- __version__ = "0.24.9"
1
+ __version__ = "0.24.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.24.9
3
+ Version: 0.24.11
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
@@ -15,10 +15,10 @@ Classifier: License :: OSI Approved :: Apache Software License
15
15
  Classifier: Operating System :: OS Independent
16
16
  Requires-Python: >=3.8,<3.13
17
17
  License-File: LICENSE
18
- Requires-Dist: dagster ==1.8.9
18
+ Requires-Dist: dagster ==1.8.11
19
19
  Requires-Dist: requests
20
20
  Provides-Extra: managed
21
- Requires-Dist: dagster-managed-elements ==0.24.9 ; extra == 'managed'
21
+ Requires-Dist: dagster-managed-elements ==0.24.11 ; extra == 'managed'
22
22
  Provides-Extra: test
23
23
  Requires-Dist: requests-mock ; extra == 'test'
24
24
 
@@ -1,21 +1,21 @@
1
1
  dagster_airbyte/__init__.py,sha256=eXDY4rzDz0PKzPAaO0zWw6uP8RQY-SgU0EXa7GMdM2k,1305
2
- dagster_airbyte/asset_defs.py,sha256=GAKJWVjrrG6IX2uqlK8zeDueBjbs5irkzig-siXB4XY,48539
2
+ dagster_airbyte/asset_defs.py,sha256=_jG8PpusrnkIKxow8VsQte8sEZa_IbCDTvBpQoBwuww,53307
3
3
  dagster_airbyte/cli.py,sha256=HErteP1MjfHozKKSrznh0yAreKETbXp5NDHzXGsdvvE,425
4
4
  dagster_airbyte/ops.py,sha256=pq6mp7vN2wXgo3gJMuWaAcxTmfkZ7d1zWzPyL_auSEY,4208
5
5
  dagster_airbyte/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
6
6
  dagster_airbyte/resources.py,sha256=FoHI_Eiw2F6tnI1KZMfkgXhThdU6DplHp6INsbjUFsg,29802
7
- dagster_airbyte/types.py,sha256=fwqUv_MZCegwHhSELgUqm1H1JVUb-m83CyXFkd0r-ko,1425
7
+ dagster_airbyte/types.py,sha256=w1DyTcXyuzrG3wfkOPYFtwj7snHcgqf-dC7_pRjiE1Q,1544
8
8
  dagster_airbyte/utils.py,sha256=cFKCkGFAvwr17KFTeqpVtQRDsNo4zpqw9yr2-1YSJeI,2823
9
- dagster_airbyte/version.py,sha256=9weWImOclCBqHuCBLYEnR7Ab-QgQ2rysqid5TzPUNBk,23
9
+ dagster_airbyte/version.py,sha256=XoAmHmUmn_stAwPPPU1_t-UuvmY7DsPg9-UreBts-RU,24
10
10
  dagster_airbyte/managed/__init__.py,sha256=6SBtyNOMJ9Cu2UIwFExJHpL_ZVFo3rPMvyIxVOsKvWE,469
11
11
  dagster_airbyte/managed/reconciliation.py,sha256=HgrLT-Xs8vWY9SfbdBXuorMf60KCn5Qz7bPITW5MxJo,34862
12
12
  dagster_airbyte/managed/types.py,sha256=ja056Wm7_ZFw1XGSNmdxmBy2TcOxbnylJCpRA2ng2TE,14596
13
13
  dagster_airbyte/managed/generated/__init__.py,sha256=eYq-yfXEeffuKAVFXY8plD0se1wHjFNVqklpbu9gljw,108
14
14
  dagster_airbyte/managed/generated/destinations.py,sha256=x1wmWlXvOJHtfaZva3ErdKuVS--sDvfidSXR5ji9G5w,119692
15
15
  dagster_airbyte/managed/generated/sources.py,sha256=wyNoGJiNvW8mjRRs6b-_lWFs0Fgy-MZlRaxiN6bP-4s,282691
16
- dagster_airbyte-0.24.9.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
17
- dagster_airbyte-0.24.9.dist-info/METADATA,sha256=BmTsOM9LIp4wOvJ0CoxZ8JXmWcIiE8-Rz6ZwmUjibJE,926
18
- dagster_airbyte-0.24.9.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
- dagster_airbyte-0.24.9.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
20
- dagster_airbyte-0.24.9.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
21
- dagster_airbyte-0.24.9.dist-info/RECORD,,
16
+ dagster_airbyte-0.24.11.dist-info/LICENSE,sha256=TMatHW4_G9ldRdodEAp-l2Xa2WvsdeOh60E3v1R2jis,11349
17
+ dagster_airbyte-0.24.11.dist-info/METADATA,sha256=-ggc_Tx6b8G3ZLsUb3ZEum_Um0RdPVBATFvCPorkh44,929
18
+ dagster_airbyte-0.24.11.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
19
+ dagster_airbyte-0.24.11.dist-info/entry_points.txt,sha256=XrbLOz3LpgPV5fdwMmgdP6Rp1AfSG07KeWIddLqh7Lw,61
20
+ dagster_airbyte-0.24.11.dist-info/top_level.txt,sha256=HLwIRQCzqItn88_KbPP8DNTKKQEBUVKk6NCn4PrCtqY,16
21
+ dagster_airbyte-0.24.11.dist-info/RECORD,,