dagster-airbyte 0.25.6__tar.gz → 0.25.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dagster-airbyte might be problematic. Click here for more details.

Files changed (31) hide show
  1. {dagster-airbyte-0.25.6/dagster_airbyte.egg-info → dagster-airbyte-0.25.7}/PKG-INFO +1 -1
  2. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/__init__.py +8 -2
  3. dagster-airbyte-0.25.7/dagster_airbyte/asset_decorator.py +113 -0
  4. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/asset_defs.py +127 -9
  5. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/reconciliation.py +3 -4
  6. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/resources.py +275 -29
  7. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/translator.py +44 -3
  8. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/utils.py +33 -2
  9. dagster-airbyte-0.25.7/dagster_airbyte/version.py +1 -0
  10. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7/dagster_airbyte.egg-info}/PKG-INFO +1 -1
  11. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/SOURCES.txt +1 -0
  12. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/requires.txt +2 -2
  13. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/setup.py +2 -2
  14. dagster-airbyte-0.25.6/dagster_airbyte/version.py +0 -1
  15. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/LICENSE +0 -0
  16. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/MANIFEST.in +0 -0
  17. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/README.md +0 -0
  18. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/cli.py +0 -0
  19. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/__init__.py +0 -0
  20. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/generated/__init__.py +0 -0
  21. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/generated/destinations.py +0 -0
  22. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/generated/sources.py +0 -0
  23. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/managed/types.py +0 -0
  24. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/ops.py +0 -0
  25. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/py.typed +0 -0
  26. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte/types.py +0 -0
  27. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/dependency_links.txt +0 -0
  28. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/entry_points.txt +0 -0
  29. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/not-zip-safe +0 -0
  30. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/dagster_airbyte.egg-info/top_level.txt +0 -0
  31. {dagster-airbyte-0.25.6 → dagster-airbyte-0.25.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.25.6
3
+ Version: 0.25.7
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
@@ -14,8 +14,10 @@ try:
14
14
  except ImportError:
15
15
  pass
16
16
 
17
+ from dagster_airbyte.asset_decorator import airbyte_assets as airbyte_assets
17
18
  from dagster_airbyte.asset_defs import (
18
19
  build_airbyte_assets as build_airbyte_assets,
20
+ build_airbyte_assets_definitions as build_airbyte_assets_definitions,
19
21
  load_assets_from_airbyte_instance as load_assets_from_airbyte_instance,
20
22
  )
21
23
  from dagster_airbyte.ops import airbyte_sync_op as airbyte_sync_op
@@ -23,12 +25,16 @@ from dagster_airbyte.resources import (
23
25
  AirbyteCloudResource as AirbyteCloudResource,
24
26
  AirbyteCloudWorkspace as AirbyteCloudWorkspace,
25
27
  AirbyteResource as AirbyteResource,
26
- AirbyteState as AirbyteState,
27
28
  airbyte_cloud_resource as airbyte_cloud_resource,
28
29
  airbyte_resource as airbyte_resource,
29
30
  load_airbyte_cloud_asset_specs as load_airbyte_cloud_asset_specs,
30
31
  )
31
- from dagster_airbyte.translator import DagsterAirbyteTranslator as DagsterAirbyteTranslator
32
+ from dagster_airbyte.translator import (
33
+ AirbyteConnectionTableProps as AirbyteConnectionTableProps,
34
+ AirbyteJobStatusType as AirbyteJobStatusType,
35
+ AirbyteState as AirbyteState,
36
+ DagsterAirbyteTranslator as DagsterAirbyteTranslator,
37
+ )
32
38
  from dagster_airbyte.types import AirbyteOutput as AirbyteOutput
33
39
  from dagster_airbyte.version import __version__ as __version__
34
40
 
@@ -0,0 +1,113 @@
1
+ from typing import Any, Callable, Optional
2
+
3
+ from dagster import AssetsDefinition, multi_asset
4
+ from dagster._annotations import experimental
5
+
6
+ from dagster_airbyte.resources import AirbyteCloudWorkspace
7
+ from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
8
+
9
+
10
+ @experimental
11
+ def airbyte_assets(
12
+ *,
13
+ connection_id: str,
14
+ workspace: AirbyteCloudWorkspace,
15
+ name: Optional[str] = None,
16
+ group_name: Optional[str] = None,
17
+ dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
18
+ ) -> Callable[[Callable[..., Any]], AssetsDefinition]:
19
+ """Create a definition for how to sync the tables of a given Airbyte connection.
20
+
21
+ Args:
22
+ connection_id (str): The Airbyte Connection ID.
23
+ workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
24
+ name (Optional[str], optional): The name of the op.
25
+ group_name (Optional[str], optional): The name of the asset group.
26
+ dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
27
+ to convert Airbyte content into :py:class:`dagster.AssetSpec`.
28
+ Defaults to :py:class:`DagsterAirbyteTranslator`.
29
+
30
+ Examples:
31
+ Sync the tables of an Airbyte connection:
32
+
33
+ .. code-block:: python
34
+
35
+ from dagster_airbyte import AirbyteCloudWorkspace, airbyte_assets
36
+
37
+ import dagster as dg
38
+
39
+ airbyte_workspace = AirbyteCloudWorkspace(
40
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
41
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
42
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
43
+ )
44
+
45
+
46
+ @airbyte_assets(
47
+ connection_id="airbyte_connection_id",
48
+ workspace=airbyte_workspace,
49
+ )
50
+ def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
51
+ yield from airbyte.sync_and_poll(context=context)
52
+
53
+
54
+ defs = dg.Definitions(
55
+ assets=[airbyte_connection_assets],
56
+ resources={"airbyte": airbyte_workspace},
57
+ )
58
+
59
+ Sync the tables of an Airbyte connection with a custom translator:
60
+
61
+ .. code-block:: python
62
+
63
+ from dagster_airbyte import (
64
+ DagsterAirbyteTranslator,
65
+ AirbyteConnectionTableProps,
66
+ AirbyteCloudWorkspace,
67
+ airbyte_assets
68
+ )
69
+
70
+ import dagster as dg
71
+
72
+ class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
73
+ def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
74
+ default_spec = super().get_asset_spec(props)
75
+ return default_spec.merge_attributes(
76
+ metadata={"custom": "metadata"},
77
+ )
78
+
79
+ airbyte_workspace = AirbyteCloudWorkspace(
80
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
81
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
82
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
83
+ )
84
+
85
+
86
+ @airbyte_assets(
87
+ connection_id="airbyte_connection_id",
88
+ workspace=airbyte_workspace,
89
+ dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
90
+ )
91
+ def airbyte_connection_assets(context: dg.AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
92
+ yield from airbyte.sync_and_poll(context=context)
93
+
94
+
95
+ defs = dg.Definitions(
96
+ assets=[airbyte_connection_assets],
97
+ resources={"airbyte": airbyte_workspace},
98
+ )
99
+ """
100
+ dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
101
+
102
+ return multi_asset(
103
+ name=name,
104
+ group_name=group_name,
105
+ can_subset=True,
106
+ specs=[
107
+ spec
108
+ for spec in workspace.load_asset_specs(
109
+ dagster_airbyte_translator=dagster_airbyte_translator
110
+ )
111
+ if AirbyteMetadataSet.extract(spec.metadata).connection_id == connection_id
112
+ ],
113
+ )
@@ -1,7 +1,6 @@
1
1
  import hashlib
2
2
  import inspect
3
3
  import os
4
- import re
5
4
  from abc import abstractmethod
6
5
  from functools import partial
7
6
  from itertools import chain
@@ -23,6 +22,7 @@ from typing import (
23
22
 
24
23
  import yaml
25
24
  from dagster import (
25
+ AssetExecutionContext,
26
26
  AssetKey,
27
27
  AssetOut,
28
28
  AutoMaterializePolicy,
@@ -33,6 +33,7 @@ from dagster import (
33
33
  SourceAsset,
34
34
  _check as check,
35
35
  )
36
+ from dagster._annotations import experimental
36
37
  from dagster._core.definitions import AssetsDefinition, multi_asset
37
38
  from dagster._core.definitions.cacheable_assets import (
38
39
  AssetsDefinitionCacheableData,
@@ -45,9 +46,17 @@ from dagster._core.errors import DagsterInvalidDefinitionError, DagsterInvalidIn
45
46
  from dagster._core.execution.context.init import build_init_resource_context
46
47
  from dagster._utils.merger import merge_dicts
47
48
 
48
- from dagster_airbyte.resources import AirbyteCloudResource, AirbyteResource, BaseAirbyteResource
49
+ from dagster_airbyte.asset_decorator import airbyte_assets
50
+ from dagster_airbyte.resources import (
51
+ AirbyteCloudResource,
52
+ AirbyteCloudWorkspace,
53
+ AirbyteResource,
54
+ BaseAirbyteResource,
55
+ )
56
+ from dagster_airbyte.translator import AirbyteMetadataSet, DagsterAirbyteTranslator
49
57
  from dagster_airbyte.types import AirbyteTableMetadata
50
58
  from dagster_airbyte.utils import (
59
+ clean_name,
51
60
  generate_materializations,
52
61
  generate_table_schema,
53
62
  is_basic_normalization_operation,
@@ -462,11 +471,6 @@ def _get_normalization_tables_for_schema(
462
471
  return out
463
472
 
464
473
 
465
- def _clean_name(name: str) -> str:
466
- """Cleans an input to be a valid Dagster asset name."""
467
- return re.sub(r"[^a-z0-9]+", "_", name.lower())
468
-
469
-
470
474
  class AirbyteConnectionMetadata(
471
475
  NamedTuple(
472
476
  "_AirbyteConnectionMetadata",
@@ -908,7 +912,7 @@ def load_assets_from_airbyte_instance(
908
912
  workspace_id: Optional[str] = None,
909
913
  key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
910
914
  create_assets_for_normalization_tables: bool = True,
911
- connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = _clean_name,
915
+ connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
912
916
  connection_meta_to_group_fn: Optional[
913
917
  Callable[[AirbyteConnectionMetadata], Optional[str]]
914
918
  ] = None,
@@ -1013,7 +1017,7 @@ def load_assets_from_airbyte_instance(
1013
1017
  check.invariant(
1014
1018
  not connection_meta_to_group_fn
1015
1019
  or not connection_to_group_fn
1016
- or connection_to_group_fn == _clean_name,
1020
+ or connection_to_group_fn == clean_name,
1017
1021
  "Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
1018
1022
  )
1019
1023
 
@@ -1032,3 +1036,117 @@ def load_assets_from_airbyte_instance(
1032
1036
  connection_to_freshness_policy_fn=connection_to_freshness_policy_fn,
1033
1037
  connection_to_auto_materialize_policy_fn=connection_to_auto_materialize_policy_fn,
1034
1038
  )
1039
+
1040
+
1041
+ # -----------------------
1042
+ # Reworked assets factory
1043
+ # -----------------------
1044
+
1045
+
1046
+ @experimental
1047
+ def build_airbyte_assets_definitions(
1048
+ *,
1049
+ workspace: AirbyteCloudWorkspace,
1050
+ dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
1051
+ ) -> Sequence[AssetsDefinition]:
1052
+ """The list of AssetsDefinition for all connections in the Airbyte workspace.
1053
+
1054
+ Args:
1055
+ workspace (AirbyteCloudWorkspace): The Airbyte workspace to fetch assets from.
1056
+ dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
1057
+ to convert Airbyte content into :py:class:`dagster.AssetSpec`.
1058
+ Defaults to :py:class:`DagsterAirbyteTranslator`.
1059
+
1060
+ Returns:
1061
+ List[AssetsDefinition]: The list of AssetsDefinition for all connections in the Airbyte workspace.
1062
+
1063
+ Examples:
1064
+ Sync the tables of a Airbyte connection:
1065
+
1066
+ .. code-block:: python
1067
+
1068
+ from dagster_airbyte import AirbyteCloudWorkspace, build_airbyte_assets_definitions
1069
+
1070
+ import dagster as dg
1071
+
1072
+ airbyte_workspace = AirbyteCloudWorkspace(
1073
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
1074
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
1075
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
1076
+ )
1077
+
1078
+
1079
+ airbyte_assets = build_airbyte_assets_definitions(workspace=workspace)
1080
+
1081
+ defs = dg.Definitions(
1082
+ assets=airbyte_assets,
1083
+ resources={"airbyte": airbyte_workspace},
1084
+ )
1085
+
1086
+ Sync the tables of a Airbyte connection with a custom translator:
1087
+
1088
+ .. code-block:: python
1089
+
1090
+ from dagster_airbyte import (
1091
+ DagsterAirbyteTranslator,
1092
+ AirbyteConnectionTableProps,
1093
+ AirbyteCloudWorkspace,
1094
+ build_airbyte_assets_definitions
1095
+ )
1096
+
1097
+ import dagster as dg
1098
+
1099
+ class CustomDagsterAirbyteTranslator(DagsterAirbyteTranslator):
1100
+ def get_asset_spec(self, props: AirbyteConnectionTableProps) -> dg.AssetSpec:
1101
+ default_spec = super().get_asset_spec(props)
1102
+ return default_spec.merge_attributes(
1103
+ metadata={"custom": "metadata"},
1104
+ )
1105
+
1106
+ airbyte_workspace = AirbyteCloudWorkspace(
1107
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
1108
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
1109
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
1110
+ )
1111
+
1112
+
1113
+ airbyte_assets = build_airbyte_assets_definitions(
1114
+ workspace=workspace,
1115
+ dagster_airbyte_translator=CustomDagsterAirbyteTranslator()
1116
+ )
1117
+
1118
+ defs = dg.Definitions(
1119
+ assets=airbyte_assets,
1120
+ resources={"airbyte": airbyte_workspace},
1121
+ )
1122
+ """
1123
+ dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
1124
+
1125
+ all_asset_specs = workspace.load_asset_specs(
1126
+ dagster_airbyte_translator=dagster_airbyte_translator
1127
+ )
1128
+
1129
+ connections = {
1130
+ (
1131
+ check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id),
1132
+ check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_name),
1133
+ )
1134
+ for spec in all_asset_specs
1135
+ }
1136
+
1137
+ _asset_fns = []
1138
+ for connection_id, connection_name in connections:
1139
+
1140
+ @airbyte_assets(
1141
+ connection_id=connection_id,
1142
+ workspace=workspace,
1143
+ name=clean_name(connection_name),
1144
+ group_name=clean_name(connection_name),
1145
+ dagster_airbyte_translator=dagster_airbyte_translator,
1146
+ )
1147
+ def _asset_fn(context: AssetExecutionContext, airbyte: AirbyteCloudWorkspace):
1148
+ yield from airbyte.sync_and_poll(context=context)
1149
+
1150
+ _asset_fns.append(_asset_fn)
1151
+
1152
+ return _asset_fns
@@ -36,7 +36,6 @@ from dagster_managed_elements.utils import UNSET, diff_dicts
36
36
  from dagster_airbyte.asset_defs import (
37
37
  AirbyteConnectionMetadata,
38
38
  AirbyteInstanceCacheableAssetsDefinition,
39
- _clean_name,
40
39
  )
41
40
  from dagster_airbyte.managed.types import (
42
41
  MANAGED_ELEMENTS_DEPRECATION_MSG,
@@ -50,7 +49,7 @@ from dagster_airbyte.managed.types import (
50
49
  InitializedAirbyteSource,
51
50
  )
52
51
  from dagster_airbyte.resources import AirbyteResource
53
- from dagster_airbyte.utils import is_basic_normalization_operation
52
+ from dagster_airbyte.utils import clean_name, is_basic_normalization_operation
54
53
 
55
54
 
56
55
  def gen_configured_stream_json(
@@ -746,7 +745,7 @@ def load_assets_from_connections(
746
745
  connections: Iterable[AirbyteConnection],
747
746
  key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
748
747
  create_assets_for_normalization_tables: bool = True,
749
- connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = _clean_name,
748
+ connection_to_group_fn: Optional[Callable[[str], Optional[str]]] = clean_name,
750
749
  connection_meta_to_group_fn: Optional[
751
750
  Callable[[AirbyteConnectionMetadata], Optional[str]]
752
751
  ] = None,
@@ -821,7 +820,7 @@ def load_assets_from_connections(
821
820
  check.invariant(
822
821
  not connection_meta_to_group_fn
823
822
  or not connection_to_group_fn
824
- or connection_to_group_fn == _clean_name,
823
+ or connection_to_group_fn == clean_name,
825
824
  "Cannot specify both connection_meta_to_group_fn and connection_to_group_fn",
826
825
  )
827
826
 
@@ -10,15 +10,18 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence, cast
10
10
 
11
11
  import requests
12
12
  from dagster import (
13
+ AssetExecutionContext,
14
+ AssetMaterialization,
13
15
  ConfigurableResource,
14
16
  Definitions,
15
17
  Failure,
16
18
  InitResourceContext,
19
+ MaterializeResult,
17
20
  _check as check,
18
21
  get_dagster_logger,
19
22
  resource,
20
23
  )
21
- from dagster._annotations import experimental
24
+ from dagster._annotations import experimental, public
22
25
  from dagster._config.pythonic_config import infer_schema_from_config_class
23
26
  from dagster._core.definitions.asset_spec import AssetSpec
24
27
  from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
@@ -32,11 +35,20 @@ from requests.exceptions import RequestException
32
35
 
33
36
  from dagster_airbyte.translator import (
34
37
  AirbyteConnection,
38
+ AirbyteConnectionTableProps,
35
39
  AirbyteDestination,
40
+ AirbyteJob,
41
+ AirbyteJobStatusType,
42
+ AirbyteMetadataSet,
36
43
  AirbyteWorkspaceData,
37
44
  DagsterAirbyteTranslator,
38
45
  )
39
46
  from dagster_airbyte.types import AirbyteOutput
47
+ from dagster_airbyte.utils import (
48
+ DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY,
49
+ get_airbyte_connection_table_name,
50
+ get_translator_from_airbyte_assets,
51
+ )
40
52
 
41
53
  AIRBYTE_REST_API_BASE = "https://api.airbyte.com"
42
54
  AIRBYTE_REST_API_VERSION = "v1"
@@ -53,16 +65,6 @@ AIRBYTE_CLOUD_REFRESH_TIMEDELTA_SECONDS = 150
53
65
  AIRBYTE_RECONSTRUCTION_METADATA_KEY_PREFIX = "dagster-airbyte/reconstruction_metadata"
54
66
 
55
67
 
56
- class AirbyteState:
57
- RUNNING = "running"
58
- SUCCEEDED = "succeeded"
59
- CANCELLED = "cancelled"
60
- PENDING = "pending"
61
- FAILED = "failed"
62
- ERROR = "error"
63
- INCOMPLETE = "incomplete"
64
-
65
-
66
68
  class AirbyteResourceState:
67
69
  def __init__(self) -> None:
68
70
  self.request_cache: Dict[str, Optional[Mapping[str, object]]] = {}
@@ -252,13 +254,17 @@ class BaseAirbyteResource(ConfigurableResource):
252
254
  job_info = cast(Dict[str, object], job_details.get("job", {}))
253
255
  state = job_info.get("status")
254
256
 
255
- if state in (AirbyteState.RUNNING, AirbyteState.PENDING, AirbyteState.INCOMPLETE):
257
+ if state in (
258
+ AirbyteJobStatusType.RUNNING,
259
+ AirbyteJobStatusType.PENDING,
260
+ AirbyteJobStatusType.INCOMPLETE,
261
+ ):
256
262
  continue
257
- elif state == AirbyteState.SUCCEEDED:
263
+ elif state == AirbyteJobStatusType.SUCCEEDED:
258
264
  break
259
- elif state == AirbyteState.ERROR:
265
+ elif state == AirbyteJobStatusType.ERROR:
260
266
  raise Failure(f"Job failed: {job_id}")
261
- elif state == AirbyteState.CANCELLED:
267
+ elif state == AirbyteJobStatusType.CANCELLED:
262
268
  raise Failure(f"Job was cancelled: {job_id}")
263
269
  else:
264
270
  raise Failure(f"Encountered unexpected state `{state}` for job_id {job_id}")
@@ -266,7 +272,12 @@ class BaseAirbyteResource(ConfigurableResource):
266
272
  # if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
267
273
  # the python process
268
274
  if (
269
- state not in (AirbyteState.SUCCEEDED, AirbyteState.ERROR, AirbyteState.CANCELLED)
275
+ state
276
+ not in (
277
+ AirbyteJobStatusType.SUCCEEDED,
278
+ AirbyteJobStatusType.ERROR,
279
+ AirbyteJobStatusType.CANCELLED,
280
+ )
270
281
  and self.cancel_sync_on_run_termination
271
282
  ):
272
283
  self.cancel_job(job_id)
@@ -742,13 +753,17 @@ class AirbyteResource(BaseAirbyteResource):
742
753
  job_info = cast(Dict[str, object], job_details.get("job", {}))
743
754
  state = job_info.get("status")
744
755
 
745
- if state in (AirbyteState.RUNNING, AirbyteState.PENDING, AirbyteState.INCOMPLETE):
756
+ if state in (
757
+ AirbyteJobStatusType.RUNNING,
758
+ AirbyteJobStatusType.PENDING,
759
+ AirbyteJobStatusType.INCOMPLETE,
760
+ ):
746
761
  continue
747
- elif state == AirbyteState.SUCCEEDED:
762
+ elif state == AirbyteJobStatusType.SUCCEEDED:
748
763
  break
749
- elif state == AirbyteState.ERROR:
764
+ elif state == AirbyteJobStatusType.ERROR:
750
765
  raise Failure(f"Job failed: {job_id}")
751
- elif state == AirbyteState.CANCELLED:
766
+ elif state == AirbyteJobStatusType.CANCELLED:
752
767
  raise Failure(f"Job was cancelled: {job_id}")
753
768
  else:
754
769
  raise Failure(f"Encountered unexpected state `{state}` for job_id {job_id}")
@@ -756,7 +771,12 @@ class AirbyteResource(BaseAirbyteResource):
756
771
  # if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
757
772
  # the python process
758
773
  if (
759
- state not in (AirbyteState.SUCCEEDED, AirbyteState.ERROR, AirbyteState.CANCELLED)
774
+ state
775
+ not in (
776
+ AirbyteJobStatusType.SUCCEEDED,
777
+ AirbyteJobStatusType.ERROR,
778
+ AirbyteJobStatusType.CANCELLED,
779
+ )
760
780
  and self.cancel_sync_on_run_termination
761
781
  ):
762
782
  self.cancel_job(job_id)
@@ -992,6 +1012,101 @@ class AirbyteCloudClient(DagsterModel):
992
1012
  base_url=self.rest_api_base_url,
993
1013
  )
994
1014
 
1015
+ def start_sync_job(self, connection_id: str) -> Mapping[str, Any]:
1016
+ return self._make_request(
1017
+ method="POST",
1018
+ endpoint="jobs",
1019
+ base_url=self.rest_api_base_url,
1020
+ data={
1021
+ "connectionId": connection_id,
1022
+ "jobType": "sync",
1023
+ },
1024
+ )
1025
+
1026
+ def get_job_details(self, job_id: int) -> Mapping[str, Any]:
1027
+ return self._make_request(
1028
+ method="GET", endpoint=f"jobs/{job_id}", base_url=self.rest_api_base_url
1029
+ )
1030
+
1031
+ def cancel_job(self, job_id: int) -> Mapping[str, Any]:
1032
+ return self._make_request(
1033
+ method="DELETE", endpoint=f"jobs/{job_id}", base_url=self.rest_api_base_url
1034
+ )
1035
+
1036
+ def sync_and_poll(
1037
+ self,
1038
+ connection_id: str,
1039
+ poll_interval: Optional[float] = None,
1040
+ poll_timeout: Optional[float] = None,
1041
+ cancel_on_termination: bool = True,
1042
+ ) -> AirbyteOutput:
1043
+ """Initializes a sync operation for the given connection, and polls until it completes.
1044
+
1045
+ Args:
1046
+ connection_id (str): The Airbyte Connection ID. You can retrieve this value from the
1047
+ "Connection" tab of a given connection in the Airbyte UI.
1048
+ poll_interval (float): The time (in seconds) that will be waited between successive polls.
1049
+ poll_timeout (float): The maximum time that will wait before this operation is timed
1050
+ out. By default, this will never time out.
1051
+ cancel_on_termination (bool): Whether to cancel a sync in Airbyte if the Dagster runner is terminated.
1052
+ This may be useful to disable if using Airbyte sources that cannot be cancelled and
1053
+ resumed easily, or if your Dagster deployment may experience runner interruptions
1054
+ that do not impact your Airbyte deployment.
1055
+
1056
+ Returns:
1057
+ :py:class:`~AirbyteOutput`:
1058
+ Details of the sync job.
1059
+ """
1060
+ connection_details = self.get_connection_details(connection_id)
1061
+ start_job_details = self.start_sync_job(connection_id)
1062
+ job = AirbyteJob.from_job_details(job_details=start_job_details)
1063
+
1064
+ self._log.info(f"Job {job.id} initialized for connection_id={connection_id}.")
1065
+ poll_start = datetime.now()
1066
+ poll_interval = (
1067
+ poll_interval if poll_interval is not None else DEFAULT_POLL_INTERVAL_SECONDS
1068
+ )
1069
+ try:
1070
+ while True:
1071
+ if poll_timeout and datetime.now() > poll_start + timedelta(seconds=poll_timeout):
1072
+ raise Failure(
1073
+ f"Timeout: Airbyte job {job.id} is not ready after the timeout"
1074
+ f" {poll_timeout} seconds"
1075
+ )
1076
+
1077
+ time.sleep(poll_interval)
1078
+ # We return these job details in the AirbyteOutput when the job succeeds
1079
+ poll_job_details = self.get_job_details(job.id)
1080
+ job = AirbyteJob.from_job_details(job_details=poll_job_details)
1081
+ if job.status in (
1082
+ AirbyteJobStatusType.RUNNING,
1083
+ AirbyteJobStatusType.PENDING,
1084
+ AirbyteJobStatusType.INCOMPLETE,
1085
+ ):
1086
+ continue
1087
+ elif job.status == AirbyteJobStatusType.SUCCEEDED:
1088
+ break
1089
+ elif job.status in [AirbyteJobStatusType.ERROR, AirbyteJobStatusType.FAILED]:
1090
+ raise Failure(f"Job failed: {job.id}")
1091
+ elif job.status == AirbyteJobStatusType.CANCELLED:
1092
+ raise Failure(f"Job was cancelled: {job.id}")
1093
+ else:
1094
+ raise Failure(
1095
+ f"Encountered unexpected state `{job.status}` for job_id {job.id}"
1096
+ )
1097
+ finally:
1098
+ # if Airbyte sync has not completed, make sure to cancel it so that it doesn't outlive
1099
+ # the python process
1100
+ if cancel_on_termination and job.status not in (
1101
+ AirbyteJobStatusType.SUCCEEDED,
1102
+ AirbyteJobStatusType.ERROR,
1103
+ AirbyteJobStatusType.CANCELLED,
1104
+ AirbyteJobStatusType.FAILED,
1105
+ ):
1106
+ self.cancel_job(job.id)
1107
+
1108
+ return AirbyteOutput(job_details=poll_job_details, connection_details=connection_details)
1109
+
995
1110
 
996
1111
  @experimental
997
1112
  class AirbyteCloudWorkspace(ConfigurableResource):
@@ -1067,6 +1182,130 @@ class AirbyteCloudWorkspace(ConfigurableResource):
1067
1182
  destinations_by_id=destinations_by_id,
1068
1183
  )
1069
1184
 
1185
+ @cached_method
1186
+ def load_asset_specs(
1187
+ self,
1188
+ dagster_airbyte_translator: Optional[DagsterAirbyteTranslator] = None,
1189
+ ) -> Sequence[AssetSpec]:
1190
+ """Returns a list of AssetSpecs representing the Airbyte content in the workspace.
1191
+
1192
+ Args:
1193
+ dagster_airbyte_translator (Optional[DagsterAirbyteTranslator], optional): The translator to use
1194
+ to convert Airbyte content into :py:class:`dagster.AssetSpec`.
1195
+ Defaults to :py:class:`DagsterAirbyteTranslator`.
1196
+
1197
+ Returns:
1198
+ List[AssetSpec]: The set of assets representing the Airbyte content in the workspace.
1199
+
1200
+ Examples:
1201
+ Loading the asset specs for a given Airbyte workspace:
1202
+ .. code-block:: python
1203
+
1204
+ from dagster_airbyte import AirbyteCloudWorkspace
1205
+
1206
+ import dagster as dg
1207
+
1208
+ airbyte_workspace = AirbyteCloudWorkspace(
1209
+ workspace_id=dg.EnvVar("AIRBYTE_CLOUD_WORKSPACE_ID"),
1210
+ client_id=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_ID"),
1211
+ client_secret=dg.EnvVar("AIRBYTE_CLOUD_CLIENT_SECRET"),
1212
+ )
1213
+
1214
+ airbyte_specs = airbyte_workspace.load_asset_specs()
1215
+ defs = dg.Definitions(assets=airbyte_specs, resources={"airbyte": airbyte_workspace}
1216
+ """
1217
+ dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
1218
+
1219
+ return load_airbyte_cloud_asset_specs(
1220
+ workspace=self, dagster_airbyte_translator=dagster_airbyte_translator
1221
+ )
1222
+
1223
+ def _generate_materialization(
1224
+ self,
1225
+ airbyte_output: AirbyteOutput,
1226
+ dagster_airbyte_translator: DagsterAirbyteTranslator,
1227
+ ):
1228
+ connection = AirbyteConnection.from_connection_details(
1229
+ connection_details=airbyte_output.connection_details
1230
+ )
1231
+
1232
+ for stream in connection.streams.values():
1233
+ if stream.selected:
1234
+ connection_table_name = get_airbyte_connection_table_name(
1235
+ stream_prefix=connection.stream_prefix,
1236
+ stream_name=stream.name,
1237
+ )
1238
+ stream_asset_spec = dagster_airbyte_translator.get_asset_spec(
1239
+ props=AirbyteConnectionTableProps(
1240
+ table_name=connection_table_name,
1241
+ stream_prefix=connection.stream_prefix,
1242
+ stream_name=stream.name,
1243
+ json_schema=stream.json_schema,
1244
+ connection_id=connection.id,
1245
+ connection_name=connection.name,
1246
+ destination_type=None,
1247
+ database=None,
1248
+ schema=None,
1249
+ )
1250
+ )
1251
+
1252
+ yield AssetMaterialization(
1253
+ asset_key=stream_asset_spec.key,
1254
+ description=(
1255
+ f"Table generated via Airbyte Cloud sync "
1256
+ f"for connection {connection.name}: {connection_table_name}"
1257
+ ),
1258
+ metadata=stream_asset_spec.metadata,
1259
+ )
1260
+
1261
+ @public
1262
+ @experimental
1263
+ def sync_and_poll(self, context: AssetExecutionContext):
1264
+ """Executes a sync and poll process to materialize Airbyte Cloud assets.
1265
+ This method can only be used in the context of an asset execution.
1266
+
1267
+ Args:
1268
+ context (AssetExecutionContext): The execution context
1269
+ from within `@airbyte_assets`.
1270
+
1271
+ Returns:
1272
+ Iterator[Union[AssetMaterialization, MaterializeResult]]: An iterator of MaterializeResult
1273
+ or AssetMaterialization.
1274
+ """
1275
+ assets_def = context.assets_def
1276
+ dagster_airbyte_translator = get_translator_from_airbyte_assets(assets_def)
1277
+ connection_id = next(
1278
+ check.not_none(AirbyteMetadataSet.extract(spec.metadata).connection_id)
1279
+ for spec in assets_def.specs
1280
+ )
1281
+
1282
+ client = self.get_client()
1283
+ airbyte_output = client.sync_and_poll(
1284
+ connection_id=connection_id,
1285
+ )
1286
+
1287
+ materialized_asset_keys = set()
1288
+ for materialization in self._generate_materialization(
1289
+ airbyte_output=airbyte_output, dagster_airbyte_translator=dagster_airbyte_translator
1290
+ ):
1291
+ # Scan through all tables actually created, if it was expected then emit a MaterializeResult.
1292
+ # Otherwise, emit a runtime AssetMaterialization.
1293
+ if materialization.asset_key in context.selected_asset_keys:
1294
+ yield MaterializeResult(
1295
+ asset_key=materialization.asset_key, metadata=materialization.metadata
1296
+ )
1297
+ materialized_asset_keys.add(materialization.asset_key)
1298
+ else:
1299
+ context.log.warning(
1300
+ f"An unexpected asset was materialized: {materialization.asset_key}. "
1301
+ f"Yielding a materialization event."
1302
+ )
1303
+ yield materialization
1304
+
1305
+ unmaterialized_asset_keys = context.selected_asset_keys - materialized_asset_keys
1306
+ if unmaterialized_asset_keys:
1307
+ context.log.warning(f"Assets were not materialized: {unmaterialized_asset_keys}")
1308
+
1070
1309
 
1071
1310
  @experimental
1072
1311
  def load_airbyte_cloud_asset_specs(
@@ -1103,16 +1342,23 @@ def load_airbyte_cloud_asset_specs(
1103
1342
  airbyte_cloud_specs = load_airbyte_cloud_asset_specs(airbyte_cloud_workspace)
1104
1343
  defs = dg.Definitions(assets=airbyte_cloud_specs)
1105
1344
  """
1345
+ dagster_airbyte_translator = dagster_airbyte_translator or DagsterAirbyteTranslator()
1346
+
1106
1347
  with workspace.process_config_and_initialize_cm() as initialized_workspace:
1107
- return check.is_list(
1108
- AirbyteCloudWorkspaceDefsLoader(
1109
- workspace=initialized_workspace,
1110
- translator=dagster_airbyte_translator or DagsterAirbyteTranslator(),
1348
+ return [
1349
+ spec.merge_attributes(
1350
+ metadata={DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY: dagster_airbyte_translator}
1111
1351
  )
1112
- .build_defs()
1113
- .assets,
1114
- AssetSpec,
1115
- )
1352
+ for spec in check.is_list(
1353
+ AirbyteCloudWorkspaceDefsLoader(
1354
+ workspace=initialized_workspace,
1355
+ translator=dagster_airbyte_translator,
1356
+ )
1357
+ .build_defs()
1358
+ .assets,
1359
+ AssetSpec,
1360
+ )
1361
+ ]
1116
1362
 
1117
1363
 
1118
1364
  @record
@@ -1,6 +1,7 @@
1
+ from enum import Enum
1
2
  from typing import Any, List, Mapping, Optional, Sequence
2
3
 
3
- from dagster._annotations import experimental
4
+ from dagster._annotations import deprecated, experimental
4
5
  from dagster._core.definitions.asset_key import AssetKey
5
6
  from dagster._core.definitions.asset_spec import AssetSpec
6
7
  from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet, TableMetadataSet
@@ -11,6 +12,27 @@ from dagster._utils.cached_method import cached_method
11
12
  from dagster_airbyte.utils import generate_table_schema, get_airbyte_connection_table_name
12
13
 
13
14
 
15
+ class AirbyteJobStatusType(str, Enum):
16
+ RUNNING = "running"
17
+ SUCCEEDED = "succeeded"
18
+ CANCELLED = "cancelled"
19
+ PENDING = "pending"
20
+ FAILED = "failed"
21
+ ERROR = "error"
22
+ INCOMPLETE = "incomplete"
23
+
24
+
25
+ @deprecated(breaking_version="1.10", additional_warn_text="Use `AirbyteJobStatusType` instead.")
26
+ class AirbyteState:
27
+ RUNNING = AirbyteJobStatusType.RUNNING
28
+ SUCCEEDED = AirbyteJobStatusType.SUCCEEDED
29
+ CANCELLED = AirbyteJobStatusType.CANCELLED
30
+ PENDING = AirbyteJobStatusType.PENDING
31
+ FAILED = AirbyteJobStatusType.FAILED
32
+ ERROR = AirbyteJobStatusType.ERROR
33
+ INCOMPLETE = AirbyteJobStatusType.INCOMPLETE
34
+
35
+
14
36
  @record
15
37
  class AirbyteConnectionTableProps:
16
38
  table_name: str
@@ -19,7 +41,7 @@ class AirbyteConnectionTableProps:
19
41
  json_schema: Mapping[str, Any]
20
42
  connection_id: str
21
43
  connection_name: str
22
- destination_type: str
44
+ destination_type: Optional[str]
23
45
  database: Optional[str]
24
46
  schema: Optional[str]
25
47
 
@@ -108,6 +130,25 @@ class AirbyteStream:
108
130
  )
109
131
 
110
132
 
133
+ @whitelist_for_serdes
134
+ @record
135
+ class AirbyteJob:
136
+ """Represents an Airbyte job, based on data as returned from the API."""
137
+
138
+ id: int
139
+ status: str
140
+
141
+ @classmethod
142
+ def from_job_details(
143
+ cls,
144
+ job_details: Mapping[str, Any],
145
+ ) -> "AirbyteJob":
146
+ return cls(
147
+ id=job_details["jobId"],
148
+ status=job_details["status"],
149
+ )
150
+
151
+
111
152
  @whitelist_for_serdes
112
153
  @record
113
154
  class AirbyteWorkspaceData:
@@ -190,5 +231,5 @@ class DagsterAirbyteTranslator:
190
231
  return AssetSpec(
191
232
  key=AssetKey(props.table_name),
192
233
  metadata=metadata,
193
- kinds={"airbyte", props.destination_type},
234
+ kinds={"airbyte", *({props.destination_type} if props.destination_type else set())},
194
235
  )
@@ -1,10 +1,26 @@
1
- from typing import Any, Iterator, Mapping, Optional, Sequence
1
+ import re
2
+ from typing import TYPE_CHECKING, Any, Iterator, Mapping, Optional, Sequence
2
3
 
3
- from dagster import AssetMaterialization, MetadataValue
4
+ from dagster import (
5
+ AssetMaterialization,
6
+ AssetsDefinition,
7
+ DagsterInvariantViolationError,
8
+ MetadataValue,
9
+ )
4
10
  from dagster._core.definitions.metadata.table import TableColumn, TableSchema
5
11
 
6
12
  from dagster_airbyte.types import AirbyteOutput
7
13
 
14
+ if TYPE_CHECKING:
15
+ from dagster_airbyte import DagsterAirbyteTranslator
16
+
17
+ DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY = "dagster-airbyte/dagster_airbyte_translator"
18
+
19
+
20
+ def clean_name(name: str) -> str:
21
+ """Cleans an input to be a valid Dagster asset name."""
22
+ return re.sub(r"[^a-z0-9]+", "_", name.lower())
23
+
8
24
 
9
25
  def get_airbyte_connection_table_name(stream_prefix: Optional[str], stream_name: str) -> str:
10
26
  return f"{stream_prefix if stream_prefix else ''}{stream_name}"
@@ -78,3 +94,18 @@ def generate_materializations(
78
94
  all_stream_stats.get(stream_name, {}),
79
95
  asset_key_prefix=asset_key_prefix,
80
96
  )
97
+
98
+
99
+ def get_translator_from_airbyte_assets(
100
+ airbyte_assets: AssetsDefinition,
101
+ ) -> "DagsterAirbyteTranslator":
102
+ metadata_by_key = airbyte_assets.metadata_by_key or {}
103
+ first_asset_key = next(iter(airbyte_assets.metadata_by_key.keys()))
104
+ first_metadata = metadata_by_key.get(first_asset_key, {})
105
+ dagster_airbyte_translator = first_metadata.get(DAGSTER_AIRBYTE_TRANSLATOR_METADATA_KEY)
106
+ if dagster_airbyte_translator is None:
107
+ raise DagsterInvariantViolationError(
108
+ f"Expected to find airbyte translator metadata on asset {first_asset_key.to_user_string()},"
109
+ " but did not. Did you pass in assets that weren't generated by @airbyte_assets?"
110
+ )
111
+ return dagster_airbyte_translator
@@ -0,0 +1 @@
1
+ __version__ = "0.25.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dagster-airbyte
3
- Version: 0.25.6
3
+ Version: 0.25.7
4
4
  Summary: Package for integrating Airbyte with Dagster.
5
5
  Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-airbyte
6
6
  Author: Dagster Labs
@@ -4,6 +4,7 @@ README.md
4
4
  setup.cfg
5
5
  setup.py
6
6
  dagster_airbyte/__init__.py
7
+ dagster_airbyte/asset_decorator.py
7
8
  dagster_airbyte/asset_defs.py
8
9
  dagster_airbyte/cli.py
9
10
  dagster_airbyte/ops.py
@@ -1,8 +1,8 @@
1
- dagster==1.9.6
1
+ dagster==1.9.7
2
2
  requests
3
3
 
4
4
  [managed]
5
- dagster-managed-elements==0.25.6
5
+ dagster-managed-elements==0.25.7
6
6
 
7
7
  [test]
8
8
  requests-mock
@@ -37,7 +37,7 @@ setup(
37
37
  include_package_data=True,
38
38
  python_requires=">=3.9,<3.13",
39
39
  install_requires=[
40
- "dagster==1.9.6",
40
+ "dagster==1.9.7",
41
41
  "requests",
42
42
  ],
43
43
  zip_safe=False,
@@ -52,7 +52,7 @@ setup(
52
52
  "flaky",
53
53
  ],
54
54
  "managed": [
55
- "dagster-managed-elements==0.25.6",
55
+ "dagster-managed-elements==0.25.7",
56
56
  ],
57
57
  },
58
58
  )
@@ -1 +0,0 @@
1
- __version__ = "0.25.6"