airbyte-cdk 6.12.0__py3-none-any.whl → 6.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -329,6 +329,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
329
329
  SinglePartitionRouter,
330
330
  SubstreamPartitionRouter,
331
331
  )
332
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
333
+ AsyncJobPartitionRouter,
334
+ )
332
335
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
333
336
  ParentStreamConfig,
334
337
  )
@@ -2260,22 +2263,28 @@ class ModelToComponentFactory:
2260
2263
  urls_extractor=urls_extractor,
2261
2264
  )
2262
2265
 
2263
- return AsyncRetriever(
2266
+ async_job_partition_router = AsyncJobPartitionRouter(
2264
2267
  job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
2265
2268
  job_repository,
2266
2269
  stream_slices,
2267
- JobTracker(
2268
- 1
2269
- ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2270
+ JobTracker(1),
2271
+ # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2270
2272
  self._message_repository,
2271
- has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2273
+ has_bulk_parent=False,
2274
+ # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2272
2275
  ),
2273
- record_selector=record_selector,
2274
2276
  stream_slicer=stream_slicer,
2275
2277
  config=config,
2276
2278
  parameters=model.parameters or {},
2277
2279
  )
2278
2280
 
2281
+ return AsyncRetriever(
2282
+ record_selector=record_selector,
2283
+ stream_slicer=async_job_partition_router,
2284
+ config=config,
2285
+ parameters=model.parameters or {},
2286
+ )
2287
+
2279
2288
  @staticmethod
2280
2289
  def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
2281
2290
  return Spec(
@@ -2,10 +2,18 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
5
6
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
6
7
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
7
8
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
8
9
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
9
10
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
10
11
 
11
- __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
12
+ __all__ = [
13
+ "AsyncJobPartitionRouter",
14
+ "CartesianProductStreamSlicer",
15
+ "ListPartitionRouter",
16
+ "SinglePartitionRouter",
17
+ "SubstreamPartitionRouter",
18
+ "PartitionRouter"
19
+ ]
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass, field
4
+ from typing import Any, Callable, Iterable, Mapping, Optional
5
+
6
+ from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
8
+ AsyncJobOrchestrator,
9
+ AsyncPartition,
10
+ )
11
+ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
+ SinglePartitionRouter,
13
+ )
14
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
15
+ from airbyte_cdk.sources.types import Config, StreamSlice
16
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
17
+
18
+
19
+ @dataclass
20
+ class AsyncJobPartitionRouter(StreamSlicer):
21
+ """
22
+ Partition router that creates async jobs in a source API, periodically polls for job
23
+ completion, and supplies the completed job URL locations as stream slices so that
24
+ records can be extracted.
25
+ """
26
+
27
+ config: Config
28
+ parameters: InitVar[Mapping[str, Any]]
29
+ job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
30
+ stream_slicer: StreamSlicer = field(
31
+ default_factory=lambda: SinglePartitionRouter(parameters={})
32
+ )
33
+
34
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
35
+ self._job_orchestrator_factory = self.job_orchestrator_factory
36
+ self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
37
+ self._parameters = parameters
38
+
39
+ def stream_slices(self) -> Iterable[StreamSlice]:
40
+ slices = self.stream_slicer.stream_slices()
41
+ self._job_orchestrator = self._job_orchestrator_factory(slices)
42
+
43
+ for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
+ yield StreamSlice(
45
+ partition=dict(completed_partition.stream_slice.partition)
46
+ | {"partition": completed_partition},
47
+ cursor_slice=completed_partition.stream_slice.cursor_slice,
48
+ )
49
+
50
+ def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
51
+ """
52
+ This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
+ be responsible for. However, this was added in because the JobOrchestrator is required to
54
+ retrieve records. And without defining fetch_records() on this class, we're stuck with either
55
+ passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
56
+ """
57
+
58
+ if not self._job_orchestrator:
59
+ raise AirbyteTracedException(
60
+ message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
61
+ internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
62
+ failure_type=FailureType.system_error,
63
+ )
64
+
65
+ return self._job_orchestrator.fetch_records(partition=partition)
@@ -1,8 +1,8 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
3
 
4
- from dataclasses import InitVar, dataclass, field
5
- from typing import Any, Callable, Iterable, Mapping, Optional
4
+ from dataclasses import InitVar, dataclass
5
+ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
12
12
  AsyncPartition,
13
13
  )
14
14
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
15
- from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
15
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
16
+ AsyncJobPartitionRouter,
17
+ )
16
18
  from airbyte_cdk.sources.declarative.retrievers import Retriever
17
- from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
18
19
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
20
  from airbyte_cdk.sources.streams.core import StreamData
20
21
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
29
30
  class AsyncRetriever(Retriever):
30
31
  config: Config
31
32
  parameters: InitVar[Mapping[str, Any]]
32
- job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
33
33
  record_selector: RecordSelector
34
- stream_slicer: StreamSlicer = field(
35
- default_factory=lambda: SinglePartitionRouter(parameters={})
36
- )
34
+ stream_slicer: AsyncJobPartitionRouter
37
35
 
38
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
- self._job_orchestrator_factory = self.job_orchestrator_factory
40
- self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
41
37
  self._parameters = parameters
42
38
 
43
39
  @property
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
54
50
  """
55
51
  pass
56
52
 
57
- @property
58
- def _job_orchestrator(self) -> AsyncJobOrchestrator:
59
- if not self.__job_orchestrator:
60
- raise AirbyteTracedException(
61
- message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
62
- internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
63
- failure_type=FailureType.system_error,
64
- )
65
-
66
- return self.__job_orchestrator
67
-
68
53
  def _get_stream_state(self) -> StreamState:
69
54
  """
70
55
  Gets the current state of the stream.
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
99
84
  return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
100
85
 
101
86
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
102
- slices = self.stream_slicer.stream_slices()
103
- self.__job_orchestrator = self._job_orchestrator_factory(slices)
104
-
105
- for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
106
- yield StreamSlice(
107
- partition=dict(completed_partition.stream_slice.partition)
108
- | {"partition": completed_partition},
109
- cursor_slice=completed_partition.stream_slice.cursor_slice,
110
- )
87
+ return self.stream_slicer.stream_slices()
111
88
 
112
89
  def read_records(
113
90
  self,
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
116
93
  ) -> Iterable[StreamData]:
117
94
  stream_state: StreamState = self._get_stream_state()
118
95
  partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
119
- records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition)
96
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
120
97
 
121
98
  yield from self.record_selector.filter_and_transform(
122
99
  all_data=records,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.12.0
3
+ Version: 6.12.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -109,8 +109,9 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
110
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=uGh-zDTkJaSirWaB67cnoJUQKqAs-nxSHJAsQKMAcNE,105716
113
- airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=i2Z4q_9pYWc40uiHJ3UMqAh1hgoVTTXybxRaWzbwNHE,106031
113
+ airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=974SY1RFwitUCiiDHuFHDGmSNu1D72z3bSTpvlBwAho,911
114
+ airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
114
115
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
116
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
116
117
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
@@ -157,7 +158,7 @@ airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb
157
158
  airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
158
159
  airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=ZA2vrHQKfXNMcH3x1iuyFOTGNzYDhUFT2qcaiOzSK0A,4271
159
160
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
160
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
161
+ airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=3jgor7a6_s_9KgqHmPk6cWMDZ-6OugFPjCajIkC3Onw,3721
161
162
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
162
163
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
163
164
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
@@ -338,8 +339,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
338
339
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
339
340
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
340
341
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
341
- airbyte_cdk-6.12.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
342
- airbyte_cdk-6.12.0.dist-info/METADATA,sha256=3KZ1_QqdXybMOsKxD9zYe1w0QhREO_XsDc0Nm_yPBTo,5988
343
- airbyte_cdk-6.12.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
344
- airbyte_cdk-6.12.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
345
- airbyte_cdk-6.12.0.dist-info/RECORD,,
342
+ airbyte_cdk-6.12.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
343
+ airbyte_cdk-6.12.1.dist-info/METADATA,sha256=w7op06XAtTqV47wgvbGddUc7pHN6TQB2YvBh8vjQ7FM,5988
344
+ airbyte_cdk-6.12.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
345
+ airbyte_cdk-6.12.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
346
+ airbyte_cdk-6.12.1.dist-info/RECORD,,