airbyte-cdk 6.11.1.dev4100__py3-none-any.whl → 6.11.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3007,7 +3007,6 @@ definitions:
3007
3007
  interpolation_context:
3008
3008
  - config
3009
3009
  - components_values
3010
- - stream_slice
3011
3010
  - stream_template_config
3012
3011
  examples:
3013
3012
  - ["data"]
@@ -3024,13 +3023,10 @@ definitions:
3024
3023
  - config
3025
3024
  - stream_template_config
3026
3025
  - components_values
3027
- - stream_slice
3028
3026
  examples:
3029
3027
  - "{{ components_values['updates'] }}"
3030
3028
  - "{{ components_values['MetaData']['LastUpdatedTime'] }}"
3031
3029
  - "{{ config['segment_id'] }}"
3032
- - "{{ stream_slice['parent_id'] }}"
3033
- - "{{ stream_slice['extra_fields']['name'] }}"
3034
3030
  value_type:
3035
3031
  title: Value Type
3036
3032
  description: The expected data type of the value. If omitted, the type will be inferred from the value provided.
@@ -323,6 +323,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
323
323
  SinglePartitionRouter,
324
324
  SubstreamPartitionRouter,
325
325
  )
326
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
327
+ AsyncJobPartitionRouter,
328
+ )
326
329
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
327
330
  ParentStreamConfig,
328
331
  )
@@ -2228,22 +2231,28 @@ class ModelToComponentFactory:
2228
2231
  urls_extractor=urls_extractor,
2229
2232
  )
2230
2233
 
2231
- return AsyncRetriever(
2234
+ async_job_partition_router = AsyncJobPartitionRouter(
2232
2235
  job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
2233
2236
  job_repository,
2234
2237
  stream_slices,
2235
- JobTracker(
2236
- 1
2237
- ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2238
+ JobTracker(1),
2239
+ # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2238
2240
  self._message_repository,
2239
- has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2241
+ has_bulk_parent=False,
2242
+ # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2240
2243
  ),
2241
- record_selector=record_selector,
2242
2244
  stream_slicer=stream_slicer,
2243
2245
  config=config,
2244
2246
  parameters=model.parameters or {},
2245
2247
  )
2246
2248
 
2249
+ return AsyncRetriever(
2250
+ record_selector=record_selector,
2251
+ stream_slicer=async_job_partition_router,
2252
+ config=config,
2253
+ parameters=model.parameters or {},
2254
+ )
2255
+
2247
2256
  @staticmethod
2248
2257
  def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
2249
2258
  return Spec(
@@ -2353,7 +2362,7 @@ class ModelToComponentFactory:
2353
2362
  config=config,
2354
2363
  name="",
2355
2364
  primary_key=None,
2356
- stream_slicer=stream_slicer if stream_slicer else combined_slicers,
2365
+ stream_slicer=combined_slicers,
2357
2366
  transformations=[],
2358
2367
  )
2359
2368
 
@@ -2,10 +2,18 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
5
6
  from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
6
7
  from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
7
8
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
8
9
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
9
10
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
10
11
 
11
- __all__ = ["CartesianProductStreamSlicer", "ListPartitionRouter", "SinglePartitionRouter", "SubstreamPartitionRouter", "PartitionRouter"]
12
+ __all__ = [
13
+ "AsyncJobPartitionRouter",
14
+ "CartesianProductStreamSlicer",
15
+ "ListPartitionRouter",
16
+ "SinglePartitionRouter",
17
+ "SubstreamPartitionRouter",
18
+ "PartitionRouter"
19
+ ]
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import InitVar, dataclass, field
4
+ from typing import Any, Callable, Iterable, Mapping, Optional
5
+
6
+ from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
8
+ AsyncJobOrchestrator,
9
+ AsyncPartition,
10
+ )
11
+ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
+ SinglePartitionRouter,
13
+ )
14
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
15
+ from airbyte_cdk.sources.types import Config, StreamSlice
16
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
17
+
18
+
19
+ @dataclass
20
+ class AsyncJobPartitionRouter(StreamSlicer):
21
+ """
22
+ Partition router that creates async jobs in a source API, periodically polls for job
23
+ completion, and supplies the completed job URL locations as stream slices so that
24
+ records can be extracted.
25
+ """
26
+
27
+ config: Config
28
+ parameters: InitVar[Mapping[str, Any]]
29
+ job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
30
+ stream_slicer: StreamSlicer = field(
31
+ default_factory=lambda: SinglePartitionRouter(parameters={})
32
+ )
33
+
34
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
35
+ self._job_orchestrator_factory = self.job_orchestrator_factory
36
+ self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
37
+ self._parameters = parameters
38
+
39
+ def stream_slices(self) -> Iterable[StreamSlice]:
40
+ slices = self.stream_slicer.stream_slices()
41
+ self._job_orchestrator = self._job_orchestrator_factory(slices)
42
+
43
+ for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
+ yield StreamSlice(
45
+ partition=dict(completed_partition.stream_slice.partition)
46
+ | {"partition": completed_partition},
47
+ cursor_slice=completed_partition.stream_slice.cursor_slice,
48
+ )
49
+
50
+ def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
51
+ """
52
+ This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
+ be responsible for. However, this was added in because the JobOrchestrator is required to
54
+ retrieve records. And without defining fetch_records() on this class, we're stuck with either
55
+ passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
56
+ """
57
+
58
+ if not self._job_orchestrator:
59
+ raise AirbyteTracedException(
60
+ message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
61
+ internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
62
+ failure_type=FailureType.system_error,
63
+ )
64
+
65
+ return self._job_orchestrator.fetch_records(partition=partition)
@@ -88,25 +88,19 @@ class HttpComponentsResolver(ComponentsResolver):
88
88
  """
89
89
  kwargs = {"stream_template_config": stream_template_config}
90
90
 
91
- stream_slices = self.retriever.stream_slices() if self.retriever.stream_slicer else [{}]
91
+ for components_values in self.retriever.read_records({}):
92
+ updated_config = deepcopy(stream_template_config)
93
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
92
94
 
93
- for stream_slice in stream_slices:
94
- for components_values in self.retriever.read_records({}, stream_slice):
95
- updated_config = deepcopy(stream_template_config)
96
- kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
97
- kwargs["stream_slice"] = stream_slice
98
-
99
- for resolved_component in self._resolved_components:
100
- valid_types = (
101
- (resolved_component.value_type,) if resolved_component.value_type else None
102
- )
103
- value = resolved_component.value.eval(
104
- self.config, valid_types=valid_types, **kwargs
105
- )
95
+ for resolved_component in self._resolved_components:
96
+ valid_types = (
97
+ (resolved_component.value_type,) if resolved_component.value_type else None
98
+ )
99
+ value = resolved_component.value.eval(
100
+ self.config, valid_types=valid_types, **kwargs
101
+ )
106
102
 
107
- path = [
108
- path.eval(self.config, **kwargs) for path in resolved_component.field_path
109
- ]
110
- dpath.set(updated_config, path, value)
103
+ path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
104
+ dpath.set(updated_config, path, value)
111
105
 
112
- yield updated_config
106
+ yield updated_config
@@ -1,8 +1,8 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
3
 
4
- from dataclasses import InitVar, dataclass, field
5
- from typing import Any, Callable, Iterable, Mapping, Optional
4
+ from dataclasses import InitVar, dataclass
5
+ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
12
12
  AsyncPartition,
13
13
  )
14
14
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
15
- from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
15
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
16
+ AsyncJobPartitionRouter,
17
+ )
16
18
  from airbyte_cdk.sources.declarative.retrievers import Retriever
17
- from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
18
19
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
20
  from airbyte_cdk.sources.streams.core import StreamData
20
21
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
29
30
  class AsyncRetriever(Retriever):
30
31
  config: Config
31
32
  parameters: InitVar[Mapping[str, Any]]
32
- job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
33
33
  record_selector: RecordSelector
34
- stream_slicer: StreamSlicer = field(
35
- default_factory=lambda: SinglePartitionRouter(parameters={})
36
- )
34
+ stream_slicer: AsyncJobPartitionRouter
37
35
 
38
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
- self._job_orchestrator_factory = self.job_orchestrator_factory
40
- self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
41
37
  self._parameters = parameters
42
38
 
43
39
  @property
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
54
50
  """
55
51
  pass
56
52
 
57
- @property
58
- def _job_orchestrator(self) -> AsyncJobOrchestrator:
59
- if not self.__job_orchestrator:
60
- raise AirbyteTracedException(
61
- message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
62
- internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
63
- failure_type=FailureType.system_error,
64
- )
65
-
66
- return self.__job_orchestrator
67
-
68
53
  def _get_stream_state(self) -> StreamState:
69
54
  """
70
55
  Gets the current state of the stream.
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
99
84
  return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
100
85
 
101
86
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
102
- slices = self.stream_slicer.stream_slices()
103
- self.__job_orchestrator = self._job_orchestrator_factory(slices)
104
-
105
- for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
106
- yield StreamSlice(
107
- partition=dict(completed_partition.stream_slice.partition)
108
- | {"partition": completed_partition},
109
- cursor_slice=completed_partition.stream_slice.cursor_slice,
110
- )
87
+ return self.stream_slicer.stream_slices()
111
88
 
112
89
  def read_records(
113
90
  self,
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
116
93
  ) -> Iterable[StreamData]:
117
94
  stream_state: StreamState = self._get_stream_state()
118
95
  partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
119
- records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition)
96
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
120
97
 
121
98
  yield from self.record_selector.filter_and_transform(
122
99
  all_data=records,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.11.1.dev4100
3
+ Version: 6.11.2.dev1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -66,7 +66,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p268
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=JpHSMfwhHFqTh6bv-LQRq2i8AZbnPOqdNvdar62NZxU,128126
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=9DYx7lQ2r4rlHcWG-ClA27NZ1mzjHQyh4ZHSe5wErI8,127973
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -109,8 +109,9 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
110
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=EOsUBIPWAP6Yc10ptIKT7mCJlTK6DfEtmnrmhY9hgoE,104403
113
- airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Z0i4Sdrv-AcFveaKaL6MnfOzXWC5zNa6AGm1PPKK-x4,104682
113
+ airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=974SY1RFwitUCiiDHuFHDGmSNu1D72z3bSTpvlBwAho,911
114
+ airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
114
115
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
116
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
116
117
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
@@ -155,9 +156,9 @@ airbyte_cdk/sources/declarative/requesters/requester.py,sha256=iVVpXQ4KEd9OyZNwm
155
156
  airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=RAwq1VrkC0kAaIkmKkL7so8ZeUzF0MgUQ0tciGkY7v4,1116
156
157
  airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
157
158
  airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
158
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=X9wvQLwUFAll_rqNJYePb14SnKd20IS7ofrW4YuUSIo,4578
159
+ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=ZA2vrHQKfXNMcH3x1iuyFOTGNzYDhUFT2qcaiOzSK0A,4271
159
160
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
160
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
161
+ airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=3jgor7a6_s_9KgqHmPk6cWMDZ-6OugFPjCajIkC3Onw,3721
161
162
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
162
163
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
163
164
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
@@ -336,8 +337,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
336
337
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
337
338
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
338
339
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
339
- airbyte_cdk-6.11.1.dev4100.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
340
- airbyte_cdk-6.11.1.dev4100.dist-info/METADATA,sha256=nU7_sP4w22VFlutdpffLkTVOKxAKf0It1poerdrx-60,5958
341
- airbyte_cdk-6.11.1.dev4100.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
342
- airbyte_cdk-6.11.1.dev4100.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
343
- airbyte_cdk-6.11.1.dev4100.dist-info/RECORD,,
340
+ airbyte_cdk-6.11.2.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
341
+ airbyte_cdk-6.11.2.dev1.dist-info/METADATA,sha256=WrA1bFbcp_mLbP53VO8SKGP4-aP-a5mALs0l3fNQJaA,5955
342
+ airbyte_cdk-6.11.2.dev1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
343
+ airbyte_cdk-6.11.2.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
344
+ airbyte_cdk-6.11.2.dev1.dist-info/RECORD,,