airbyte-cdk 6.39.2__py3-none-any.whl → 6.39.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +36 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +8 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +24 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +20 -3
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +135 -78
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +30 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/RECORD +14 -14
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.39.3.dist-info}/entry_points.txt +0 -0
@@ -162,6 +162,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
162
162
|
else:
|
163
163
|
filtered_catalog = catalog
|
164
164
|
|
165
|
+
# It is no need run read for synchronous streams if they are not exists.
|
166
|
+
if not filtered_catalog.streams:
|
167
|
+
return
|
168
|
+
|
165
169
|
yield from super().read(logger, config, filtered_catalog, state)
|
166
170
|
|
167
171
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
@@ -201,6 +205,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
201
205
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
202
206
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
203
207
|
# so we need to treat them as synchronous
|
208
|
+
|
209
|
+
if name_to_stream_mapping[declarative_stream.name]["type"] == "StateDelegatingStream":
|
210
|
+
stream_state = self._connector_state_manager.get_stream_state(
|
211
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
212
|
+
)
|
213
|
+
|
214
|
+
name_to_stream_mapping[declarative_stream.name] = (
|
215
|
+
name_to_stream_mapping[declarative_stream.name]["incremental_stream"]
|
216
|
+
if stream_state
|
217
|
+
else name_to_stream_mapping[declarative_stream.name]["full_refresh_stream"]
|
218
|
+
)
|
219
|
+
|
204
220
|
if isinstance(declarative_stream, DeclarativeStream) and (
|
205
221
|
name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
206
222
|
== "SimpleRetriever"
|
@@ -24,7 +24,9 @@ properties:
|
|
24
24
|
streams:
|
25
25
|
type: array
|
26
26
|
items:
|
27
|
-
|
27
|
+
anyOf:
|
28
|
+
- "$ref": "#/definitions/DeclarativeStream"
|
29
|
+
- "$ref": "#/definitions/StateDelegatingStream"
|
28
30
|
dynamic_streams:
|
29
31
|
type: array
|
30
32
|
items:
|
@@ -2881,7 +2883,9 @@ definitions:
|
|
2881
2883
|
stream:
|
2882
2884
|
title: Parent Stream
|
2883
2885
|
description: Reference to the parent stream.
|
2884
|
-
|
2886
|
+
anyOf:
|
2887
|
+
- "$ref": "#/definitions/DeclarativeStream"
|
2888
|
+
- "$ref": "#/definitions/StateDelegatingStream"
|
2885
2889
|
partition_field:
|
2886
2890
|
title: Current Parent Key Value Identifier
|
2887
2891
|
description: While iterating over parent records during a sync, the parent_key value can be referenced by using this field.
|
@@ -3154,6 +3158,36 @@ definitions:
|
|
3154
3158
|
$parameters:
|
3155
3159
|
type: object
|
3156
3160
|
additionalProperties: true
|
3161
|
+
StateDelegatingStream:
|
3162
|
+
description: (This component is experimental. Use at your own risk.) Orchestrate the retriever's usage based on the state value.
|
3163
|
+
type: object
|
3164
|
+
required:
|
3165
|
+
- type
|
3166
|
+
- name
|
3167
|
+
- full_refresh_stream
|
3168
|
+
- incremental_stream
|
3169
|
+
properties:
|
3170
|
+
type:
|
3171
|
+
type: string
|
3172
|
+
enum: [ StateDelegatingStream ]
|
3173
|
+
name:
|
3174
|
+
title: Name
|
3175
|
+
description: The stream name.
|
3176
|
+
type: string
|
3177
|
+
default: ""
|
3178
|
+
example:
|
3179
|
+
- "Users"
|
3180
|
+
full_refresh_stream:
|
3181
|
+
title: Retriever
|
3182
|
+
description: Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.
|
3183
|
+
"$ref": "#/definitions/DeclarativeStream"
|
3184
|
+
incremental_stream:
|
3185
|
+
title: Retriever
|
3186
|
+
description: Component used to coordinate how records are extracted across stream slices and request pages when the state provided.
|
3187
|
+
"$ref": "#/definitions/DeclarativeStream"
|
3188
|
+
$parameters:
|
3189
|
+
type: object
|
3190
|
+
additionalProperties: true
|
3157
3191
|
SimpleRetriever:
|
3158
3192
|
description: Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
|
3159
3193
|
type: object
|
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
14
14
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
15
15
|
from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
|
16
16
|
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
17
|
+
from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
|
17
18
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
18
19
|
from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader
|
19
20
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
@@ -76,11 +77,17 @@ class DeclarativeStream(Stream):
|
|
76
77
|
|
77
78
|
@property
|
78
79
|
def exit_on_rate_limit(self) -> bool:
|
80
|
+
if isinstance(self.retriever, AsyncRetriever):
|
81
|
+
return self.retriever.exit_on_rate_limit
|
82
|
+
|
79
83
|
return self.retriever.requester.exit_on_rate_limit # type: ignore # abstract Retriever class has not requester attribute
|
80
84
|
|
81
85
|
@exit_on_rate_limit.setter
|
82
86
|
def exit_on_rate_limit(self, value: bool) -> None:
|
83
|
-
self.retriever
|
87
|
+
if isinstance(self.retriever, AsyncRetriever):
|
88
|
+
self.retriever.exit_on_rate_limit = value
|
89
|
+
else:
|
90
|
+
self.retriever.requester.exit_on_rate_limit = value # type: ignore[attr-defined]
|
84
91
|
|
85
92
|
@property # type: ignore
|
86
93
|
def name(self) -> str:
|
@@ -30,6 +30,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
30
30
|
DeclarativeStream as DeclarativeStreamModel,
|
31
31
|
)
|
32
32
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
33
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
34
|
+
StateDelegatingStream as StateDelegatingStreamModel,
|
35
|
+
)
|
33
36
|
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
34
37
|
get_registered_components_module,
|
35
38
|
)
|
@@ -146,7 +149,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
146
149
|
|
147
150
|
source_streams = [
|
148
151
|
self._constructor.create_component(
|
149
|
-
|
152
|
+
StateDelegatingStreamModel
|
153
|
+
if stream_config.get("type") == StateDelegatingStreamModel.__name__
|
154
|
+
else DeclarativeStreamModel,
|
150
155
|
stream_config,
|
151
156
|
config,
|
152
157
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
@@ -165,7 +170,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
165
170
|
def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None:
|
166
171
|
for parent_config in parent_configs:
|
167
172
|
parent_streams.add(parent_config["stream"]["name"])
|
168
|
-
parent_config["stream"]["
|
173
|
+
if parent_config["stream"]["type"] == "StateDelegatingStream":
|
174
|
+
parent_config["stream"]["full_refresh_stream"]["retriever"]["requester"][
|
175
|
+
"use_cache"
|
176
|
+
] = True
|
177
|
+
parent_config["stream"]["incremental_stream"]["retriever"]["requester"][
|
178
|
+
"use_cache"
|
179
|
+
] = True
|
180
|
+
else:
|
181
|
+
parent_config["stream"]["retriever"]["requester"]["use_cache"] = True
|
169
182
|
|
170
183
|
for stream_config in stream_configs:
|
171
184
|
if stream_config.get("incremental_sync", {}).get("parent_stream"):
|
@@ -188,7 +201,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
188
201
|
|
189
202
|
for stream_config in stream_configs:
|
190
203
|
if stream_config["name"] in parent_streams:
|
191
|
-
stream_config["
|
204
|
+
if stream_config["type"] == "StateDelegatingStream":
|
205
|
+
stream_config["full_refresh_stream"]["retriever"]["requester"]["use_cache"] = (
|
206
|
+
True
|
207
|
+
)
|
208
|
+
stream_config["incremental_stream"]["retriever"]["requester"]["use_cache"] = (
|
209
|
+
True
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
stream_config["retriever"]["requester"]["use_cache"] = True
|
192
213
|
|
193
214
|
return stream_configs
|
194
215
|
|
@@ -1860,7 +1860,7 @@ class DeclarativeSource1(BaseModel):
|
|
1860
1860
|
|
1861
1861
|
type: Literal["DeclarativeSource"]
|
1862
1862
|
check: Union[CheckStream, CheckDynamicStream]
|
1863
|
-
streams: List[DeclarativeStream]
|
1863
|
+
streams: List[Union[DeclarativeStream, StateDelegatingStream]]
|
1864
1864
|
dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
|
1865
1865
|
version: str = Field(
|
1866
1866
|
...,
|
@@ -1892,7 +1892,7 @@ class DeclarativeSource2(BaseModel):
|
|
1892
1892
|
|
1893
1893
|
type: Literal["DeclarativeSource"]
|
1894
1894
|
check: Union[CheckStream, CheckDynamicStream]
|
1895
|
-
streams: Optional[List[DeclarativeStream]] = None
|
1895
|
+
streams: Optional[List[Union[DeclarativeStream, StateDelegatingStream]]] = None
|
1896
1896
|
dynamic_streams: List[DynamicDeclarativeStream]
|
1897
1897
|
version: str = Field(
|
1898
1898
|
...,
|
@@ -2211,7 +2211,7 @@ class ParentStreamConfig(BaseModel):
|
|
2211
2211
|
examples=["id", "{{ config['parent_record_id'] }}"],
|
2212
2212
|
title="Parent Key",
|
2213
2213
|
)
|
2214
|
-
stream: DeclarativeStream = Field(
|
2214
|
+
stream: Union[DeclarativeStream, StateDelegatingStream] = Field(
|
2215
2215
|
..., description="Reference to the parent stream.", title="Parent Stream"
|
2216
2216
|
)
|
2217
2217
|
partition_field: str = Field(
|
@@ -2238,6 +2238,22 @@ class ParentStreamConfig(BaseModel):
|
|
2238
2238
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2239
2239
|
|
2240
2240
|
|
2241
|
+
class StateDelegatingStream(BaseModel):
|
2242
|
+
type: Literal["StateDelegatingStream"]
|
2243
|
+
name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
|
2244
|
+
full_refresh_stream: DeclarativeStream = Field(
|
2245
|
+
...,
|
2246
|
+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
|
2247
|
+
title="Retriever",
|
2248
|
+
)
|
2249
|
+
incremental_stream: DeclarativeStream = Field(
|
2250
|
+
...,
|
2251
|
+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.",
|
2252
|
+
title="Retriever",
|
2253
|
+
)
|
2254
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2255
|
+
|
2256
|
+
|
2241
2257
|
class SimpleRetriever(BaseModel):
|
2242
2258
|
type: Literal["SimpleRetriever"]
|
2243
2259
|
record_selector: RecordSelector = Field(
|
@@ -2423,5 +2439,6 @@ SelectiveAuthenticator.update_forward_refs()
|
|
2423
2439
|
DeclarativeStream.update_forward_refs()
|
2424
2440
|
SessionTokenAuthenticator.update_forward_refs()
|
2425
2441
|
DynamicSchemaLoader.update_forward_refs()
|
2442
|
+
ParentStreamConfig.update_forward_refs()
|
2426
2443
|
SimpleRetriever.update_forward_refs()
|
2427
2444
|
AsyncRetriever.update_forward_refs()
|
@@ -351,6 +351,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
351
351
|
SimpleRetriever as SimpleRetrieverModel,
|
352
352
|
)
|
353
353
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
354
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
355
|
+
StateDelegatingStream as StateDelegatingStreamModel,
|
356
|
+
)
|
354
357
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
355
358
|
StreamConfig as StreamConfigModel,
|
356
359
|
)
|
@@ -617,6 +620,7 @@ class ModelToComponentFactory:
|
|
617
620
|
LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
|
618
621
|
SelectiveAuthenticatorModel: self.create_selective_authenticator,
|
619
622
|
SimpleRetrieverModel: self.create_simple_retriever,
|
623
|
+
StateDelegatingStreamModel: self.create_state_delegating_stream,
|
620
624
|
SpecModel: self.create_spec,
|
621
625
|
SubstreamPartitionRouterModel: self.create_substream_partition_router,
|
622
626
|
WaitTimeFromHeaderModel: self.create_wait_time_from_header,
|
@@ -1785,8 +1789,13 @@ class ModelToComponentFactory:
|
|
1785
1789
|
|
1786
1790
|
def _build_stream_slicer_from_partition_router(
|
1787
1791
|
self,
|
1788
|
-
model: Union[
|
1792
|
+
model: Union[
|
1793
|
+
AsyncRetrieverModel,
|
1794
|
+
CustomRetrieverModel,
|
1795
|
+
SimpleRetrieverModel,
|
1796
|
+
],
|
1789
1797
|
config: Config,
|
1798
|
+
stream_name: Optional[str] = None,
|
1790
1799
|
) -> Optional[PartitionRouter]:
|
1791
1800
|
if (
|
1792
1801
|
hasattr(model, "partition_router")
|
@@ -1794,95 +1803,65 @@ class ModelToComponentFactory:
|
|
1794
1803
|
and model.partition_router
|
1795
1804
|
):
|
1796
1805
|
stream_slicer_model = model.partition_router
|
1797
|
-
|
1798
1806
|
if isinstance(stream_slicer_model, list):
|
1799
1807
|
return CartesianProductStreamSlicer(
|
1800
1808
|
[
|
1801
|
-
self._create_component_from_model(
|
1809
|
+
self._create_component_from_model(
|
1810
|
+
model=slicer, config=config, stream_name=stream_name or ""
|
1811
|
+
)
|
1802
1812
|
for slicer in stream_slicer_model
|
1803
1813
|
],
|
1804
1814
|
parameters={},
|
1805
1815
|
)
|
1806
1816
|
else:
|
1807
|
-
return self._create_component_from_model(
|
1808
|
-
|
1817
|
+
return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
1818
|
+
model=stream_slicer_model, config=config, stream_name=stream_name or ""
|
1819
|
+
)
|
1809
1820
|
return None
|
1810
1821
|
|
1811
|
-
def
|
1822
|
+
def _build_incremental_cursor(
|
1812
1823
|
self,
|
1813
|
-
model:
|
1814
|
-
stream_slicer: Optional[
|
1815
|
-
|
1816
|
-
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1817
|
-
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1818
|
-
return ResumableFullRefreshCursor(parameters={})
|
1819
|
-
return None
|
1820
|
-
|
1821
|
-
def _merge_stream_slicers(
|
1822
|
-
self, model: DeclarativeStreamModel, config: Config
|
1824
|
+
model: DeclarativeStreamModel,
|
1825
|
+
stream_slicer: Optional[PartitionRouter],
|
1826
|
+
config: Config,
|
1823
1827
|
) -> Optional[StreamSlicer]:
|
1824
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1825
|
-
|
1826
1828
|
if model.incremental_sync and stream_slicer:
|
1827
1829
|
if model.retriever.type == "AsyncRetriever":
|
1828
|
-
|
1829
|
-
|
1830
|
-
raise ValueError(
|
1831
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1832
|
-
)
|
1833
|
-
if stream_slicer:
|
1834
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1835
|
-
state_manager=self._connector_state_manager,
|
1836
|
-
model_type=DatetimeBasedCursorModel,
|
1837
|
-
component_definition=model.incremental_sync.__dict__,
|
1838
|
-
stream_name=model.name or "",
|
1839
|
-
stream_namespace=None,
|
1840
|
-
config=config or {},
|
1841
|
-
stream_state={},
|
1842
|
-
partition_router=stream_slicer,
|
1843
|
-
)
|
1844
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1830
|
+
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1831
|
+
state_manager=self._connector_state_manager,
|
1845
1832
|
model_type=DatetimeBasedCursorModel,
|
1846
1833
|
component_definition=model.incremental_sync.__dict__,
|
1847
1834
|
stream_name=model.name or "",
|
1848
1835
|
stream_namespace=None,
|
1849
1836
|
config=config or {},
|
1837
|
+
stream_state={},
|
1838
|
+
partition_router=stream_slicer,
|
1850
1839
|
)
|
1851
1840
|
|
1852
1841
|
incremental_sync_model = model.incremental_sync
|
1853
|
-
|
1842
|
+
cursor_component = self._create_component_from_model(
|
1843
|
+
model=incremental_sync_model, config=config
|
1844
|
+
)
|
1845
|
+
is_global_cursor = (
|
1854
1846
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
1855
1847
|
and incremental_sync_model.global_substream_cursor
|
1856
|
-
)
|
1857
|
-
|
1858
|
-
|
1859
|
-
)
|
1848
|
+
)
|
1849
|
+
|
1850
|
+
if is_global_cursor:
|
1860
1851
|
return GlobalSubstreamCursor(
|
1861
1852
|
stream_cursor=cursor_component, partition_router=stream_slicer
|
1862
1853
|
)
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
return PerPartitionWithGlobalCursor(
|
1868
|
-
cursor_factory=CursorFactory(
|
1869
|
-
lambda: self._create_component_from_model(
|
1870
|
-
model=incremental_sync_model, config=config
|
1871
|
-
),
|
1854
|
+
return PerPartitionWithGlobalCursor(
|
1855
|
+
cursor_factory=CursorFactory(
|
1856
|
+
lambda: self._create_component_from_model(
|
1857
|
+
model=incremental_sync_model, config=config
|
1872
1858
|
),
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1859
|
+
),
|
1860
|
+
partition_router=stream_slicer,
|
1861
|
+
stream_cursor=cursor_component,
|
1862
|
+
)
|
1876
1863
|
elif model.incremental_sync:
|
1877
1864
|
if model.retriever.type == "AsyncRetriever":
|
1878
|
-
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1879
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1880
|
-
raise ValueError(
|
1881
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1882
|
-
)
|
1883
|
-
if model.retriever.partition_router:
|
1884
|
-
# Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
|
1885
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever")
|
1886
1865
|
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1887
1866
|
model_type=DatetimeBasedCursorModel,
|
1888
1867
|
component_definition=model.incremental_sync.__dict__,
|
@@ -1891,13 +1870,21 @@ class ModelToComponentFactory:
|
|
1891
1870
|
config=config or {},
|
1892
1871
|
stream_state_migrations=model.state_migrations,
|
1893
1872
|
)
|
1894
|
-
return (
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1873
|
+
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
1874
|
+
return None
|
1875
|
+
|
1876
|
+
def _build_resumable_cursor(
|
1877
|
+
self,
|
1878
|
+
model: Union[
|
1879
|
+
AsyncRetrieverModel,
|
1880
|
+
CustomRetrieverModel,
|
1881
|
+
SimpleRetrieverModel,
|
1882
|
+
],
|
1883
|
+
stream_slicer: Optional[PartitionRouter],
|
1884
|
+
) -> Optional[StreamSlicer]:
|
1885
|
+
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1886
|
+
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1887
|
+
return ResumableFullRefreshCursor(parameters={})
|
1901
1888
|
elif stream_slicer:
|
1902
1889
|
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
1903
1890
|
return PerPartitionCursor(
|
@@ -1906,7 +1893,47 @@ class ModelToComponentFactory:
|
|
1906
1893
|
),
|
1907
1894
|
partition_router=stream_slicer,
|
1908
1895
|
)
|
1909
|
-
return
|
1896
|
+
return None
|
1897
|
+
|
1898
|
+
def _merge_stream_slicers(
|
1899
|
+
self, model: DeclarativeStreamModel, config: Config
|
1900
|
+
) -> Optional[StreamSlicer]:
|
1901
|
+
retriever_model = model.retriever
|
1902
|
+
|
1903
|
+
if retriever_model.type == "AsyncRetriever":
|
1904
|
+
is_not_datetime_cursor = (
|
1905
|
+
model.incremental_sync.type != "DatetimeBasedCursor"
|
1906
|
+
if model.incremental_sync
|
1907
|
+
else None
|
1908
|
+
)
|
1909
|
+
is_partition_router = (
|
1910
|
+
bool(retriever_model.partition_router) if model.incremental_sync else None
|
1911
|
+
)
|
1912
|
+
|
1913
|
+
if is_not_datetime_cursor:
|
1914
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
1915
|
+
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
1916
|
+
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
1917
|
+
# implementation available in the CDK, we can enable more cursors here.
|
1918
|
+
raise ValueError(
|
1919
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
1920
|
+
)
|
1921
|
+
|
1922
|
+
if is_partition_router:
|
1923
|
+
# Note that this development is also done in parallel to the per partition development which once merged
|
1924
|
+
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
1925
|
+
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
1926
|
+
|
1927
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(retriever_model, config)
|
1928
|
+
|
1929
|
+
if model.incremental_sync:
|
1930
|
+
return self._build_incremental_cursor(model, stream_slicer, config)
|
1931
|
+
|
1932
|
+
return (
|
1933
|
+
stream_slicer
|
1934
|
+
if self._disable_resumable_full_refresh
|
1935
|
+
else self._build_resumable_cursor(retriever_model, stream_slicer)
|
1936
|
+
)
|
1910
1937
|
|
1911
1938
|
def create_default_error_handler(
|
1912
1939
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -2167,9 +2194,7 @@ class ModelToComponentFactory:
|
|
2167
2194
|
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
2168
2195
|
) -> DynamicSchemaLoader:
|
2169
2196
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
2170
|
-
combined_slicers = self.
|
2171
|
-
model.retriever, stream_slicer
|
2172
|
-
)
|
2197
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
2173
2198
|
|
2174
2199
|
schema_transformations = []
|
2175
2200
|
if model.schema_transformations:
|
@@ -2492,7 +2517,9 @@ class ModelToComponentFactory:
|
|
2492
2517
|
def create_parent_stream_config(
|
2493
2518
|
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2494
2519
|
) -> ParentStreamConfig:
|
2495
|
-
declarative_stream = self._create_component_from_model(
|
2520
|
+
declarative_stream = self._create_component_from_model(
|
2521
|
+
model.stream, config=config, **kwargs
|
2522
|
+
)
|
2496
2523
|
request_option = (
|
2497
2524
|
self._create_component_from_model(model.request_option, config=config)
|
2498
2525
|
if model.request_option
|
@@ -2733,6 +2760,29 @@ class ModelToComponentFactory:
|
|
2733
2760
|
parameters=model.parameters or {},
|
2734
2761
|
)
|
2735
2762
|
|
2763
|
+
def create_state_delegating_stream(
|
2764
|
+
self,
|
2765
|
+
model: StateDelegatingStreamModel,
|
2766
|
+
config: Config,
|
2767
|
+
has_parent_state: Optional[bool] = None,
|
2768
|
+
**kwargs: Any,
|
2769
|
+
) -> DeclarativeStream:
|
2770
|
+
if (
|
2771
|
+
model.full_refresh_stream.name != model.name
|
2772
|
+
or model.name != model.incremental_stream.name
|
2773
|
+
):
|
2774
|
+
raise ValueError(
|
2775
|
+
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
2776
|
+
)
|
2777
|
+
|
2778
|
+
stream_model = (
|
2779
|
+
model.incremental_stream
|
2780
|
+
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
2781
|
+
else model.full_refresh_stream
|
2782
|
+
)
|
2783
|
+
|
2784
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
2785
|
+
|
2736
2786
|
def _create_async_job_status_mapping(
|
2737
2787
|
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
2738
2788
|
) -> Mapping[str, AsyncJobStatus]:
|
@@ -2964,7 +3014,7 @@ class ModelToComponentFactory:
|
|
2964
3014
|
parent_stream_configs.extend(
|
2965
3015
|
[
|
2966
3016
|
self._create_message_repository_substream_wrapper(
|
2967
|
-
model=parent_stream_config, config=config
|
3017
|
+
model=parent_stream_config, config=config, **kwargs
|
2968
3018
|
)
|
2969
3019
|
for parent_stream_config in model.parent_stream_configs
|
2970
3020
|
]
|
@@ -2977,7 +3027,7 @@ class ModelToComponentFactory:
|
|
2977
3027
|
)
|
2978
3028
|
|
2979
3029
|
def _create_message_repository_substream_wrapper(
|
2980
|
-
self, model: ParentStreamConfigModel, config: Config
|
3030
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2981
3031
|
) -> Any:
|
2982
3032
|
substream_factory = ModelToComponentFactory(
|
2983
3033
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
@@ -2991,7 +3041,16 @@ class ModelToComponentFactory:
|
|
2991
3041
|
self._evaluate_log_level(self._emit_connector_builder_messages),
|
2992
3042
|
),
|
2993
3043
|
)
|
2994
|
-
|
3044
|
+
|
3045
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3046
|
+
has_parent_state = bool(
|
3047
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3048
|
+
if model.incremental_dependency
|
3049
|
+
else False
|
3050
|
+
)
|
3051
|
+
return substream_factory._create_component_from_model(
|
3052
|
+
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
3053
|
+
)
|
2995
3054
|
|
2996
3055
|
@staticmethod
|
2997
3056
|
def create_wait_time_from_header(
|
@@ -3047,9 +3106,7 @@ class ModelToComponentFactory:
|
|
3047
3106
|
self, model: HttpComponentsResolverModel, config: Config
|
3048
3107
|
) -> Any:
|
3049
3108
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
3050
|
-
combined_slicers = self.
|
3051
|
-
model.retriever, stream_slicer
|
3052
|
-
)
|
3109
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
3053
3110
|
|
3054
3111
|
retriever = self._create_component_from_model(
|
3055
3112
|
model=model.retriever,
|
@@ -9,4 +9,9 @@ from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
|
|
9
9
|
SimpleRetrieverTestReadDecorator,
|
10
10
|
)
|
11
11
|
|
12
|
-
__all__ = [
|
12
|
+
__all__ = [
|
13
|
+
"Retriever",
|
14
|
+
"SimpleRetriever",
|
15
|
+
"SimpleRetrieverTestReadDecorator",
|
16
|
+
"AsyncRetriever",
|
17
|
+
]
|
@@ -36,6 +36,36 @@ class AsyncRetriever(Retriever):
|
|
36
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
37
37
|
self._parameters = parameters
|
38
38
|
|
39
|
+
@property
|
40
|
+
def exit_on_rate_limit(self) -> bool:
|
41
|
+
"""
|
42
|
+
Whether to exit on rate limit. This is a property of the job repository
|
43
|
+
and not the stream slicer. The stream slicer is responsible for creating
|
44
|
+
the jobs, but the job repository is responsible for managing the rate
|
45
|
+
limits and other job-related properties.
|
46
|
+
|
47
|
+
Note:
|
48
|
+
- If the `creation_requester` cannot place / create the job - it might be the case of the RateLimits
|
49
|
+
- If the `creation_requester` can place / create the job - it means all other requesters should successfully manage
|
50
|
+
to complete the results.
|
51
|
+
"""
|
52
|
+
job_orchestrator = self.stream_slicer._job_orchestrator
|
53
|
+
if job_orchestrator is None:
|
54
|
+
# Default value when orchestrator is not available
|
55
|
+
return False
|
56
|
+
return job_orchestrator._job_repository.creation_requester.exit_on_rate_limit # type: ignore
|
57
|
+
|
58
|
+
@exit_on_rate_limit.setter
|
59
|
+
def exit_on_rate_limit(self, value: bool) -> None:
|
60
|
+
"""
|
61
|
+
Sets the `exit_on_rate_limit` property of the job repository > creation_requester,
|
62
|
+
meaning that the Job cannot be placed / created if the rate limit is reached.
|
63
|
+
Thus no further work on managing jobs is expected to be done.
|
64
|
+
"""
|
65
|
+
job_orchestrator = self.stream_slicer._job_orchestrator
|
66
|
+
if job_orchestrator is not None:
|
67
|
+
job_orchestrator._job_repository.creation_requester.exit_on_rate_limit = value # type: ignore[attr-defined, assignment]
|
68
|
+
|
39
69
|
@property
|
40
70
|
def state(self) -> StreamState:
|
41
71
|
"""
|
@@ -67,13 +67,13 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
67
67
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
68
68
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
69
69
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
70
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=0I1lOxV7oQEsUxyg7q9EgcW2zvhai4_7-IIDF79WiOU,27569
|
71
71
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
72
72
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
73
73
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
74
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
74
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=TW-7fw3OXmp8hZmbvGJiW_5SCU4f6bzJbTIveQkAPZE,149239
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
|
-
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=
|
76
|
+
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
78
78
|
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=Jd7URkDQBoHSDQHQuYUqzeex1HYfLRtGcY_-dVW33pA,7884
|
79
79
|
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
|
@@ -109,18 +109,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkH
|
|
109
109
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
110
110
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
|
111
111
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=HQKHKnjE17zKoPn27ZpTpugRZZQSaof4GVzUUZaV2eE,5081
|
112
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=bgrVP227hsiPJO6QeZy0v1kmdjrjQM63dlDTaI0pAC8,18300
|
113
113
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
114
114
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
115
115
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
116
116
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
117
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
117
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=FIq1hfhFjMmGB2gnTo7mohxgpwmH-8C4wuiEPKxH_hQ,105591
|
118
118
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
119
119
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=jDw_TttD3_hpfevXOH-0Ws0eRuqt6wvED0BqosGPRjI,5938
|
120
120
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
121
121
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
122
122
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
123
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
123
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Qe28QMKiAWEWeeybPqseSL5xm-_qrgjMY-lFn4VDtJM,143588
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
125
125
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
126
126
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -169,8 +169,8 @@ airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=NiDcz5qi8HPsfX94MUm
|
|
169
169
|
airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
|
170
170
|
airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
|
171
171
|
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=AiojNs8wItJFrENZBFUaDvau3sgwudO6Wkra36upSPo,4639
|
172
|
-
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=
|
173
|
-
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=
|
172
|
+
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=U9Hf9OK1bWdVa3cgs2cJm_-O-wOKuvhmRzP3SckL3rg,475
|
173
|
+
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=Fxwg53i_9R3kMNFtD3gEwZbdW8xlcXYXA5evEhrKunM,5072
|
174
174
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
175
175
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=fDhc6dMx75UImh1_TfLm4Le59tsHpqIUZnau7uIJyYw,25043
|
176
176
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
@@ -358,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
358
358
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
359
359
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
360
360
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
361
|
-
airbyte_cdk-6.39.
|
362
|
-
airbyte_cdk-6.39.
|
363
|
-
airbyte_cdk-6.39.
|
364
|
-
airbyte_cdk-6.39.
|
365
|
-
airbyte_cdk-6.39.
|
366
|
-
airbyte_cdk-6.39.
|
361
|
+
airbyte_cdk-6.39.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
362
|
+
airbyte_cdk-6.39.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
363
|
+
airbyte_cdk-6.39.3.dist-info/METADATA,sha256=oToy3NTEXtxefbvn_Nu0CuDIH_EjOpwwZ8hmUrmsZ14,6071
|
364
|
+
airbyte_cdk-6.39.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
365
|
+
airbyte_cdk-6.39.3.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
366
|
+
airbyte_cdk-6.39.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|