airbyte-cdk 6.39.2__py3-none-any.whl → 6.40.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +66 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +8 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +24 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +44 -3
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +205 -80
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +66 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/__init__.py +8 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +30 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +84 -2
- airbyte_cdk/sources/declarative/transformations/add_fields.py +10 -2
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +10 -4
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/RECORD +19 -19
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -351,6 +351,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
351
351
|
SimpleRetriever as SimpleRetrieverModel,
|
352
352
|
)
|
353
353
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
354
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
355
|
+
StateDelegatingStream as StateDelegatingStreamModel,
|
356
|
+
)
|
354
357
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
355
358
|
StreamConfig as StreamConfigModel,
|
356
359
|
)
|
@@ -435,6 +438,7 @@ from airbyte_cdk.sources.declarative.resolvers import (
|
|
435
438
|
)
|
436
439
|
from airbyte_cdk.sources.declarative.retrievers import (
|
437
440
|
AsyncRetriever,
|
441
|
+
LazySimpleRetriever,
|
438
442
|
SimpleRetriever,
|
439
443
|
SimpleRetrieverTestReadDecorator,
|
440
444
|
)
|
@@ -617,6 +621,7 @@ class ModelToComponentFactory:
|
|
617
621
|
LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
|
618
622
|
SelectiveAuthenticatorModel: self.create_selective_authenticator,
|
619
623
|
SimpleRetrieverModel: self.create_simple_retriever,
|
624
|
+
StateDelegatingStreamModel: self.create_state_delegating_stream,
|
620
625
|
SpecModel: self.create_spec,
|
621
626
|
SubstreamPartitionRouterModel: self.create_substream_partition_router,
|
622
627
|
WaitTimeFromHeaderModel: self.create_wait_time_from_header,
|
@@ -708,7 +713,11 @@ class ModelToComponentFactory:
|
|
708
713
|
)
|
709
714
|
for added_field_definition_model in model.fields
|
710
715
|
]
|
711
|
-
return AddFields(
|
716
|
+
return AddFields(
|
717
|
+
fields=added_field_definitions,
|
718
|
+
condition=model.condition or "",
|
719
|
+
parameters=model.parameters or {},
|
720
|
+
)
|
712
721
|
|
713
722
|
def create_keys_to_lower_transformation(
|
714
723
|
self, model: KeysToLowerModel, config: Config, **kwargs: Any
|
@@ -744,6 +753,7 @@ class ModelToComponentFactory:
|
|
744
753
|
delete_origin_value=model.delete_origin_value
|
745
754
|
if model.delete_origin_value is not None
|
746
755
|
else False,
|
756
|
+
replace_record=model.replace_record if model.replace_record is not None else False,
|
747
757
|
parameters=model.parameters or {},
|
748
758
|
)
|
749
759
|
|
@@ -1741,6 +1751,7 @@ class ModelToComponentFactory:
|
|
1741
1751
|
transformations.append(
|
1742
1752
|
self._create_component_from_model(model=transformation_model, config=config)
|
1743
1753
|
)
|
1754
|
+
|
1744
1755
|
retriever = self._create_component_from_model(
|
1745
1756
|
model=model.retriever,
|
1746
1757
|
config=config,
|
@@ -1751,6 +1762,7 @@ class ModelToComponentFactory:
|
|
1751
1762
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1752
1763
|
client_side_incremental_sync=client_side_incremental_sync,
|
1753
1764
|
transformations=transformations,
|
1765
|
+
incremental_sync=model.incremental_sync,
|
1754
1766
|
)
|
1755
1767
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
1756
1768
|
|
@@ -1785,8 +1797,13 @@ class ModelToComponentFactory:
|
|
1785
1797
|
|
1786
1798
|
def _build_stream_slicer_from_partition_router(
|
1787
1799
|
self,
|
1788
|
-
model: Union[
|
1800
|
+
model: Union[
|
1801
|
+
AsyncRetrieverModel,
|
1802
|
+
CustomRetrieverModel,
|
1803
|
+
SimpleRetrieverModel,
|
1804
|
+
],
|
1789
1805
|
config: Config,
|
1806
|
+
stream_name: Optional[str] = None,
|
1790
1807
|
) -> Optional[PartitionRouter]:
|
1791
1808
|
if (
|
1792
1809
|
hasattr(model, "partition_router")
|
@@ -1794,95 +1811,65 @@ class ModelToComponentFactory:
|
|
1794
1811
|
and model.partition_router
|
1795
1812
|
):
|
1796
1813
|
stream_slicer_model = model.partition_router
|
1797
|
-
|
1798
1814
|
if isinstance(stream_slicer_model, list):
|
1799
1815
|
return CartesianProductStreamSlicer(
|
1800
1816
|
[
|
1801
|
-
self._create_component_from_model(
|
1817
|
+
self._create_component_from_model(
|
1818
|
+
model=slicer, config=config, stream_name=stream_name or ""
|
1819
|
+
)
|
1802
1820
|
for slicer in stream_slicer_model
|
1803
1821
|
],
|
1804
1822
|
parameters={},
|
1805
1823
|
)
|
1806
1824
|
else:
|
1807
|
-
return self._create_component_from_model(
|
1808
|
-
|
1825
|
+
return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
1826
|
+
model=stream_slicer_model, config=config, stream_name=stream_name or ""
|
1827
|
+
)
|
1809
1828
|
return None
|
1810
1829
|
|
1811
|
-
def
|
1830
|
+
def _build_incremental_cursor(
|
1812
1831
|
self,
|
1813
|
-
model:
|
1814
|
-
stream_slicer: Optional[
|
1815
|
-
|
1816
|
-
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1817
|
-
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1818
|
-
return ResumableFullRefreshCursor(parameters={})
|
1819
|
-
return None
|
1820
|
-
|
1821
|
-
def _merge_stream_slicers(
|
1822
|
-
self, model: DeclarativeStreamModel, config: Config
|
1832
|
+
model: DeclarativeStreamModel,
|
1833
|
+
stream_slicer: Optional[PartitionRouter],
|
1834
|
+
config: Config,
|
1823
1835
|
) -> Optional[StreamSlicer]:
|
1824
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1825
|
-
|
1826
1836
|
if model.incremental_sync and stream_slicer:
|
1827
1837
|
if model.retriever.type == "AsyncRetriever":
|
1828
|
-
|
1829
|
-
|
1830
|
-
raise ValueError(
|
1831
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1832
|
-
)
|
1833
|
-
if stream_slicer:
|
1834
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1835
|
-
state_manager=self._connector_state_manager,
|
1836
|
-
model_type=DatetimeBasedCursorModel,
|
1837
|
-
component_definition=model.incremental_sync.__dict__,
|
1838
|
-
stream_name=model.name or "",
|
1839
|
-
stream_namespace=None,
|
1840
|
-
config=config or {},
|
1841
|
-
stream_state={},
|
1842
|
-
partition_router=stream_slicer,
|
1843
|
-
)
|
1844
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1838
|
+
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1839
|
+
state_manager=self._connector_state_manager,
|
1845
1840
|
model_type=DatetimeBasedCursorModel,
|
1846
1841
|
component_definition=model.incremental_sync.__dict__,
|
1847
1842
|
stream_name=model.name or "",
|
1848
1843
|
stream_namespace=None,
|
1849
1844
|
config=config or {},
|
1845
|
+
stream_state={},
|
1846
|
+
partition_router=stream_slicer,
|
1850
1847
|
)
|
1851
1848
|
|
1852
1849
|
incremental_sync_model = model.incremental_sync
|
1853
|
-
|
1850
|
+
cursor_component = self._create_component_from_model(
|
1851
|
+
model=incremental_sync_model, config=config
|
1852
|
+
)
|
1853
|
+
is_global_cursor = (
|
1854
1854
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
1855
1855
|
and incremental_sync_model.global_substream_cursor
|
1856
|
-
)
|
1857
|
-
|
1858
|
-
|
1859
|
-
)
|
1856
|
+
)
|
1857
|
+
|
1858
|
+
if is_global_cursor:
|
1860
1859
|
return GlobalSubstreamCursor(
|
1861
1860
|
stream_cursor=cursor_component, partition_router=stream_slicer
|
1862
1861
|
)
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
return PerPartitionWithGlobalCursor(
|
1868
|
-
cursor_factory=CursorFactory(
|
1869
|
-
lambda: self._create_component_from_model(
|
1870
|
-
model=incremental_sync_model, config=config
|
1871
|
-
),
|
1862
|
+
return PerPartitionWithGlobalCursor(
|
1863
|
+
cursor_factory=CursorFactory(
|
1864
|
+
lambda: self._create_component_from_model(
|
1865
|
+
model=incremental_sync_model, config=config
|
1872
1866
|
),
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1867
|
+
),
|
1868
|
+
partition_router=stream_slicer,
|
1869
|
+
stream_cursor=cursor_component,
|
1870
|
+
)
|
1876
1871
|
elif model.incremental_sync:
|
1877
1872
|
if model.retriever.type == "AsyncRetriever":
|
1878
|
-
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1879
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1880
|
-
raise ValueError(
|
1881
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1882
|
-
)
|
1883
|
-
if model.retriever.partition_router:
|
1884
|
-
# Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
|
1885
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever")
|
1886
1873
|
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1887
1874
|
model_type=DatetimeBasedCursorModel,
|
1888
1875
|
component_definition=model.incremental_sync.__dict__,
|
@@ -1891,13 +1878,21 @@ class ModelToComponentFactory:
|
|
1891
1878
|
config=config or {},
|
1892
1879
|
stream_state_migrations=model.state_migrations,
|
1893
1880
|
)
|
1894
|
-
return (
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1881
|
+
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
1882
|
+
return None
|
1883
|
+
|
1884
|
+
def _build_resumable_cursor(
|
1885
|
+
self,
|
1886
|
+
model: Union[
|
1887
|
+
AsyncRetrieverModel,
|
1888
|
+
CustomRetrieverModel,
|
1889
|
+
SimpleRetrieverModel,
|
1890
|
+
],
|
1891
|
+
stream_slicer: Optional[PartitionRouter],
|
1892
|
+
) -> Optional[StreamSlicer]:
|
1893
|
+
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1894
|
+
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1895
|
+
return ResumableFullRefreshCursor(parameters={})
|
1901
1896
|
elif stream_slicer:
|
1902
1897
|
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
1903
1898
|
return PerPartitionCursor(
|
@@ -1906,7 +1901,49 @@ class ModelToComponentFactory:
|
|
1906
1901
|
),
|
1907
1902
|
partition_router=stream_slicer,
|
1908
1903
|
)
|
1909
|
-
return
|
1904
|
+
return None
|
1905
|
+
|
1906
|
+
def _merge_stream_slicers(
|
1907
|
+
self, model: DeclarativeStreamModel, config: Config
|
1908
|
+
) -> Optional[StreamSlicer]:
|
1909
|
+
retriever_model = model.retriever
|
1910
|
+
|
1911
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(
|
1912
|
+
retriever_model, config, stream_name=model.name
|
1913
|
+
)
|
1914
|
+
|
1915
|
+
if retriever_model.type == "AsyncRetriever":
|
1916
|
+
is_not_datetime_cursor = (
|
1917
|
+
model.incremental_sync.type != "DatetimeBasedCursor"
|
1918
|
+
if model.incremental_sync
|
1919
|
+
else None
|
1920
|
+
)
|
1921
|
+
is_partition_router = (
|
1922
|
+
bool(retriever_model.partition_router) if model.incremental_sync else None
|
1923
|
+
)
|
1924
|
+
|
1925
|
+
if is_not_datetime_cursor:
|
1926
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
1927
|
+
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
1928
|
+
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
1929
|
+
# implementation available in the CDK, we can enable more cursors here.
|
1930
|
+
raise ValueError(
|
1931
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
1932
|
+
)
|
1933
|
+
|
1934
|
+
if is_partition_router and not stream_slicer:
|
1935
|
+
# Note that this development is also done in parallel to the per partition development which once merged
|
1936
|
+
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
1937
|
+
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
1938
|
+
|
1939
|
+
if model.incremental_sync:
|
1940
|
+
return self._build_incremental_cursor(model, stream_slicer, config)
|
1941
|
+
|
1942
|
+
return (
|
1943
|
+
stream_slicer
|
1944
|
+
if self._disable_resumable_full_refresh
|
1945
|
+
else self._build_resumable_cursor(retriever_model, stream_slicer)
|
1946
|
+
)
|
1910
1947
|
|
1911
1948
|
def create_default_error_handler(
|
1912
1949
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -2167,9 +2204,7 @@ class ModelToComponentFactory:
|
|
2167
2204
|
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
2168
2205
|
) -> DynamicSchemaLoader:
|
2169
2206
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
2170
|
-
combined_slicers = self.
|
2171
|
-
model.retriever, stream_slicer
|
2172
|
-
)
|
2207
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
2173
2208
|
|
2174
2209
|
schema_transformations = []
|
2175
2210
|
if model.schema_transformations:
|
@@ -2492,12 +2527,24 @@ class ModelToComponentFactory:
|
|
2492
2527
|
def create_parent_stream_config(
|
2493
2528
|
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2494
2529
|
) -> ParentStreamConfig:
|
2495
|
-
declarative_stream = self._create_component_from_model(
|
2530
|
+
declarative_stream = self._create_component_from_model(
|
2531
|
+
model.stream, config=config, **kwargs
|
2532
|
+
)
|
2496
2533
|
request_option = (
|
2497
2534
|
self._create_component_from_model(model.request_option, config=config)
|
2498
2535
|
if model.request_option
|
2499
2536
|
else None
|
2500
2537
|
)
|
2538
|
+
|
2539
|
+
if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
|
2540
|
+
raise ValueError(
|
2541
|
+
"The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
|
2542
|
+
)
|
2543
|
+
|
2544
|
+
model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
|
2545
|
+
[x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
|
2546
|
+
)
|
2547
|
+
|
2501
2548
|
return ParentStreamConfig(
|
2502
2549
|
parent_key=model.parent_key,
|
2503
2550
|
request_option=request_option,
|
@@ -2507,6 +2554,7 @@ class ModelToComponentFactory:
|
|
2507
2554
|
incremental_dependency=model.incremental_dependency or False,
|
2508
2555
|
parameters=model.parameters or {},
|
2509
2556
|
extra_fields=model.extra_fields,
|
2557
|
+
lazy_read_pointer=model_lazy_read_pointer,
|
2510
2558
|
)
|
2511
2559
|
|
2512
2560
|
@staticmethod
|
@@ -2566,7 +2614,9 @@ class ModelToComponentFactory:
|
|
2566
2614
|
else None
|
2567
2615
|
)
|
2568
2616
|
|
2569
|
-
transform_before_filtering
|
2617
|
+
assert model.transform_before_filtering is not None # for mypy
|
2618
|
+
|
2619
|
+
transform_before_filtering = model.transform_before_filtering
|
2570
2620
|
if client_side_incremental_sync:
|
2571
2621
|
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
2572
2622
|
config=config,
|
@@ -2647,6 +2697,12 @@ class ModelToComponentFactory:
|
|
2647
2697
|
stop_condition_on_cursor: bool = False,
|
2648
2698
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2649
2699
|
transformations: List[RecordTransformation],
|
2700
|
+
incremental_sync: Optional[
|
2701
|
+
Union[
|
2702
|
+
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
2703
|
+
]
|
2704
|
+
] = None,
|
2705
|
+
**kwargs: Any,
|
2650
2706
|
) -> SimpleRetriever:
|
2651
2707
|
decoder = (
|
2652
2708
|
self._create_component_from_model(model=model.decoder, config=config)
|
@@ -2704,6 +2760,45 @@ class ModelToComponentFactory:
|
|
2704
2760
|
model.ignore_stream_slicer_parameters_on_paginated_requests or False
|
2705
2761
|
)
|
2706
2762
|
|
2763
|
+
if (
|
2764
|
+
model.partition_router
|
2765
|
+
and isinstance(model.partition_router, SubstreamPartitionRouterModel)
|
2766
|
+
and not bool(self._connector_state_manager.get_stream_state(name, None))
|
2767
|
+
and any(
|
2768
|
+
parent_stream_config.lazy_read_pointer
|
2769
|
+
for parent_stream_config in model.partition_router.parent_stream_configs
|
2770
|
+
)
|
2771
|
+
):
|
2772
|
+
if incremental_sync:
|
2773
|
+
if incremental_sync.type != "DatetimeBasedCursor":
|
2774
|
+
raise ValueError(
|
2775
|
+
f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
|
2776
|
+
)
|
2777
|
+
|
2778
|
+
elif incremental_sync.step or incremental_sync.cursor_granularity:
|
2779
|
+
raise ValueError(
|
2780
|
+
f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
|
2781
|
+
)
|
2782
|
+
|
2783
|
+
if model.decoder and model.decoder.type != "JsonDecoder":
|
2784
|
+
raise ValueError(
|
2785
|
+
f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
|
2786
|
+
)
|
2787
|
+
|
2788
|
+
return LazySimpleRetriever(
|
2789
|
+
name=name,
|
2790
|
+
paginator=paginator,
|
2791
|
+
primary_key=primary_key,
|
2792
|
+
requester=requester,
|
2793
|
+
record_selector=record_selector,
|
2794
|
+
stream_slicer=stream_slicer,
|
2795
|
+
request_option_provider=request_options_provider,
|
2796
|
+
cursor=cursor,
|
2797
|
+
config=config,
|
2798
|
+
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
2799
|
+
parameters=model.parameters or {},
|
2800
|
+
)
|
2801
|
+
|
2707
2802
|
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
2708
2803
|
return SimpleRetrieverTestReadDecorator(
|
2709
2804
|
name=name,
|
@@ -2733,6 +2828,29 @@ class ModelToComponentFactory:
|
|
2733
2828
|
parameters=model.parameters or {},
|
2734
2829
|
)
|
2735
2830
|
|
2831
|
+
def create_state_delegating_stream(
|
2832
|
+
self,
|
2833
|
+
model: StateDelegatingStreamModel,
|
2834
|
+
config: Config,
|
2835
|
+
has_parent_state: Optional[bool] = None,
|
2836
|
+
**kwargs: Any,
|
2837
|
+
) -> DeclarativeStream:
|
2838
|
+
if (
|
2839
|
+
model.full_refresh_stream.name != model.name
|
2840
|
+
or model.name != model.incremental_stream.name
|
2841
|
+
):
|
2842
|
+
raise ValueError(
|
2843
|
+
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
2844
|
+
)
|
2845
|
+
|
2846
|
+
stream_model = (
|
2847
|
+
model.incremental_stream
|
2848
|
+
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
2849
|
+
else model.full_refresh_stream
|
2850
|
+
)
|
2851
|
+
|
2852
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
2853
|
+
|
2736
2854
|
def _create_async_job_status_mapping(
|
2737
2855
|
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
2738
2856
|
) -> Mapping[str, AsyncJobStatus]:
|
@@ -2964,7 +3082,7 @@ class ModelToComponentFactory:
|
|
2964
3082
|
parent_stream_configs.extend(
|
2965
3083
|
[
|
2966
3084
|
self._create_message_repository_substream_wrapper(
|
2967
|
-
model=parent_stream_config, config=config
|
3085
|
+
model=parent_stream_config, config=config, **kwargs
|
2968
3086
|
)
|
2969
3087
|
for parent_stream_config in model.parent_stream_configs
|
2970
3088
|
]
|
@@ -2977,7 +3095,7 @@ class ModelToComponentFactory:
|
|
2977
3095
|
)
|
2978
3096
|
|
2979
3097
|
def _create_message_repository_substream_wrapper(
|
2980
|
-
self, model: ParentStreamConfigModel, config: Config
|
3098
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2981
3099
|
) -> Any:
|
2982
3100
|
substream_factory = ModelToComponentFactory(
|
2983
3101
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
@@ -2991,7 +3109,16 @@ class ModelToComponentFactory:
|
|
2991
3109
|
self._evaluate_log_level(self._emit_connector_builder_messages),
|
2992
3110
|
),
|
2993
3111
|
)
|
2994
|
-
|
3112
|
+
|
3113
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3114
|
+
has_parent_state = bool(
|
3115
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3116
|
+
if model.incremental_dependency
|
3117
|
+
else False
|
3118
|
+
)
|
3119
|
+
return substream_factory._create_component_from_model(
|
3120
|
+
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
3121
|
+
)
|
2995
3122
|
|
2996
3123
|
@staticmethod
|
2997
3124
|
def create_wait_time_from_header(
|
@@ -3047,9 +3174,7 @@ class ModelToComponentFactory:
|
|
3047
3174
|
self, model: HttpComponentsResolverModel, config: Config
|
3048
3175
|
) -> Any:
|
3049
3176
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
3050
|
-
combined_slicers = self.
|
3051
|
-
model.retriever, stream_slicer
|
3052
|
-
)
|
3177
|
+
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
3053
3178
|
|
3054
3179
|
retriever = self._create_component_from_model(
|
3055
3180
|
model=model.retriever,
|
@@ -1,12 +1,16 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
+
|
5
|
+
|
4
6
|
import copy
|
7
|
+
import json
|
5
8
|
import logging
|
6
9
|
from dataclasses import InitVar, dataclass
|
7
10
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
8
11
|
|
9
12
|
import dpath
|
13
|
+
import requests
|
10
14
|
|
11
15
|
from airbyte_cdk.models import AirbyteMessage
|
12
16
|
from airbyte_cdk.models import Type as MessageType
|
@@ -46,6 +50,7 @@ class ParentStreamConfig:
|
|
46
50
|
)
|
47
51
|
request_option: Optional[RequestOption] = None
|
48
52
|
incremental_dependency: bool = False
|
53
|
+
lazy_read_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
49
54
|
|
50
55
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
51
56
|
self.parent_key = InterpolatedString.create(self.parent_key, parameters=parameters)
|
@@ -59,6 +64,17 @@ class ParentStreamConfig:
|
|
59
64
|
for key_path in self.extra_fields
|
60
65
|
]
|
61
66
|
|
67
|
+
self.lazy_read_pointer = (
|
68
|
+
[
|
69
|
+
InterpolatedString.create(path, parameters=parameters)
|
70
|
+
if isinstance(path, str)
|
71
|
+
else path
|
72
|
+
for path in self.lazy_read_pointer
|
73
|
+
]
|
74
|
+
if self.lazy_read_pointer
|
75
|
+
else None
|
76
|
+
)
|
77
|
+
|
62
78
|
|
63
79
|
@dataclass
|
64
80
|
class SubstreamPartitionRouter(PartitionRouter):
|
@@ -196,6 +212,15 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
196
212
|
# Add extra fields
|
197
213
|
extracted_extra_fields = self._extract_extra_fields(parent_record, extra_fields)
|
198
214
|
|
215
|
+
if parent_stream_config.lazy_read_pointer:
|
216
|
+
extracted_extra_fields = {
|
217
|
+
"child_response": self._extract_child_response(
|
218
|
+
parent_record,
|
219
|
+
parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
|
220
|
+
),
|
221
|
+
**extracted_extra_fields,
|
222
|
+
}
|
223
|
+
|
199
224
|
yield StreamSlice(
|
200
225
|
partition={
|
201
226
|
partition_field: partition_value,
|
@@ -205,6 +230,21 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
205
230
|
extra_fields=extracted_extra_fields,
|
206
231
|
)
|
207
232
|
|
233
|
+
def _extract_child_response(
|
234
|
+
self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: List[InterpolatedString]
|
235
|
+
) -> requests.Response:
|
236
|
+
"""Extract child records from a parent record based on lazy pointers."""
|
237
|
+
|
238
|
+
def _create_response(data: MutableMapping[str, Any]) -> SafeResponse:
|
239
|
+
"""Create a SafeResponse with the given data."""
|
240
|
+
response = SafeResponse()
|
241
|
+
response.content = json.dumps(data).encode("utf-8")
|
242
|
+
response.status_code = 200
|
243
|
+
return response
|
244
|
+
|
245
|
+
path = [path.eval(self.config) for path in pointer]
|
246
|
+
return _create_response(dpath.get(parent_record, path, default=[])) # type: ignore # argunet will be a MutableMapping, given input data structure
|
247
|
+
|
208
248
|
def _extract_extra_fields(
|
209
249
|
self,
|
210
250
|
parent_record: Mapping[str, Any] | AirbyteMessage,
|
@@ -280,20 +320,15 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
280
320
|
|
281
321
|
parent_state = stream_state.get("parent_state", {})
|
282
322
|
|
283
|
-
# If `parent_state` doesn't exist and at least one parent stream has an incremental dependency,
|
284
|
-
# copy the child state to parent streams with incremental dependencies.
|
285
|
-
incremental_dependency = any(
|
286
|
-
[parent_config.incremental_dependency for parent_config in self.parent_stream_configs]
|
287
|
-
)
|
288
|
-
if not parent_state and not incremental_dependency:
|
289
|
-
return
|
290
|
-
|
291
|
-
if not parent_state and incremental_dependency:
|
292
|
-
# Migrate child state to parent state format
|
293
|
-
parent_state = self._migrate_child_state_to_parent_state(stream_state)
|
294
|
-
|
295
323
|
# Set state for each parent stream with an incremental dependency
|
296
324
|
for parent_config in self.parent_stream_configs:
|
325
|
+
if (
|
326
|
+
not parent_state.get(parent_config.stream.name, {})
|
327
|
+
and parent_config.incremental_dependency
|
328
|
+
):
|
329
|
+
# Migrate child state to parent state format
|
330
|
+
parent_state = self._migrate_child_state_to_parent_state(stream_state)
|
331
|
+
|
297
332
|
if parent_config.incremental_dependency:
|
298
333
|
parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
|
299
334
|
|
@@ -381,3 +416,22 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
381
416
|
@property
|
382
417
|
def logger(self) -> logging.Logger:
|
383
418
|
return logging.getLogger("airbyte.SubstreamPartitionRouter")
|
419
|
+
|
420
|
+
|
421
|
+
class SafeResponse(requests.Response):
|
422
|
+
"""
|
423
|
+
A subclass of requests.Response that acts as an interface to migrate parsed child records
|
424
|
+
into a response object. This allows seamless interaction with child records as if they
|
425
|
+
were original response, ensuring compatibility with methods that expect requests.Response data type.
|
426
|
+
"""
|
427
|
+
|
428
|
+
def __getattr__(self, name: str) -> Any:
|
429
|
+
return getattr(requests.Response, name, None)
|
430
|
+
|
431
|
+
@property
|
432
|
+
def content(self) -> Optional[bytes]:
|
433
|
+
return super().content
|
434
|
+
|
435
|
+
@content.setter
|
436
|
+
def content(self, value: Union[str, bytes]) -> None:
|
437
|
+
self._content = value.encode() if isinstance(value, str) else value
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py
CHANGED
@@ -71,7 +71,6 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
71
71
|
last_page_token_value: Optional[Any] = None,
|
72
72
|
) -> Optional[Any]:
|
73
73
|
decoded_response = next(self.decoder.decode(response))
|
74
|
-
|
75
74
|
# The default way that link is presented in requests.Response is a string of various links (last, next, etc). This
|
76
75
|
# is not indexable or useful for parsing the cursor, so we replace it with the link dictionary from response.links
|
77
76
|
headers: Dict[str, Any] = dict(response.headers)
|
@@ -5,8 +5,15 @@
|
|
5
5
|
from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
|
6
6
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
7
7
|
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
|
8
|
+
LazySimpleRetriever,
|
8
9
|
SimpleRetriever,
|
9
10
|
SimpleRetrieverTestReadDecorator,
|
10
11
|
)
|
11
12
|
|
12
|
-
__all__ = [
|
13
|
+
__all__ = [
|
14
|
+
"Retriever",
|
15
|
+
"SimpleRetriever",
|
16
|
+
"SimpleRetrieverTestReadDecorator",
|
17
|
+
"AsyncRetriever",
|
18
|
+
"LazySimpleRetriever",
|
19
|
+
]
|
@@ -36,6 +36,36 @@ class AsyncRetriever(Retriever):
|
|
36
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
37
37
|
self._parameters = parameters
|
38
38
|
|
39
|
+
@property
|
40
|
+
def exit_on_rate_limit(self) -> bool:
|
41
|
+
"""
|
42
|
+
Whether to exit on rate limit. This is a property of the job repository
|
43
|
+
and not the stream slicer. The stream slicer is responsible for creating
|
44
|
+
the jobs, but the job repository is responsible for managing the rate
|
45
|
+
limits and other job-related properties.
|
46
|
+
|
47
|
+
Note:
|
48
|
+
- If the `creation_requester` cannot place / create the job - it might be the case of the RateLimits
|
49
|
+
- If the `creation_requester` can place / create the job - it means all other requesters should successfully manage
|
50
|
+
to complete the results.
|
51
|
+
"""
|
52
|
+
job_orchestrator = self.stream_slicer._job_orchestrator
|
53
|
+
if job_orchestrator is None:
|
54
|
+
# Default value when orchestrator is not available
|
55
|
+
return False
|
56
|
+
return job_orchestrator._job_repository.creation_requester.exit_on_rate_limit # type: ignore
|
57
|
+
|
58
|
+
@exit_on_rate_limit.setter
|
59
|
+
def exit_on_rate_limit(self, value: bool) -> None:
|
60
|
+
"""
|
61
|
+
Sets the `exit_on_rate_limit` property of the job repository > creation_requester,
|
62
|
+
meaning that the Job cannot be placed / created if the rate limit is reached.
|
63
|
+
Thus no further work on managing jobs is expected to be done.
|
64
|
+
"""
|
65
|
+
job_orchestrator = self.stream_slicer._job_orchestrator
|
66
|
+
if job_orchestrator is not None:
|
67
|
+
job_orchestrator._job_repository.creation_requester.exit_on_rate_limit = value # type: ignore[attr-defined, assignment]
|
68
|
+
|
39
69
|
@property
|
40
70
|
def state(self) -> StreamState:
|
41
71
|
"""
|