airbyte-cdk 6.39.2__py3-none-any.whl → 6.40.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -0
  2. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +66 -2
  3. airbyte_cdk/sources/declarative/declarative_stream.py +8 -1
  4. airbyte_cdk/sources/declarative/manifest_declarative_source.py +24 -3
  5. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +44 -3
  6. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +205 -80
  7. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +66 -12
  8. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
  9. airbyte_cdk/sources/declarative/retrievers/__init__.py +8 -1
  10. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +30 -0
  11. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +84 -2
  12. airbyte_cdk/sources/declarative/transformations/add_fields.py +10 -2
  13. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +10 -4
  14. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/METADATA +1 -1
  15. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/RECORD +19 -19
  16. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE.txt +0 -0
  17. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE_SHORT +0 -0
  18. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/WHEEL +0 -0
  19. {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -351,6 +351,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
351
351
  SimpleRetriever as SimpleRetrieverModel,
352
352
  )
353
353
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
354
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
355
+ StateDelegatingStream as StateDelegatingStreamModel,
356
+ )
354
357
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
355
358
  StreamConfig as StreamConfigModel,
356
359
  )
@@ -435,6 +438,7 @@ from airbyte_cdk.sources.declarative.resolvers import (
435
438
  )
436
439
  from airbyte_cdk.sources.declarative.retrievers import (
437
440
  AsyncRetriever,
441
+ LazySimpleRetriever,
438
442
  SimpleRetriever,
439
443
  SimpleRetrieverTestReadDecorator,
440
444
  )
@@ -617,6 +621,7 @@ class ModelToComponentFactory:
617
621
  LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
618
622
  SelectiveAuthenticatorModel: self.create_selective_authenticator,
619
623
  SimpleRetrieverModel: self.create_simple_retriever,
624
+ StateDelegatingStreamModel: self.create_state_delegating_stream,
620
625
  SpecModel: self.create_spec,
621
626
  SubstreamPartitionRouterModel: self.create_substream_partition_router,
622
627
  WaitTimeFromHeaderModel: self.create_wait_time_from_header,
@@ -708,7 +713,11 @@ class ModelToComponentFactory:
708
713
  )
709
714
  for added_field_definition_model in model.fields
710
715
  ]
711
- return AddFields(fields=added_field_definitions, parameters=model.parameters or {})
716
+ return AddFields(
717
+ fields=added_field_definitions,
718
+ condition=model.condition or "",
719
+ parameters=model.parameters or {},
720
+ )
712
721
 
713
722
  def create_keys_to_lower_transformation(
714
723
  self, model: KeysToLowerModel, config: Config, **kwargs: Any
@@ -744,6 +753,7 @@ class ModelToComponentFactory:
744
753
  delete_origin_value=model.delete_origin_value
745
754
  if model.delete_origin_value is not None
746
755
  else False,
756
+ replace_record=model.replace_record if model.replace_record is not None else False,
747
757
  parameters=model.parameters or {},
748
758
  )
749
759
 
@@ -1741,6 +1751,7 @@ class ModelToComponentFactory:
1741
1751
  transformations.append(
1742
1752
  self._create_component_from_model(model=transformation_model, config=config)
1743
1753
  )
1754
+
1744
1755
  retriever = self._create_component_from_model(
1745
1756
  model=model.retriever,
1746
1757
  config=config,
@@ -1751,6 +1762,7 @@ class ModelToComponentFactory:
1751
1762
  stop_condition_on_cursor=stop_condition_on_cursor,
1752
1763
  client_side_incremental_sync=client_side_incremental_sync,
1753
1764
  transformations=transformations,
1765
+ incremental_sync=model.incremental_sync,
1754
1766
  )
1755
1767
  cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
1756
1768
 
@@ -1785,8 +1797,13 @@ class ModelToComponentFactory:
1785
1797
 
1786
1798
  def _build_stream_slicer_from_partition_router(
1787
1799
  self,
1788
- model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1800
+ model: Union[
1801
+ AsyncRetrieverModel,
1802
+ CustomRetrieverModel,
1803
+ SimpleRetrieverModel,
1804
+ ],
1789
1805
  config: Config,
1806
+ stream_name: Optional[str] = None,
1790
1807
  ) -> Optional[PartitionRouter]:
1791
1808
  if (
1792
1809
  hasattr(model, "partition_router")
@@ -1794,95 +1811,65 @@ class ModelToComponentFactory:
1794
1811
  and model.partition_router
1795
1812
  ):
1796
1813
  stream_slicer_model = model.partition_router
1797
-
1798
1814
  if isinstance(stream_slicer_model, list):
1799
1815
  return CartesianProductStreamSlicer(
1800
1816
  [
1801
- self._create_component_from_model(model=slicer, config=config)
1817
+ self._create_component_from_model(
1818
+ model=slicer, config=config, stream_name=stream_name or ""
1819
+ )
1802
1820
  for slicer in stream_slicer_model
1803
1821
  ],
1804
1822
  parameters={},
1805
1823
  )
1806
1824
  else:
1807
- return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
1808
- # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1825
+ return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1826
+ model=stream_slicer_model, config=config, stream_name=stream_name or ""
1827
+ )
1809
1828
  return None
1810
1829
 
1811
- def _build_resumable_cursor_from_paginator(
1830
+ def _build_incremental_cursor(
1812
1831
  self,
1813
- model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1814
- stream_slicer: Optional[StreamSlicer],
1815
- ) -> Optional[StreamSlicer]:
1816
- if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1817
- # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1818
- return ResumableFullRefreshCursor(parameters={})
1819
- return None
1820
-
1821
- def _merge_stream_slicers(
1822
- self, model: DeclarativeStreamModel, config: Config
1832
+ model: DeclarativeStreamModel,
1833
+ stream_slicer: Optional[PartitionRouter],
1834
+ config: Config,
1823
1835
  ) -> Optional[StreamSlicer]:
1824
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1825
-
1826
1836
  if model.incremental_sync and stream_slicer:
1827
1837
  if model.retriever.type == "AsyncRetriever":
1828
- if model.incremental_sync.type != "DatetimeBasedCursor":
1829
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1830
- raise ValueError(
1831
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1832
- )
1833
- if stream_slicer:
1834
- return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1835
- state_manager=self._connector_state_manager,
1836
- model_type=DatetimeBasedCursorModel,
1837
- component_definition=model.incremental_sync.__dict__,
1838
- stream_name=model.name or "",
1839
- stream_namespace=None,
1840
- config=config or {},
1841
- stream_state={},
1842
- partition_router=stream_slicer,
1843
- )
1844
- return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1838
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1839
+ state_manager=self._connector_state_manager,
1845
1840
  model_type=DatetimeBasedCursorModel,
1846
1841
  component_definition=model.incremental_sync.__dict__,
1847
1842
  stream_name=model.name or "",
1848
1843
  stream_namespace=None,
1849
1844
  config=config or {},
1845
+ stream_state={},
1846
+ partition_router=stream_slicer,
1850
1847
  )
1851
1848
 
1852
1849
  incremental_sync_model = model.incremental_sync
1853
- if (
1850
+ cursor_component = self._create_component_from_model(
1851
+ model=incremental_sync_model, config=config
1852
+ )
1853
+ is_global_cursor = (
1854
1854
  hasattr(incremental_sync_model, "global_substream_cursor")
1855
1855
  and incremental_sync_model.global_substream_cursor
1856
- ):
1857
- cursor_component = self._create_component_from_model(
1858
- model=incremental_sync_model, config=config
1859
- )
1856
+ )
1857
+
1858
+ if is_global_cursor:
1860
1859
  return GlobalSubstreamCursor(
1861
1860
  stream_cursor=cursor_component, partition_router=stream_slicer
1862
1861
  )
1863
- else:
1864
- cursor_component = self._create_component_from_model(
1865
- model=incremental_sync_model, config=config
1866
- )
1867
- return PerPartitionWithGlobalCursor(
1868
- cursor_factory=CursorFactory(
1869
- lambda: self._create_component_from_model(
1870
- model=incremental_sync_model, config=config
1871
- ),
1862
+ return PerPartitionWithGlobalCursor(
1863
+ cursor_factory=CursorFactory(
1864
+ lambda: self._create_component_from_model(
1865
+ model=incremental_sync_model, config=config
1872
1866
  ),
1873
- partition_router=stream_slicer,
1874
- stream_cursor=cursor_component,
1875
- )
1867
+ ),
1868
+ partition_router=stream_slicer,
1869
+ stream_cursor=cursor_component,
1870
+ )
1876
1871
  elif model.incremental_sync:
1877
1872
  if model.retriever.type == "AsyncRetriever":
1878
- if model.incremental_sync.type != "DatetimeBasedCursor":
1879
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1880
- raise ValueError(
1881
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1882
- )
1883
- if model.retriever.partition_router:
1884
- # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
1885
- raise ValueError("Per partition state is not supported yet for AsyncRetriever")
1886
1873
  return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1887
1874
  model_type=DatetimeBasedCursorModel,
1888
1875
  component_definition=model.incremental_sync.__dict__,
@@ -1891,13 +1878,21 @@ class ModelToComponentFactory:
1891
1878
  config=config or {},
1892
1879
  stream_state_migrations=model.state_migrations,
1893
1880
  )
1894
- return (
1895
- self._create_component_from_model(model=model.incremental_sync, config=config)
1896
- if model.incremental_sync
1897
- else None
1898
- )
1899
- elif self._disable_resumable_full_refresh:
1900
- return stream_slicer
1881
+ return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
1882
+ return None
1883
+
1884
+ def _build_resumable_cursor(
1885
+ self,
1886
+ model: Union[
1887
+ AsyncRetrieverModel,
1888
+ CustomRetrieverModel,
1889
+ SimpleRetrieverModel,
1890
+ ],
1891
+ stream_slicer: Optional[PartitionRouter],
1892
+ ) -> Optional[StreamSlicer]:
1893
+ if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1894
+ # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1895
+ return ResumableFullRefreshCursor(parameters={})
1901
1896
  elif stream_slicer:
1902
1897
  # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
1903
1898
  return PerPartitionCursor(
@@ -1906,7 +1901,49 @@ class ModelToComponentFactory:
1906
1901
  ),
1907
1902
  partition_router=stream_slicer,
1908
1903
  )
1909
- return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
1904
+ return None
1905
+
1906
+ def _merge_stream_slicers(
1907
+ self, model: DeclarativeStreamModel, config: Config
1908
+ ) -> Optional[StreamSlicer]:
1909
+ retriever_model = model.retriever
1910
+
1911
+ stream_slicer = self._build_stream_slicer_from_partition_router(
1912
+ retriever_model, config, stream_name=model.name
1913
+ )
1914
+
1915
+ if retriever_model.type == "AsyncRetriever":
1916
+ is_not_datetime_cursor = (
1917
+ model.incremental_sync.type != "DatetimeBasedCursor"
1918
+ if model.incremental_sync
1919
+ else None
1920
+ )
1921
+ is_partition_router = (
1922
+ bool(retriever_model.partition_router) if model.incremental_sync else None
1923
+ )
1924
+
1925
+ if is_not_datetime_cursor:
1926
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
1927
+ # support or unordered slices (for example, when we trigger reports for January and February, the report
1928
+ # in February can be completed first). Once we have support for custom concurrent cursor or have a new
1929
+ # implementation available in the CDK, we can enable more cursors here.
1930
+ raise ValueError(
1931
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
1932
+ )
1933
+
1934
+ if is_partition_router and not stream_slicer:
1935
+ # Note that this development is also done in parallel to the per partition development which once merged
1936
+ # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
1937
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
1938
+
1939
+ if model.incremental_sync:
1940
+ return self._build_incremental_cursor(model, stream_slicer, config)
1941
+
1942
+ return (
1943
+ stream_slicer
1944
+ if self._disable_resumable_full_refresh
1945
+ else self._build_resumable_cursor(retriever_model, stream_slicer)
1946
+ )
1910
1947
 
1911
1948
  def create_default_error_handler(
1912
1949
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -2167,9 +2204,7 @@ class ModelToComponentFactory:
2167
2204
  self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2168
2205
  ) -> DynamicSchemaLoader:
2169
2206
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2170
- combined_slicers = self._build_resumable_cursor_from_paginator(
2171
- model.retriever, stream_slicer
2172
- )
2207
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2173
2208
 
2174
2209
  schema_transformations = []
2175
2210
  if model.schema_transformations:
@@ -2492,12 +2527,24 @@ class ModelToComponentFactory:
2492
2527
  def create_parent_stream_config(
2493
2528
  self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2494
2529
  ) -> ParentStreamConfig:
2495
- declarative_stream = self._create_component_from_model(model.stream, config=config)
2530
+ declarative_stream = self._create_component_from_model(
2531
+ model.stream, config=config, **kwargs
2532
+ )
2496
2533
  request_option = (
2497
2534
  self._create_component_from_model(model.request_option, config=config)
2498
2535
  if model.request_option
2499
2536
  else None
2500
2537
  )
2538
+
2539
+ if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2540
+ raise ValueError(
2541
+ "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2542
+ )
2543
+
2544
+ model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2545
+ [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2546
+ )
2547
+
2501
2548
  return ParentStreamConfig(
2502
2549
  parent_key=model.parent_key,
2503
2550
  request_option=request_option,
@@ -2507,6 +2554,7 @@ class ModelToComponentFactory:
2507
2554
  incremental_dependency=model.incremental_dependency or False,
2508
2555
  parameters=model.parameters or {},
2509
2556
  extra_fields=model.extra_fields,
2557
+ lazy_read_pointer=model_lazy_read_pointer,
2510
2558
  )
2511
2559
 
2512
2560
  @staticmethod
@@ -2566,7 +2614,9 @@ class ModelToComponentFactory:
2566
2614
  else None
2567
2615
  )
2568
2616
 
2569
- transform_before_filtering = False
2617
+ assert model.transform_before_filtering is not None # for mypy
2618
+
2619
+ transform_before_filtering = model.transform_before_filtering
2570
2620
  if client_side_incremental_sync:
2571
2621
  record_filter = ClientSideIncrementalRecordFilterDecorator(
2572
2622
  config=config,
@@ -2647,6 +2697,12 @@ class ModelToComponentFactory:
2647
2697
  stop_condition_on_cursor: bool = False,
2648
2698
  client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2649
2699
  transformations: List[RecordTransformation],
2700
+ incremental_sync: Optional[
2701
+ Union[
2702
+ IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2703
+ ]
2704
+ ] = None,
2705
+ **kwargs: Any,
2650
2706
  ) -> SimpleRetriever:
2651
2707
  decoder = (
2652
2708
  self._create_component_from_model(model=model.decoder, config=config)
@@ -2704,6 +2760,45 @@ class ModelToComponentFactory:
2704
2760
  model.ignore_stream_slicer_parameters_on_paginated_requests or False
2705
2761
  )
2706
2762
 
2763
+ if (
2764
+ model.partition_router
2765
+ and isinstance(model.partition_router, SubstreamPartitionRouterModel)
2766
+ and not bool(self._connector_state_manager.get_stream_state(name, None))
2767
+ and any(
2768
+ parent_stream_config.lazy_read_pointer
2769
+ for parent_stream_config in model.partition_router.parent_stream_configs
2770
+ )
2771
+ ):
2772
+ if incremental_sync:
2773
+ if incremental_sync.type != "DatetimeBasedCursor":
2774
+ raise ValueError(
2775
+ f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
2776
+ )
2777
+
2778
+ elif incremental_sync.step or incremental_sync.cursor_granularity:
2779
+ raise ValueError(
2780
+ f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
2781
+ )
2782
+
2783
+ if model.decoder and model.decoder.type != "JsonDecoder":
2784
+ raise ValueError(
2785
+ f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
2786
+ )
2787
+
2788
+ return LazySimpleRetriever(
2789
+ name=name,
2790
+ paginator=paginator,
2791
+ primary_key=primary_key,
2792
+ requester=requester,
2793
+ record_selector=record_selector,
2794
+ stream_slicer=stream_slicer,
2795
+ request_option_provider=request_options_provider,
2796
+ cursor=cursor,
2797
+ config=config,
2798
+ ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
2799
+ parameters=model.parameters or {},
2800
+ )
2801
+
2707
2802
  if self._limit_slices_fetched or self._emit_connector_builder_messages:
2708
2803
  return SimpleRetrieverTestReadDecorator(
2709
2804
  name=name,
@@ -2733,6 +2828,29 @@ class ModelToComponentFactory:
2733
2828
  parameters=model.parameters or {},
2734
2829
  )
2735
2830
 
2831
+ def create_state_delegating_stream(
2832
+ self,
2833
+ model: StateDelegatingStreamModel,
2834
+ config: Config,
2835
+ has_parent_state: Optional[bool] = None,
2836
+ **kwargs: Any,
2837
+ ) -> DeclarativeStream:
2838
+ if (
2839
+ model.full_refresh_stream.name != model.name
2840
+ or model.name != model.incremental_stream.name
2841
+ ):
2842
+ raise ValueError(
2843
+ f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
2844
+ )
2845
+
2846
+ stream_model = (
2847
+ model.incremental_stream
2848
+ if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
2849
+ else model.full_refresh_stream
2850
+ )
2851
+
2852
+ return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
2853
+
2736
2854
  def _create_async_job_status_mapping(
2737
2855
  self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
2738
2856
  ) -> Mapping[str, AsyncJobStatus]:
@@ -2964,7 +3082,7 @@ class ModelToComponentFactory:
2964
3082
  parent_stream_configs.extend(
2965
3083
  [
2966
3084
  self._create_message_repository_substream_wrapper(
2967
- model=parent_stream_config, config=config
3085
+ model=parent_stream_config, config=config, **kwargs
2968
3086
  )
2969
3087
  for parent_stream_config in model.parent_stream_configs
2970
3088
  ]
@@ -2977,7 +3095,7 @@ class ModelToComponentFactory:
2977
3095
  )
2978
3096
 
2979
3097
  def _create_message_repository_substream_wrapper(
2980
- self, model: ParentStreamConfigModel, config: Config
3098
+ self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2981
3099
  ) -> Any:
2982
3100
  substream_factory = ModelToComponentFactory(
2983
3101
  limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
@@ -2991,7 +3109,16 @@ class ModelToComponentFactory:
2991
3109
  self._evaluate_log_level(self._emit_connector_builder_messages),
2992
3110
  ),
2993
3111
  )
2994
- return substream_factory._create_component_from_model(model=model, config=config)
3112
+
3113
+ # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3114
+ has_parent_state = bool(
3115
+ self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3116
+ if model.incremental_dependency
3117
+ else False
3118
+ )
3119
+ return substream_factory._create_component_from_model(
3120
+ model=model, config=config, has_parent_state=has_parent_state, **kwargs
3121
+ )
2995
3122
 
2996
3123
  @staticmethod
2997
3124
  def create_wait_time_from_header(
@@ -3047,9 +3174,7 @@ class ModelToComponentFactory:
3047
3174
  self, model: HttpComponentsResolverModel, config: Config
3048
3175
  ) -> Any:
3049
3176
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3050
- combined_slicers = self._build_resumable_cursor_from_paginator(
3051
- model.retriever, stream_slicer
3052
- )
3177
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3053
3178
 
3054
3179
  retriever = self._create_component_from_model(
3055
3180
  model=model.retriever,
@@ -1,12 +1,16 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
+
5
+
4
6
  import copy
7
+ import json
5
8
  import logging
6
9
  from dataclasses import InitVar, dataclass
7
10
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
8
11
 
9
12
  import dpath
13
+ import requests
10
14
 
11
15
  from airbyte_cdk.models import AirbyteMessage
12
16
  from airbyte_cdk.models import Type as MessageType
@@ -46,6 +50,7 @@ class ParentStreamConfig:
46
50
  )
47
51
  request_option: Optional[RequestOption] = None
48
52
  incremental_dependency: bool = False
53
+ lazy_read_pointer: Optional[List[Union[InterpolatedString, str]]] = None
49
54
 
50
55
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
51
56
  self.parent_key = InterpolatedString.create(self.parent_key, parameters=parameters)
@@ -59,6 +64,17 @@ class ParentStreamConfig:
59
64
  for key_path in self.extra_fields
60
65
  ]
61
66
 
67
+ self.lazy_read_pointer = (
68
+ [
69
+ InterpolatedString.create(path, parameters=parameters)
70
+ if isinstance(path, str)
71
+ else path
72
+ for path in self.lazy_read_pointer
73
+ ]
74
+ if self.lazy_read_pointer
75
+ else None
76
+ )
77
+
62
78
 
63
79
  @dataclass
64
80
  class SubstreamPartitionRouter(PartitionRouter):
@@ -196,6 +212,15 @@ class SubstreamPartitionRouter(PartitionRouter):
196
212
  # Add extra fields
197
213
  extracted_extra_fields = self._extract_extra_fields(parent_record, extra_fields)
198
214
 
215
+ if parent_stream_config.lazy_read_pointer:
216
+ extracted_extra_fields = {
217
+ "child_response": self._extract_child_response(
218
+ parent_record,
219
+ parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
220
+ ),
221
+ **extracted_extra_fields,
222
+ }
223
+
199
224
  yield StreamSlice(
200
225
  partition={
201
226
  partition_field: partition_value,
@@ -205,6 +230,21 @@ class SubstreamPartitionRouter(PartitionRouter):
205
230
  extra_fields=extracted_extra_fields,
206
231
  )
207
232
 
233
+ def _extract_child_response(
234
+ self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: List[InterpolatedString]
235
+ ) -> requests.Response:
236
+ """Extract child records from a parent record based on lazy pointers."""
237
+
238
+ def _create_response(data: MutableMapping[str, Any]) -> SafeResponse:
239
+ """Create a SafeResponse with the given data."""
240
+ response = SafeResponse()
241
+ response.content = json.dumps(data).encode("utf-8")
242
+ response.status_code = 200
243
+ return response
244
+
245
+ path = [path.eval(self.config) for path in pointer]
246
+ return _create_response(dpath.get(parent_record, path, default=[])) # type: ignore # argunet will be a MutableMapping, given input data structure
247
+
208
248
  def _extract_extra_fields(
209
249
  self,
210
250
  parent_record: Mapping[str, Any] | AirbyteMessage,
@@ -280,20 +320,15 @@ class SubstreamPartitionRouter(PartitionRouter):
280
320
 
281
321
  parent_state = stream_state.get("parent_state", {})
282
322
 
283
- # If `parent_state` doesn't exist and at least one parent stream has an incremental dependency,
284
- # copy the child state to parent streams with incremental dependencies.
285
- incremental_dependency = any(
286
- [parent_config.incremental_dependency for parent_config in self.parent_stream_configs]
287
- )
288
- if not parent_state and not incremental_dependency:
289
- return
290
-
291
- if not parent_state and incremental_dependency:
292
- # Migrate child state to parent state format
293
- parent_state = self._migrate_child_state_to_parent_state(stream_state)
294
-
295
323
  # Set state for each parent stream with an incremental dependency
296
324
  for parent_config in self.parent_stream_configs:
325
+ if (
326
+ not parent_state.get(parent_config.stream.name, {})
327
+ and parent_config.incremental_dependency
328
+ ):
329
+ # Migrate child state to parent state format
330
+ parent_state = self._migrate_child_state_to_parent_state(stream_state)
331
+
297
332
  if parent_config.incremental_dependency:
298
333
  parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
299
334
 
@@ -381,3 +416,22 @@ class SubstreamPartitionRouter(PartitionRouter):
381
416
  @property
382
417
  def logger(self) -> logging.Logger:
383
418
  return logging.getLogger("airbyte.SubstreamPartitionRouter")
419
+
420
+
421
+ class SafeResponse(requests.Response):
422
+ """
423
+ A subclass of requests.Response that acts as an interface to migrate parsed child records
424
+ into a response object. This allows seamless interaction with child records as if they
425
+ were original response, ensuring compatibility with methods that expect requests.Response data type.
426
+ """
427
+
428
+ def __getattr__(self, name: str) -> Any:
429
+ return getattr(requests.Response, name, None)
430
+
431
+ @property
432
+ def content(self) -> Optional[bytes]:
433
+ return super().content
434
+
435
+ @content.setter
436
+ def content(self, value: Union[str, bytes]) -> None:
437
+ self._content = value.encode() if isinstance(value, str) else value
@@ -71,7 +71,6 @@ class CursorPaginationStrategy(PaginationStrategy):
71
71
  last_page_token_value: Optional[Any] = None,
72
72
  ) -> Optional[Any]:
73
73
  decoded_response = next(self.decoder.decode(response))
74
-
75
74
  # The default way that link is presented in requests.Response is a string of various links (last, next, etc). This
76
75
  # is not indexable or useful for parsing the cursor, so we replace it with the link dictionary from response.links
77
76
  headers: Dict[str, Any] = dict(response.headers)
@@ -5,8 +5,15 @@
5
5
  from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
6
6
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
7
7
  from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
8
+ LazySimpleRetriever,
8
9
  SimpleRetriever,
9
10
  SimpleRetrieverTestReadDecorator,
10
11
  )
11
12
 
12
- __all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator", "AsyncRetriever"]
13
+ __all__ = [
14
+ "Retriever",
15
+ "SimpleRetriever",
16
+ "SimpleRetrieverTestReadDecorator",
17
+ "AsyncRetriever",
18
+ "LazySimpleRetriever",
19
+ ]
@@ -36,6 +36,36 @@ class AsyncRetriever(Retriever):
36
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
37
37
  self._parameters = parameters
38
38
 
39
+ @property
40
+ def exit_on_rate_limit(self) -> bool:
41
+ """
42
+ Whether to exit on rate limit. This is a property of the job repository
43
+ and not the stream slicer. The stream slicer is responsible for creating
44
+ the jobs, but the job repository is responsible for managing the rate
45
+ limits and other job-related properties.
46
+
47
+ Note:
48
+ - If the `creation_requester` cannot place / create the job - it might be the case of the RateLimits
49
+ - If the `creation_requester` can place / create the job - it means all other requesters should successfully manage
50
+ to complete the results.
51
+ """
52
+ job_orchestrator = self.stream_slicer._job_orchestrator
53
+ if job_orchestrator is None:
54
+ # Default value when orchestrator is not available
55
+ return False
56
+ return job_orchestrator._job_repository.creation_requester.exit_on_rate_limit # type: ignore
57
+
58
+ @exit_on_rate_limit.setter
59
+ def exit_on_rate_limit(self, value: bool) -> None:
60
+ """
61
+ Sets the `exit_on_rate_limit` property of the job repository > creation_requester,
62
+ meaning that the Job cannot be placed / created if the rate limit is reached.
63
+ Thus no further work on managing jobs is expected to be done.
64
+ """
65
+ job_orchestrator = self.stream_slicer._job_orchestrator
66
+ if job_orchestrator is not None:
67
+ job_orchestrator._job_repository.creation_requester.exit_on_rate_limit = value # type: ignore[attr-defined, assignment]
68
+
39
69
  @property
40
70
  def state(self) -> StreamState:
41
71
  """