airbyte-cdk 6.61.5__py3-none-any.whl → 6.61.6.post3.dev17473738577__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
  2. airbyte_cdk/connector_builder/main.py +2 -2
  3. airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
  4. airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
  5. airbyte_cdk/manifest_server/Dockerfile +2 -2
  6. airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
  7. airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
  8. airbyte_cdk/manifest_server/api_models/stream.py +2 -2
  9. airbyte_cdk/manifest_server/command_processor/processor.py +2 -4
  10. airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
  11. airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
  12. airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
  13. airbyte_cdk/manifest_server/routers/manifest.py +38 -2
  14. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
  16. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
  17. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +229 -281
  18. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +0 -6
  19. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
  21. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
  22. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
  23. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
  24. airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
  25. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
  26. airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
  27. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
  28. airbyte_cdk/sources/message/repository.py +20 -0
  29. airbyte_cdk/sources/utils/schema_helpers.py +29 -9
  30. airbyte_cdk/sources/utils/transform.py +25 -13
  31. airbyte_cdk/utils/spec_schema_transformations.py +7 -5
  32. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/METADATA +3 -2
  33. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/RECORD +38 -35
  34. /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
  35. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/LICENSE.txt +0 -0
  36. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/LICENSE_SHORT +0 -0
  37. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/WHEEL +0 -0
  38. {airbyte_cdk-6.61.5.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/entry_points.txt +0 -0
@@ -33,7 +33,15 @@ from requests import Response
33
33
  from airbyte_cdk.connector_builder.models import (
34
34
  LogMessage as ConnectorBuilderLogMessage,
35
35
  )
36
- from airbyte_cdk.models import FailureType, Level
36
+ from airbyte_cdk.models import (
37
+ AirbyteStateBlob,
38
+ AirbyteStateMessage,
39
+ AirbyteStateType,
40
+ AirbyteStreamState,
41
+ FailureType,
42
+ Level,
43
+ StreamDescriptor,
44
+ )
37
45
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
38
46
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
39
47
  from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
@@ -90,6 +98,7 @@ from airbyte_cdk.sources.declarative.extractors import (
90
98
  RecordSelector,
91
99
  ResponseToFileExtractor,
92
100
  )
101
+ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
93
102
  from airbyte_cdk.sources.declarative.extractors.record_filter import (
94
103
  ClientSideIncrementalRecordFilterDecorator,
95
104
  )
@@ -98,7 +107,6 @@ from airbyte_cdk.sources.declarative.incremental import (
98
107
  ConcurrentPerPartitionCursor,
99
108
  CursorFactory,
100
109
  DatetimeBasedCursor,
101
- DeclarativeCursor,
102
110
  GlobalSubstreamCursor,
103
111
  PerPartitionWithGlobalCursor,
104
112
  )
@@ -500,8 +508,11 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
500
508
  InterpolatedRequestOptionsProvider,
501
509
  RequestOptionsProvider,
502
510
  )
511
+ from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import (
512
+ PerPartitionRequestOptionsProvider,
513
+ )
503
514
  from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
504
- from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
515
+ from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
505
516
  from airbyte_cdk.sources.declarative.resolvers import (
506
517
  ComponentMappingDefinition,
507
518
  ConfigComponentsResolver,
@@ -583,6 +594,7 @@ from airbyte_cdk.sources.message import (
583
594
  MessageRepository,
584
595
  NoopMessageRepository,
585
596
  )
597
+ from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository
586
598
  from airbyte_cdk.sources.streams.call_rate import (
587
599
  APIBudget,
588
600
  FixedWindowCallRatePolicy,
@@ -630,6 +642,7 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
630
642
  SchemaNormalizationModel.None_: TransformConfig.NoTransform,
631
643
  SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
632
644
  }
645
+ _NO_STREAM_SLICING = SinglePartitionRouter(parameters={})
633
646
 
634
647
  # Ideally this should use the value defined in ConcurrentDeclarativeSource, but
635
648
  # this would be a circular import
@@ -702,7 +715,7 @@ class ModelToComponentFactory:
702
715
  CustomValidationStrategyModel: self.create_custom_component,
703
716
  CustomConfigTransformationModel: self.create_custom_component,
704
717
  DatetimeBasedCursorModel: self.create_datetime_based_cursor,
705
- DeclarativeStreamModel: self.create_declarative_stream,
718
+ DeclarativeStreamModel: self.create_default_stream,
706
719
  DefaultErrorHandlerModel: self.create_default_error_handler,
707
720
  DefaultPaginatorModel: self.create_default_paginator,
708
721
  DpathExtractorModel: self.create_dpath_extractor,
@@ -739,7 +752,7 @@ class ModelToComponentFactory:
739
752
  OAuthAuthenticatorModel: self.create_oauth_authenticator,
740
753
  OffsetIncrementModel: self.create_offset_increment,
741
754
  PageIncrementModel: self.create_page_increment,
742
- ParentStreamConfigModel: self.create_parent_stream_config,
755
+ ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper,
743
756
  PredicateValidatorModel: self.create_predicate_validator,
744
757
  PropertiesFromEndpointModel: self.create_properties_from_endpoint,
745
758
  PropertyChunkingModel: self.create_property_chunking,
@@ -1291,19 +1304,20 @@ class ModelToComponentFactory:
1291
1304
  f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1292
1305
  )
1293
1306
 
1307
+ model_parameters = datetime_based_cursor_model.parameters or {}
1294
1308
  interpolated_cursor_field = InterpolatedString.create(
1295
1309
  datetime_based_cursor_model.cursor_field,
1296
- parameters=datetime_based_cursor_model.parameters or {},
1310
+ parameters=model_parameters,
1297
1311
  )
1298
1312
  cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1299
1313
 
1300
1314
  interpolated_partition_field_start = InterpolatedString.create(
1301
1315
  datetime_based_cursor_model.partition_field_start or "start_time",
1302
- parameters=datetime_based_cursor_model.parameters or {},
1316
+ parameters=model_parameters,
1303
1317
  )
1304
1318
  interpolated_partition_field_end = InterpolatedString.create(
1305
1319
  datetime_based_cursor_model.partition_field_end or "end_time",
1306
- parameters=datetime_based_cursor_model.parameters or {},
1320
+ parameters=model_parameters,
1307
1321
  )
1308
1322
 
1309
1323
  slice_boundary_fields = (
@@ -1323,7 +1337,7 @@ class ModelToComponentFactory:
1323
1337
  interpolated_lookback_window = (
1324
1338
  InterpolatedString.create(
1325
1339
  datetime_based_cursor_model.lookback_window,
1326
- parameters=datetime_based_cursor_model.parameters or {},
1340
+ parameters=model_parameters,
1327
1341
  )
1328
1342
  if datetime_based_cursor_model.lookback_window
1329
1343
  else None
@@ -1409,7 +1423,7 @@ class ModelToComponentFactory:
1409
1423
  interpolated_step = (
1410
1424
  InterpolatedString.create(
1411
1425
  datetime_based_cursor_model.step,
1412
- parameters=datetime_based_cursor_model.parameters or {},
1426
+ parameters=model_parameters,
1413
1427
  )
1414
1428
  if datetime_based_cursor_model.step
1415
1429
  else None
@@ -1426,7 +1440,7 @@ class ModelToComponentFactory:
1426
1440
  # object which we want to keep agnostic of being low-code
1427
1441
  target = InterpolatedString(
1428
1442
  string=datetime_based_cursor_model.clamping.target,
1429
- parameters=datetime_based_cursor_model.parameters or {},
1443
+ parameters=model_parameters,
1430
1444
  )
1431
1445
  evaluated_target = target.eval(config=config)
1432
1446
  match evaluated_target:
@@ -1603,6 +1617,10 @@ class ModelToComponentFactory:
1603
1617
 
1604
1618
  interpolated_cursor_field = InterpolatedString.create(
1605
1619
  datetime_based_cursor_model.cursor_field,
1620
+ # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases:
1621
+ # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters`
1622
+ # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters`
1623
+ # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory.
1606
1624
  parameters=datetime_based_cursor_model.parameters or {},
1607
1625
  )
1608
1626
  cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
@@ -1634,7 +1652,7 @@ class ModelToComponentFactory:
1634
1652
  stream_namespace=stream_namespace,
1635
1653
  config=config,
1636
1654
  message_repository=NoopMessageRepository(),
1637
- stream_state_migrations=stream_state_migrations,
1655
+ # stream_state_migrations=stream_state_migrations, # FIXME is it expected to run migration on per partition state too?
1638
1656
  )
1639
1657
  )
1640
1658
 
@@ -1730,7 +1748,11 @@ class ModelToComponentFactory:
1730
1748
 
1731
1749
  if self._is_component(model_value):
1732
1750
  model_args[model_field] = self._create_nested_component(
1733
- model, model_field, model_value, config
1751
+ model,
1752
+ model_field,
1753
+ model_value,
1754
+ config,
1755
+ **kwargs,
1734
1756
  )
1735
1757
  elif isinstance(model_value, list):
1736
1758
  vals = []
@@ -1742,7 +1764,15 @@ class ModelToComponentFactory:
1742
1764
  if derived_type:
1743
1765
  v["type"] = derived_type
1744
1766
  if self._is_component(v):
1745
- vals.append(self._create_nested_component(model, model_field, v, config))
1767
+ vals.append(
1768
+ self._create_nested_component(
1769
+ model,
1770
+ model_field,
1771
+ v,
1772
+ config,
1773
+ **kwargs,
1774
+ )
1775
+ )
1746
1776
  else:
1747
1777
  vals.append(v)
1748
1778
  model_args[model_field] = vals
@@ -1832,7 +1862,7 @@ class ModelToComponentFactory:
1832
1862
  return []
1833
1863
 
1834
1864
  def _create_nested_component(
1835
- self, model: Any, model_field: str, model_value: Any, config: Config
1865
+ self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any
1836
1866
  ) -> Any:
1837
1867
  type_name = model_value.get("type", None)
1838
1868
  if not type_name:
@@ -1857,8 +1887,11 @@ class ModelToComponentFactory:
1857
1887
  for kwarg in constructor_kwargs
1858
1888
  if kwarg in model_parameters
1859
1889
  }
1890
+ matching_kwargs = {
1891
+ kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs
1892
+ }
1860
1893
  return self._create_component_from_model(
1861
- model=parsed_model, config=config, **matching_parameters
1894
+ model=parsed_model, config=config, **(matching_parameters | matching_kwargs)
1862
1895
  )
1863
1896
  except TypeError as error:
1864
1897
  missing_parameters = self._extract_missing_parameters(error)
@@ -1942,13 +1975,17 @@ class ModelToComponentFactory:
1942
1975
  parameters=model.parameters or {},
1943
1976
  )
1944
1977
 
1945
- def create_declarative_stream(
1978
+ def create_default_stream(
1946
1979
  self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
1947
1980
  ) -> Union[DeclarativeStream, AbstractStream]:
1948
1981
  primary_key = model.primary_key.__root__ if model.primary_key else None
1949
1982
 
1983
+ partition_router = self._build_stream_slicer_from_partition_router(
1984
+ model.retriever, config, stream_name=model.name
1985
+ )
1986
+ concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
1950
1987
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1951
- cursor_model = model.incremental_sync
1988
+ cursor_model: DatetimeBasedCursorModel = model.incremental_sync
1952
1989
 
1953
1990
  end_time_option = (
1954
1991
  self._create_component_from_model(
@@ -1965,17 +2002,29 @@ class ModelToComponentFactory:
1965
2002
  else None
1966
2003
  )
1967
2004
 
1968
- request_options_provider = DatetimeBasedRequestOptionsProvider(
2005
+ datetime_request_options_provider = DatetimeBasedRequestOptionsProvider(
1969
2006
  start_time_option=start_time_option,
1970
2007
  end_time_option=end_time_option,
1971
- partition_field_start=cursor_model.partition_field_end,
2008
+ partition_field_start=cursor_model.partition_field_start,
1972
2009
  partition_field_end=cursor_model.partition_field_end,
1973
2010
  config=config,
1974
2011
  parameters=model.parameters or {},
1975
2012
  )
2013
+ request_options_provider = (
2014
+ datetime_request_options_provider
2015
+ if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor)
2016
+ else PerPartitionRequestOptionsProvider(
2017
+ partition_router, datetime_request_options_provider
2018
+ )
2019
+ )
1976
2020
  elif model.incremental_sync and isinstance(
1977
2021
  model.incremental_sync, IncrementingCountCursorModel
1978
2022
  ):
2023
+ if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor):
2024
+ raise ValueError(
2025
+ "PerPartition does not support per partition states because switching to global state is time based"
2026
+ )
2027
+
1979
2028
  cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
1980
2029
 
1981
2030
  start_time_option = (
@@ -2013,22 +2062,19 @@ class ModelToComponentFactory:
2013
2062
  model=model.file_uploader, config=config
2014
2063
  )
2015
2064
 
2016
- # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
2017
- # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
2018
- # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
2019
- # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
2020
- combined_slicers = self._merge_stream_slicers(model=model, config=config)
2021
- partition_router = self._build_stream_slicer_from_partition_router(
2022
- model.retriever, config, stream_name=model.name
2065
+ stream_slicer: ConcurrentStreamSlicer = (
2066
+ partition_router
2067
+ if isinstance(concurrent_cursor, FinalStateCursor)
2068
+ else concurrent_cursor
2023
2069
  )
2024
- concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
2025
2070
  retriever = self._create_component_from_model(
2026
2071
  model=model.retriever,
2027
2072
  config=config,
2028
2073
  name=model.name,
2029
2074
  primary_key=primary_key,
2030
- stream_slicer=combined_slicers,
2031
2075
  request_options_provider=request_options_provider,
2076
+ stream_slicer=stream_slicer,
2077
+ partition_router=partition_router,
2032
2078
  stop_condition_cursor=concurrent_cursor
2033
2079
  if self._is_stop_condition_on_cursor(model)
2034
2080
  else None,
@@ -2039,6 +2085,8 @@ class ModelToComponentFactory:
2039
2085
  file_uploader=file_uploader,
2040
2086
  incremental_sync=model.incremental_sync,
2041
2087
  )
2088
+ if isinstance(retriever, AsyncRetriever):
2089
+ stream_slicer = retriever.stream_slicer
2042
2090
 
2043
2091
  schema_loader: Union[
2044
2092
  CompositeSchemaLoader,
@@ -2066,89 +2114,27 @@ class ModelToComponentFactory:
2066
2114
  options["name"] = model.name
2067
2115
  schema_loader = DefaultSchemaLoader(config=config, parameters=options)
2068
2116
 
2069
- if (
2070
- (
2071
- isinstance(combined_slicers, PartitionRouter)
2072
- or isinstance(concurrent_cursor, ConcurrentCursor)
2073
- )
2074
- and not self._emit_connector_builder_messages
2075
- and not is_parent
2076
- ):
2077
- # We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
2078
- # DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
2079
- # * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
2080
- # * Streams without partition router but with cursor. This is the `isinstance(concurrent_cursor, ConcurrentCursor)` condition
2081
- # * Streams with both partition router and cursor
2082
- # We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
2083
- # We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
2084
-
2085
- stream_name = model.name or ""
2086
- stream_slicer: ConcurrentStreamSlicer = (
2087
- concurrent_cursor if concurrent_cursor else SinglePartitionRouter(parameters={})
2088
- )
2089
- cursor: Cursor = FinalStateCursor(stream_name, None, self._message_repository)
2090
- if isinstance(retriever, AsyncRetriever):
2091
- # The AsyncRetriever only ever worked with a cursor from the concurrent package. Hence, the method
2092
- # `_build_incremental_cursor` which we would usually think would return only declarative stuff has a
2093
- # special clause and return a concurrent cursor. This stream slicer is passed to AsyncRetriever when
2094
- # built because the async retriever has a specific partition router which relies on this stream slicer.
2095
- # We can't re-use `concurrent_cursor` because it is a different instance than the one passed in
2096
- # AsyncJobPartitionRouter.
2097
- stream_slicer = retriever.stream_slicer
2098
- if isinstance(combined_slicers, Cursor):
2099
- cursor = combined_slicers
2100
- elif isinstance(combined_slicers, PartitionRouter):
2101
- stream_slicer = combined_slicers
2102
- elif concurrent_cursor:
2103
- cursor = concurrent_cursor
2104
-
2105
- # FIXME to be removed once we migrate everything to DefaultStream
2106
- if isinstance(retriever, SimpleRetriever):
2107
- # We zero it out here, but since this is a cursor reference, the state is still properly
2108
- # instantiated for the other components that reference it
2109
- retriever.cursor = None
2110
-
2111
- partition_generator = StreamSlicerPartitionGenerator(
2117
+ stream_name = model.name or ""
2118
+ return DefaultStream(
2119
+ partition_generator=StreamSlicerPartitionGenerator(
2112
2120
  DeclarativePartitionFactory(
2113
2121
  stream_name,
2114
2122
  schema_loader,
2115
2123
  retriever,
2116
2124
  self._message_repository,
2117
2125
  ),
2118
- stream_slicer=stream_slicer,
2119
- )
2120
- return DefaultStream(
2121
- partition_generator=partition_generator,
2122
- name=stream_name,
2123
- json_schema=schema_loader.get_json_schema,
2124
- primary_key=get_primary_key_from_stream(primary_key),
2125
- cursor_field=cursor.cursor_field.cursor_field_key
2126
- if hasattr(cursor, "cursor_field")
2127
- else "", # FIXME we should have the cursor field has part of the interface of cursor,
2128
- logger=logging.getLogger(f"airbyte.{stream_name}"),
2129
- # FIXME this is a breaking change compared to the old implementation which used the source name instead
2130
- cursor=cursor,
2131
- supports_file_transfer=hasattr(model, "file_uploader")
2132
- and bool(model.file_uploader),
2133
- )
2134
-
2135
- cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2136
- if model.state_migrations:
2137
- state_transformations = [
2138
- self._create_component_from_model(state_migration, config, declarative_stream=model)
2139
- for state_migration in model.state_migrations
2140
- ]
2141
- else:
2142
- state_transformations = []
2143
- return DeclarativeStream(
2144
- name=model.name or "",
2145
- primary_key=primary_key,
2146
- retriever=retriever,
2147
- schema_loader=schema_loader,
2148
- stream_cursor_field=cursor_field or "",
2149
- state_migrations=state_transformations,
2150
- config=config,
2151
- parameters=model.parameters or {},
2126
+ stream_slicer,
2127
+ slice_limit=self._limit_slices_fetched,
2128
+ ),
2129
+ name=stream_name,
2130
+ json_schema=schema_loader.get_json_schema,
2131
+ primary_key=get_primary_key_from_stream(primary_key),
2132
+ cursor_field=concurrent_cursor.cursor_field.cursor_field_key
2133
+ if hasattr(concurrent_cursor, "cursor_field")
2134
+ else "", # FIXME we should have the cursor field has part of the interface of cursor,
2135
+ logger=logging.getLogger(f"airbyte.{stream_name}"),
2136
+ cursor=concurrent_cursor,
2137
+ supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader),
2152
2138
  )
2153
2139
 
2154
2140
  def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool:
@@ -2197,86 +2183,15 @@ class ModelToComponentFactory:
2197
2183
  )
2198
2184
  return SinglePartitionRouter(parameters={})
2199
2185
 
2200
- def _build_incremental_cursor(
2201
- self,
2202
- model: DeclarativeStreamModel,
2203
- stream_slicer: Optional[PartitionRouter],
2204
- config: Config,
2205
- ) -> Optional[StreamSlicer]:
2206
- state_transformations = (
2207
- [
2208
- self._create_component_from_model(state_migration, config, declarative_stream=model)
2209
- for state_migration in model.state_migrations
2210
- ]
2211
- if model.state_migrations
2212
- else []
2213
- )
2214
-
2215
- if model.incremental_sync and (
2216
- stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
2217
- ):
2218
- if model.retriever.type == "AsyncRetriever":
2219
- stream_name = model.name or ""
2220
- stream_namespace = None
2221
- stream_state = self._connector_state_manager.get_stream_state(
2222
- stream_name, stream_namespace
2223
- )
2224
-
2225
- return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2226
- state_manager=self._connector_state_manager,
2227
- model_type=DatetimeBasedCursorModel,
2228
- component_definition=model.incremental_sync.__dict__,
2229
- stream_name=stream_name,
2230
- stream_namespace=stream_namespace,
2231
- config=config or {},
2232
- stream_state=stream_state,
2233
- stream_state_migrations=state_transformations,
2234
- partition_router=stream_slicer,
2235
- )
2236
-
2237
- incremental_sync_model = model.incremental_sync
2238
- cursor_component = self._create_component_from_model(
2239
- model=incremental_sync_model, config=config
2240
- )
2241
- is_global_cursor = (
2242
- hasattr(incremental_sync_model, "global_substream_cursor")
2243
- and incremental_sync_model.global_substream_cursor
2244
- )
2245
-
2246
- if is_global_cursor:
2247
- return GlobalSubstreamCursor(
2248
- stream_cursor=cursor_component, partition_router=stream_slicer
2249
- )
2250
- return PerPartitionWithGlobalCursor(
2251
- cursor_factory=CursorFactory(
2252
- lambda: self._create_component_from_model(
2253
- model=incremental_sync_model, config=config
2254
- ),
2255
- ),
2256
- partition_router=stream_slicer,
2257
- stream_cursor=cursor_component,
2258
- )
2259
- elif model.incremental_sync:
2260
- if model.retriever.type == "AsyncRetriever":
2261
- return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2262
- model_type=DatetimeBasedCursorModel,
2263
- component_definition=model.incremental_sync.__dict__,
2264
- stream_name=model.name or "",
2265
- stream_namespace=None,
2266
- config=config or {},
2267
- stream_state_migrations=state_transformations,
2268
- )
2269
- return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
2270
- return None
2271
-
2272
2186
  def _build_concurrent_cursor(
2273
2187
  self,
2274
2188
  model: DeclarativeStreamModel,
2275
2189
  stream_slicer: Optional[PartitionRouter],
2276
2190
  config: Config,
2277
- ) -> Optional[StreamSlicer]:
2191
+ ) -> Cursor:
2192
+ stream_name = model.name or ""
2278
2193
  stream_state = self._connector_state_manager.get_stream_state(
2279
- stream_name=model.name or "", namespace=None
2194
+ stream_name=stream_name, namespace=None
2280
2195
  )
2281
2196
 
2282
2197
  if model.state_migrations:
@@ -2296,20 +2211,20 @@ class ModelToComponentFactory:
2296
2211
  state_manager=self._connector_state_manager,
2297
2212
  model_type=DatetimeBasedCursorModel,
2298
2213
  component_definition=model.incremental_sync.__dict__,
2299
- stream_name=model.name or "",
2214
+ stream_name=stream_name,
2300
2215
  stream_namespace=None,
2301
2216
  config=config or {},
2302
2217
  stream_state=stream_state,
2303
2218
  stream_state_migrations=state_transformations,
2304
2219
  partition_router=stream_slicer,
2305
- attempt_to_create_cursor_if_not_provided=True,
2220
+ attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now?
2306
2221
  )
2307
2222
  elif model.incremental_sync:
2308
2223
  if type(model.incremental_sync) == IncrementingCountCursorModel:
2309
2224
  return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2310
2225
  model_type=IncrementingCountCursorModel,
2311
2226
  component_definition=model.incremental_sync.__dict__,
2312
- stream_name=model.name or "",
2227
+ stream_name=stream_name,
2313
2228
  stream_namespace=None,
2314
2229
  config=config or {},
2315
2230
  stream_state_migrations=state_transformations,
@@ -2318,7 +2233,7 @@ class ModelToComponentFactory:
2318
2233
  return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2319
2234
  model_type=type(model.incremental_sync),
2320
2235
  component_definition=model.incremental_sync.__dict__,
2321
- stream_name=model.name or "",
2236
+ stream_name=stream_name,
2322
2237
  stream_namespace=None,
2323
2238
  config=config or {},
2324
2239
  stream_state_migrations=state_transformations,
@@ -2328,45 +2243,7 @@ class ModelToComponentFactory:
2328
2243
  raise ValueError(
2329
2244
  f"Incremental sync of type {type(model.incremental_sync)} is not supported"
2330
2245
  )
2331
- return None
2332
-
2333
- def _merge_stream_slicers(
2334
- self, model: DeclarativeStreamModel, config: Config
2335
- ) -> Optional[StreamSlicer]:
2336
- retriever_model = model.retriever
2337
-
2338
- stream_slicer = self._build_stream_slicer_from_partition_router(
2339
- retriever_model, config, stream_name=model.name
2340
- )
2341
-
2342
- if retriever_model.type == "AsyncRetriever":
2343
- is_not_datetime_cursor = (
2344
- model.incremental_sync.type != "DatetimeBasedCursor"
2345
- if model.incremental_sync
2346
- else None
2347
- )
2348
- is_partition_router = (
2349
- bool(retriever_model.partition_router) if model.incremental_sync else None
2350
- )
2351
-
2352
- if is_not_datetime_cursor:
2353
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
2354
- # support or unordered slices (for example, when we trigger reports for January and February, the report
2355
- # in February can be completed first). Once we have support for custom concurrent cursor or have a new
2356
- # implementation available in the CDK, we can enable more cursors here.
2357
- raise ValueError(
2358
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
2359
- )
2360
-
2361
- if is_partition_router and not stream_slicer:
2362
- # Note that this development is also done in parallel to the per partition development which once merged
2363
- # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
2364
- raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
2365
-
2366
- if model.incremental_sync:
2367
- return self._build_incremental_cursor(model, stream_slicer, config)
2368
-
2369
- return stream_slicer
2246
+ return FinalStateCursor(stream_name, None, self._message_repository)
2370
2247
 
2371
2248
  def create_default_error_handler(
2372
2249
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -2660,7 +2537,9 @@ class ModelToComponentFactory:
2660
2537
  config=config,
2661
2538
  name=name,
2662
2539
  primary_key=None,
2663
- stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
2540
+ partition_router=self._build_stream_slicer_from_partition_router(
2541
+ model.retriever, config
2542
+ ),
2664
2543
  transformations=[],
2665
2544
  use_cache=True,
2666
2545
  log_formatter=(
@@ -3010,7 +2889,7 @@ class ModelToComponentFactory:
3010
2889
  )
3011
2890
 
3012
2891
  def create_parent_stream_config(
3013
- self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2892
+ self, model: ParentStreamConfigModel, config: Config, stream_name: str, **kwargs: Any
3014
2893
  ) -> ParentStreamConfig:
3015
2894
  declarative_stream = self._create_component_from_model(
3016
2895
  model.stream,
@@ -3263,7 +3142,6 @@ class ModelToComponentFactory:
3263
3142
  *,
3264
3143
  name: str,
3265
3144
  primary_key: Optional[Union[str, List[str], List[List[str]]]],
3266
- stream_slicer: Optional[StreamSlicer],
3267
3145
  request_options_provider: Optional[RequestOptionsProvider] = None,
3268
3146
  stop_condition_cursor: Optional[Cursor] = None,
3269
3147
  client_side_incremental_sync: Optional[Dict[str, Any]] = None,
@@ -3276,9 +3154,10 @@ class ModelToComponentFactory:
3276
3154
  ] = None,
3277
3155
  use_cache: Optional[bool] = None,
3278
3156
  log_formatter: Optional[Callable[[Response], Any]] = None,
3157
+ partition_router: Optional[PartitionRouter] = None,
3279
3158
  **kwargs: Any,
3280
3159
  ) -> SimpleRetriever:
3281
- def _get_url() -> str:
3160
+ def _get_url(req: Requester) -> str:
3282
3161
  """
3283
3162
  Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
3284
3163
  This is needed because the URL is not set until the requester is created.
@@ -3287,12 +3166,12 @@ class ModelToComponentFactory:
3287
3166
  _url: str = (
3288
3167
  model.requester.url
3289
3168
  if hasattr(model.requester, "url") and model.requester.url is not None
3290
- else requester.get_url()
3169
+ else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
3291
3170
  )
3292
3171
  _url_base: str = (
3293
3172
  model.requester.url_base
3294
3173
  if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3295
- else requester.get_url_base()
3174
+ else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None)
3296
3175
  )
3297
3176
 
3298
3177
  return _url or _url_base
@@ -3371,36 +3250,18 @@ class ModelToComponentFactory:
3371
3250
  config=config,
3372
3251
  )
3373
3252
 
3374
- # Define cursor only if per partition or common incremental support is needed
3375
- cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3376
-
3377
- if (
3378
- not isinstance(stream_slicer, DatetimeBasedCursor)
3379
- or type(stream_slicer) is not DatetimeBasedCursor
3380
- ):
3381
- # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3382
- # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3383
- # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3384
- # request_options_provider
3385
- request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3386
- elif not request_options_provider:
3253
+ if not request_options_provider:
3387
3254
  request_options_provider = DefaultRequestOptionsProvider(parameters={})
3388
-
3389
- stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3390
- if self._should_limit_slices_fetched():
3391
- stream_slicer = cast(
3392
- StreamSlicer,
3393
- StreamSlicerTestReadDecorator(
3394
- wrapped_slicer=stream_slicer,
3395
- maximum_number_of_slices=self._limit_slices_fetched or 5,
3396
- ),
3397
- )
3255
+ if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance(
3256
+ partition_router, PartitionRouter
3257
+ ):
3258
+ request_options_provider = partition_router
3398
3259
 
3399
3260
  paginator = (
3400
3261
  self._create_component_from_model(
3401
3262
  model=model.paginator,
3402
3263
  config=config,
3403
- url_base=_get_url(),
3264
+ url_base=_get_url(requester),
3404
3265
  extractor_model=model.record_selector.extractor,
3405
3266
  decoder=decoder,
3406
3267
  cursor_used_for_stop_condition=stop_condition_cursor or None,
@@ -3444,9 +3305,9 @@ class ModelToComponentFactory:
3444
3305
  primary_key=primary_key,
3445
3306
  requester=requester,
3446
3307
  record_selector=record_selector,
3447
- stream_slicer=stream_slicer,
3308
+ stream_slicer=_NO_STREAM_SLICING,
3448
3309
  request_option_provider=request_options_provider,
3449
- cursor=cursor,
3310
+ cursor=None,
3450
3311
  config=config,
3451
3312
  ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3452
3313
  parameters=model.parameters or {},
@@ -3458,9 +3319,9 @@ class ModelToComponentFactory:
3458
3319
  primary_key=primary_key,
3459
3320
  requester=requester,
3460
3321
  record_selector=record_selector,
3461
- stream_slicer=stream_slicer,
3322
+ stream_slicer=_NO_STREAM_SLICING,
3462
3323
  request_option_provider=request_options_provider,
3463
- cursor=cursor,
3324
+ cursor=None,
3464
3325
  config=config,
3465
3326
  ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3466
3327
  additional_query_properties=query_properties,
@@ -3531,14 +3392,21 @@ class ModelToComponentFactory:
3531
3392
  f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3532
3393
  )
3533
3394
 
3534
- stream_model = (
3395
+ stream_model = self._get_state_delegating_stream_model(
3396
+ False if has_parent_state is None else has_parent_state, model
3397
+ )
3398
+
3399
+ return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
3400
+
3401
+ def _get_state_delegating_stream_model(
3402
+ self, has_parent_state: bool, model: StateDelegatingStreamModel
3403
+ ) -> DeclarativeStreamModel:
3404
+ return (
3535
3405
  model.incremental_stream
3536
3406
  if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3537
3407
  else model.full_refresh_stream
3538
3408
  )
3539
3409
 
3540
- return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
3541
-
3542
3410
  def _create_async_job_status_mapping(
3543
3411
  self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
3544
3412
  ) -> Mapping[str, AsyncJobStatus]:
@@ -3583,12 +3451,14 @@ class ModelToComponentFactory:
3583
3451
  transformations: List[RecordTransformation],
3584
3452
  **kwargs: Any,
3585
3453
  ) -> AsyncRetriever:
3586
- def _get_download_retriever() -> SimpleRetriever:
3454
+ def _get_download_retriever(
3455
+ requester: Requester, extractor: RecordExtractor, _decoder: Decoder
3456
+ ) -> SimpleRetriever:
3587
3457
  # We create a record selector for the download retriever
3588
3458
  # with no schema normalization and no transformations, neither record filter
3589
3459
  # as all this occurs in the record_selector of the AsyncRetriever
3590
3460
  record_selector = RecordSelector(
3591
- extractor=download_extractor,
3461
+ extractor=extractor,
3592
3462
  name=name,
3593
3463
  record_filter=None,
3594
3464
  transformations=[],
@@ -3599,7 +3469,7 @@ class ModelToComponentFactory:
3599
3469
  paginator = (
3600
3470
  self._create_component_from_model(
3601
3471
  model=model.download_paginator,
3602
- decoder=decoder,
3472
+ decoder=_decoder,
3603
3473
  config=config,
3604
3474
  url_base="",
3605
3475
  )
@@ -3608,7 +3478,7 @@ class ModelToComponentFactory:
3608
3478
  )
3609
3479
 
3610
3480
  return SimpleRetriever(
3611
- requester=download_requester,
3481
+ requester=requester,
3612
3482
  record_selector=record_selector,
3613
3483
  primary_key=None,
3614
3484
  name=name,
@@ -3702,7 +3572,9 @@ class ModelToComponentFactory:
3702
3572
  config=config,
3703
3573
  name=job_download_components_name,
3704
3574
  )
3705
- download_retriever = _get_download_retriever()
3575
+ download_retriever = _get_download_retriever(
3576
+ download_requester, download_extractor, download_decoder
3577
+ )
3706
3578
  abort_requester = (
3707
3579
  self._create_component_from_model(
3708
3580
  model=model.abort_requester,
@@ -3832,7 +3704,7 @@ class ModelToComponentFactory:
3832
3704
  if model.parent_stream_configs:
3833
3705
  parent_stream_configs.extend(
3834
3706
  [
3835
- self._create_message_repository_substream_wrapper(
3707
+ self.create_parent_stream_config_with_substream_wrapper(
3836
3708
  model=parent_stream_config, config=config, **kwargs
3837
3709
  )
3838
3710
  for parent_stream_config in model.parent_stream_configs
@@ -3845,32 +3717,105 @@ class ModelToComponentFactory:
3845
3717
  config=config,
3846
3718
  )
3847
3719
 
3848
- def _create_message_repository_substream_wrapper(
3849
- self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3720
+ def create_parent_stream_config_with_substream_wrapper(
3721
+ self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
3850
3722
  ) -> Any:
3723
+ # getting the parent state
3724
+ child_state = self._connector_state_manager.get_stream_state(stream_name, None)
3725
+
3726
+ # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3727
+ has_parent_state = bool(
3728
+ self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3729
+ if model.incremental_dependency
3730
+ else False
3731
+ )
3732
+ connector_state_manager = self._instantiate_parent_stream_state_manager(
3733
+ child_state, config, model, has_parent_state
3734
+ )
3735
+
3851
3736
  substream_factory = ModelToComponentFactory(
3737
+ connector_state_manager=connector_state_manager,
3852
3738
  limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3853
3739
  limit_slices_fetched=self._limit_slices_fetched,
3854
3740
  emit_connector_builder_messages=self._emit_connector_builder_messages,
3855
3741
  disable_retries=self._disable_retries,
3856
3742
  disable_cache=self._disable_cache,
3857
- message_repository=LogAppenderMessageRepositoryDecorator(
3858
- {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3859
- self._message_repository,
3860
- self._evaluate_log_level(self._emit_connector_builder_messages),
3743
+ message_repository=StateFilteringMessageRepository(
3744
+ LogAppenderMessageRepositoryDecorator(
3745
+ {
3746
+ "airbyte_cdk": {"stream": {"is_substream": True}},
3747
+ "http": {"is_auxiliary": True},
3748
+ },
3749
+ self._message_repository,
3750
+ self._evaluate_log_level(self._emit_connector_builder_messages),
3751
+ ),
3861
3752
  ),
3862
3753
  )
3863
3754
 
3864
- # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3865
- has_parent_state = bool(
3866
- self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3867
- if model.incremental_dependency
3868
- else False
3869
- )
3870
- return substream_factory._create_component_from_model(
3871
- model=model, config=config, has_parent_state=has_parent_state, **kwargs
3755
+ return substream_factory.create_parent_stream_config(
3756
+ model=model, config=config, stream_name=stream_name, **kwargs
3872
3757
  )
3873
3758
 
3759
+ def _instantiate_parent_stream_state_manager(
3760
+ self,
3761
+ child_state: MutableMapping[str, Any],
3762
+ config: Config,
3763
+ model: ParentStreamConfigModel,
3764
+ has_parent_state: bool,
3765
+ ) -> ConnectorStateManager:
3766
+ """
3767
+ With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the
3768
+ `set_initial_state` flow that existed for the declarative cursors. This state is taken from
3769
+ self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account
3770
+ for the MessageRepository being different). So we need to pass a ConnectorStateManager to the
3771
+ ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if
3772
+ incremental_dependency is set.
3773
+ """
3774
+ if model.incremental_dependency and child_state:
3775
+ parent_stream_name = model.stream.name or ""
3776
+ parent_state = ConcurrentPerPartitionCursor.get_parent_state(
3777
+ child_state, parent_stream_name
3778
+ )
3779
+
3780
+ if not parent_state:
3781
+ # there are two migration cases: state value from child stream or from global state
3782
+ parent_state = ConcurrentPerPartitionCursor.get_global_state(
3783
+ child_state, parent_stream_name
3784
+ )
3785
+
3786
+ if not parent_state and not isinstance(parent_state, dict):
3787
+ cursor_values = child_state.values()
3788
+ if cursor_values:
3789
+ incremental_sync_model: Union[
3790
+ DatetimeBasedCursorModel,
3791
+ IncrementingCountCursorModel,
3792
+ CustomIncrementalSyncModel,
3793
+ ] = (
3794
+ model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
3795
+ if isinstance(model.stream, DeclarativeStreamModel)
3796
+ else self._get_state_delegating_stream_model(
3797
+ has_parent_state, model.stream
3798
+ ).incremental_sync
3799
+ )
3800
+ cursor_field = InterpolatedString.create(
3801
+ incremental_sync_model.cursor_field,
3802
+ parameters=incremental_sync_model.parameters or {},
3803
+ ).eval(config)
3804
+ parent_state = AirbyteStateMessage(
3805
+ type=AirbyteStateType.STREAM,
3806
+ stream=AirbyteStreamState(
3807
+ stream_descriptor=StreamDescriptor(
3808
+ name=parent_stream_name, namespace=None
3809
+ ),
3810
+ stream_state=AirbyteStateBlob(
3811
+ {cursor_field: list(cursor_values)[0]}
3812
+ ),
3813
+ ),
3814
+ )
3815
+ return ConnectorStateManager([parent_state] if parent_state else [])
3816
+
3817
+ return ConnectorStateManager([])
3818
+
3874
3819
  @staticmethod
3875
3820
  def create_wait_time_from_header(
3876
3821
  model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
@@ -3951,6 +3896,7 @@ class ModelToComponentFactory:
3951
3896
 
3952
3897
  return HttpComponentsResolver(
3953
3898
  retriever=retriever,
3899
+ stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
3954
3900
  config=config,
3955
3901
  components_mapping=components_mapping,
3956
3902
  parameters=model.parameters or {},
@@ -4176,7 +4122,9 @@ class ModelToComponentFactory:
4176
4122
  self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
4177
4123
  ) -> GroupingPartitionRouter:
4178
4124
  underlying_router = self._create_component_from_model(
4179
- model=model.underlying_partition_router, config=config
4125
+ model=model.underlying_partition_router,
4126
+ config=config,
4127
+ **kwargs,
4180
4128
  )
4181
4129
  if model.group_size < 1:
4182
4130
  raise ValueError(f"Group size must be greater than 0, got {model.group_size}")