airbyte-cdk 6.61.3.post2.dev17299502224__py3-none-any.whl → 6.62.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
- airbyte_cdk/manifest_server/README.md +17 -3
- airbyte_cdk/manifest_server/openapi.yaml +27 -27
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -2
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +196 -269
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +4 -7
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
- airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
- airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +3 -3
- airbyte_cdk/sources/message/repository.py +20 -0
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/METADATA +6 -5
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/RECORD +24 -23
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.61.3.post2.dev17299502224.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -33,7 +33,15 @@ from requests import Response
|
|
33
33
|
from airbyte_cdk.connector_builder.models import (
|
34
34
|
LogMessage as ConnectorBuilderLogMessage,
|
35
35
|
)
|
36
|
-
from airbyte_cdk.models import
|
36
|
+
from airbyte_cdk.models import (
|
37
|
+
AirbyteStateBlob,
|
38
|
+
AirbyteStateMessage,
|
39
|
+
AirbyteStateType,
|
40
|
+
AirbyteStreamState,
|
41
|
+
FailureType,
|
42
|
+
Level,
|
43
|
+
StreamDescriptor,
|
44
|
+
)
|
37
45
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
38
46
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
|
39
47
|
from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
@@ -90,6 +98,7 @@ from airbyte_cdk.sources.declarative.extractors import (
|
|
90
98
|
RecordSelector,
|
91
99
|
ResponseToFileExtractor,
|
92
100
|
)
|
101
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
93
102
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
94
103
|
ClientSideIncrementalRecordFilterDecorator,
|
95
104
|
)
|
@@ -98,7 +107,6 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
98
107
|
ConcurrentPerPartitionCursor,
|
99
108
|
CursorFactory,
|
100
109
|
DatetimeBasedCursor,
|
101
|
-
DeclarativeCursor,
|
102
110
|
GlobalSubstreamCursor,
|
103
111
|
PerPartitionWithGlobalCursor,
|
104
112
|
)
|
@@ -500,8 +508,11 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
500
508
|
InterpolatedRequestOptionsProvider,
|
501
509
|
RequestOptionsProvider,
|
502
510
|
)
|
511
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import (
|
512
|
+
PerPartitionRequestOptionsProvider,
|
513
|
+
)
|
503
514
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
504
|
-
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
515
|
+
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
|
505
516
|
from airbyte_cdk.sources.declarative.resolvers import (
|
506
517
|
ComponentMappingDefinition,
|
507
518
|
ConfigComponentsResolver,
|
@@ -583,6 +594,7 @@ from airbyte_cdk.sources.message import (
|
|
583
594
|
MessageRepository,
|
584
595
|
NoopMessageRepository,
|
585
596
|
)
|
597
|
+
from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository
|
586
598
|
from airbyte_cdk.sources.streams.call_rate import (
|
587
599
|
APIBudget,
|
588
600
|
FixedWindowCallRatePolicy,
|
@@ -630,6 +642,7 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
|
630
642
|
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
631
643
|
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
632
644
|
}
|
645
|
+
_NO_STREAM_SLICING = SinglePartitionRouter(parameters={})
|
633
646
|
|
634
647
|
# Ideally this should use the value defined in ConcurrentDeclarativeSource, but
|
635
648
|
# this would be a circular import
|
@@ -702,7 +715,7 @@ class ModelToComponentFactory:
|
|
702
715
|
CustomValidationStrategyModel: self.create_custom_component,
|
703
716
|
CustomConfigTransformationModel: self.create_custom_component,
|
704
717
|
DatetimeBasedCursorModel: self.create_datetime_based_cursor,
|
705
|
-
DeclarativeStreamModel: self.
|
718
|
+
DeclarativeStreamModel: self.create_default_stream,
|
706
719
|
DefaultErrorHandlerModel: self.create_default_error_handler,
|
707
720
|
DefaultPaginatorModel: self.create_default_paginator,
|
708
721
|
DpathExtractorModel: self.create_dpath_extractor,
|
@@ -1291,19 +1304,20 @@ class ModelToComponentFactory:
|
|
1291
1304
|
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1292
1305
|
)
|
1293
1306
|
|
1307
|
+
model_parameters = datetime_based_cursor_model.parameters or {}
|
1294
1308
|
interpolated_cursor_field = InterpolatedString.create(
|
1295
1309
|
datetime_based_cursor_model.cursor_field,
|
1296
|
-
parameters=
|
1310
|
+
parameters=model_parameters,
|
1297
1311
|
)
|
1298
1312
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1299
1313
|
|
1300
1314
|
interpolated_partition_field_start = InterpolatedString.create(
|
1301
1315
|
datetime_based_cursor_model.partition_field_start or "start_time",
|
1302
|
-
parameters=
|
1316
|
+
parameters=model_parameters,
|
1303
1317
|
)
|
1304
1318
|
interpolated_partition_field_end = InterpolatedString.create(
|
1305
1319
|
datetime_based_cursor_model.partition_field_end or "end_time",
|
1306
|
-
parameters=
|
1320
|
+
parameters=model_parameters,
|
1307
1321
|
)
|
1308
1322
|
|
1309
1323
|
slice_boundary_fields = (
|
@@ -1323,7 +1337,7 @@ class ModelToComponentFactory:
|
|
1323
1337
|
interpolated_lookback_window = (
|
1324
1338
|
InterpolatedString.create(
|
1325
1339
|
datetime_based_cursor_model.lookback_window,
|
1326
|
-
parameters=
|
1340
|
+
parameters=model_parameters,
|
1327
1341
|
)
|
1328
1342
|
if datetime_based_cursor_model.lookback_window
|
1329
1343
|
else None
|
@@ -1409,7 +1423,7 @@ class ModelToComponentFactory:
|
|
1409
1423
|
interpolated_step = (
|
1410
1424
|
InterpolatedString.create(
|
1411
1425
|
datetime_based_cursor_model.step,
|
1412
|
-
parameters=
|
1426
|
+
parameters=model_parameters,
|
1413
1427
|
)
|
1414
1428
|
if datetime_based_cursor_model.step
|
1415
1429
|
else None
|
@@ -1426,7 +1440,7 @@ class ModelToComponentFactory:
|
|
1426
1440
|
# object which we want to keep agnostic of being low-code
|
1427
1441
|
target = InterpolatedString(
|
1428
1442
|
string=datetime_based_cursor_model.clamping.target,
|
1429
|
-
parameters=
|
1443
|
+
parameters=model_parameters,
|
1430
1444
|
)
|
1431
1445
|
evaluated_target = target.eval(config=config)
|
1432
1446
|
match evaluated_target:
|
@@ -1603,6 +1617,10 @@ class ModelToComponentFactory:
|
|
1603
1617
|
|
1604
1618
|
interpolated_cursor_field = InterpolatedString.create(
|
1605
1619
|
datetime_based_cursor_model.cursor_field,
|
1620
|
+
# FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases:
|
1621
|
+
# * The ComponentDefinition comes from model.__dict__ in which case we have `parameters`
|
1622
|
+
# * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters`
|
1623
|
+
# We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory.
|
1606
1624
|
parameters=datetime_based_cursor_model.parameters or {},
|
1607
1625
|
)
|
1608
1626
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
@@ -1634,7 +1652,7 @@ class ModelToComponentFactory:
|
|
1634
1652
|
stream_namespace=stream_namespace,
|
1635
1653
|
config=config,
|
1636
1654
|
message_repository=NoopMessageRepository(),
|
1637
|
-
stream_state_migrations=stream_state_migrations,
|
1655
|
+
# stream_state_migrations=stream_state_migrations, # FIXME is it expected to run migration on per partition state too?
|
1638
1656
|
)
|
1639
1657
|
)
|
1640
1658
|
|
@@ -1942,13 +1960,17 @@ class ModelToComponentFactory:
|
|
1942
1960
|
parameters=model.parameters or {},
|
1943
1961
|
)
|
1944
1962
|
|
1945
|
-
def
|
1963
|
+
def create_default_stream(
|
1946
1964
|
self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
|
1947
1965
|
) -> Union[DeclarativeStream, AbstractStream]:
|
1948
1966
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
1949
1967
|
|
1968
|
+
partition_router = self._build_stream_slicer_from_partition_router(
|
1969
|
+
model.retriever, config, stream_name=model.name
|
1970
|
+
)
|
1971
|
+
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
1950
1972
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1951
|
-
cursor_model = model.incremental_sync
|
1973
|
+
cursor_model: DatetimeBasedCursorModel = model.incremental_sync
|
1952
1974
|
|
1953
1975
|
end_time_option = (
|
1954
1976
|
self._create_component_from_model(
|
@@ -1965,17 +1987,29 @@ class ModelToComponentFactory:
|
|
1965
1987
|
else None
|
1966
1988
|
)
|
1967
1989
|
|
1968
|
-
|
1990
|
+
datetime_request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1969
1991
|
start_time_option=start_time_option,
|
1970
1992
|
end_time_option=end_time_option,
|
1971
|
-
partition_field_start=cursor_model.
|
1993
|
+
partition_field_start=cursor_model.partition_field_start,
|
1972
1994
|
partition_field_end=cursor_model.partition_field_end,
|
1973
1995
|
config=config,
|
1974
1996
|
parameters=model.parameters or {},
|
1975
1997
|
)
|
1998
|
+
request_options_provider = (
|
1999
|
+
datetime_request_options_provider
|
2000
|
+
if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor)
|
2001
|
+
else PerPartitionRequestOptionsProvider(
|
2002
|
+
partition_router, datetime_request_options_provider
|
2003
|
+
)
|
2004
|
+
)
|
1976
2005
|
elif model.incremental_sync and isinstance(
|
1977
2006
|
model.incremental_sync, IncrementingCountCursorModel
|
1978
2007
|
):
|
2008
|
+
if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor):
|
2009
|
+
raise ValueError(
|
2010
|
+
"PerPartition does not support per partition states because switching to global state is time based"
|
2011
|
+
)
|
2012
|
+
|
1979
2013
|
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1980
2014
|
|
1981
2015
|
start_time_option = (
|
@@ -2013,22 +2047,18 @@ class ModelToComponentFactory:
|
|
2013
2047
|
model=model.file_uploader, config=config
|
2014
2048
|
)
|
2015
2049
|
|
2016
|
-
|
2017
|
-
|
2018
|
-
|
2019
|
-
|
2020
|
-
combined_slicers = self._merge_stream_slicers(model=model, config=config)
|
2021
|
-
partition_router = self._build_stream_slicer_from_partition_router(
|
2022
|
-
model.retriever, config, stream_name=model.name
|
2050
|
+
stream_slicer: ConcurrentStreamSlicer = (
|
2051
|
+
partition_router
|
2052
|
+
if isinstance(concurrent_cursor, FinalStateCursor)
|
2053
|
+
else concurrent_cursor
|
2023
2054
|
)
|
2024
|
-
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
2025
2055
|
retriever = self._create_component_from_model(
|
2026
2056
|
model=model.retriever,
|
2027
2057
|
config=config,
|
2028
2058
|
name=model.name,
|
2029
2059
|
primary_key=primary_key,
|
2030
|
-
stream_slicer=combined_slicers,
|
2031
2060
|
request_options_provider=request_options_provider,
|
2061
|
+
stream_slicer=stream_slicer,
|
2032
2062
|
stop_condition_cursor=concurrent_cursor
|
2033
2063
|
if self._is_stop_condition_on_cursor(model)
|
2034
2064
|
else None,
|
@@ -2039,6 +2069,8 @@ class ModelToComponentFactory:
|
|
2039
2069
|
file_uploader=file_uploader,
|
2040
2070
|
incremental_sync=model.incremental_sync,
|
2041
2071
|
)
|
2072
|
+
if isinstance(retriever, AsyncRetriever):
|
2073
|
+
stream_slicer = retriever.stream_slicer
|
2042
2074
|
|
2043
2075
|
schema_loader: Union[
|
2044
2076
|
CompositeSchemaLoader,
|
@@ -2066,89 +2098,27 @@ class ModelToComponentFactory:
|
|
2066
2098
|
options["name"] = model.name
|
2067
2099
|
schema_loader = DefaultSchemaLoader(config=config, parameters=options)
|
2068
2100
|
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
2072
|
-
or isinstance(concurrent_cursor, ConcurrentCursor)
|
2073
|
-
)
|
2074
|
-
and not self._emit_connector_builder_messages
|
2075
|
-
and not is_parent
|
2076
|
-
):
|
2077
|
-
# We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
|
2078
|
-
# DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
|
2079
|
-
# * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
|
2080
|
-
# * Streams without partition router but with cursor. This is the `isinstance(concurrent_cursor, ConcurrentCursor)` condition
|
2081
|
-
# * Streams with both partition router and cursor
|
2082
|
-
# We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
|
2083
|
-
# We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
|
2084
|
-
|
2085
|
-
stream_name = model.name or ""
|
2086
|
-
stream_slicer: ConcurrentStreamSlicer = (
|
2087
|
-
concurrent_cursor if concurrent_cursor else SinglePartitionRouter(parameters={})
|
2088
|
-
)
|
2089
|
-
cursor: Cursor = FinalStateCursor(stream_name, None, self._message_repository)
|
2090
|
-
if isinstance(retriever, AsyncRetriever):
|
2091
|
-
# The AsyncRetriever only ever worked with a cursor from the concurrent package. Hence, the method
|
2092
|
-
# `_build_incremental_cursor` which we would usually think would return only declarative stuff has a
|
2093
|
-
# special clause and return a concurrent cursor. This stream slicer is passed to AsyncRetriever when
|
2094
|
-
# built because the async retriever has a specific partition router which relies on this stream slicer.
|
2095
|
-
# We can't re-use `concurrent_cursor` because it is a different instance than the one passed in
|
2096
|
-
# AsyncJobPartitionRouter.
|
2097
|
-
stream_slicer = retriever.stream_slicer
|
2098
|
-
if isinstance(combined_slicers, Cursor):
|
2099
|
-
cursor = combined_slicers
|
2100
|
-
elif isinstance(combined_slicers, PartitionRouter):
|
2101
|
-
stream_slicer = combined_slicers
|
2102
|
-
elif concurrent_cursor:
|
2103
|
-
cursor = concurrent_cursor
|
2104
|
-
|
2105
|
-
# FIXME to be removed once we migrate everything to DefaultStream
|
2106
|
-
if isinstance(retriever, SimpleRetriever):
|
2107
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
2108
|
-
# instantiated for the other components that reference it
|
2109
|
-
retriever.cursor = None
|
2110
|
-
|
2111
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
2101
|
+
stream_name = model.name or ""
|
2102
|
+
return DefaultStream(
|
2103
|
+
partition_generator=StreamSlicerPartitionGenerator(
|
2112
2104
|
DeclarativePartitionFactory(
|
2113
2105
|
stream_name,
|
2114
2106
|
schema_loader,
|
2115
2107
|
retriever,
|
2116
2108
|
self._message_repository,
|
2117
2109
|
),
|
2118
|
-
stream_slicer
|
2119
|
-
|
2120
|
-
|
2121
|
-
|
2122
|
-
|
2123
|
-
|
2124
|
-
|
2125
|
-
|
2126
|
-
|
2127
|
-
|
2128
|
-
|
2129
|
-
|
2130
|
-
cursor=cursor,
|
2131
|
-
supports_file_transfer=hasattr(model, "file_uploader")
|
2132
|
-
and bool(model.file_uploader),
|
2133
|
-
)
|
2134
|
-
|
2135
|
-
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
2136
|
-
if model.state_migrations:
|
2137
|
-
state_transformations = [
|
2138
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2139
|
-
for state_migration in model.state_migrations
|
2140
|
-
]
|
2141
|
-
else:
|
2142
|
-
state_transformations = []
|
2143
|
-
return DeclarativeStream(
|
2144
|
-
name=model.name or "",
|
2145
|
-
primary_key=primary_key,
|
2146
|
-
retriever=retriever,
|
2147
|
-
schema_loader=schema_loader,
|
2148
|
-
stream_cursor_field=cursor_field or "",
|
2149
|
-
state_migrations=state_transformations,
|
2150
|
-
config=config,
|
2151
|
-
parameters=model.parameters or {},
|
2110
|
+
stream_slicer,
|
2111
|
+
slice_limit=self._limit_slices_fetched,
|
2112
|
+
),
|
2113
|
+
name=stream_name,
|
2114
|
+
json_schema=schema_loader.get_json_schema,
|
2115
|
+
primary_key=get_primary_key_from_stream(primary_key),
|
2116
|
+
cursor_field=concurrent_cursor.cursor_field.cursor_field_key
|
2117
|
+
if hasattr(concurrent_cursor, "cursor_field")
|
2118
|
+
else "", # FIXME we should have the cursor field has part of the interface of cursor,
|
2119
|
+
logger=logging.getLogger(f"airbyte.{stream_name}"),
|
2120
|
+
cursor=concurrent_cursor,
|
2121
|
+
supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader),
|
2152
2122
|
)
|
2153
2123
|
|
2154
2124
|
def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool:
|
@@ -2197,86 +2167,15 @@ class ModelToComponentFactory:
|
|
2197
2167
|
)
|
2198
2168
|
return SinglePartitionRouter(parameters={})
|
2199
2169
|
|
2200
|
-
def _build_incremental_cursor(
|
2201
|
-
self,
|
2202
|
-
model: DeclarativeStreamModel,
|
2203
|
-
stream_slicer: Optional[PartitionRouter],
|
2204
|
-
config: Config,
|
2205
|
-
) -> Optional[StreamSlicer]:
|
2206
|
-
state_transformations = (
|
2207
|
-
[
|
2208
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2209
|
-
for state_migration in model.state_migrations
|
2210
|
-
]
|
2211
|
-
if model.state_migrations
|
2212
|
-
else []
|
2213
|
-
)
|
2214
|
-
|
2215
|
-
if model.incremental_sync and (
|
2216
|
-
stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
|
2217
|
-
):
|
2218
|
-
if model.retriever.type == "AsyncRetriever":
|
2219
|
-
stream_name = model.name or ""
|
2220
|
-
stream_namespace = None
|
2221
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
2222
|
-
stream_name, stream_namespace
|
2223
|
-
)
|
2224
|
-
|
2225
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2226
|
-
state_manager=self._connector_state_manager,
|
2227
|
-
model_type=DatetimeBasedCursorModel,
|
2228
|
-
component_definition=model.incremental_sync.__dict__,
|
2229
|
-
stream_name=stream_name,
|
2230
|
-
stream_namespace=stream_namespace,
|
2231
|
-
config=config or {},
|
2232
|
-
stream_state=stream_state,
|
2233
|
-
stream_state_migrations=state_transformations,
|
2234
|
-
partition_router=stream_slicer,
|
2235
|
-
)
|
2236
|
-
|
2237
|
-
incremental_sync_model = model.incremental_sync
|
2238
|
-
cursor_component = self._create_component_from_model(
|
2239
|
-
model=incremental_sync_model, config=config
|
2240
|
-
)
|
2241
|
-
is_global_cursor = (
|
2242
|
-
hasattr(incremental_sync_model, "global_substream_cursor")
|
2243
|
-
and incremental_sync_model.global_substream_cursor
|
2244
|
-
)
|
2245
|
-
|
2246
|
-
if is_global_cursor:
|
2247
|
-
return GlobalSubstreamCursor(
|
2248
|
-
stream_cursor=cursor_component, partition_router=stream_slicer
|
2249
|
-
)
|
2250
|
-
return PerPartitionWithGlobalCursor(
|
2251
|
-
cursor_factory=CursorFactory(
|
2252
|
-
lambda: self._create_component_from_model(
|
2253
|
-
model=incremental_sync_model, config=config
|
2254
|
-
),
|
2255
|
-
),
|
2256
|
-
partition_router=stream_slicer,
|
2257
|
-
stream_cursor=cursor_component,
|
2258
|
-
)
|
2259
|
-
elif model.incremental_sync:
|
2260
|
-
if model.retriever.type == "AsyncRetriever":
|
2261
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2262
|
-
model_type=DatetimeBasedCursorModel,
|
2263
|
-
component_definition=model.incremental_sync.__dict__,
|
2264
|
-
stream_name=model.name or "",
|
2265
|
-
stream_namespace=None,
|
2266
|
-
config=config or {},
|
2267
|
-
stream_state_migrations=state_transformations,
|
2268
|
-
)
|
2269
|
-
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
2270
|
-
return None
|
2271
|
-
|
2272
2170
|
def _build_concurrent_cursor(
|
2273
2171
|
self,
|
2274
2172
|
model: DeclarativeStreamModel,
|
2275
2173
|
stream_slicer: Optional[PartitionRouter],
|
2276
2174
|
config: Config,
|
2277
|
-
) ->
|
2175
|
+
) -> Cursor:
|
2176
|
+
stream_name = model.name or ""
|
2278
2177
|
stream_state = self._connector_state_manager.get_stream_state(
|
2279
|
-
stream_name=
|
2178
|
+
stream_name=stream_name, namespace=None
|
2280
2179
|
)
|
2281
2180
|
|
2282
2181
|
if model.state_migrations:
|
@@ -2296,20 +2195,20 @@ class ModelToComponentFactory:
|
|
2296
2195
|
state_manager=self._connector_state_manager,
|
2297
2196
|
model_type=DatetimeBasedCursorModel,
|
2298
2197
|
component_definition=model.incremental_sync.__dict__,
|
2299
|
-
stream_name=
|
2198
|
+
stream_name=stream_name,
|
2300
2199
|
stream_namespace=None,
|
2301
2200
|
config=config or {},
|
2302
2201
|
stream_state=stream_state,
|
2303
2202
|
stream_state_migrations=state_transformations,
|
2304
2203
|
partition_router=stream_slicer,
|
2305
|
-
attempt_to_create_cursor_if_not_provided=True,
|
2204
|
+
attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now?
|
2306
2205
|
)
|
2307
2206
|
elif model.incremental_sync:
|
2308
2207
|
if type(model.incremental_sync) == IncrementingCountCursorModel:
|
2309
2208
|
return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2310
2209
|
model_type=IncrementingCountCursorModel,
|
2311
2210
|
component_definition=model.incremental_sync.__dict__,
|
2312
|
-
stream_name=
|
2211
|
+
stream_name=stream_name,
|
2313
2212
|
stream_namespace=None,
|
2314
2213
|
config=config or {},
|
2315
2214
|
stream_state_migrations=state_transformations,
|
@@ -2318,7 +2217,7 @@ class ModelToComponentFactory:
|
|
2318
2217
|
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2319
2218
|
model_type=type(model.incremental_sync),
|
2320
2219
|
component_definition=model.incremental_sync.__dict__,
|
2321
|
-
stream_name=
|
2220
|
+
stream_name=stream_name,
|
2322
2221
|
stream_namespace=None,
|
2323
2222
|
config=config or {},
|
2324
2223
|
stream_state_migrations=state_transformations,
|
@@ -2328,45 +2227,7 @@ class ModelToComponentFactory:
|
|
2328
2227
|
raise ValueError(
|
2329
2228
|
f"Incremental sync of type {type(model.incremental_sync)} is not supported"
|
2330
2229
|
)
|
2331
|
-
return None
|
2332
|
-
|
2333
|
-
def _merge_stream_slicers(
|
2334
|
-
self, model: DeclarativeStreamModel, config: Config
|
2335
|
-
) -> Optional[StreamSlicer]:
|
2336
|
-
retriever_model = model.retriever
|
2337
|
-
|
2338
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(
|
2339
|
-
retriever_model, config, stream_name=model.name
|
2340
|
-
)
|
2341
|
-
|
2342
|
-
if retriever_model.type == "AsyncRetriever":
|
2343
|
-
is_not_datetime_cursor = (
|
2344
|
-
model.incremental_sync.type != "DatetimeBasedCursor"
|
2345
|
-
if model.incremental_sync
|
2346
|
-
else None
|
2347
|
-
)
|
2348
|
-
is_partition_router = (
|
2349
|
-
bool(retriever_model.partition_router) if model.incremental_sync else None
|
2350
|
-
)
|
2351
|
-
|
2352
|
-
if is_not_datetime_cursor:
|
2353
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
2354
|
-
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
2355
|
-
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
2356
|
-
# implementation available in the CDK, we can enable more cursors here.
|
2357
|
-
raise ValueError(
|
2358
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
2359
|
-
)
|
2360
|
-
|
2361
|
-
if is_partition_router and not stream_slicer:
|
2362
|
-
# Note that this development is also done in parallel to the per partition development which once merged
|
2363
|
-
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
2364
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
2365
|
-
|
2366
|
-
if model.incremental_sync:
|
2367
|
-
return self._build_incremental_cursor(model, stream_slicer, config)
|
2368
|
-
|
2369
|
-
return stream_slicer
|
2230
|
+
return FinalStateCursor(stream_name, None, self._message_repository)
|
2370
2231
|
|
2371
2232
|
def create_default_error_handler(
|
2372
2233
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -3263,7 +3124,6 @@ class ModelToComponentFactory:
|
|
3263
3124
|
*,
|
3264
3125
|
name: str,
|
3265
3126
|
primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
3266
|
-
stream_slicer: Optional[StreamSlicer],
|
3267
3127
|
request_options_provider: Optional[RequestOptionsProvider] = None,
|
3268
3128
|
stop_condition_cursor: Optional[Cursor] = None,
|
3269
3129
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
@@ -3278,7 +3138,7 @@ class ModelToComponentFactory:
|
|
3278
3138
|
log_formatter: Optional[Callable[[Response], Any]] = None,
|
3279
3139
|
**kwargs: Any,
|
3280
3140
|
) -> SimpleRetriever:
|
3281
|
-
def _get_url() -> str:
|
3141
|
+
def _get_url(req: Requester) -> str:
|
3282
3142
|
"""
|
3283
3143
|
Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
|
3284
3144
|
This is needed because the URL is not set until the requester is created.
|
@@ -3287,12 +3147,12 @@ class ModelToComponentFactory:
|
|
3287
3147
|
_url: str = (
|
3288
3148
|
model.requester.url
|
3289
3149
|
if hasattr(model.requester, "url") and model.requester.url is not None
|
3290
|
-
else
|
3150
|
+
else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
|
3291
3151
|
)
|
3292
3152
|
_url_base: str = (
|
3293
3153
|
model.requester.url_base
|
3294
3154
|
if hasattr(model.requester, "url_base") and model.requester.url_base is not None
|
3295
|
-
else
|
3155
|
+
else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
|
3296
3156
|
)
|
3297
3157
|
|
3298
3158
|
return _url or _url_base
|
@@ -3371,36 +3231,14 @@ class ModelToComponentFactory:
|
|
3371
3231
|
config=config,
|
3372
3232
|
)
|
3373
3233
|
|
3374
|
-
|
3375
|
-
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
3376
|
-
|
3377
|
-
if (
|
3378
|
-
not isinstance(stream_slicer, DatetimeBasedCursor)
|
3379
|
-
or type(stream_slicer) is not DatetimeBasedCursor
|
3380
|
-
):
|
3381
|
-
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
3382
|
-
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
3383
|
-
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
3384
|
-
# request_options_provider
|
3385
|
-
request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
|
3386
|
-
elif not request_options_provider:
|
3234
|
+
if not request_options_provider:
|
3387
3235
|
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
3388
3236
|
|
3389
|
-
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
3390
|
-
if self._should_limit_slices_fetched():
|
3391
|
-
stream_slicer = cast(
|
3392
|
-
StreamSlicer,
|
3393
|
-
StreamSlicerTestReadDecorator(
|
3394
|
-
wrapped_slicer=stream_slicer,
|
3395
|
-
maximum_number_of_slices=self._limit_slices_fetched or 5,
|
3396
|
-
),
|
3397
|
-
)
|
3398
|
-
|
3399
3237
|
paginator = (
|
3400
3238
|
self._create_component_from_model(
|
3401
3239
|
model=model.paginator,
|
3402
3240
|
config=config,
|
3403
|
-
url_base=_get_url(),
|
3241
|
+
url_base=_get_url(requester),
|
3404
3242
|
extractor_model=model.record_selector.extractor,
|
3405
3243
|
decoder=decoder,
|
3406
3244
|
cursor_used_for_stop_condition=stop_condition_cursor or None,
|
@@ -3444,9 +3282,9 @@ class ModelToComponentFactory:
|
|
3444
3282
|
primary_key=primary_key,
|
3445
3283
|
requester=requester,
|
3446
3284
|
record_selector=record_selector,
|
3447
|
-
stream_slicer=
|
3285
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3448
3286
|
request_option_provider=request_options_provider,
|
3449
|
-
cursor=
|
3287
|
+
cursor=None,
|
3450
3288
|
config=config,
|
3451
3289
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3452
3290
|
parameters=model.parameters or {},
|
@@ -3458,9 +3296,9 @@ class ModelToComponentFactory:
|
|
3458
3296
|
primary_key=primary_key,
|
3459
3297
|
requester=requester,
|
3460
3298
|
record_selector=record_selector,
|
3461
|
-
stream_slicer=
|
3299
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3462
3300
|
request_option_provider=request_options_provider,
|
3463
|
-
cursor=
|
3301
|
+
cursor=None,
|
3464
3302
|
config=config,
|
3465
3303
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3466
3304
|
additional_query_properties=query_properties,
|
@@ -3531,14 +3369,21 @@ class ModelToComponentFactory:
|
|
3531
3369
|
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
3532
3370
|
)
|
3533
3371
|
|
3534
|
-
stream_model = (
|
3372
|
+
stream_model = self._get_state_delegating_stream_model(
|
3373
|
+
False if has_parent_state is None else has_parent_state, model
|
3374
|
+
)
|
3375
|
+
|
3376
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
|
3377
|
+
|
3378
|
+
def _get_state_delegating_stream_model(
|
3379
|
+
self, has_parent_state: bool, model: StateDelegatingStreamModel
|
3380
|
+
) -> DeclarativeStreamModel:
|
3381
|
+
return (
|
3535
3382
|
model.incremental_stream
|
3536
3383
|
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
3537
3384
|
else model.full_refresh_stream
|
3538
3385
|
)
|
3539
3386
|
|
3540
|
-
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
3541
|
-
|
3542
3387
|
def _create_async_job_status_mapping(
|
3543
3388
|
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
3544
3389
|
) -> Mapping[str, AsyncJobStatus]:
|
@@ -3583,12 +3428,14 @@ class ModelToComponentFactory:
|
|
3583
3428
|
transformations: List[RecordTransformation],
|
3584
3429
|
**kwargs: Any,
|
3585
3430
|
) -> AsyncRetriever:
|
3586
|
-
def _get_download_retriever(
|
3431
|
+
def _get_download_retriever(
|
3432
|
+
requester: Requester, extractor: RecordExtractor, _decoder: Decoder
|
3433
|
+
) -> SimpleRetriever:
|
3587
3434
|
# We create a record selector for the download retriever
|
3588
3435
|
# with no schema normalization and no transformations, neither record filter
|
3589
3436
|
# as all this occurs in the record_selector of the AsyncRetriever
|
3590
3437
|
record_selector = RecordSelector(
|
3591
|
-
extractor=
|
3438
|
+
extractor=extractor,
|
3592
3439
|
name=name,
|
3593
3440
|
record_filter=None,
|
3594
3441
|
transformations=[],
|
@@ -3599,7 +3446,7 @@ class ModelToComponentFactory:
|
|
3599
3446
|
paginator = (
|
3600
3447
|
self._create_component_from_model(
|
3601
3448
|
model=model.download_paginator,
|
3602
|
-
decoder=
|
3449
|
+
decoder=_decoder,
|
3603
3450
|
config=config,
|
3604
3451
|
url_base="",
|
3605
3452
|
)
|
@@ -3608,7 +3455,7 @@ class ModelToComponentFactory:
|
|
3608
3455
|
)
|
3609
3456
|
|
3610
3457
|
return SimpleRetriever(
|
3611
|
-
requester=
|
3458
|
+
requester=requester,
|
3612
3459
|
record_selector=record_selector,
|
3613
3460
|
primary_key=None,
|
3614
3461
|
name=name,
|
@@ -3702,7 +3549,9 @@ class ModelToComponentFactory:
|
|
3702
3549
|
config=config,
|
3703
3550
|
name=job_download_components_name,
|
3704
3551
|
)
|
3705
|
-
download_retriever = _get_download_retriever(
|
3552
|
+
download_retriever = _get_download_retriever(
|
3553
|
+
download_requester, download_extractor, download_decoder
|
3554
|
+
)
|
3706
3555
|
abort_requester = (
|
3707
3556
|
self._create_component_from_model(
|
3708
3557
|
model=model.abort_requester,
|
@@ -3848,29 +3697,104 @@ class ModelToComponentFactory:
|
|
3848
3697
|
def _create_message_repository_substream_wrapper(
|
3849
3698
|
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
3850
3699
|
) -> Any:
|
3700
|
+
# getting the parent state
|
3701
|
+
child_state = self._connector_state_manager.get_stream_state(
|
3702
|
+
kwargs["stream_name"], None
|
3703
|
+
)
|
3704
|
+
|
3705
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3706
|
+
has_parent_state = bool(
|
3707
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3708
|
+
if model.incremental_dependency
|
3709
|
+
else False
|
3710
|
+
)
|
3711
|
+
connector_state_manager = self._instantiate_parent_stream_state_manager(
|
3712
|
+
child_state, config, model, has_parent_state
|
3713
|
+
)
|
3714
|
+
|
3851
3715
|
substream_factory = ModelToComponentFactory(
|
3716
|
+
connector_state_manager=connector_state_manager,
|
3852
3717
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
3853
3718
|
limit_slices_fetched=self._limit_slices_fetched,
|
3854
3719
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
3855
3720
|
disable_retries=self._disable_retries,
|
3856
3721
|
disable_cache=self._disable_cache,
|
3857
|
-
message_repository=
|
3858
|
-
|
3859
|
-
|
3860
|
-
|
3722
|
+
message_repository=StateFilteringMessageRepository(
|
3723
|
+
LogAppenderMessageRepositoryDecorator(
|
3724
|
+
{
|
3725
|
+
"airbyte_cdk": {"stream": {"is_substream": True}},
|
3726
|
+
"http": {"is_auxiliary": True},
|
3727
|
+
},
|
3728
|
+
self._message_repository,
|
3729
|
+
self._evaluate_log_level(self._emit_connector_builder_messages),
|
3730
|
+
),
|
3861
3731
|
),
|
3862
3732
|
)
|
3863
3733
|
|
3864
|
-
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3865
|
-
has_parent_state = bool(
|
3866
|
-
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3867
|
-
if model.incremental_dependency
|
3868
|
-
else False
|
3869
|
-
)
|
3870
3734
|
return substream_factory._create_component_from_model(
|
3871
3735
|
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
3872
3736
|
)
|
3873
3737
|
|
3738
|
+
def _instantiate_parent_stream_state_manager(
|
3739
|
+
self,
|
3740
|
+
child_state: MutableMapping[str, Any],
|
3741
|
+
config: Config,
|
3742
|
+
model: ParentStreamConfigModel,
|
3743
|
+
has_parent_state: bool,
|
3744
|
+
) -> ConnectorStateManager:
|
3745
|
+
"""
|
3746
|
+
With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the
|
3747
|
+
`set_initial_state` flow that existed for the declarative cursors. This state is taken from
|
3748
|
+
self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account
|
3749
|
+
for the MessageRepository being different). So we need to pass a ConnectorStateManager to the
|
3750
|
+
ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if
|
3751
|
+
incremental_dependency is set.
|
3752
|
+
"""
|
3753
|
+
if model.incremental_dependency and child_state:
|
3754
|
+
parent_stream_name = model.stream.name or ""
|
3755
|
+
parent_state = ConcurrentPerPartitionCursor.get_parent_state(
|
3756
|
+
child_state, parent_stream_name
|
3757
|
+
)
|
3758
|
+
|
3759
|
+
if not parent_state:
|
3760
|
+
# there are two migration cases: state value from child stream or from global state
|
3761
|
+
parent_state = ConcurrentPerPartitionCursor.get_global_state(
|
3762
|
+
child_state, parent_stream_name
|
3763
|
+
)
|
3764
|
+
|
3765
|
+
if not parent_state and not isinstance(parent_state, dict):
|
3766
|
+
cursor_values = child_state.values()
|
3767
|
+
if cursor_values:
|
3768
|
+
incremental_sync_model: Union[
|
3769
|
+
DatetimeBasedCursorModel,
|
3770
|
+
IncrementingCountCursorModel,
|
3771
|
+
CustomIncrementalSyncModel,
|
3772
|
+
] = (
|
3773
|
+
model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
|
3774
|
+
if isinstance(model.stream, DeclarativeStreamModel)
|
3775
|
+
else self._get_state_delegating_stream_model(
|
3776
|
+
has_parent_state, model.stream
|
3777
|
+
).incremental_sync
|
3778
|
+
)
|
3779
|
+
cursor_field = InterpolatedString.create(
|
3780
|
+
incremental_sync_model.cursor_field,
|
3781
|
+
parameters=incremental_sync_model.parameters or {},
|
3782
|
+
).eval(config)
|
3783
|
+
parent_state = AirbyteStateMessage(
|
3784
|
+
type=AirbyteStateType.STREAM,
|
3785
|
+
stream=AirbyteStreamState(
|
3786
|
+
stream_descriptor=StreamDescriptor(
|
3787
|
+
name=parent_stream_name, namespace=None
|
3788
|
+
),
|
3789
|
+
stream_state=AirbyteStateBlob(
|
3790
|
+
{cursor_field: list(cursor_values)[0]}
|
3791
|
+
),
|
3792
|
+
),
|
3793
|
+
)
|
3794
|
+
return ConnectorStateManager([parent_state] if parent_state else [])
|
3795
|
+
|
3796
|
+
return ConnectorStateManager([])
|
3797
|
+
|
3874
3798
|
@staticmethod
|
3875
3799
|
def create_wait_time_from_header(
|
3876
3800
|
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
@@ -3951,6 +3875,7 @@ class ModelToComponentFactory:
|
|
3951
3875
|
|
3952
3876
|
return HttpComponentsResolver(
|
3953
3877
|
retriever=retriever,
|
3878
|
+
stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
|
3954
3879
|
config=config,
|
3955
3880
|
components_mapping=components_mapping,
|
3956
3881
|
parameters=model.parameters or {},
|
@@ -4176,7 +4101,9 @@ class ModelToComponentFactory:
|
|
4176
4101
|
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
4177
4102
|
) -> GroupingPartitionRouter:
|
4178
4103
|
underlying_router = self._create_component_from_model(
|
4179
|
-
model=model.underlying_partition_router,
|
4104
|
+
model=model.underlying_partition_router,
|
4105
|
+
config=config,
|
4106
|
+
**kwargs,
|
4180
4107
|
)
|
4181
4108
|
if model.group_size < 1:
|
4182
4109
|
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|