airbyte-cdk 6.61.6__py3-none-any.whl → 6.62.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
- airbyte_cdk/connector_builder/main.py +2 -2
- airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
- airbyte_cdk/manifest_server/Dockerfile +2 -2
- airbyte_cdk/manifest_server/README.md +0 -22
- airbyte_cdk/manifest_server/app.py +0 -6
- airbyte_cdk/manifest_server/cli/_common.py +0 -1
- airbyte_cdk/manifest_server/command_processor/processor.py +5 -2
- airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
- airbyte_cdk/manifest_server/routers/manifest.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +6 -7
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +208 -278
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
- airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
- airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -21
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
- airbyte_cdk/sources/message/repository.py +20 -0
- airbyte_cdk/sources/utils/schema_helpers.py +9 -29
- airbyte_cdk/sources/utils/transform.py +13 -25
- airbyte_cdk/utils/spec_schema_transformations.py +5 -7
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/METADATA +2 -4
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/RECORD +36 -35
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.62.0.dev1.dist-info}/entry_points.txt +0 -0
@@ -33,7 +33,15 @@ from requests import Response
|
|
33
33
|
from airbyte_cdk.connector_builder.models import (
|
34
34
|
LogMessage as ConnectorBuilderLogMessage,
|
35
35
|
)
|
36
|
-
from airbyte_cdk.models import
|
36
|
+
from airbyte_cdk.models import (
|
37
|
+
AirbyteStateBlob,
|
38
|
+
AirbyteStateMessage,
|
39
|
+
AirbyteStateType,
|
40
|
+
AirbyteStreamState,
|
41
|
+
FailureType,
|
42
|
+
Level,
|
43
|
+
StreamDescriptor,
|
44
|
+
)
|
37
45
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
38
46
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
|
39
47
|
from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
@@ -90,6 +98,7 @@ from airbyte_cdk.sources.declarative.extractors import (
|
|
90
98
|
RecordSelector,
|
91
99
|
ResponseToFileExtractor,
|
92
100
|
)
|
101
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
93
102
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
94
103
|
ClientSideIncrementalRecordFilterDecorator,
|
95
104
|
)
|
@@ -98,7 +107,6 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
98
107
|
ConcurrentPerPartitionCursor,
|
99
108
|
CursorFactory,
|
100
109
|
DatetimeBasedCursor,
|
101
|
-
DeclarativeCursor,
|
102
110
|
GlobalSubstreamCursor,
|
103
111
|
PerPartitionWithGlobalCursor,
|
104
112
|
)
|
@@ -500,8 +508,11 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
500
508
|
InterpolatedRequestOptionsProvider,
|
501
509
|
RequestOptionsProvider,
|
502
510
|
)
|
511
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import (
|
512
|
+
PerPartitionRequestOptionsProvider,
|
513
|
+
)
|
503
514
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
504
|
-
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
515
|
+
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
|
505
516
|
from airbyte_cdk.sources.declarative.resolvers import (
|
506
517
|
ComponentMappingDefinition,
|
507
518
|
ConfigComponentsResolver,
|
@@ -583,6 +594,7 @@ from airbyte_cdk.sources.message import (
|
|
583
594
|
MessageRepository,
|
584
595
|
NoopMessageRepository,
|
585
596
|
)
|
597
|
+
from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository
|
586
598
|
from airbyte_cdk.sources.streams.call_rate import (
|
587
599
|
APIBudget,
|
588
600
|
FixedWindowCallRatePolicy,
|
@@ -630,6 +642,7 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
|
630
642
|
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
631
643
|
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
632
644
|
}
|
645
|
+
_NO_STREAM_SLICING = SinglePartitionRouter(parameters={})
|
633
646
|
|
634
647
|
# Ideally this should use the value defined in ConcurrentDeclarativeSource, but
|
635
648
|
# this would be a circular import
|
@@ -702,7 +715,7 @@ class ModelToComponentFactory:
|
|
702
715
|
CustomValidationStrategyModel: self.create_custom_component,
|
703
716
|
CustomConfigTransformationModel: self.create_custom_component,
|
704
717
|
DatetimeBasedCursorModel: self.create_datetime_based_cursor,
|
705
|
-
DeclarativeStreamModel: self.
|
718
|
+
DeclarativeStreamModel: self.create_default_stream,
|
706
719
|
DefaultErrorHandlerModel: self.create_default_error_handler,
|
707
720
|
DefaultPaginatorModel: self.create_default_paginator,
|
708
721
|
DpathExtractorModel: self.create_dpath_extractor,
|
@@ -739,7 +752,7 @@ class ModelToComponentFactory:
|
|
739
752
|
OAuthAuthenticatorModel: self.create_oauth_authenticator,
|
740
753
|
OffsetIncrementModel: self.create_offset_increment,
|
741
754
|
PageIncrementModel: self.create_page_increment,
|
742
|
-
ParentStreamConfigModel: self.
|
755
|
+
ParentStreamConfigModel: self._create_message_repository_substream_wrapper,
|
743
756
|
PredicateValidatorModel: self.create_predicate_validator,
|
744
757
|
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
745
758
|
PropertyChunkingModel: self.create_property_chunking,
|
@@ -1291,19 +1304,20 @@ class ModelToComponentFactory:
|
|
1291
1304
|
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1292
1305
|
)
|
1293
1306
|
|
1307
|
+
model_parameters = datetime_based_cursor_model.parameters or {}
|
1294
1308
|
interpolated_cursor_field = InterpolatedString.create(
|
1295
1309
|
datetime_based_cursor_model.cursor_field,
|
1296
|
-
parameters=
|
1310
|
+
parameters=model_parameters,
|
1297
1311
|
)
|
1298
1312
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1299
1313
|
|
1300
1314
|
interpolated_partition_field_start = InterpolatedString.create(
|
1301
1315
|
datetime_based_cursor_model.partition_field_start or "start_time",
|
1302
|
-
parameters=
|
1316
|
+
parameters=model_parameters,
|
1303
1317
|
)
|
1304
1318
|
interpolated_partition_field_end = InterpolatedString.create(
|
1305
1319
|
datetime_based_cursor_model.partition_field_end or "end_time",
|
1306
|
-
parameters=
|
1320
|
+
parameters=model_parameters,
|
1307
1321
|
)
|
1308
1322
|
|
1309
1323
|
slice_boundary_fields = (
|
@@ -1323,7 +1337,7 @@ class ModelToComponentFactory:
|
|
1323
1337
|
interpolated_lookback_window = (
|
1324
1338
|
InterpolatedString.create(
|
1325
1339
|
datetime_based_cursor_model.lookback_window,
|
1326
|
-
parameters=
|
1340
|
+
parameters=model_parameters,
|
1327
1341
|
)
|
1328
1342
|
if datetime_based_cursor_model.lookback_window
|
1329
1343
|
else None
|
@@ -1409,7 +1423,7 @@ class ModelToComponentFactory:
|
|
1409
1423
|
interpolated_step = (
|
1410
1424
|
InterpolatedString.create(
|
1411
1425
|
datetime_based_cursor_model.step,
|
1412
|
-
parameters=
|
1426
|
+
parameters=model_parameters,
|
1413
1427
|
)
|
1414
1428
|
if datetime_based_cursor_model.step
|
1415
1429
|
else None
|
@@ -1426,7 +1440,7 @@ class ModelToComponentFactory:
|
|
1426
1440
|
# object which we want to keep agnostic of being low-code
|
1427
1441
|
target = InterpolatedString(
|
1428
1442
|
string=datetime_based_cursor_model.clamping.target,
|
1429
|
-
parameters=
|
1443
|
+
parameters=model_parameters,
|
1430
1444
|
)
|
1431
1445
|
evaluated_target = target.eval(config=config)
|
1432
1446
|
match evaluated_target:
|
@@ -1603,6 +1617,10 @@ class ModelToComponentFactory:
|
|
1603
1617
|
|
1604
1618
|
interpolated_cursor_field = InterpolatedString.create(
|
1605
1619
|
datetime_based_cursor_model.cursor_field,
|
1620
|
+
# FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases:
|
1621
|
+
# * The ComponentDefinition comes from model.__dict__ in which case we have `parameters`
|
1622
|
+
# * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters`
|
1623
|
+
# We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory.
|
1606
1624
|
parameters=datetime_based_cursor_model.parameters or {},
|
1607
1625
|
)
|
1608
1626
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
@@ -1634,7 +1652,7 @@ class ModelToComponentFactory:
|
|
1634
1652
|
stream_namespace=stream_namespace,
|
1635
1653
|
config=config,
|
1636
1654
|
message_repository=NoopMessageRepository(),
|
1637
|
-
stream_state_migrations=stream_state_migrations,
|
1655
|
+
# stream_state_migrations=stream_state_migrations, # FIXME is it expected to run migration on per partition state too?
|
1638
1656
|
)
|
1639
1657
|
)
|
1640
1658
|
|
@@ -1730,7 +1748,7 @@ class ModelToComponentFactory:
|
|
1730
1748
|
|
1731
1749
|
if self._is_component(model_value):
|
1732
1750
|
model_args[model_field] = self._create_nested_component(
|
1733
|
-
model, model_field, model_value, config
|
1751
|
+
model, model_field, model_value, config, **kwargs,
|
1734
1752
|
)
|
1735
1753
|
elif isinstance(model_value, list):
|
1736
1754
|
vals = []
|
@@ -1742,7 +1760,7 @@ class ModelToComponentFactory:
|
|
1742
1760
|
if derived_type:
|
1743
1761
|
v["type"] = derived_type
|
1744
1762
|
if self._is_component(v):
|
1745
|
-
vals.append(self._create_nested_component(model, model_field, v, config))
|
1763
|
+
vals.append(self._create_nested_component(model, model_field, v, config, **kwargs,))
|
1746
1764
|
else:
|
1747
1765
|
vals.append(v)
|
1748
1766
|
model_args[model_field] = vals
|
@@ -1832,7 +1850,7 @@ class ModelToComponentFactory:
|
|
1832
1850
|
return []
|
1833
1851
|
|
1834
1852
|
def _create_nested_component(
|
1835
|
-
self, model: Any, model_field: str, model_value: Any, config: Config
|
1853
|
+
self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any
|
1836
1854
|
) -> Any:
|
1837
1855
|
type_name = model_value.get("type", None)
|
1838
1856
|
if not type_name:
|
@@ -1857,8 +1875,11 @@ class ModelToComponentFactory:
|
|
1857
1875
|
for kwarg in constructor_kwargs
|
1858
1876
|
if kwarg in model_parameters
|
1859
1877
|
}
|
1878
|
+
matching_kwargs = {
|
1879
|
+
kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs
|
1880
|
+
}
|
1860
1881
|
return self._create_component_from_model(
|
1861
|
-
model=parsed_model, config=config, **matching_parameters
|
1882
|
+
model=parsed_model, config=config, **(matching_parameters | matching_kwargs)
|
1862
1883
|
)
|
1863
1884
|
except TypeError as error:
|
1864
1885
|
missing_parameters = self._extract_missing_parameters(error)
|
@@ -1942,13 +1963,17 @@ class ModelToComponentFactory:
|
|
1942
1963
|
parameters=model.parameters or {},
|
1943
1964
|
)
|
1944
1965
|
|
1945
|
-
def
|
1966
|
+
def create_default_stream(
|
1946
1967
|
self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
|
1947
1968
|
) -> Union[DeclarativeStream, AbstractStream]:
|
1948
1969
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
1949
1970
|
|
1971
|
+
partition_router = self._build_stream_slicer_from_partition_router(
|
1972
|
+
model.retriever, config, stream_name=model.name
|
1973
|
+
)
|
1974
|
+
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
1950
1975
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1951
|
-
cursor_model = model.incremental_sync
|
1976
|
+
cursor_model: DatetimeBasedCursorModel = model.incremental_sync
|
1952
1977
|
|
1953
1978
|
end_time_option = (
|
1954
1979
|
self._create_component_from_model(
|
@@ -1965,17 +1990,29 @@ class ModelToComponentFactory:
|
|
1965
1990
|
else None
|
1966
1991
|
)
|
1967
1992
|
|
1968
|
-
|
1993
|
+
datetime_request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1969
1994
|
start_time_option=start_time_option,
|
1970
1995
|
end_time_option=end_time_option,
|
1971
|
-
partition_field_start=cursor_model.
|
1996
|
+
partition_field_start=cursor_model.partition_field_start,
|
1972
1997
|
partition_field_end=cursor_model.partition_field_end,
|
1973
1998
|
config=config,
|
1974
1999
|
parameters=model.parameters or {},
|
1975
2000
|
)
|
2001
|
+
request_options_provider = (
|
2002
|
+
datetime_request_options_provider
|
2003
|
+
if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor)
|
2004
|
+
else PerPartitionRequestOptionsProvider(
|
2005
|
+
partition_router, datetime_request_options_provider
|
2006
|
+
)
|
2007
|
+
)
|
1976
2008
|
elif model.incremental_sync and isinstance(
|
1977
2009
|
model.incremental_sync, IncrementingCountCursorModel
|
1978
2010
|
):
|
2011
|
+
if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor):
|
2012
|
+
raise ValueError(
|
2013
|
+
"PerPartition does not support per partition states because switching to global state is time based"
|
2014
|
+
)
|
2015
|
+
|
1979
2016
|
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1980
2017
|
|
1981
2018
|
start_time_option = (
|
@@ -2013,22 +2050,18 @@ class ModelToComponentFactory:
|
|
2013
2050
|
model=model.file_uploader, config=config
|
2014
2051
|
)
|
2015
2052
|
|
2016
|
-
|
2017
|
-
|
2018
|
-
|
2019
|
-
|
2020
|
-
combined_slicers = self._merge_stream_slicers(model=model, config=config)
|
2021
|
-
partition_router = self._build_stream_slicer_from_partition_router(
|
2022
|
-
model.retriever, config, stream_name=model.name
|
2053
|
+
stream_slicer: ConcurrentStreamSlicer = (
|
2054
|
+
partition_router
|
2055
|
+
if isinstance(concurrent_cursor, FinalStateCursor)
|
2056
|
+
else concurrent_cursor
|
2023
2057
|
)
|
2024
|
-
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
2025
2058
|
retriever = self._create_component_from_model(
|
2026
2059
|
model=model.retriever,
|
2027
2060
|
config=config,
|
2028
2061
|
name=model.name,
|
2029
2062
|
primary_key=primary_key,
|
2030
|
-
stream_slicer=combined_slicers,
|
2031
2063
|
request_options_provider=request_options_provider,
|
2064
|
+
stream_slicer=stream_slicer,
|
2032
2065
|
stop_condition_cursor=concurrent_cursor
|
2033
2066
|
if self._is_stop_condition_on_cursor(model)
|
2034
2067
|
else None,
|
@@ -2039,6 +2072,8 @@ class ModelToComponentFactory:
|
|
2039
2072
|
file_uploader=file_uploader,
|
2040
2073
|
incremental_sync=model.incremental_sync,
|
2041
2074
|
)
|
2075
|
+
if isinstance(retriever, AsyncRetriever):
|
2076
|
+
stream_slicer = retriever.stream_slicer
|
2042
2077
|
|
2043
2078
|
schema_loader: Union[
|
2044
2079
|
CompositeSchemaLoader,
|
@@ -2066,89 +2101,27 @@ class ModelToComponentFactory:
|
|
2066
2101
|
options["name"] = model.name
|
2067
2102
|
schema_loader = DefaultSchemaLoader(config=config, parameters=options)
|
2068
2103
|
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
2072
|
-
or isinstance(concurrent_cursor, ConcurrentCursor)
|
2073
|
-
)
|
2074
|
-
and not self._emit_connector_builder_messages
|
2075
|
-
and not is_parent
|
2076
|
-
):
|
2077
|
-
# We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
|
2078
|
-
# DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
|
2079
|
-
# * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
|
2080
|
-
# * Streams without partition router but with cursor. This is the `isinstance(concurrent_cursor, ConcurrentCursor)` condition
|
2081
|
-
# * Streams with both partition router and cursor
|
2082
|
-
# We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
|
2083
|
-
# We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
|
2084
|
-
|
2085
|
-
stream_name = model.name or ""
|
2086
|
-
stream_slicer: ConcurrentStreamSlicer = (
|
2087
|
-
concurrent_cursor if concurrent_cursor else SinglePartitionRouter(parameters={})
|
2088
|
-
)
|
2089
|
-
cursor: Cursor = FinalStateCursor(stream_name, None, self._message_repository)
|
2090
|
-
if isinstance(retriever, AsyncRetriever):
|
2091
|
-
# The AsyncRetriever only ever worked with a cursor from the concurrent package. Hence, the method
|
2092
|
-
# `_build_incremental_cursor` which we would usually think would return only declarative stuff has a
|
2093
|
-
# special clause and return a concurrent cursor. This stream slicer is passed to AsyncRetriever when
|
2094
|
-
# built because the async retriever has a specific partition router which relies on this stream slicer.
|
2095
|
-
# We can't re-use `concurrent_cursor` because it is a different instance than the one passed in
|
2096
|
-
# AsyncJobPartitionRouter.
|
2097
|
-
stream_slicer = retriever.stream_slicer
|
2098
|
-
if isinstance(combined_slicers, Cursor):
|
2099
|
-
cursor = combined_slicers
|
2100
|
-
elif isinstance(combined_slicers, PartitionRouter):
|
2101
|
-
stream_slicer = combined_slicers
|
2102
|
-
elif concurrent_cursor:
|
2103
|
-
cursor = concurrent_cursor
|
2104
|
-
|
2105
|
-
# FIXME to be removed once we migrate everything to DefaultStream
|
2106
|
-
if isinstance(retriever, SimpleRetriever):
|
2107
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
2108
|
-
# instantiated for the other components that reference it
|
2109
|
-
retriever.cursor = None
|
2110
|
-
|
2111
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
2104
|
+
stream_name = model.name or ""
|
2105
|
+
return DefaultStream(
|
2106
|
+
partition_generator=StreamSlicerPartitionGenerator(
|
2112
2107
|
DeclarativePartitionFactory(
|
2113
2108
|
stream_name,
|
2114
2109
|
schema_loader,
|
2115
2110
|
retriever,
|
2116
2111
|
self._message_repository,
|
2117
2112
|
),
|
2118
|
-
stream_slicer
|
2119
|
-
|
2120
|
-
|
2121
|
-
|
2122
|
-
|
2123
|
-
|
2124
|
-
|
2125
|
-
|
2126
|
-
|
2127
|
-
|
2128
|
-
|
2129
|
-
|
2130
|
-
cursor=cursor,
|
2131
|
-
supports_file_transfer=hasattr(model, "file_uploader")
|
2132
|
-
and bool(model.file_uploader),
|
2133
|
-
)
|
2134
|
-
|
2135
|
-
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
2136
|
-
if model.state_migrations:
|
2137
|
-
state_transformations = [
|
2138
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2139
|
-
for state_migration in model.state_migrations
|
2140
|
-
]
|
2141
|
-
else:
|
2142
|
-
state_transformations = []
|
2143
|
-
return DeclarativeStream(
|
2144
|
-
name=model.name or "",
|
2145
|
-
primary_key=primary_key,
|
2146
|
-
retriever=retriever,
|
2147
|
-
schema_loader=schema_loader,
|
2148
|
-
stream_cursor_field=cursor_field or "",
|
2149
|
-
state_migrations=state_transformations,
|
2150
|
-
config=config,
|
2151
|
-
parameters=model.parameters or {},
|
2113
|
+
stream_slicer,
|
2114
|
+
slice_limit=self._limit_slices_fetched,
|
2115
|
+
),
|
2116
|
+
name=stream_name,
|
2117
|
+
json_schema=schema_loader.get_json_schema,
|
2118
|
+
primary_key=get_primary_key_from_stream(primary_key),
|
2119
|
+
cursor_field=concurrent_cursor.cursor_field.cursor_field_key
|
2120
|
+
if hasattr(concurrent_cursor, "cursor_field")
|
2121
|
+
else "", # FIXME we should have the cursor field has part of the interface of cursor,
|
2122
|
+
logger=logging.getLogger(f"airbyte.{stream_name}"),
|
2123
|
+
cursor=concurrent_cursor,
|
2124
|
+
supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader),
|
2152
2125
|
)
|
2153
2126
|
|
2154
2127
|
def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool:
|
@@ -2197,86 +2170,15 @@ class ModelToComponentFactory:
|
|
2197
2170
|
)
|
2198
2171
|
return SinglePartitionRouter(parameters={})
|
2199
2172
|
|
2200
|
-
def _build_incremental_cursor(
|
2201
|
-
self,
|
2202
|
-
model: DeclarativeStreamModel,
|
2203
|
-
stream_slicer: Optional[PartitionRouter],
|
2204
|
-
config: Config,
|
2205
|
-
) -> Optional[StreamSlicer]:
|
2206
|
-
state_transformations = (
|
2207
|
-
[
|
2208
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2209
|
-
for state_migration in model.state_migrations
|
2210
|
-
]
|
2211
|
-
if model.state_migrations
|
2212
|
-
else []
|
2213
|
-
)
|
2214
|
-
|
2215
|
-
if model.incremental_sync and (
|
2216
|
-
stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
|
2217
|
-
):
|
2218
|
-
if model.retriever.type == "AsyncRetriever":
|
2219
|
-
stream_name = model.name or ""
|
2220
|
-
stream_namespace = None
|
2221
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
2222
|
-
stream_name, stream_namespace
|
2223
|
-
)
|
2224
|
-
|
2225
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2226
|
-
state_manager=self._connector_state_manager,
|
2227
|
-
model_type=DatetimeBasedCursorModel,
|
2228
|
-
component_definition=model.incremental_sync.__dict__,
|
2229
|
-
stream_name=stream_name,
|
2230
|
-
stream_namespace=stream_namespace,
|
2231
|
-
config=config or {},
|
2232
|
-
stream_state=stream_state,
|
2233
|
-
stream_state_migrations=state_transformations,
|
2234
|
-
partition_router=stream_slicer,
|
2235
|
-
)
|
2236
|
-
|
2237
|
-
incremental_sync_model = model.incremental_sync
|
2238
|
-
cursor_component = self._create_component_from_model(
|
2239
|
-
model=incremental_sync_model, config=config
|
2240
|
-
)
|
2241
|
-
is_global_cursor = (
|
2242
|
-
hasattr(incremental_sync_model, "global_substream_cursor")
|
2243
|
-
and incremental_sync_model.global_substream_cursor
|
2244
|
-
)
|
2245
|
-
|
2246
|
-
if is_global_cursor:
|
2247
|
-
return GlobalSubstreamCursor(
|
2248
|
-
stream_cursor=cursor_component, partition_router=stream_slicer
|
2249
|
-
)
|
2250
|
-
return PerPartitionWithGlobalCursor(
|
2251
|
-
cursor_factory=CursorFactory(
|
2252
|
-
lambda: self._create_component_from_model(
|
2253
|
-
model=incremental_sync_model, config=config
|
2254
|
-
),
|
2255
|
-
),
|
2256
|
-
partition_router=stream_slicer,
|
2257
|
-
stream_cursor=cursor_component,
|
2258
|
-
)
|
2259
|
-
elif model.incremental_sync:
|
2260
|
-
if model.retriever.type == "AsyncRetriever":
|
2261
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2262
|
-
model_type=DatetimeBasedCursorModel,
|
2263
|
-
component_definition=model.incremental_sync.__dict__,
|
2264
|
-
stream_name=model.name or "",
|
2265
|
-
stream_namespace=None,
|
2266
|
-
config=config or {},
|
2267
|
-
stream_state_migrations=state_transformations,
|
2268
|
-
)
|
2269
|
-
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
2270
|
-
return None
|
2271
|
-
|
2272
2173
|
def _build_concurrent_cursor(
|
2273
2174
|
self,
|
2274
2175
|
model: DeclarativeStreamModel,
|
2275
2176
|
stream_slicer: Optional[PartitionRouter],
|
2276
2177
|
config: Config,
|
2277
|
-
) ->
|
2178
|
+
) -> Cursor:
|
2179
|
+
stream_name = model.name or ""
|
2278
2180
|
stream_state = self._connector_state_manager.get_stream_state(
|
2279
|
-
stream_name=
|
2181
|
+
stream_name=stream_name, namespace=None
|
2280
2182
|
)
|
2281
2183
|
|
2282
2184
|
if model.state_migrations:
|
@@ -2296,20 +2198,20 @@ class ModelToComponentFactory:
|
|
2296
2198
|
state_manager=self._connector_state_manager,
|
2297
2199
|
model_type=DatetimeBasedCursorModel,
|
2298
2200
|
component_definition=model.incremental_sync.__dict__,
|
2299
|
-
stream_name=
|
2201
|
+
stream_name=stream_name,
|
2300
2202
|
stream_namespace=None,
|
2301
2203
|
config=config or {},
|
2302
2204
|
stream_state=stream_state,
|
2303
2205
|
stream_state_migrations=state_transformations,
|
2304
2206
|
partition_router=stream_slicer,
|
2305
|
-
attempt_to_create_cursor_if_not_provided=True,
|
2207
|
+
attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now?
|
2306
2208
|
)
|
2307
2209
|
elif model.incremental_sync:
|
2308
2210
|
if type(model.incremental_sync) == IncrementingCountCursorModel:
|
2309
2211
|
return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2310
2212
|
model_type=IncrementingCountCursorModel,
|
2311
2213
|
component_definition=model.incremental_sync.__dict__,
|
2312
|
-
stream_name=
|
2214
|
+
stream_name=stream_name,
|
2313
2215
|
stream_namespace=None,
|
2314
2216
|
config=config or {},
|
2315
2217
|
stream_state_migrations=state_transformations,
|
@@ -2318,7 +2220,7 @@ class ModelToComponentFactory:
|
|
2318
2220
|
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2319
2221
|
model_type=type(model.incremental_sync),
|
2320
2222
|
component_definition=model.incremental_sync.__dict__,
|
2321
|
-
stream_name=
|
2223
|
+
stream_name=stream_name,
|
2322
2224
|
stream_namespace=None,
|
2323
2225
|
config=config or {},
|
2324
2226
|
stream_state_migrations=state_transformations,
|
@@ -2328,45 +2230,7 @@ class ModelToComponentFactory:
|
|
2328
2230
|
raise ValueError(
|
2329
2231
|
f"Incremental sync of type {type(model.incremental_sync)} is not supported"
|
2330
2232
|
)
|
2331
|
-
return None
|
2332
|
-
|
2333
|
-
def _merge_stream_slicers(
|
2334
|
-
self, model: DeclarativeStreamModel, config: Config
|
2335
|
-
) -> Optional[StreamSlicer]:
|
2336
|
-
retriever_model = model.retriever
|
2337
|
-
|
2338
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(
|
2339
|
-
retriever_model, config, stream_name=model.name
|
2340
|
-
)
|
2341
|
-
|
2342
|
-
if retriever_model.type == "AsyncRetriever":
|
2343
|
-
is_not_datetime_cursor = (
|
2344
|
-
model.incremental_sync.type != "DatetimeBasedCursor"
|
2345
|
-
if model.incremental_sync
|
2346
|
-
else None
|
2347
|
-
)
|
2348
|
-
is_partition_router = (
|
2349
|
-
bool(retriever_model.partition_router) if model.incremental_sync else None
|
2350
|
-
)
|
2351
|
-
|
2352
|
-
if is_not_datetime_cursor:
|
2353
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
2354
|
-
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
2355
|
-
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
2356
|
-
# implementation available in the CDK, we can enable more cursors here.
|
2357
|
-
raise ValueError(
|
2358
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
2359
|
-
)
|
2360
|
-
|
2361
|
-
if is_partition_router and not stream_slicer:
|
2362
|
-
# Note that this development is also done in parallel to the per partition development which once merged
|
2363
|
-
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
2364
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
2365
|
-
|
2366
|
-
if model.incremental_sync:
|
2367
|
-
return self._build_incremental_cursor(model, stream_slicer, config)
|
2368
|
-
|
2369
|
-
return stream_slicer
|
2233
|
+
return FinalStateCursor(stream_name, None, self._message_repository)
|
2370
2234
|
|
2371
2235
|
def create_default_error_handler(
|
2372
2236
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -3010,7 +2874,7 @@ class ModelToComponentFactory:
|
|
3010
2874
|
)
|
3011
2875
|
|
3012
2876
|
def create_parent_stream_config(
|
3013
|
-
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2877
|
+
self, model: ParentStreamConfigModel, config: Config, stream_name: str, **kwargs: Any
|
3014
2878
|
) -> ParentStreamConfig:
|
3015
2879
|
declarative_stream = self._create_component_from_model(
|
3016
2880
|
model.stream,
|
@@ -3263,7 +3127,6 @@ class ModelToComponentFactory:
|
|
3263
3127
|
*,
|
3264
3128
|
name: str,
|
3265
3129
|
primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
3266
|
-
stream_slicer: Optional[StreamSlicer],
|
3267
3130
|
request_options_provider: Optional[RequestOptionsProvider] = None,
|
3268
3131
|
stop_condition_cursor: Optional[Cursor] = None,
|
3269
3132
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
@@ -3278,7 +3141,7 @@ class ModelToComponentFactory:
|
|
3278
3141
|
log_formatter: Optional[Callable[[Response], Any]] = None,
|
3279
3142
|
**kwargs: Any,
|
3280
3143
|
) -> SimpleRetriever:
|
3281
|
-
def _get_url() -> str:
|
3144
|
+
def _get_url(req: Requester) -> str:
|
3282
3145
|
"""
|
3283
3146
|
Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
|
3284
3147
|
This is needed because the URL is not set until the requester is created.
|
@@ -3287,12 +3150,12 @@ class ModelToComponentFactory:
|
|
3287
3150
|
_url: str = (
|
3288
3151
|
model.requester.url
|
3289
3152
|
if hasattr(model.requester, "url") and model.requester.url is not None
|
3290
|
-
else
|
3153
|
+
else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
|
3291
3154
|
)
|
3292
3155
|
_url_base: str = (
|
3293
3156
|
model.requester.url_base
|
3294
3157
|
if hasattr(model.requester, "url_base") and model.requester.url_base is not None
|
3295
|
-
else
|
3158
|
+
else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
|
3296
3159
|
)
|
3297
3160
|
|
3298
3161
|
return _url or _url_base
|
@@ -3371,36 +3234,14 @@ class ModelToComponentFactory:
|
|
3371
3234
|
config=config,
|
3372
3235
|
)
|
3373
3236
|
|
3374
|
-
|
3375
|
-
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
3376
|
-
|
3377
|
-
if (
|
3378
|
-
not isinstance(stream_slicer, DatetimeBasedCursor)
|
3379
|
-
or type(stream_slicer) is not DatetimeBasedCursor
|
3380
|
-
):
|
3381
|
-
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
3382
|
-
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
3383
|
-
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
3384
|
-
# request_options_provider
|
3385
|
-
request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
|
3386
|
-
elif not request_options_provider:
|
3237
|
+
if not request_options_provider:
|
3387
3238
|
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
3388
3239
|
|
3389
|
-
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
3390
|
-
if self._should_limit_slices_fetched():
|
3391
|
-
stream_slicer = cast(
|
3392
|
-
StreamSlicer,
|
3393
|
-
StreamSlicerTestReadDecorator(
|
3394
|
-
wrapped_slicer=stream_slicer,
|
3395
|
-
maximum_number_of_slices=self._limit_slices_fetched or 5,
|
3396
|
-
),
|
3397
|
-
)
|
3398
|
-
|
3399
3240
|
paginator = (
|
3400
3241
|
self._create_component_from_model(
|
3401
3242
|
model=model.paginator,
|
3402
3243
|
config=config,
|
3403
|
-
url_base=_get_url(),
|
3244
|
+
url_base=_get_url(requester),
|
3404
3245
|
extractor_model=model.record_selector.extractor,
|
3405
3246
|
decoder=decoder,
|
3406
3247
|
cursor_used_for_stop_condition=stop_condition_cursor or None,
|
@@ -3444,9 +3285,9 @@ class ModelToComponentFactory:
|
|
3444
3285
|
primary_key=primary_key,
|
3445
3286
|
requester=requester,
|
3446
3287
|
record_selector=record_selector,
|
3447
|
-
stream_slicer=
|
3288
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3448
3289
|
request_option_provider=request_options_provider,
|
3449
|
-
cursor=
|
3290
|
+
cursor=None,
|
3450
3291
|
config=config,
|
3451
3292
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3452
3293
|
parameters=model.parameters or {},
|
@@ -3458,9 +3299,9 @@ class ModelToComponentFactory:
|
|
3458
3299
|
primary_key=primary_key,
|
3459
3300
|
requester=requester,
|
3460
3301
|
record_selector=record_selector,
|
3461
|
-
stream_slicer=
|
3302
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3462
3303
|
request_option_provider=request_options_provider,
|
3463
|
-
cursor=
|
3304
|
+
cursor=None,
|
3464
3305
|
config=config,
|
3465
3306
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3466
3307
|
additional_query_properties=query_properties,
|
@@ -3531,14 +3372,21 @@ class ModelToComponentFactory:
|
|
3531
3372
|
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
3532
3373
|
)
|
3533
3374
|
|
3534
|
-
stream_model = (
|
3375
|
+
stream_model = self._get_state_delegating_stream_model(
|
3376
|
+
False if has_parent_state is None else has_parent_state, model
|
3377
|
+
)
|
3378
|
+
|
3379
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
|
3380
|
+
|
3381
|
+
def _get_state_delegating_stream_model(
|
3382
|
+
self, has_parent_state: bool, model: StateDelegatingStreamModel
|
3383
|
+
) -> DeclarativeStreamModel:
|
3384
|
+
return (
|
3535
3385
|
model.incremental_stream
|
3536
3386
|
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
3537
3387
|
else model.full_refresh_stream
|
3538
3388
|
)
|
3539
3389
|
|
3540
|
-
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
3541
|
-
|
3542
3390
|
def _create_async_job_status_mapping(
|
3543
3391
|
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
3544
3392
|
) -> Mapping[str, AsyncJobStatus]:
|
@@ -3583,12 +3431,14 @@ class ModelToComponentFactory:
|
|
3583
3431
|
transformations: List[RecordTransformation],
|
3584
3432
|
**kwargs: Any,
|
3585
3433
|
) -> AsyncRetriever:
|
3586
|
-
def _get_download_retriever(
|
3434
|
+
def _get_download_retriever(
|
3435
|
+
requester: Requester, extractor: RecordExtractor, _decoder: Decoder
|
3436
|
+
) -> SimpleRetriever:
|
3587
3437
|
# We create a record selector for the download retriever
|
3588
3438
|
# with no schema normalization and no transformations, neither record filter
|
3589
3439
|
# as all this occurs in the record_selector of the AsyncRetriever
|
3590
3440
|
record_selector = RecordSelector(
|
3591
|
-
extractor=
|
3441
|
+
extractor=extractor,
|
3592
3442
|
name=name,
|
3593
3443
|
record_filter=None,
|
3594
3444
|
transformations=[],
|
@@ -3599,7 +3449,7 @@ class ModelToComponentFactory:
|
|
3599
3449
|
paginator = (
|
3600
3450
|
self._create_component_from_model(
|
3601
3451
|
model=model.download_paginator,
|
3602
|
-
decoder=
|
3452
|
+
decoder=_decoder,
|
3603
3453
|
config=config,
|
3604
3454
|
url_base="",
|
3605
3455
|
)
|
@@ -3608,7 +3458,7 @@ class ModelToComponentFactory:
|
|
3608
3458
|
)
|
3609
3459
|
|
3610
3460
|
return SimpleRetriever(
|
3611
|
-
requester=
|
3461
|
+
requester=requester,
|
3612
3462
|
record_selector=record_selector,
|
3613
3463
|
primary_key=None,
|
3614
3464
|
name=name,
|
@@ -3702,7 +3552,9 @@ class ModelToComponentFactory:
|
|
3702
3552
|
config=config,
|
3703
3553
|
name=job_download_components_name,
|
3704
3554
|
)
|
3705
|
-
download_retriever = _get_download_retriever(
|
3555
|
+
download_retriever = _get_download_retriever(
|
3556
|
+
download_requester, download_extractor, download_decoder
|
3557
|
+
)
|
3706
3558
|
abort_requester = (
|
3707
3559
|
self._create_component_from_model(
|
3708
3560
|
model=model.abort_requester,
|
@@ -3846,31 +3698,106 @@ class ModelToComponentFactory:
|
|
3846
3698
|
)
|
3847
3699
|
|
3848
3700
|
def _create_message_repository_substream_wrapper(
|
3849
|
-
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
3701
|
+
self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
|
3850
3702
|
) -> Any:
|
3703
|
+
# getting the parent state
|
3704
|
+
child_state = self._connector_state_manager.get_stream_state(
|
3705
|
+
stream_name, None
|
3706
|
+
)
|
3707
|
+
|
3708
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3709
|
+
has_parent_state = bool(
|
3710
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3711
|
+
if model.incremental_dependency
|
3712
|
+
else False
|
3713
|
+
)
|
3714
|
+
connector_state_manager = self._instantiate_parent_stream_state_manager(
|
3715
|
+
child_state, config, model, has_parent_state
|
3716
|
+
)
|
3717
|
+
|
3851
3718
|
substream_factory = ModelToComponentFactory(
|
3719
|
+
connector_state_manager=connector_state_manager,
|
3852
3720
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
3853
3721
|
limit_slices_fetched=self._limit_slices_fetched,
|
3854
3722
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
3855
3723
|
disable_retries=self._disable_retries,
|
3856
3724
|
disable_cache=self._disable_cache,
|
3857
|
-
message_repository=
|
3858
|
-
|
3859
|
-
|
3860
|
-
|
3725
|
+
message_repository=StateFilteringMessageRepository(
|
3726
|
+
LogAppenderMessageRepositoryDecorator(
|
3727
|
+
{
|
3728
|
+
"airbyte_cdk": {"stream": {"is_substream": True}},
|
3729
|
+
"http": {"is_auxiliary": True},
|
3730
|
+
},
|
3731
|
+
self._message_repository,
|
3732
|
+
self._evaluate_log_level(self._emit_connector_builder_messages),
|
3733
|
+
),
|
3861
3734
|
),
|
3862
3735
|
)
|
3863
3736
|
|
3864
|
-
|
3865
|
-
|
3866
|
-
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3867
|
-
if model.incremental_dependency
|
3868
|
-
else False
|
3869
|
-
)
|
3870
|
-
return substream_factory._create_component_from_model(
|
3871
|
-
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
3737
|
+
return substream_factory.create_parent_stream_config(
|
3738
|
+
model=model, config=config, stream_name=stream_name, **kwargs
|
3872
3739
|
)
|
3873
3740
|
|
3741
|
+
def _instantiate_parent_stream_state_manager(
|
3742
|
+
self,
|
3743
|
+
child_state: MutableMapping[str, Any],
|
3744
|
+
config: Config,
|
3745
|
+
model: ParentStreamConfigModel,
|
3746
|
+
has_parent_state: bool,
|
3747
|
+
) -> ConnectorStateManager:
|
3748
|
+
"""
|
3749
|
+
With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the
|
3750
|
+
`set_initial_state` flow that existed for the declarative cursors. This state is taken from
|
3751
|
+
self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account
|
3752
|
+
for the MessageRepository being different). So we need to pass a ConnectorStateManager to the
|
3753
|
+
ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if
|
3754
|
+
incremental_dependency is set.
|
3755
|
+
"""
|
3756
|
+
if model.incremental_dependency and child_state:
|
3757
|
+
parent_stream_name = model.stream.name or ""
|
3758
|
+
parent_state = ConcurrentPerPartitionCursor.get_parent_state(
|
3759
|
+
child_state, parent_stream_name
|
3760
|
+
)
|
3761
|
+
|
3762
|
+
if not parent_state:
|
3763
|
+
# there are two migration cases: state value from child stream or from global state
|
3764
|
+
parent_state = ConcurrentPerPartitionCursor.get_global_state(
|
3765
|
+
child_state, parent_stream_name
|
3766
|
+
)
|
3767
|
+
|
3768
|
+
if not parent_state and not isinstance(parent_state, dict):
|
3769
|
+
cursor_values = child_state.values()
|
3770
|
+
if cursor_values:
|
3771
|
+
incremental_sync_model: Union[
|
3772
|
+
DatetimeBasedCursorModel,
|
3773
|
+
IncrementingCountCursorModel,
|
3774
|
+
CustomIncrementalSyncModel,
|
3775
|
+
] = (
|
3776
|
+
model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
|
3777
|
+
if isinstance(model.stream, DeclarativeStreamModel)
|
3778
|
+
else self._get_state_delegating_stream_model(
|
3779
|
+
has_parent_state, model.stream
|
3780
|
+
).incremental_sync
|
3781
|
+
)
|
3782
|
+
cursor_field = InterpolatedString.create(
|
3783
|
+
incremental_sync_model.cursor_field,
|
3784
|
+
parameters=incremental_sync_model.parameters or {},
|
3785
|
+
).eval(config)
|
3786
|
+
parent_state = AirbyteStateMessage(
|
3787
|
+
type=AirbyteStateType.STREAM,
|
3788
|
+
stream=AirbyteStreamState(
|
3789
|
+
stream_descriptor=StreamDescriptor(
|
3790
|
+
name=parent_stream_name, namespace=None
|
3791
|
+
),
|
3792
|
+
stream_state=AirbyteStateBlob(
|
3793
|
+
{cursor_field: list(cursor_values)[0]}
|
3794
|
+
),
|
3795
|
+
),
|
3796
|
+
)
|
3797
|
+
return ConnectorStateManager([parent_state] if parent_state else [])
|
3798
|
+
|
3799
|
+
return ConnectorStateManager([])
|
3800
|
+
|
3874
3801
|
@staticmethod
|
3875
3802
|
def create_wait_time_from_header(
|
3876
3803
|
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
@@ -3951,6 +3878,7 @@ class ModelToComponentFactory:
|
|
3951
3878
|
|
3952
3879
|
return HttpComponentsResolver(
|
3953
3880
|
retriever=retriever,
|
3881
|
+
stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
|
3954
3882
|
config=config,
|
3955
3883
|
components_mapping=components_mapping,
|
3956
3884
|
parameters=model.parameters or {},
|
@@ -4176,7 +4104,9 @@ class ModelToComponentFactory:
|
|
4176
4104
|
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
4177
4105
|
) -> GroupingPartitionRouter:
|
4178
4106
|
underlying_router = self._create_component_from_model(
|
4179
|
-
model=model.underlying_partition_router,
|
4107
|
+
model=model.underlying_partition_router,
|
4108
|
+
config=config,
|
4109
|
+
**kwargs,
|
4180
4110
|
)
|
4181
4111
|
if model.group_size < 1:
|
4182
4112
|
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|