airbyte-cdk 6.61.6__py3-none-any.whl → 6.61.6.post3.dev17473738577__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
- airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
- airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
- airbyte_cdk/manifest_server/api_models/stream.py +2 -2
- airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
- airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
- airbyte_cdk/manifest_server/routers/manifest.py +37 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -2
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +229 -281
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
- airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
- airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
- airbyte_cdk/sources/message/repository.py +20 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/RECORD +28 -25
- /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.61.6.dist-info → airbyte_cdk-6.61.6.post3.dev17473738577.dist-info}/entry_points.txt +0 -0
@@ -33,7 +33,15 @@ from requests import Response
|
|
33
33
|
from airbyte_cdk.connector_builder.models import (
|
34
34
|
LogMessage as ConnectorBuilderLogMessage,
|
35
35
|
)
|
36
|
-
from airbyte_cdk.models import
|
36
|
+
from airbyte_cdk.models import (
|
37
|
+
AirbyteStateBlob,
|
38
|
+
AirbyteStateMessage,
|
39
|
+
AirbyteStateType,
|
40
|
+
AirbyteStreamState,
|
41
|
+
FailureType,
|
42
|
+
Level,
|
43
|
+
StreamDescriptor,
|
44
|
+
)
|
37
45
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
38
46
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
|
39
47
|
from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
@@ -90,6 +98,7 @@ from airbyte_cdk.sources.declarative.extractors import (
|
|
90
98
|
RecordSelector,
|
91
99
|
ResponseToFileExtractor,
|
92
100
|
)
|
101
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
93
102
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
94
103
|
ClientSideIncrementalRecordFilterDecorator,
|
95
104
|
)
|
@@ -98,7 +107,6 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
98
107
|
ConcurrentPerPartitionCursor,
|
99
108
|
CursorFactory,
|
100
109
|
DatetimeBasedCursor,
|
101
|
-
DeclarativeCursor,
|
102
110
|
GlobalSubstreamCursor,
|
103
111
|
PerPartitionWithGlobalCursor,
|
104
112
|
)
|
@@ -500,8 +508,11 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
500
508
|
InterpolatedRequestOptionsProvider,
|
501
509
|
RequestOptionsProvider,
|
502
510
|
)
|
511
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import (
|
512
|
+
PerPartitionRequestOptionsProvider,
|
513
|
+
)
|
503
514
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
504
|
-
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
515
|
+
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
|
505
516
|
from airbyte_cdk.sources.declarative.resolvers import (
|
506
517
|
ComponentMappingDefinition,
|
507
518
|
ConfigComponentsResolver,
|
@@ -583,6 +594,7 @@ from airbyte_cdk.sources.message import (
|
|
583
594
|
MessageRepository,
|
584
595
|
NoopMessageRepository,
|
585
596
|
)
|
597
|
+
from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository
|
586
598
|
from airbyte_cdk.sources.streams.call_rate import (
|
587
599
|
APIBudget,
|
588
600
|
FixedWindowCallRatePolicy,
|
@@ -630,6 +642,7 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
|
630
642
|
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
631
643
|
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
632
644
|
}
|
645
|
+
_NO_STREAM_SLICING = SinglePartitionRouter(parameters={})
|
633
646
|
|
634
647
|
# Ideally this should use the value defined in ConcurrentDeclarativeSource, but
|
635
648
|
# this would be a circular import
|
@@ -702,7 +715,7 @@ class ModelToComponentFactory:
|
|
702
715
|
CustomValidationStrategyModel: self.create_custom_component,
|
703
716
|
CustomConfigTransformationModel: self.create_custom_component,
|
704
717
|
DatetimeBasedCursorModel: self.create_datetime_based_cursor,
|
705
|
-
DeclarativeStreamModel: self.
|
718
|
+
DeclarativeStreamModel: self.create_default_stream,
|
706
719
|
DefaultErrorHandlerModel: self.create_default_error_handler,
|
707
720
|
DefaultPaginatorModel: self.create_default_paginator,
|
708
721
|
DpathExtractorModel: self.create_dpath_extractor,
|
@@ -739,7 +752,7 @@ class ModelToComponentFactory:
|
|
739
752
|
OAuthAuthenticatorModel: self.create_oauth_authenticator,
|
740
753
|
OffsetIncrementModel: self.create_offset_increment,
|
741
754
|
PageIncrementModel: self.create_page_increment,
|
742
|
-
ParentStreamConfigModel: self.
|
755
|
+
ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper,
|
743
756
|
PredicateValidatorModel: self.create_predicate_validator,
|
744
757
|
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
745
758
|
PropertyChunkingModel: self.create_property_chunking,
|
@@ -1291,19 +1304,20 @@ class ModelToComponentFactory:
|
|
1291
1304
|
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1292
1305
|
)
|
1293
1306
|
|
1307
|
+
model_parameters = datetime_based_cursor_model.parameters or {}
|
1294
1308
|
interpolated_cursor_field = InterpolatedString.create(
|
1295
1309
|
datetime_based_cursor_model.cursor_field,
|
1296
|
-
parameters=
|
1310
|
+
parameters=model_parameters,
|
1297
1311
|
)
|
1298
1312
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1299
1313
|
|
1300
1314
|
interpolated_partition_field_start = InterpolatedString.create(
|
1301
1315
|
datetime_based_cursor_model.partition_field_start or "start_time",
|
1302
|
-
parameters=
|
1316
|
+
parameters=model_parameters,
|
1303
1317
|
)
|
1304
1318
|
interpolated_partition_field_end = InterpolatedString.create(
|
1305
1319
|
datetime_based_cursor_model.partition_field_end or "end_time",
|
1306
|
-
parameters=
|
1320
|
+
parameters=model_parameters,
|
1307
1321
|
)
|
1308
1322
|
|
1309
1323
|
slice_boundary_fields = (
|
@@ -1323,7 +1337,7 @@ class ModelToComponentFactory:
|
|
1323
1337
|
interpolated_lookback_window = (
|
1324
1338
|
InterpolatedString.create(
|
1325
1339
|
datetime_based_cursor_model.lookback_window,
|
1326
|
-
parameters=
|
1340
|
+
parameters=model_parameters,
|
1327
1341
|
)
|
1328
1342
|
if datetime_based_cursor_model.lookback_window
|
1329
1343
|
else None
|
@@ -1409,7 +1423,7 @@ class ModelToComponentFactory:
|
|
1409
1423
|
interpolated_step = (
|
1410
1424
|
InterpolatedString.create(
|
1411
1425
|
datetime_based_cursor_model.step,
|
1412
|
-
parameters=
|
1426
|
+
parameters=model_parameters,
|
1413
1427
|
)
|
1414
1428
|
if datetime_based_cursor_model.step
|
1415
1429
|
else None
|
@@ -1426,7 +1440,7 @@ class ModelToComponentFactory:
|
|
1426
1440
|
# object which we want to keep agnostic of being low-code
|
1427
1441
|
target = InterpolatedString(
|
1428
1442
|
string=datetime_based_cursor_model.clamping.target,
|
1429
|
-
parameters=
|
1443
|
+
parameters=model_parameters,
|
1430
1444
|
)
|
1431
1445
|
evaluated_target = target.eval(config=config)
|
1432
1446
|
match evaluated_target:
|
@@ -1603,6 +1617,10 @@ class ModelToComponentFactory:
|
|
1603
1617
|
|
1604
1618
|
interpolated_cursor_field = InterpolatedString.create(
|
1605
1619
|
datetime_based_cursor_model.cursor_field,
|
1620
|
+
# FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases:
|
1621
|
+
# * The ComponentDefinition comes from model.__dict__ in which case we have `parameters`
|
1622
|
+
# * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters`
|
1623
|
+
# We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory.
|
1606
1624
|
parameters=datetime_based_cursor_model.parameters or {},
|
1607
1625
|
)
|
1608
1626
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
@@ -1634,7 +1652,7 @@ class ModelToComponentFactory:
|
|
1634
1652
|
stream_namespace=stream_namespace,
|
1635
1653
|
config=config,
|
1636
1654
|
message_repository=NoopMessageRepository(),
|
1637
|
-
stream_state_migrations=stream_state_migrations,
|
1655
|
+
# stream_state_migrations=stream_state_migrations, # FIXME is it expected to run migration on per partition state too?
|
1638
1656
|
)
|
1639
1657
|
)
|
1640
1658
|
|
@@ -1730,7 +1748,11 @@ class ModelToComponentFactory:
|
|
1730
1748
|
|
1731
1749
|
if self._is_component(model_value):
|
1732
1750
|
model_args[model_field] = self._create_nested_component(
|
1733
|
-
model,
|
1751
|
+
model,
|
1752
|
+
model_field,
|
1753
|
+
model_value,
|
1754
|
+
config,
|
1755
|
+
**kwargs,
|
1734
1756
|
)
|
1735
1757
|
elif isinstance(model_value, list):
|
1736
1758
|
vals = []
|
@@ -1742,7 +1764,15 @@ class ModelToComponentFactory:
|
|
1742
1764
|
if derived_type:
|
1743
1765
|
v["type"] = derived_type
|
1744
1766
|
if self._is_component(v):
|
1745
|
-
vals.append(
|
1767
|
+
vals.append(
|
1768
|
+
self._create_nested_component(
|
1769
|
+
model,
|
1770
|
+
model_field,
|
1771
|
+
v,
|
1772
|
+
config,
|
1773
|
+
**kwargs,
|
1774
|
+
)
|
1775
|
+
)
|
1746
1776
|
else:
|
1747
1777
|
vals.append(v)
|
1748
1778
|
model_args[model_field] = vals
|
@@ -1832,7 +1862,7 @@ class ModelToComponentFactory:
|
|
1832
1862
|
return []
|
1833
1863
|
|
1834
1864
|
def _create_nested_component(
|
1835
|
-
self, model: Any, model_field: str, model_value: Any, config: Config
|
1865
|
+
self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any
|
1836
1866
|
) -> Any:
|
1837
1867
|
type_name = model_value.get("type", None)
|
1838
1868
|
if not type_name:
|
@@ -1857,8 +1887,11 @@ class ModelToComponentFactory:
|
|
1857
1887
|
for kwarg in constructor_kwargs
|
1858
1888
|
if kwarg in model_parameters
|
1859
1889
|
}
|
1890
|
+
matching_kwargs = {
|
1891
|
+
kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs
|
1892
|
+
}
|
1860
1893
|
return self._create_component_from_model(
|
1861
|
-
model=parsed_model, config=config, **matching_parameters
|
1894
|
+
model=parsed_model, config=config, **(matching_parameters | matching_kwargs)
|
1862
1895
|
)
|
1863
1896
|
except TypeError as error:
|
1864
1897
|
missing_parameters = self._extract_missing_parameters(error)
|
@@ -1942,13 +1975,17 @@ class ModelToComponentFactory:
|
|
1942
1975
|
parameters=model.parameters or {},
|
1943
1976
|
)
|
1944
1977
|
|
1945
|
-
def
|
1978
|
+
def create_default_stream(
|
1946
1979
|
self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
|
1947
1980
|
) -> Union[DeclarativeStream, AbstractStream]:
|
1948
1981
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
1949
1982
|
|
1983
|
+
partition_router = self._build_stream_slicer_from_partition_router(
|
1984
|
+
model.retriever, config, stream_name=model.name
|
1985
|
+
)
|
1986
|
+
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
1950
1987
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1951
|
-
cursor_model = model.incremental_sync
|
1988
|
+
cursor_model: DatetimeBasedCursorModel = model.incremental_sync
|
1952
1989
|
|
1953
1990
|
end_time_option = (
|
1954
1991
|
self._create_component_from_model(
|
@@ -1965,17 +2002,29 @@ class ModelToComponentFactory:
|
|
1965
2002
|
else None
|
1966
2003
|
)
|
1967
2004
|
|
1968
|
-
|
2005
|
+
datetime_request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1969
2006
|
start_time_option=start_time_option,
|
1970
2007
|
end_time_option=end_time_option,
|
1971
|
-
partition_field_start=cursor_model.
|
2008
|
+
partition_field_start=cursor_model.partition_field_start,
|
1972
2009
|
partition_field_end=cursor_model.partition_field_end,
|
1973
2010
|
config=config,
|
1974
2011
|
parameters=model.parameters or {},
|
1975
2012
|
)
|
2013
|
+
request_options_provider = (
|
2014
|
+
datetime_request_options_provider
|
2015
|
+
if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor)
|
2016
|
+
else PerPartitionRequestOptionsProvider(
|
2017
|
+
partition_router, datetime_request_options_provider
|
2018
|
+
)
|
2019
|
+
)
|
1976
2020
|
elif model.incremental_sync and isinstance(
|
1977
2021
|
model.incremental_sync, IncrementingCountCursorModel
|
1978
2022
|
):
|
2023
|
+
if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor):
|
2024
|
+
raise ValueError(
|
2025
|
+
"PerPartition does not support per partition states because switching to global state is time based"
|
2026
|
+
)
|
2027
|
+
|
1979
2028
|
cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
|
1980
2029
|
|
1981
2030
|
start_time_option = (
|
@@ -2013,22 +2062,19 @@ class ModelToComponentFactory:
|
|
2013
2062
|
model=model.file_uploader, config=config
|
2014
2063
|
)
|
2015
2064
|
|
2016
|
-
|
2017
|
-
|
2018
|
-
|
2019
|
-
|
2020
|
-
combined_slicers = self._merge_stream_slicers(model=model, config=config)
|
2021
|
-
partition_router = self._build_stream_slicer_from_partition_router(
|
2022
|
-
model.retriever, config, stream_name=model.name
|
2065
|
+
stream_slicer: ConcurrentStreamSlicer = (
|
2066
|
+
partition_router
|
2067
|
+
if isinstance(concurrent_cursor, FinalStateCursor)
|
2068
|
+
else concurrent_cursor
|
2023
2069
|
)
|
2024
|
-
concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config)
|
2025
2070
|
retriever = self._create_component_from_model(
|
2026
2071
|
model=model.retriever,
|
2027
2072
|
config=config,
|
2028
2073
|
name=model.name,
|
2029
2074
|
primary_key=primary_key,
|
2030
|
-
stream_slicer=combined_slicers,
|
2031
2075
|
request_options_provider=request_options_provider,
|
2076
|
+
stream_slicer=stream_slicer,
|
2077
|
+
partition_router=partition_router,
|
2032
2078
|
stop_condition_cursor=concurrent_cursor
|
2033
2079
|
if self._is_stop_condition_on_cursor(model)
|
2034
2080
|
else None,
|
@@ -2039,6 +2085,8 @@ class ModelToComponentFactory:
|
|
2039
2085
|
file_uploader=file_uploader,
|
2040
2086
|
incremental_sync=model.incremental_sync,
|
2041
2087
|
)
|
2088
|
+
if isinstance(retriever, AsyncRetriever):
|
2089
|
+
stream_slicer = retriever.stream_slicer
|
2042
2090
|
|
2043
2091
|
schema_loader: Union[
|
2044
2092
|
CompositeSchemaLoader,
|
@@ -2066,89 +2114,27 @@ class ModelToComponentFactory:
|
|
2066
2114
|
options["name"] = model.name
|
2067
2115
|
schema_loader = DefaultSchemaLoader(config=config, parameters=options)
|
2068
2116
|
|
2069
|
-
|
2070
|
-
|
2071
|
-
|
2072
|
-
or isinstance(concurrent_cursor, ConcurrentCursor)
|
2073
|
-
)
|
2074
|
-
and not self._emit_connector_builder_messages
|
2075
|
-
and not is_parent
|
2076
|
-
):
|
2077
|
-
# We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
|
2078
|
-
# DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
|
2079
|
-
# * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
|
2080
|
-
# * Streams without partition router but with cursor. This is the `isinstance(concurrent_cursor, ConcurrentCursor)` condition
|
2081
|
-
# * Streams with both partition router and cursor
|
2082
|
-
# We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
|
2083
|
-
# We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
|
2084
|
-
|
2085
|
-
stream_name = model.name or ""
|
2086
|
-
stream_slicer: ConcurrentStreamSlicer = (
|
2087
|
-
concurrent_cursor if concurrent_cursor else SinglePartitionRouter(parameters={})
|
2088
|
-
)
|
2089
|
-
cursor: Cursor = FinalStateCursor(stream_name, None, self._message_repository)
|
2090
|
-
if isinstance(retriever, AsyncRetriever):
|
2091
|
-
# The AsyncRetriever only ever worked with a cursor from the concurrent package. Hence, the method
|
2092
|
-
# `_build_incremental_cursor` which we would usually think would return only declarative stuff has a
|
2093
|
-
# special clause and return a concurrent cursor. This stream slicer is passed to AsyncRetriever when
|
2094
|
-
# built because the async retriever has a specific partition router which relies on this stream slicer.
|
2095
|
-
# We can't re-use `concurrent_cursor` because it is a different instance than the one passed in
|
2096
|
-
# AsyncJobPartitionRouter.
|
2097
|
-
stream_slicer = retriever.stream_slicer
|
2098
|
-
if isinstance(combined_slicers, Cursor):
|
2099
|
-
cursor = combined_slicers
|
2100
|
-
elif isinstance(combined_slicers, PartitionRouter):
|
2101
|
-
stream_slicer = combined_slicers
|
2102
|
-
elif concurrent_cursor:
|
2103
|
-
cursor = concurrent_cursor
|
2104
|
-
|
2105
|
-
# FIXME to be removed once we migrate everything to DefaultStream
|
2106
|
-
if isinstance(retriever, SimpleRetriever):
|
2107
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
2108
|
-
# instantiated for the other components that reference it
|
2109
|
-
retriever.cursor = None
|
2110
|
-
|
2111
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
2117
|
+
stream_name = model.name or ""
|
2118
|
+
return DefaultStream(
|
2119
|
+
partition_generator=StreamSlicerPartitionGenerator(
|
2112
2120
|
DeclarativePartitionFactory(
|
2113
2121
|
stream_name,
|
2114
2122
|
schema_loader,
|
2115
2123
|
retriever,
|
2116
2124
|
self._message_repository,
|
2117
2125
|
),
|
2118
|
-
stream_slicer
|
2119
|
-
|
2120
|
-
|
2121
|
-
|
2122
|
-
|
2123
|
-
|
2124
|
-
|
2125
|
-
|
2126
|
-
|
2127
|
-
|
2128
|
-
|
2129
|
-
|
2130
|
-
cursor=cursor,
|
2131
|
-
supports_file_transfer=hasattr(model, "file_uploader")
|
2132
|
-
and bool(model.file_uploader),
|
2133
|
-
)
|
2134
|
-
|
2135
|
-
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
2136
|
-
if model.state_migrations:
|
2137
|
-
state_transformations = [
|
2138
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2139
|
-
for state_migration in model.state_migrations
|
2140
|
-
]
|
2141
|
-
else:
|
2142
|
-
state_transformations = []
|
2143
|
-
return DeclarativeStream(
|
2144
|
-
name=model.name or "",
|
2145
|
-
primary_key=primary_key,
|
2146
|
-
retriever=retriever,
|
2147
|
-
schema_loader=schema_loader,
|
2148
|
-
stream_cursor_field=cursor_field or "",
|
2149
|
-
state_migrations=state_transformations,
|
2150
|
-
config=config,
|
2151
|
-
parameters=model.parameters or {},
|
2126
|
+
stream_slicer,
|
2127
|
+
slice_limit=self._limit_slices_fetched,
|
2128
|
+
),
|
2129
|
+
name=stream_name,
|
2130
|
+
json_schema=schema_loader.get_json_schema,
|
2131
|
+
primary_key=get_primary_key_from_stream(primary_key),
|
2132
|
+
cursor_field=concurrent_cursor.cursor_field.cursor_field_key
|
2133
|
+
if hasattr(concurrent_cursor, "cursor_field")
|
2134
|
+
else "", # FIXME we should have the cursor field has part of the interface of cursor,
|
2135
|
+
logger=logging.getLogger(f"airbyte.{stream_name}"),
|
2136
|
+
cursor=concurrent_cursor,
|
2137
|
+
supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader),
|
2152
2138
|
)
|
2153
2139
|
|
2154
2140
|
def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool:
|
@@ -2197,86 +2183,15 @@ class ModelToComponentFactory:
|
|
2197
2183
|
)
|
2198
2184
|
return SinglePartitionRouter(parameters={})
|
2199
2185
|
|
2200
|
-
def _build_incremental_cursor(
|
2201
|
-
self,
|
2202
|
-
model: DeclarativeStreamModel,
|
2203
|
-
stream_slicer: Optional[PartitionRouter],
|
2204
|
-
config: Config,
|
2205
|
-
) -> Optional[StreamSlicer]:
|
2206
|
-
state_transformations = (
|
2207
|
-
[
|
2208
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2209
|
-
for state_migration in model.state_migrations
|
2210
|
-
]
|
2211
|
-
if model.state_migrations
|
2212
|
-
else []
|
2213
|
-
)
|
2214
|
-
|
2215
|
-
if model.incremental_sync and (
|
2216
|
-
stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
|
2217
|
-
):
|
2218
|
-
if model.retriever.type == "AsyncRetriever":
|
2219
|
-
stream_name = model.name or ""
|
2220
|
-
stream_namespace = None
|
2221
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
2222
|
-
stream_name, stream_namespace
|
2223
|
-
)
|
2224
|
-
|
2225
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2226
|
-
state_manager=self._connector_state_manager,
|
2227
|
-
model_type=DatetimeBasedCursorModel,
|
2228
|
-
component_definition=model.incremental_sync.__dict__,
|
2229
|
-
stream_name=stream_name,
|
2230
|
-
stream_namespace=stream_namespace,
|
2231
|
-
config=config or {},
|
2232
|
-
stream_state=stream_state,
|
2233
|
-
stream_state_migrations=state_transformations,
|
2234
|
-
partition_router=stream_slicer,
|
2235
|
-
)
|
2236
|
-
|
2237
|
-
incremental_sync_model = model.incremental_sync
|
2238
|
-
cursor_component = self._create_component_from_model(
|
2239
|
-
model=incremental_sync_model, config=config
|
2240
|
-
)
|
2241
|
-
is_global_cursor = (
|
2242
|
-
hasattr(incremental_sync_model, "global_substream_cursor")
|
2243
|
-
and incremental_sync_model.global_substream_cursor
|
2244
|
-
)
|
2245
|
-
|
2246
|
-
if is_global_cursor:
|
2247
|
-
return GlobalSubstreamCursor(
|
2248
|
-
stream_cursor=cursor_component, partition_router=stream_slicer
|
2249
|
-
)
|
2250
|
-
return PerPartitionWithGlobalCursor(
|
2251
|
-
cursor_factory=CursorFactory(
|
2252
|
-
lambda: self._create_component_from_model(
|
2253
|
-
model=incremental_sync_model, config=config
|
2254
|
-
),
|
2255
|
-
),
|
2256
|
-
partition_router=stream_slicer,
|
2257
|
-
stream_cursor=cursor_component,
|
2258
|
-
)
|
2259
|
-
elif model.incremental_sync:
|
2260
|
-
if model.retriever.type == "AsyncRetriever":
|
2261
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2262
|
-
model_type=DatetimeBasedCursorModel,
|
2263
|
-
component_definition=model.incremental_sync.__dict__,
|
2264
|
-
stream_name=model.name or "",
|
2265
|
-
stream_namespace=None,
|
2266
|
-
config=config or {},
|
2267
|
-
stream_state_migrations=state_transformations,
|
2268
|
-
)
|
2269
|
-
return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
|
2270
|
-
return None
|
2271
|
-
|
2272
2186
|
def _build_concurrent_cursor(
|
2273
2187
|
self,
|
2274
2188
|
model: DeclarativeStreamModel,
|
2275
2189
|
stream_slicer: Optional[PartitionRouter],
|
2276
2190
|
config: Config,
|
2277
|
-
) ->
|
2191
|
+
) -> Cursor:
|
2192
|
+
stream_name = model.name or ""
|
2278
2193
|
stream_state = self._connector_state_manager.get_stream_state(
|
2279
|
-
stream_name=
|
2194
|
+
stream_name=stream_name, namespace=None
|
2280
2195
|
)
|
2281
2196
|
|
2282
2197
|
if model.state_migrations:
|
@@ -2296,20 +2211,20 @@ class ModelToComponentFactory:
|
|
2296
2211
|
state_manager=self._connector_state_manager,
|
2297
2212
|
model_type=DatetimeBasedCursorModel,
|
2298
2213
|
component_definition=model.incremental_sync.__dict__,
|
2299
|
-
stream_name=
|
2214
|
+
stream_name=stream_name,
|
2300
2215
|
stream_namespace=None,
|
2301
2216
|
config=config or {},
|
2302
2217
|
stream_state=stream_state,
|
2303
2218
|
stream_state_migrations=state_transformations,
|
2304
2219
|
partition_router=stream_slicer,
|
2305
|
-
attempt_to_create_cursor_if_not_provided=True,
|
2220
|
+
attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now?
|
2306
2221
|
)
|
2307
2222
|
elif model.incremental_sync:
|
2308
2223
|
if type(model.incremental_sync) == IncrementingCountCursorModel:
|
2309
2224
|
return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2310
2225
|
model_type=IncrementingCountCursorModel,
|
2311
2226
|
component_definition=model.incremental_sync.__dict__,
|
2312
|
-
stream_name=
|
2227
|
+
stream_name=stream_name,
|
2313
2228
|
stream_namespace=None,
|
2314
2229
|
config=config or {},
|
2315
2230
|
stream_state_migrations=state_transformations,
|
@@ -2318,7 +2233,7 @@ class ModelToComponentFactory:
|
|
2318
2233
|
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2319
2234
|
model_type=type(model.incremental_sync),
|
2320
2235
|
component_definition=model.incremental_sync.__dict__,
|
2321
|
-
stream_name=
|
2236
|
+
stream_name=stream_name,
|
2322
2237
|
stream_namespace=None,
|
2323
2238
|
config=config or {},
|
2324
2239
|
stream_state_migrations=state_transformations,
|
@@ -2328,45 +2243,7 @@ class ModelToComponentFactory:
|
|
2328
2243
|
raise ValueError(
|
2329
2244
|
f"Incremental sync of type {type(model.incremental_sync)} is not supported"
|
2330
2245
|
)
|
2331
|
-
return None
|
2332
|
-
|
2333
|
-
def _merge_stream_slicers(
|
2334
|
-
self, model: DeclarativeStreamModel, config: Config
|
2335
|
-
) -> Optional[StreamSlicer]:
|
2336
|
-
retriever_model = model.retriever
|
2337
|
-
|
2338
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(
|
2339
|
-
retriever_model, config, stream_name=model.name
|
2340
|
-
)
|
2341
|
-
|
2342
|
-
if retriever_model.type == "AsyncRetriever":
|
2343
|
-
is_not_datetime_cursor = (
|
2344
|
-
model.incremental_sync.type != "DatetimeBasedCursor"
|
2345
|
-
if model.incremental_sync
|
2346
|
-
else None
|
2347
|
-
)
|
2348
|
-
is_partition_router = (
|
2349
|
-
bool(retriever_model.partition_router) if model.incremental_sync else None
|
2350
|
-
)
|
2351
|
-
|
2352
|
-
if is_not_datetime_cursor:
|
2353
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
|
2354
|
-
# support or unordered slices (for example, when we trigger reports for January and February, the report
|
2355
|
-
# in February can be completed first). Once we have support for custom concurrent cursor or have a new
|
2356
|
-
# implementation available in the CDK, we can enable more cursors here.
|
2357
|
-
raise ValueError(
|
2358
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
|
2359
|
-
)
|
2360
|
-
|
2361
|
-
if is_partition_router and not stream_slicer:
|
2362
|
-
# Note that this development is also done in parallel to the per partition development which once merged
|
2363
|
-
# we could support here by calling create_concurrent_cursor_from_perpartition_cursor
|
2364
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
|
2365
|
-
|
2366
|
-
if model.incremental_sync:
|
2367
|
-
return self._build_incremental_cursor(model, stream_slicer, config)
|
2368
|
-
|
2369
|
-
return stream_slicer
|
2246
|
+
return FinalStateCursor(stream_name, None, self._message_repository)
|
2370
2247
|
|
2371
2248
|
def create_default_error_handler(
|
2372
2249
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -2660,7 +2537,9 @@ class ModelToComponentFactory:
|
|
2660
2537
|
config=config,
|
2661
2538
|
name=name,
|
2662
2539
|
primary_key=None,
|
2663
|
-
|
2540
|
+
partition_router=self._build_stream_slicer_from_partition_router(
|
2541
|
+
model.retriever, config
|
2542
|
+
),
|
2664
2543
|
transformations=[],
|
2665
2544
|
use_cache=True,
|
2666
2545
|
log_formatter=(
|
@@ -3010,7 +2889,7 @@ class ModelToComponentFactory:
|
|
3010
2889
|
)
|
3011
2890
|
|
3012
2891
|
def create_parent_stream_config(
|
3013
|
-
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2892
|
+
self, model: ParentStreamConfigModel, config: Config, stream_name: str, **kwargs: Any
|
3014
2893
|
) -> ParentStreamConfig:
|
3015
2894
|
declarative_stream = self._create_component_from_model(
|
3016
2895
|
model.stream,
|
@@ -3263,7 +3142,6 @@ class ModelToComponentFactory:
|
|
3263
3142
|
*,
|
3264
3143
|
name: str,
|
3265
3144
|
primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
3266
|
-
stream_slicer: Optional[StreamSlicer],
|
3267
3145
|
request_options_provider: Optional[RequestOptionsProvider] = None,
|
3268
3146
|
stop_condition_cursor: Optional[Cursor] = None,
|
3269
3147
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
@@ -3276,9 +3154,10 @@ class ModelToComponentFactory:
|
|
3276
3154
|
] = None,
|
3277
3155
|
use_cache: Optional[bool] = None,
|
3278
3156
|
log_formatter: Optional[Callable[[Response], Any]] = None,
|
3157
|
+
partition_router: Optional[PartitionRouter] = None,
|
3279
3158
|
**kwargs: Any,
|
3280
3159
|
) -> SimpleRetriever:
|
3281
|
-
def _get_url() -> str:
|
3160
|
+
def _get_url(req: Requester) -> str:
|
3282
3161
|
"""
|
3283
3162
|
Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
|
3284
3163
|
This is needed because the URL is not set until the requester is created.
|
@@ -3287,12 +3166,12 @@ class ModelToComponentFactory:
|
|
3287
3166
|
_url: str = (
|
3288
3167
|
model.requester.url
|
3289
3168
|
if hasattr(model.requester, "url") and model.requester.url is not None
|
3290
|
-
else
|
3169
|
+
else req.get_url(stream_state=None, stream_slice=None, next_page_token=None)
|
3291
3170
|
)
|
3292
3171
|
_url_base: str = (
|
3293
3172
|
model.requester.url_base
|
3294
3173
|
if hasattr(model.requester, "url_base") and model.requester.url_base is not None
|
3295
|
-
else
|
3174
|
+
else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None)
|
3296
3175
|
)
|
3297
3176
|
|
3298
3177
|
return _url or _url_base
|
@@ -3371,36 +3250,18 @@ class ModelToComponentFactory:
|
|
3371
3250
|
config=config,
|
3372
3251
|
)
|
3373
3252
|
|
3374
|
-
|
3375
|
-
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
3376
|
-
|
3377
|
-
if (
|
3378
|
-
not isinstance(stream_slicer, DatetimeBasedCursor)
|
3379
|
-
or type(stream_slicer) is not DatetimeBasedCursor
|
3380
|
-
):
|
3381
|
-
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
3382
|
-
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
3383
|
-
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
3384
|
-
# request_options_provider
|
3385
|
-
request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
|
3386
|
-
elif not request_options_provider:
|
3253
|
+
if not request_options_provider:
|
3387
3254
|
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
3388
|
-
|
3389
|
-
|
3390
|
-
|
3391
|
-
|
3392
|
-
StreamSlicer,
|
3393
|
-
StreamSlicerTestReadDecorator(
|
3394
|
-
wrapped_slicer=stream_slicer,
|
3395
|
-
maximum_number_of_slices=self._limit_slices_fetched or 5,
|
3396
|
-
),
|
3397
|
-
)
|
3255
|
+
if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance(
|
3256
|
+
partition_router, PartitionRouter
|
3257
|
+
):
|
3258
|
+
request_options_provider = partition_router
|
3398
3259
|
|
3399
3260
|
paginator = (
|
3400
3261
|
self._create_component_from_model(
|
3401
3262
|
model=model.paginator,
|
3402
3263
|
config=config,
|
3403
|
-
url_base=_get_url(),
|
3264
|
+
url_base=_get_url(requester),
|
3404
3265
|
extractor_model=model.record_selector.extractor,
|
3405
3266
|
decoder=decoder,
|
3406
3267
|
cursor_used_for_stop_condition=stop_condition_cursor or None,
|
@@ -3444,9 +3305,9 @@ class ModelToComponentFactory:
|
|
3444
3305
|
primary_key=primary_key,
|
3445
3306
|
requester=requester,
|
3446
3307
|
record_selector=record_selector,
|
3447
|
-
stream_slicer=
|
3308
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3448
3309
|
request_option_provider=request_options_provider,
|
3449
|
-
cursor=
|
3310
|
+
cursor=None,
|
3450
3311
|
config=config,
|
3451
3312
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3452
3313
|
parameters=model.parameters or {},
|
@@ -3458,9 +3319,9 @@ class ModelToComponentFactory:
|
|
3458
3319
|
primary_key=primary_key,
|
3459
3320
|
requester=requester,
|
3460
3321
|
record_selector=record_selector,
|
3461
|
-
stream_slicer=
|
3322
|
+
stream_slicer=_NO_STREAM_SLICING,
|
3462
3323
|
request_option_provider=request_options_provider,
|
3463
|
-
cursor=
|
3324
|
+
cursor=None,
|
3464
3325
|
config=config,
|
3465
3326
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3466
3327
|
additional_query_properties=query_properties,
|
@@ -3531,14 +3392,21 @@ class ModelToComponentFactory:
|
|
3531
3392
|
f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
|
3532
3393
|
)
|
3533
3394
|
|
3534
|
-
stream_model = (
|
3395
|
+
stream_model = self._get_state_delegating_stream_model(
|
3396
|
+
False if has_parent_state is None else has_parent_state, model
|
3397
|
+
)
|
3398
|
+
|
3399
|
+
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
|
3400
|
+
|
3401
|
+
def _get_state_delegating_stream_model(
|
3402
|
+
self, has_parent_state: bool, model: StateDelegatingStreamModel
|
3403
|
+
) -> DeclarativeStreamModel:
|
3404
|
+
return (
|
3535
3405
|
model.incremental_stream
|
3536
3406
|
if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
|
3537
3407
|
else model.full_refresh_stream
|
3538
3408
|
)
|
3539
3409
|
|
3540
|
-
return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
|
3541
|
-
|
3542
3410
|
def _create_async_job_status_mapping(
|
3543
3411
|
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
3544
3412
|
) -> Mapping[str, AsyncJobStatus]:
|
@@ -3583,12 +3451,14 @@ class ModelToComponentFactory:
|
|
3583
3451
|
transformations: List[RecordTransformation],
|
3584
3452
|
**kwargs: Any,
|
3585
3453
|
) -> AsyncRetriever:
|
3586
|
-
def _get_download_retriever(
|
3454
|
+
def _get_download_retriever(
|
3455
|
+
requester: Requester, extractor: RecordExtractor, _decoder: Decoder
|
3456
|
+
) -> SimpleRetriever:
|
3587
3457
|
# We create a record selector for the download retriever
|
3588
3458
|
# with no schema normalization and no transformations, neither record filter
|
3589
3459
|
# as all this occurs in the record_selector of the AsyncRetriever
|
3590
3460
|
record_selector = RecordSelector(
|
3591
|
-
extractor=
|
3461
|
+
extractor=extractor,
|
3592
3462
|
name=name,
|
3593
3463
|
record_filter=None,
|
3594
3464
|
transformations=[],
|
@@ -3599,7 +3469,7 @@ class ModelToComponentFactory:
|
|
3599
3469
|
paginator = (
|
3600
3470
|
self._create_component_from_model(
|
3601
3471
|
model=model.download_paginator,
|
3602
|
-
decoder=
|
3472
|
+
decoder=_decoder,
|
3603
3473
|
config=config,
|
3604
3474
|
url_base="",
|
3605
3475
|
)
|
@@ -3608,7 +3478,7 @@ class ModelToComponentFactory:
|
|
3608
3478
|
)
|
3609
3479
|
|
3610
3480
|
return SimpleRetriever(
|
3611
|
-
requester=
|
3481
|
+
requester=requester,
|
3612
3482
|
record_selector=record_selector,
|
3613
3483
|
primary_key=None,
|
3614
3484
|
name=name,
|
@@ -3702,7 +3572,9 @@ class ModelToComponentFactory:
|
|
3702
3572
|
config=config,
|
3703
3573
|
name=job_download_components_name,
|
3704
3574
|
)
|
3705
|
-
download_retriever = _get_download_retriever(
|
3575
|
+
download_retriever = _get_download_retriever(
|
3576
|
+
download_requester, download_extractor, download_decoder
|
3577
|
+
)
|
3706
3578
|
abort_requester = (
|
3707
3579
|
self._create_component_from_model(
|
3708
3580
|
model=model.abort_requester,
|
@@ -3832,7 +3704,7 @@ class ModelToComponentFactory:
|
|
3832
3704
|
if model.parent_stream_configs:
|
3833
3705
|
parent_stream_configs.extend(
|
3834
3706
|
[
|
3835
|
-
self.
|
3707
|
+
self.create_parent_stream_config_with_substream_wrapper(
|
3836
3708
|
model=parent_stream_config, config=config, **kwargs
|
3837
3709
|
)
|
3838
3710
|
for parent_stream_config in model.parent_stream_configs
|
@@ -3845,32 +3717,105 @@ class ModelToComponentFactory:
|
|
3845
3717
|
config=config,
|
3846
3718
|
)
|
3847
3719
|
|
3848
|
-
def
|
3849
|
-
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
3720
|
+
def create_parent_stream_config_with_substream_wrapper(
|
3721
|
+
self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
|
3850
3722
|
) -> Any:
|
3723
|
+
# getting the parent state
|
3724
|
+
child_state = self._connector_state_manager.get_stream_state(stream_name, None)
|
3725
|
+
|
3726
|
+
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3727
|
+
has_parent_state = bool(
|
3728
|
+
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3729
|
+
if model.incremental_dependency
|
3730
|
+
else False
|
3731
|
+
)
|
3732
|
+
connector_state_manager = self._instantiate_parent_stream_state_manager(
|
3733
|
+
child_state, config, model, has_parent_state
|
3734
|
+
)
|
3735
|
+
|
3851
3736
|
substream_factory = ModelToComponentFactory(
|
3737
|
+
connector_state_manager=connector_state_manager,
|
3852
3738
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
3853
3739
|
limit_slices_fetched=self._limit_slices_fetched,
|
3854
3740
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
3855
3741
|
disable_retries=self._disable_retries,
|
3856
3742
|
disable_cache=self._disable_cache,
|
3857
|
-
message_repository=
|
3858
|
-
|
3859
|
-
|
3860
|
-
|
3743
|
+
message_repository=StateFilteringMessageRepository(
|
3744
|
+
LogAppenderMessageRepositoryDecorator(
|
3745
|
+
{
|
3746
|
+
"airbyte_cdk": {"stream": {"is_substream": True}},
|
3747
|
+
"http": {"is_auxiliary": True},
|
3748
|
+
},
|
3749
|
+
self._message_repository,
|
3750
|
+
self._evaluate_log_level(self._emit_connector_builder_messages),
|
3751
|
+
),
|
3861
3752
|
),
|
3862
3753
|
)
|
3863
3754
|
|
3864
|
-
|
3865
|
-
|
3866
|
-
self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
|
3867
|
-
if model.incremental_dependency
|
3868
|
-
else False
|
3869
|
-
)
|
3870
|
-
return substream_factory._create_component_from_model(
|
3871
|
-
model=model, config=config, has_parent_state=has_parent_state, **kwargs
|
3755
|
+
return substream_factory.create_parent_stream_config(
|
3756
|
+
model=model, config=config, stream_name=stream_name, **kwargs
|
3872
3757
|
)
|
3873
3758
|
|
3759
|
+
def _instantiate_parent_stream_state_manager(
|
3760
|
+
self,
|
3761
|
+
child_state: MutableMapping[str, Any],
|
3762
|
+
config: Config,
|
3763
|
+
model: ParentStreamConfigModel,
|
3764
|
+
has_parent_state: bool,
|
3765
|
+
) -> ConnectorStateManager:
|
3766
|
+
"""
|
3767
|
+
With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the
|
3768
|
+
`set_initial_state` flow that existed for the declarative cursors. This state is taken from
|
3769
|
+
self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account
|
3770
|
+
for the MessageRepository being different). So we need to pass a ConnectorStateManager to the
|
3771
|
+
ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if
|
3772
|
+
incremental_dependency is set.
|
3773
|
+
"""
|
3774
|
+
if model.incremental_dependency and child_state:
|
3775
|
+
parent_stream_name = model.stream.name or ""
|
3776
|
+
parent_state = ConcurrentPerPartitionCursor.get_parent_state(
|
3777
|
+
child_state, parent_stream_name
|
3778
|
+
)
|
3779
|
+
|
3780
|
+
if not parent_state:
|
3781
|
+
# there are two migration cases: state value from child stream or from global state
|
3782
|
+
parent_state = ConcurrentPerPartitionCursor.get_global_state(
|
3783
|
+
child_state, parent_stream_name
|
3784
|
+
)
|
3785
|
+
|
3786
|
+
if not parent_state and not isinstance(parent_state, dict):
|
3787
|
+
cursor_values = child_state.values()
|
3788
|
+
if cursor_values:
|
3789
|
+
incremental_sync_model: Union[
|
3790
|
+
DatetimeBasedCursorModel,
|
3791
|
+
IncrementingCountCursorModel,
|
3792
|
+
CustomIncrementalSyncModel,
|
3793
|
+
] = (
|
3794
|
+
model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
|
3795
|
+
if isinstance(model.stream, DeclarativeStreamModel)
|
3796
|
+
else self._get_state_delegating_stream_model(
|
3797
|
+
has_parent_state, model.stream
|
3798
|
+
).incremental_sync
|
3799
|
+
)
|
3800
|
+
cursor_field = InterpolatedString.create(
|
3801
|
+
incremental_sync_model.cursor_field,
|
3802
|
+
parameters=incremental_sync_model.parameters or {},
|
3803
|
+
).eval(config)
|
3804
|
+
parent_state = AirbyteStateMessage(
|
3805
|
+
type=AirbyteStateType.STREAM,
|
3806
|
+
stream=AirbyteStreamState(
|
3807
|
+
stream_descriptor=StreamDescriptor(
|
3808
|
+
name=parent_stream_name, namespace=None
|
3809
|
+
),
|
3810
|
+
stream_state=AirbyteStateBlob(
|
3811
|
+
{cursor_field: list(cursor_values)[0]}
|
3812
|
+
),
|
3813
|
+
),
|
3814
|
+
)
|
3815
|
+
return ConnectorStateManager([parent_state] if parent_state else [])
|
3816
|
+
|
3817
|
+
return ConnectorStateManager([])
|
3818
|
+
|
3874
3819
|
@staticmethod
|
3875
3820
|
def create_wait_time_from_header(
|
3876
3821
|
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
@@ -3951,6 +3896,7 @@ class ModelToComponentFactory:
|
|
3951
3896
|
|
3952
3897
|
return HttpComponentsResolver(
|
3953
3898
|
retriever=retriever,
|
3899
|
+
stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
|
3954
3900
|
config=config,
|
3955
3901
|
components_mapping=components_mapping,
|
3956
3902
|
parameters=model.parameters or {},
|
@@ -4176,7 +4122,9 @@ class ModelToComponentFactory:
|
|
4176
4122
|
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
4177
4123
|
) -> GroupingPartitionRouter:
|
4178
4124
|
underlying_router = self._create_component_from_model(
|
4179
|
-
model=model.underlying_partition_router,
|
4125
|
+
model=model.underlying_partition_router,
|
4126
|
+
config=config,
|
4127
|
+
**kwargs,
|
4180
4128
|
)
|
4181
4129
|
if model.group_size < 1:
|
4182
4130
|
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|