airbyte-cdk 6.45.0.dev4107__py3-none-any.whl → 6.45.0.post20.dev14369762306__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +45 -6
- airbyte_cdk/connector_builder/main.py +5 -2
- airbyte_cdk/models/__init__.py +0 -1
- airbyte_cdk/models/airbyte_protocol.py +3 -1
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +6 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
- airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
- airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -8
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +210 -50
- airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +10 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +23 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +142 -43
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +16 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +263 -50
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +1 -1
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
- airbyte_cdk/sources/declarative/transformations/add_fields.py +3 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +15 -38
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
- airbyte_cdk/sources/file_based/schema_helpers.py +1 -9
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +31 -16
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
- airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
- airbyte_cdk/sources/types.py +2 -11
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/declarative/__init__.py +6 -0
- airbyte_cdk/test/declarative/models/__init__.py +7 -0
- airbyte_cdk/test/declarative/models/scenario.py +74 -0
- airbyte_cdk/test/declarative/test_suites/__init__.py +24 -0
- airbyte_cdk/test/declarative/test_suites/connector_base.py +197 -0
- airbyte_cdk/test/declarative/test_suites/declarative_sources.py +47 -0
- airbyte_cdk/test/declarative/test_suites/destination_base.py +12 -0
- airbyte_cdk/test/declarative/test_suites/source_base.py +129 -0
- airbyte_cdk/test/declarative/utils/__init__.py +0 -0
- airbyte_cdk/test/declarative/utils/job_runner.py +128 -0
- airbyte_cdk/test/entrypoint_wrapper.py +4 -0
- airbyte_cdk/test/fixtures/__init__.py +0 -0
- airbyte_cdk/test/fixtures/auto.py +14 -0
- airbyte_cdk/test/fixtures/general.py +15 -0
- airbyte_cdk/test/mock_http/response_builder.py +0 -8
- airbyte_cdk/test/pytest_config/plugin.py +40 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/RECORD +67 -47
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
- airbyte_cdk/sources/file_based/file_record_data.py +0 -22
- airbyte_cdk/sources/utils/files_directory.py +0 -15
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post20.dev14369762306.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
from __future__ import annotations
|
@@ -54,7 +54,11 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
54
54
|
SessionTokenProvider,
|
55
55
|
TokenProvider,
|
56
56
|
)
|
57
|
-
from airbyte_cdk.sources.declarative.checks import
|
57
|
+
from airbyte_cdk.sources.declarative.checks import (
|
58
|
+
CheckDynamicStream,
|
59
|
+
CheckStream,
|
60
|
+
DynamicStreamCheckConfig,
|
61
|
+
)
|
58
62
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
63
|
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
60
64
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -102,6 +106,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
102
106
|
)
|
103
107
|
from airbyte_cdk.sources.declarative.models import (
|
104
108
|
CustomStateMigration,
|
109
|
+
GzipDecoder,
|
105
110
|
)
|
106
111
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
107
112
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -218,10 +223,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
218
223
|
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
219
224
|
)
|
220
225
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
221
|
-
|
226
|
+
DynamicStreamCheckConfig as DynamicStreamCheckConfigModel,
|
222
227
|
)
|
223
228
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
224
|
-
|
229
|
+
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
225
230
|
)
|
226
231
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
227
232
|
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
@@ -229,6 +234,12 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
229
234
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
230
235
|
FlattenFields as FlattenFieldsModel,
|
231
236
|
)
|
237
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
238
|
+
GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
|
239
|
+
)
|
240
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
241
|
+
GroupingPartitionRouter as GroupingPartitionRouterModel,
|
242
|
+
)
|
232
243
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
233
244
|
GzipDecoder as GzipDecoderModel,
|
234
245
|
)
|
@@ -316,6 +327,18 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
316
327
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
317
328
|
ParentStreamConfig as ParentStreamConfigModel,
|
318
329
|
)
|
330
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
331
|
+
PropertiesFromEndpoint as PropertiesFromEndpointModel,
|
332
|
+
)
|
333
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
334
|
+
PropertyChunking as PropertyChunkingModel,
|
335
|
+
)
|
336
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
337
|
+
PropertyLimitType as PropertyLimitTypeModel,
|
338
|
+
)
|
339
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
340
|
+
QueryProperties as QueryPropertiesModel,
|
341
|
+
)
|
319
342
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
320
343
|
Rate as RateModel,
|
321
344
|
)
|
@@ -387,6 +410,7 @@ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
|
387
410
|
)
|
388
411
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
389
412
|
CartesianProductStreamSlicer,
|
413
|
+
GroupingPartitionRouter,
|
390
414
|
ListPartitionRouter,
|
391
415
|
PartitionRouter,
|
392
416
|
SinglePartitionRouter,
|
@@ -423,6 +447,17 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
|
423
447
|
PageIncrement,
|
424
448
|
StopConditionPaginationStrategyDecorator,
|
425
449
|
)
|
450
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties import (
|
451
|
+
PropertiesFromEndpoint,
|
452
|
+
PropertyChunking,
|
453
|
+
QueryProperties,
|
454
|
+
)
|
455
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
|
456
|
+
PropertyLimitType,
|
457
|
+
)
|
458
|
+
from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import (
|
459
|
+
GroupByKey,
|
460
|
+
)
|
426
461
|
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
|
427
462
|
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
428
463
|
DatetimeBasedRequestOptionsProvider,
|
@@ -444,7 +479,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
444
479
|
SimpleRetriever,
|
445
480
|
SimpleRetrieverTestReadDecorator,
|
446
481
|
)
|
447
|
-
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
448
482
|
from airbyte_cdk.sources.declarative.schema import (
|
449
483
|
ComplexFieldType,
|
450
484
|
DefaultSchemaLoader,
|
@@ -558,6 +592,7 @@ class ModelToComponentFactory:
|
|
558
592
|
BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
|
559
593
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
560
594
|
CheckStreamModel: self.create_check_stream,
|
595
|
+
DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
|
561
596
|
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
562
597
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
563
598
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
@@ -587,6 +622,7 @@ class ModelToComponentFactory:
|
|
587
622
|
ResponseToFileExtractorModel: self.create_response_to_file_extractor,
|
588
623
|
ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
|
589
624
|
SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
|
625
|
+
GroupByKeyMergeStrategyModel: self.create_group_by_key,
|
590
626
|
HttpRequesterModel: self.create_http_requester,
|
591
627
|
HttpResponseFilterModel: self.create_http_response_filter,
|
592
628
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
@@ -616,6 +652,9 @@ class ModelToComponentFactory:
|
|
616
652
|
OffsetIncrementModel: self.create_offset_increment,
|
617
653
|
PageIncrementModel: self.create_page_increment,
|
618
654
|
ParentStreamConfigModel: self.create_parent_stream_config,
|
655
|
+
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
656
|
+
PropertyChunkingModel: self.create_property_chunking,
|
657
|
+
QueryPropertiesModel: self.create_query_properties,
|
619
658
|
RecordFilterModel: self.create_record_filter,
|
620
659
|
RecordSelectorModel: self.create_record_selector,
|
621
660
|
RemoveFieldsModel: self.create_remove_fields,
|
@@ -636,12 +675,12 @@ class ModelToComponentFactory:
|
|
636
675
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
637
676
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
638
677
|
HTTPAPIBudgetModel: self.create_http_api_budget,
|
639
|
-
FileUploaderModel: self.create_file_uploader,
|
640
678
|
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
641
679
|
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
642
680
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
643
681
|
RateModel: self.create_rate,
|
644
682
|
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
683
|
+
GroupingPartitionRouterModel: self.create_grouping_partition_router,
|
645
684
|
}
|
646
685
|
|
647
686
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -935,8 +974,36 @@ class ModelToComponentFactory:
|
|
935
974
|
)
|
936
975
|
|
937
976
|
@staticmethod
|
938
|
-
def
|
939
|
-
|
977
|
+
def create_dynamic_stream_check_config(
|
978
|
+
model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
|
979
|
+
) -> DynamicStreamCheckConfig:
|
980
|
+
return DynamicStreamCheckConfig(
|
981
|
+
dynamic_stream_name=model.dynamic_stream_name,
|
982
|
+
stream_count=model.stream_count or 0,
|
983
|
+
)
|
984
|
+
|
985
|
+
def create_check_stream(
|
986
|
+
self, model: CheckStreamModel, config: Config, **kwargs: Any
|
987
|
+
) -> CheckStream:
|
988
|
+
if model.dynamic_streams_check_configs is None and model.stream_names is None:
|
989
|
+
raise ValueError(
|
990
|
+
"Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
|
991
|
+
)
|
992
|
+
|
993
|
+
dynamic_streams_check_configs = (
|
994
|
+
[
|
995
|
+
self._create_component_from_model(model=dynamic_stream_check_config, config=config)
|
996
|
+
for dynamic_stream_check_config in model.dynamic_streams_check_configs
|
997
|
+
]
|
998
|
+
if model.dynamic_streams_check_configs
|
999
|
+
else []
|
1000
|
+
)
|
1001
|
+
|
1002
|
+
return CheckStream(
|
1003
|
+
stream_names=model.stream_names or [],
|
1004
|
+
dynamic_streams_check_configs=dynamic_streams_check_configs,
|
1005
|
+
parameters={},
|
1006
|
+
)
|
940
1007
|
|
941
1008
|
@staticmethod
|
942
1009
|
def create_check_dynamic_stream(
|
@@ -1359,6 +1426,9 @@ class ModelToComponentFactory:
|
|
1359
1426
|
)
|
1360
1427
|
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
1361
1428
|
|
1429
|
+
# Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
|
1430
|
+
use_global_cursor = isinstance(partition_router, GroupingPartitionRouter)
|
1431
|
+
|
1362
1432
|
# Return the concurrent cursor and state converter
|
1363
1433
|
return ConcurrentPerPartitionCursor(
|
1364
1434
|
cursor_factory=cursor_factory,
|
@@ -1370,6 +1440,7 @@ class ModelToComponentFactory:
|
|
1370
1440
|
connector_state_manager=state_manager,
|
1371
1441
|
connector_state_converter=connector_state_converter,
|
1372
1442
|
cursor_field=cursor_field,
|
1443
|
+
use_global_cursor=use_global_cursor,
|
1373
1444
|
)
|
1374
1445
|
|
1375
1446
|
@staticmethod
|
@@ -1755,11 +1826,6 @@ class ModelToComponentFactory:
|
|
1755
1826
|
transformations.append(
|
1756
1827
|
self._create_component_from_model(model=transformation_model, config=config)
|
1757
1828
|
)
|
1758
|
-
file_uploader = None
|
1759
|
-
if model.file_uploader:
|
1760
|
-
file_uploader = self._create_component_from_model(
|
1761
|
-
model=model.file_uploader, config=config
|
1762
|
-
)
|
1763
1829
|
|
1764
1830
|
retriever = self._create_component_from_model(
|
1765
1831
|
model=model.retriever,
|
@@ -1771,7 +1837,6 @@ class ModelToComponentFactory:
|
|
1771
1837
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1772
1838
|
client_side_incremental_sync=client_side_incremental_sync,
|
1773
1839
|
transformations=transformations,
|
1774
|
-
file_uploader=file_uploader,
|
1775
1840
|
incremental_sync=model.incremental_sync,
|
1776
1841
|
)
|
1777
1842
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
@@ -2048,8 +2113,8 @@ class ModelToComponentFactory:
|
|
2048
2113
|
parameters=model.parameters or {},
|
2049
2114
|
)
|
2050
2115
|
|
2116
|
+
@staticmethod
|
2051
2117
|
def create_response_to_file_extractor(
|
2052
|
-
self,
|
2053
2118
|
model: ResponseToFileExtractorModel,
|
2054
2119
|
**kwargs: Any,
|
2055
2120
|
) -> ResponseToFileExtractor:
|
@@ -2063,11 +2128,17 @@ class ModelToComponentFactory:
|
|
2063
2128
|
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
2064
2129
|
)
|
2065
2130
|
|
2131
|
+
@staticmethod
|
2132
|
+
def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
|
2133
|
+
return GroupByKey(model.key, config=config, parameters=model.parameters or {})
|
2134
|
+
|
2066
2135
|
def create_http_requester(
|
2067
2136
|
self,
|
2068
2137
|
model: HttpRequesterModel,
|
2069
2138
|
config: Config,
|
2070
2139
|
decoder: Decoder = JsonDecoder(parameters={}),
|
2140
|
+
query_properties_key: Optional[str] = None,
|
2141
|
+
use_cache: Optional[bool] = None,
|
2071
2142
|
*,
|
2072
2143
|
name: str,
|
2073
2144
|
) -> HttpRequester:
|
@@ -2100,6 +2171,7 @@ class ModelToComponentFactory:
|
|
2100
2171
|
request_body_json=model.request_body_json,
|
2101
2172
|
request_headers=model.request_headers,
|
2102
2173
|
request_parameters=model.request_parameters,
|
2174
|
+
query_properties_key=query_properties_key,
|
2103
2175
|
config=config,
|
2104
2176
|
parameters=model.parameters or {},
|
2105
2177
|
)
|
@@ -2107,7 +2179,7 @@ class ModelToComponentFactory:
|
|
2107
2179
|
assert model.use_cache is not None # for mypy
|
2108
2180
|
assert model.http_method is not None # for mypy
|
2109
2181
|
|
2110
|
-
|
2182
|
+
should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
|
2111
2183
|
|
2112
2184
|
return HttpRequester(
|
2113
2185
|
name=name,
|
@@ -2122,7 +2194,7 @@ class ModelToComponentFactory:
|
|
2122
2194
|
disable_retries=self._disable_retries,
|
2123
2195
|
parameters=model.parameters or {},
|
2124
2196
|
message_repository=self._message_repository,
|
2125
|
-
use_cache=
|
2197
|
+
use_cache=should_use_cache,
|
2126
2198
|
decoder=decoder,
|
2127
2199
|
stream_response=decoder.is_stream_response() if decoder else False,
|
2128
2200
|
)
|
@@ -2226,10 +2298,11 @@ class ModelToComponentFactory:
|
|
2226
2298
|
retriever = self._create_component_from_model(
|
2227
2299
|
model=model.retriever,
|
2228
2300
|
config=config,
|
2229
|
-
name="",
|
2301
|
+
name="dynamic_properties",
|
2230
2302
|
primary_key=None,
|
2231
2303
|
stream_slicer=combined_slicers,
|
2232
2304
|
transformations=[],
|
2305
|
+
use_cache=True,
|
2233
2306
|
)
|
2234
2307
|
schema_type_identifier = self._create_component_from_model(
|
2235
2308
|
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
@@ -2567,6 +2640,79 @@ class ModelToComponentFactory:
|
|
2567
2640
|
lazy_read_pointer=model_lazy_read_pointer,
|
2568
2641
|
)
|
2569
2642
|
|
2643
|
+
def create_properties_from_endpoint(
|
2644
|
+
self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
|
2645
|
+
) -> PropertiesFromEndpoint:
|
2646
|
+
retriever = self._create_component_from_model(
|
2647
|
+
model=model.retriever,
|
2648
|
+
config=config,
|
2649
|
+
name="dynamic_properties",
|
2650
|
+
primary_key=None,
|
2651
|
+
stream_slicer=None,
|
2652
|
+
transformations=[],
|
2653
|
+
use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
|
2654
|
+
)
|
2655
|
+
return PropertiesFromEndpoint(
|
2656
|
+
property_field_path=model.property_field_path,
|
2657
|
+
retriever=retriever,
|
2658
|
+
config=config,
|
2659
|
+
parameters=model.parameters or {},
|
2660
|
+
)
|
2661
|
+
|
2662
|
+
def create_property_chunking(
|
2663
|
+
self, model: PropertyChunkingModel, config: Config, **kwargs: Any
|
2664
|
+
) -> PropertyChunking:
|
2665
|
+
record_merge_strategy = (
|
2666
|
+
self._create_component_from_model(
|
2667
|
+
model=model.record_merge_strategy, config=config, **kwargs
|
2668
|
+
)
|
2669
|
+
if model.record_merge_strategy
|
2670
|
+
else None
|
2671
|
+
)
|
2672
|
+
|
2673
|
+
property_limit_type: PropertyLimitType
|
2674
|
+
match model.property_limit_type:
|
2675
|
+
case PropertyLimitTypeModel.property_count:
|
2676
|
+
property_limit_type = PropertyLimitType.property_count
|
2677
|
+
case PropertyLimitTypeModel.characters:
|
2678
|
+
property_limit_type = PropertyLimitType.characters
|
2679
|
+
case _:
|
2680
|
+
raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
|
2681
|
+
|
2682
|
+
return PropertyChunking(
|
2683
|
+
property_limit_type=property_limit_type,
|
2684
|
+
property_limit=model.property_limit,
|
2685
|
+
record_merge_strategy=record_merge_strategy,
|
2686
|
+
config=config,
|
2687
|
+
parameters=model.parameters or {},
|
2688
|
+
)
|
2689
|
+
|
2690
|
+
def create_query_properties(
|
2691
|
+
self, model: QueryPropertiesModel, config: Config, **kwargs: Any
|
2692
|
+
) -> QueryProperties:
|
2693
|
+
if isinstance(model.property_list, list):
|
2694
|
+
property_list = model.property_list
|
2695
|
+
else:
|
2696
|
+
property_list = self._create_component_from_model(
|
2697
|
+
model=model.property_list, config=config, **kwargs
|
2698
|
+
)
|
2699
|
+
|
2700
|
+
property_chunking = (
|
2701
|
+
self._create_component_from_model(
|
2702
|
+
model=model.property_chunking, config=config, **kwargs
|
2703
|
+
)
|
2704
|
+
if model.property_chunking
|
2705
|
+
else None
|
2706
|
+
)
|
2707
|
+
|
2708
|
+
return QueryProperties(
|
2709
|
+
property_list=property_list,
|
2710
|
+
always_include_properties=model.always_include_properties,
|
2711
|
+
property_chunking=property_chunking,
|
2712
|
+
config=config,
|
2713
|
+
parameters=model.parameters or {},
|
2714
|
+
)
|
2715
|
+
|
2570
2716
|
@staticmethod
|
2571
2717
|
def create_record_filter(
|
2572
2718
|
model: RecordFilterModel, config: Config, **kwargs: Any
|
@@ -2613,7 +2759,6 @@ class ModelToComponentFactory:
|
|
2613
2759
|
transformations: List[RecordTransformation] | None = None,
|
2614
2760
|
decoder: Decoder | None = None,
|
2615
2761
|
client_side_incremental_sync: Dict[str, Any] | None = None,
|
2616
|
-
file_uploader: Optional[FileUploader] = None,
|
2617
2762
|
**kwargs: Any,
|
2618
2763
|
) -> RecordSelector:
|
2619
2764
|
extractor = self._create_component_from_model(
|
@@ -2651,7 +2796,6 @@ class ModelToComponentFactory:
|
|
2651
2796
|
config=config,
|
2652
2797
|
record_filter=record_filter,
|
2653
2798
|
transformations=transformations or [],
|
2654
|
-
file_uploader=file_uploader,
|
2655
2799
|
schema_normalization=schema_normalization,
|
2656
2800
|
parameters=model.parameters or {},
|
2657
2801
|
transform_before_filtering=transform_before_filtering,
|
@@ -2709,12 +2853,12 @@ class ModelToComponentFactory:
|
|
2709
2853
|
stop_condition_on_cursor: bool = False,
|
2710
2854
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2711
2855
|
transformations: List[RecordTransformation],
|
2712
|
-
file_uploader: Optional[FileUploader] = None,
|
2713
2856
|
incremental_sync: Optional[
|
2714
2857
|
Union[
|
2715
2858
|
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
2716
2859
|
]
|
2717
2860
|
] = None,
|
2861
|
+
use_cache: Optional[bool] = None,
|
2718
2862
|
**kwargs: Any,
|
2719
2863
|
) -> SimpleRetriever:
|
2720
2864
|
decoder = (
|
@@ -2722,9 +2866,6 @@ class ModelToComponentFactory:
|
|
2722
2866
|
if model.decoder
|
2723
2867
|
else JsonDecoder(parameters={})
|
2724
2868
|
)
|
2725
|
-
requester = self._create_component_from_model(
|
2726
|
-
model=model.requester, decoder=decoder, config=config, name=name
|
2727
|
-
)
|
2728
2869
|
record_selector = self._create_component_from_model(
|
2729
2870
|
model=model.record_selector,
|
2730
2871
|
name=name,
|
@@ -2732,7 +2873,57 @@ class ModelToComponentFactory:
|
|
2732
2873
|
decoder=decoder,
|
2733
2874
|
transformations=transformations,
|
2734
2875
|
client_side_incremental_sync=client_side_incremental_sync,
|
2735
|
-
|
2876
|
+
)
|
2877
|
+
|
2878
|
+
query_properties: Optional[QueryProperties] = None
|
2879
|
+
query_properties_key: Optional[str] = None
|
2880
|
+
if (
|
2881
|
+
hasattr(model.requester, "request_parameters")
|
2882
|
+
and model.requester.request_parameters
|
2883
|
+
and isinstance(model.requester.request_parameters, Mapping)
|
2884
|
+
):
|
2885
|
+
query_properties_definitions = []
|
2886
|
+
for key, request_parameter in model.requester.request_parameters.items():
|
2887
|
+
# When translating JSON schema into Pydantic models, enforcing types for arrays containing both
|
2888
|
+
# concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any].
|
2889
|
+
# This adds the extra validation that we couldn't get for free in Pydantic model generation
|
2890
|
+
if (
|
2891
|
+
isinstance(request_parameter, Mapping)
|
2892
|
+
and request_parameter.get("type") == "QueryProperties"
|
2893
|
+
):
|
2894
|
+
query_properties_key = key
|
2895
|
+
query_properties_definitions.append(request_parameter)
|
2896
|
+
elif not isinstance(request_parameter, str):
|
2897
|
+
raise ValueError(
|
2898
|
+
f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}"
|
2899
|
+
)
|
2900
|
+
|
2901
|
+
if len(query_properties_definitions) > 1:
|
2902
|
+
raise ValueError(
|
2903
|
+
f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
|
2904
|
+
)
|
2905
|
+
|
2906
|
+
if len(query_properties_definitions) == 1:
|
2907
|
+
query_properties = self.create_component(
|
2908
|
+
model_type=QueryPropertiesModel,
|
2909
|
+
component_definition=query_properties_definitions[0],
|
2910
|
+
config=config,
|
2911
|
+
)
|
2912
|
+
|
2913
|
+
# Removes QueryProperties components from the interpolated mappings because it will be resolved in
|
2914
|
+
# the provider from the slice directly instead of through jinja interpolation
|
2915
|
+
if isinstance(model.requester.request_parameters, Mapping):
|
2916
|
+
model.requester.request_parameters = self._remove_query_properties(
|
2917
|
+
model.requester.request_parameters
|
2918
|
+
)
|
2919
|
+
|
2920
|
+
requester = self._create_component_from_model(
|
2921
|
+
model=model.requester,
|
2922
|
+
decoder=decoder,
|
2923
|
+
name=name,
|
2924
|
+
query_properties_key=query_properties_key,
|
2925
|
+
use_cache=use_cache,
|
2926
|
+
config=config,
|
2736
2927
|
)
|
2737
2928
|
url_base = (
|
2738
2929
|
model.requester.url_base
|
@@ -2839,9 +3030,21 @@ class ModelToComponentFactory:
|
|
2839
3030
|
cursor=cursor,
|
2840
3031
|
config=config,
|
2841
3032
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
3033
|
+
additional_query_properties=query_properties,
|
2842
3034
|
parameters=model.parameters or {},
|
2843
3035
|
)
|
2844
3036
|
|
3037
|
+
@staticmethod
|
3038
|
+
def _remove_query_properties(
|
3039
|
+
request_parameters: Mapping[str, Union[Any, str]],
|
3040
|
+
) -> Mapping[str, Union[Any, str]]:
|
3041
|
+
return {
|
3042
|
+
parameter_field: request_parameter
|
3043
|
+
for parameter_field, request_parameter in request_parameters.items()
|
3044
|
+
if not isinstance(request_parameter, Mapping)
|
3045
|
+
or not request_parameter.get("type") == "QueryProperties"
|
3046
|
+
}
|
3047
|
+
|
2845
3048
|
def create_state_delegating_stream(
|
2846
3049
|
self,
|
2847
3050
|
model: StateDelegatingStreamModel,
|
@@ -3087,8 +3290,11 @@ class ModelToComponentFactory:
|
|
3087
3290
|
stream_slices,
|
3088
3291
|
self._job_tracker,
|
3089
3292
|
self._message_repository,
|
3090
|
-
has_bulk_parent=False,
|
3091
3293
|
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
3294
|
+
has_bulk_parent=False,
|
3295
|
+
# set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
|
3296
|
+
# `None` == default retry is set to 3 attempts, under the hood.
|
3297
|
+
job_max_retry=1 if self._emit_connector_builder_messages else None,
|
3092
3298
|
),
|
3093
3299
|
stream_slicer=stream_slicer,
|
3094
3300
|
config=config,
|
@@ -3332,30 +3538,6 @@ class ModelToComponentFactory:
|
|
3332
3538
|
matchers=matchers,
|
3333
3539
|
)
|
3334
3540
|
|
3335
|
-
def create_file_uploader(
|
3336
|
-
self, model: FileUploaderModel, config: Config, **kwargs: Any
|
3337
|
-
) -> FileUploader:
|
3338
|
-
name = "File Uploader"
|
3339
|
-
requester = self._create_component_from_model(
|
3340
|
-
model=model.requester,
|
3341
|
-
config=config,
|
3342
|
-
name=name,
|
3343
|
-
**kwargs,
|
3344
|
-
)
|
3345
|
-
download_target_extractor = self._create_component_from_model(
|
3346
|
-
model=model.download_target_extractor,
|
3347
|
-
config=config,
|
3348
|
-
name=name,
|
3349
|
-
**kwargs,
|
3350
|
-
)
|
3351
|
-
return FileUploader(
|
3352
|
-
requester=requester,
|
3353
|
-
download_target_extractor=download_target_extractor,
|
3354
|
-
config=config,
|
3355
|
-
parameters=model.parameters or {},
|
3356
|
-
filename_extractor=model.filename_extractor if model.filename_extractor else None,
|
3357
|
-
)
|
3358
|
-
|
3359
3541
|
def create_moving_window_call_rate_policy(
|
3360
3542
|
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3361
3543
|
) -> MovingWindowCallRatePolicy:
|
@@ -3405,3 +3587,34 @@ class ModelToComponentFactory:
|
|
3405
3587
|
self._api_budget = self.create_component(
|
3406
3588
|
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3407
3589
|
)
|
3590
|
+
|
3591
|
+
def create_grouping_partition_router(
|
3592
|
+
self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
|
3593
|
+
) -> GroupingPartitionRouter:
|
3594
|
+
underlying_router = self._create_component_from_model(
|
3595
|
+
model=model.underlying_partition_router, config=config
|
3596
|
+
)
|
3597
|
+
if model.group_size < 1:
|
3598
|
+
raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
|
3599
|
+
|
3600
|
+
# Request options in underlying partition routers are not supported for GroupingPartitionRouter
|
3601
|
+
# because they are specific to individual partitions and cannot be aggregated or handled
|
3602
|
+
# when grouping, potentially leading to incorrect API calls. Any request customization
|
3603
|
+
# should be managed at the stream level through the requester's configuration.
|
3604
|
+
if isinstance(underlying_router, SubstreamPartitionRouter):
|
3605
|
+
if any(
|
3606
|
+
parent_config.request_option
|
3607
|
+
for parent_config in underlying_router.parent_stream_configs
|
3608
|
+
):
|
3609
|
+
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3610
|
+
|
3611
|
+
if isinstance(underlying_router, ListPartitionRouter):
|
3612
|
+
if underlying_router.request_option:
|
3613
|
+
raise ValueError("Request options are not supported for GroupingPartitionRouter.")
|
3614
|
+
|
3615
|
+
return GroupingPartitionRouter(
|
3616
|
+
group_size=model.group_size,
|
3617
|
+
underlying_partition_router=underlying_router,
|
3618
|
+
deduplicate=model.deduplicate if model.deduplicate is not None else True,
|
3619
|
+
config=config,
|
3620
|
+
)
|
@@ -8,6 +8,9 @@ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_route
|
|
8
8
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
9
|
CartesianProductStreamSlicer,
|
10
10
|
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.grouping_partition_router import (
|
12
|
+
GroupingPartitionRouter,
|
13
|
+
)
|
11
14
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
12
15
|
ListPartitionRouter,
|
13
16
|
)
|
@@ -22,6 +25,7 @@ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_route
|
|
22
25
|
__all__ = [
|
23
26
|
"AsyncJobPartitionRouter",
|
24
27
|
"CartesianProductStreamSlicer",
|
28
|
+
"GroupingPartitionRouter",
|
25
29
|
"ListPartitionRouter",
|
26
30
|
"SinglePartitionRouter",
|
27
31
|
"SubstreamPartitionRouter",
|