airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -60,10 +60,8 @@ from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
61
61
|
from airbyte_cdk.sources.declarative.decoders import (
|
62
62
|
Decoder,
|
63
|
-
GzipJsonDecoder,
|
64
63
|
IterableDecoder,
|
65
64
|
JsonDecoder,
|
66
|
-
JsonlDecoder,
|
67
65
|
PaginationDecoderDecorator,
|
68
66
|
XmlDecoder,
|
69
67
|
ZipfileDecoder,
|
@@ -103,8 +101,8 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
103
101
|
LegacyToPerPartitionStateMigration,
|
104
102
|
)
|
105
103
|
from airbyte_cdk.sources.declarative.models import (
|
106
|
-
Clamping,
|
107
104
|
CustomStateMigration,
|
105
|
+
GzipDecoder,
|
108
106
|
)
|
109
107
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
110
108
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -142,9 +140,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
142
140
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
143
141
|
CompositeErrorHandler as CompositeErrorHandlerModel,
|
144
142
|
)
|
145
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
146
|
-
CompositeRawDecoder as CompositeRawDecoderModel,
|
147
|
-
)
|
148
143
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
149
144
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
150
145
|
)
|
@@ -155,7 +150,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
155
150
|
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
156
151
|
)
|
157
152
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
158
|
-
|
153
|
+
CsvDecoder as CsvDecoderModel,
|
159
154
|
)
|
160
155
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
161
156
|
CursorPagination as CursorPaginationModel,
|
@@ -226,14 +221,17 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
226
221
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
227
222
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
228
223
|
)
|
224
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
225
|
+
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
226
|
+
)
|
229
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
230
228
|
FlattenFields as FlattenFieldsModel,
|
231
229
|
)
|
232
230
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
233
|
-
|
231
|
+
GzipDecoder as GzipDecoderModel,
|
234
232
|
)
|
235
233
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
236
|
-
|
234
|
+
HTTPAPIBudget as HTTPAPIBudgetModel,
|
237
235
|
)
|
238
236
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
239
237
|
HttpComponentsResolver as HttpComponentsResolverModel,
|
@@ -241,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
241
239
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
242
240
|
HttpRequester as HttpRequesterModel,
|
243
241
|
)
|
242
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
+
HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
|
244
|
+
)
|
244
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
245
246
|
HttpResponseFilter as HttpResponseFilterModel,
|
246
247
|
)
|
@@ -259,12 +260,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
259
260
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
260
261
|
JsonlDecoder as JsonlDecoderModel,
|
261
262
|
)
|
262
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
263
|
-
JsonLineParser as JsonLineParserModel,
|
264
|
-
)
|
265
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
266
|
-
JsonParser as JsonParserModel,
|
267
|
-
)
|
268
263
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
269
264
|
JwtAuthenticator as JwtAuthenticatorModel,
|
270
265
|
)
|
@@ -295,6 +290,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
295
290
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
296
291
|
MinMaxDatetime as MinMaxDatetimeModel,
|
297
292
|
)
|
293
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
294
|
+
MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
|
295
|
+
)
|
298
296
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
299
297
|
NoAuth as NoAuthModel,
|
300
298
|
)
|
@@ -313,6 +311,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
313
311
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
314
312
|
ParentStreamConfig as ParentStreamConfigModel,
|
315
313
|
)
|
314
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
315
|
+
Rate as RateModel,
|
316
|
+
)
|
316
317
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
317
318
|
RecordFilter as RecordFilterModel,
|
318
319
|
)
|
@@ -356,6 +357,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
356
357
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
357
358
|
TypesMap as TypesMapModel,
|
358
359
|
)
|
360
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
361
|
+
UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
|
362
|
+
)
|
359
363
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
360
364
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
361
365
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -469,6 +473,15 @@ from airbyte_cdk.sources.message import (
|
|
469
473
|
MessageRepository,
|
470
474
|
NoopMessageRepository,
|
471
475
|
)
|
476
|
+
from airbyte_cdk.sources.streams.call_rate import (
|
477
|
+
APIBudget,
|
478
|
+
FixedWindowCallRatePolicy,
|
479
|
+
HttpAPIBudget,
|
480
|
+
HttpRequestRegexMatcher,
|
481
|
+
MovingWindowCallRatePolicy,
|
482
|
+
Rate,
|
483
|
+
UnlimitedCallRatePolicy,
|
484
|
+
)
|
472
485
|
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
473
486
|
ClampingEndProvider,
|
474
487
|
ClampingStrategy,
|
@@ -520,6 +533,7 @@ class ModelToComponentFactory:
|
|
520
533
|
self._evaluate_log_level(emit_connector_builder_messages)
|
521
534
|
)
|
522
535
|
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
536
|
+
self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
|
523
537
|
|
524
538
|
def _init_mappings(self) -> None:
|
525
539
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
@@ -531,9 +545,9 @@ class ModelToComponentFactory:
|
|
531
545
|
CheckStreamModel: self.create_check_stream,
|
532
546
|
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
533
547
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
534
|
-
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
535
548
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
536
549
|
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
550
|
+
CsvDecoderModel: self.create_csv_decoder,
|
537
551
|
CursorPaginationModel: self.create_cursor_pagination,
|
538
552
|
CustomAuthenticatorModel: self.create_custom_component,
|
539
553
|
CustomBackoffStrategyModel: self.create_custom_component,
|
@@ -563,10 +577,7 @@ class ModelToComponentFactory:
|
|
563
577
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
564
578
|
JsonDecoderModel: self.create_json_decoder,
|
565
579
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
566
|
-
|
567
|
-
JsonParserModel: self.create_json_parser,
|
568
|
-
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
569
|
-
GzipParserModel: self.create_gzip_parser,
|
580
|
+
GzipDecoderModel: self.create_gzip_decoder,
|
570
581
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
571
582
|
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
572
583
|
KeysReplaceModel: self.create_keys_replace_transformation,
|
@@ -607,6 +618,12 @@ class ModelToComponentFactory:
|
|
607
618
|
StreamConfigModel: self.create_stream_config,
|
608
619
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
609
620
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
621
|
+
HTTPAPIBudgetModel: self.create_http_api_budget,
|
622
|
+
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
623
|
+
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
624
|
+
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
625
|
+
RateModel: self.create_rate,
|
626
|
+
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
610
627
|
}
|
611
628
|
|
612
629
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -733,8 +750,8 @@ class ModelToComponentFactory:
|
|
733
750
|
}
|
734
751
|
return names_to_types[value_type]
|
735
752
|
|
736
|
-
@staticmethod
|
737
753
|
def create_api_key_authenticator(
|
754
|
+
self,
|
738
755
|
model: ApiKeyAuthenticatorModel,
|
739
756
|
config: Config,
|
740
757
|
token_provider: Optional[TokenProvider] = None,
|
@@ -756,10 +773,8 @@ class ModelToComponentFactory:
|
|
756
773
|
)
|
757
774
|
|
758
775
|
request_option = (
|
759
|
-
|
760
|
-
inject_into=
|
761
|
-
field_name=model.inject_into.field_name,
|
762
|
-
parameters=model.parameters or {},
|
776
|
+
self._create_component_from_model(
|
777
|
+
model.inject_into, config, parameters=model.parameters or {}
|
763
778
|
)
|
764
779
|
if model.inject_into
|
765
780
|
else RequestOption(
|
@@ -768,6 +783,7 @@ class ModelToComponentFactory:
|
|
768
783
|
parameters=model.parameters or {},
|
769
784
|
)
|
770
785
|
)
|
786
|
+
|
771
787
|
return ApiKeyAuthenticator(
|
772
788
|
token_provider=(
|
773
789
|
token_provider
|
@@ -849,7 +865,7 @@ class ModelToComponentFactory:
|
|
849
865
|
token_provider=token_provider,
|
850
866
|
)
|
851
867
|
else:
|
852
|
-
return
|
868
|
+
return self.create_api_key_authenticator(
|
853
869
|
ApiKeyAuthenticatorModel(
|
854
870
|
type="ApiKeyAuthenticator",
|
855
871
|
api_token="",
|
@@ -935,6 +951,17 @@ class ModelToComponentFactory:
|
|
935
951
|
parameters={},
|
936
952
|
)
|
937
953
|
|
954
|
+
@staticmethod
|
955
|
+
def apply_stream_state_migrations(
|
956
|
+
stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
|
957
|
+
) -> MutableMapping[str, Any]:
|
958
|
+
if stream_state_migrations:
|
959
|
+
for state_migration in stream_state_migrations:
|
960
|
+
if state_migration.should_migrate(stream_state):
|
961
|
+
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
962
|
+
stream_state = dict(state_migration.migrate(stream_state))
|
963
|
+
return stream_state
|
964
|
+
|
938
965
|
def create_concurrent_cursor_from_datetime_based_cursor(
|
939
966
|
self,
|
940
967
|
model_type: Type[BaseModel],
|
@@ -944,6 +971,7 @@ class ModelToComponentFactory:
|
|
944
971
|
config: Config,
|
945
972
|
message_repository: Optional[MessageRepository] = None,
|
946
973
|
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
974
|
+
stream_state_migrations: Optional[List[Any]] = None,
|
947
975
|
**kwargs: Any,
|
948
976
|
) -> ConcurrentCursor:
|
949
977
|
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
@@ -954,6 +982,7 @@ class ModelToComponentFactory:
|
|
954
982
|
if "stream_state" not in kwargs
|
955
983
|
else kwargs["stream_state"]
|
956
984
|
)
|
985
|
+
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
957
986
|
|
958
987
|
component_type = component_definition.get("type")
|
959
988
|
if component_definition.get("type") != model_type.__name__:
|
@@ -1189,6 +1218,7 @@ class ModelToComponentFactory:
|
|
1189
1218
|
config: Config,
|
1190
1219
|
stream_state: MutableMapping[str, Any],
|
1191
1220
|
partition_router: PartitionRouter,
|
1221
|
+
stream_state_migrations: Optional[List[Any]] = None,
|
1192
1222
|
**kwargs: Any,
|
1193
1223
|
) -> ConcurrentPerPartitionCursor:
|
1194
1224
|
component_type = component_definition.get("type")
|
@@ -1237,8 +1267,10 @@ class ModelToComponentFactory:
|
|
1237
1267
|
stream_namespace=stream_namespace,
|
1238
1268
|
config=config,
|
1239
1269
|
message_repository=NoopMessageRepository(),
|
1270
|
+
stream_state_migrations=stream_state_migrations,
|
1240
1271
|
)
|
1241
1272
|
)
|
1273
|
+
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
1242
1274
|
|
1243
1275
|
# Return the concurrent cursor and state converter
|
1244
1276
|
return ConcurrentPerPartitionCursor(
|
@@ -1489,19 +1521,15 @@ class ModelToComponentFactory:
|
|
1489
1521
|
)
|
1490
1522
|
|
1491
1523
|
end_time_option = (
|
1492
|
-
|
1493
|
-
|
1494
|
-
field_name=model.end_time_option.field_name,
|
1495
|
-
parameters=model.parameters or {},
|
1524
|
+
self._create_component_from_model(
|
1525
|
+
model.end_time_option, config, parameters=model.parameters or {}
|
1496
1526
|
)
|
1497
1527
|
if model.end_time_option
|
1498
1528
|
else None
|
1499
1529
|
)
|
1500
1530
|
start_time_option = (
|
1501
|
-
|
1502
|
-
|
1503
|
-
field_name=model.start_time_option.field_name,
|
1504
|
-
parameters=model.parameters or {},
|
1531
|
+
self._create_component_from_model(
|
1532
|
+
model.start_time_option, config, parameters=model.parameters or {}
|
1505
1533
|
)
|
1506
1534
|
if model.start_time_option
|
1507
1535
|
else None
|
@@ -1572,19 +1600,15 @@ class ModelToComponentFactory:
|
|
1572
1600
|
cursor_model = model.incremental_sync
|
1573
1601
|
|
1574
1602
|
end_time_option = (
|
1575
|
-
|
1576
|
-
|
1577
|
-
field_name=cursor_model.end_time_option.field_name,
|
1578
|
-
parameters=cursor_model.parameters or {},
|
1603
|
+
self._create_component_from_model(
|
1604
|
+
cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
|
1579
1605
|
)
|
1580
1606
|
if cursor_model.end_time_option
|
1581
1607
|
else None
|
1582
1608
|
)
|
1583
1609
|
start_time_option = (
|
1584
|
-
|
1585
|
-
|
1586
|
-
field_name=cursor_model.start_time_option.field_name,
|
1587
|
-
parameters=cursor_model.parameters or {},
|
1610
|
+
self._create_component_from_model(
|
1611
|
+
cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
|
1588
1612
|
)
|
1589
1613
|
if cursor_model.start_time_option
|
1590
1614
|
else None
|
@@ -1656,7 +1680,7 @@ class ModelToComponentFactory:
|
|
1656
1680
|
) -> Optional[PartitionRouter]:
|
1657
1681
|
if (
|
1658
1682
|
hasattr(model, "partition_router")
|
1659
|
-
and isinstance(model, SimpleRetrieverModel)
|
1683
|
+
and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
|
1660
1684
|
and model.partition_router
|
1661
1685
|
):
|
1662
1686
|
stream_slicer_model = model.partition_router
|
@@ -1690,6 +1714,31 @@ class ModelToComponentFactory:
|
|
1690
1714
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1691
1715
|
|
1692
1716
|
if model.incremental_sync and stream_slicer:
|
1717
|
+
if model.retriever.type == "AsyncRetriever":
|
1718
|
+
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1719
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1720
|
+
raise ValueError(
|
1721
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1722
|
+
)
|
1723
|
+
if stream_slicer:
|
1724
|
+
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1725
|
+
state_manager=self._connector_state_manager,
|
1726
|
+
model_type=DatetimeBasedCursorModel,
|
1727
|
+
component_definition=model.incremental_sync.__dict__,
|
1728
|
+
stream_name=model.name or "",
|
1729
|
+
stream_namespace=None,
|
1730
|
+
config=config or {},
|
1731
|
+
stream_state={},
|
1732
|
+
partition_router=stream_slicer,
|
1733
|
+
)
|
1734
|
+
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1735
|
+
model_type=DatetimeBasedCursorModel,
|
1736
|
+
component_definition=model.incremental_sync.__dict__,
|
1737
|
+
stream_name=model.name or "",
|
1738
|
+
stream_namespace=None,
|
1739
|
+
config=config or {},
|
1740
|
+
)
|
1741
|
+
|
1693
1742
|
incremental_sync_model = model.incremental_sync
|
1694
1743
|
if (
|
1695
1744
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
@@ -1730,6 +1779,7 @@ class ModelToComponentFactory:
|
|
1730
1779
|
stream_name=model.name or "",
|
1731
1780
|
stream_namespace=None,
|
1732
1781
|
config=config or {},
|
1782
|
+
stream_state_migrations=model.state_migrations,
|
1733
1783
|
)
|
1734
1784
|
return (
|
1735
1785
|
self._create_component_from_model(model=model.incremental_sync, config=config)
|
@@ -1886,6 +1936,8 @@ class ModelToComponentFactory:
|
|
1886
1936
|
)
|
1887
1937
|
)
|
1888
1938
|
|
1939
|
+
api_budget = self._api_budget
|
1940
|
+
|
1889
1941
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
1890
1942
|
request_body_data=model.request_body_data,
|
1891
1943
|
request_body_json=model.request_body_json,
|
@@ -1906,6 +1958,7 @@ class ModelToComponentFactory:
|
|
1906
1958
|
path=model.path,
|
1907
1959
|
authenticator=authenticator,
|
1908
1960
|
error_handler=error_handler,
|
1961
|
+
api_budget=api_budget,
|
1909
1962
|
http_method=HttpMethod[model.http_method.value],
|
1910
1963
|
request_options_provider=request_options_provider,
|
1911
1964
|
config=config,
|
@@ -2035,25 +2088,26 @@ class ModelToComponentFactory:
|
|
2035
2088
|
)
|
2036
2089
|
|
2037
2090
|
@staticmethod
|
2038
|
-
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) ->
|
2091
|
+
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2039
2092
|
return JsonDecoder(parameters={})
|
2040
2093
|
|
2041
2094
|
@staticmethod
|
2042
|
-
def
|
2043
|
-
|
2044
|
-
|
2095
|
+
def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2096
|
+
return CompositeRawDecoder(
|
2097
|
+
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2098
|
+
)
|
2045
2099
|
|
2046
2100
|
@staticmethod
|
2047
|
-
def create_jsonl_decoder(
|
2048
|
-
|
2049
|
-
|
2050
|
-
|
2101
|
+
def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2102
|
+
return CompositeRawDecoder(
|
2103
|
+
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2104
|
+
)
|
2051
2105
|
|
2052
2106
|
@staticmethod
|
2053
|
-
def
|
2054
|
-
|
2055
|
-
|
2056
|
-
|
2107
|
+
def create_gzip_decoder(model: GzipDecoderModel, config: Config, **kwargs: Any) -> Decoder:
|
2108
|
+
return CompositeRawDecoder(
|
2109
|
+
parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
|
2110
|
+
)
|
2057
2111
|
|
2058
2112
|
@staticmethod
|
2059
2113
|
def create_iterable_decoder(
|
@@ -2065,33 +2119,30 @@ class ModelToComponentFactory:
|
|
2065
2119
|
def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
|
2066
2120
|
return XmlDecoder(parameters={})
|
2067
2121
|
|
2068
|
-
@staticmethod
|
2069
|
-
def create_gzipjson_decoder(
|
2070
|
-
model: GzipJsonDecoderModel, config: Config, **kwargs: Any
|
2071
|
-
) -> GzipJsonDecoder:
|
2072
|
-
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
2073
|
-
|
2074
2122
|
def create_zipfile_decoder(
|
2075
2123
|
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
2076
2124
|
) -> ZipfileDecoder:
|
2077
|
-
parser
|
2078
|
-
return ZipfileDecoder(parser=parser)
|
2079
|
-
|
2080
|
-
def create_gzip_parser(
|
2081
|
-
self, model: GzipParserModel, config: Config, **kwargs: Any
|
2082
|
-
) -> GzipParser:
|
2083
|
-
inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
|
2084
|
-
return GzipParser(inner_parser=inner_parser)
|
2125
|
+
return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
|
2085
2126
|
|
2086
2127
|
@staticmethod
|
2087
|
-
def
|
2088
|
-
|
2128
|
+
def _get_parser(model: BaseModel, config: Config) -> Parser:
|
2129
|
+
if isinstance(model, JsonDecoderModel):
|
2130
|
+
# Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
|
2131
|
+
return JsonParser()
|
2132
|
+
elif isinstance(model, JsonlDecoderModel):
|
2133
|
+
return JsonLineParser()
|
2134
|
+
elif isinstance(model, CsvDecoderModel):
|
2135
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
2136
|
+
elif isinstance(model, GzipDecoderModel):
|
2137
|
+
return GzipParser(
|
2138
|
+
inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
|
2139
|
+
)
|
2140
|
+
elif isinstance(
|
2141
|
+
model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
|
2142
|
+
):
|
2143
|
+
raise ValueError(f"Decoder type {model} does not have parser associated to it")
|
2089
2144
|
|
2090
|
-
|
2091
|
-
self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
|
2092
|
-
) -> CompositeRawDecoder:
|
2093
|
-
parser = self._create_component_from_model(model=model.parser, config=config)
|
2094
|
-
return CompositeRawDecoder(parser=parser)
|
2145
|
+
raise ValueError(f"Unknown decoder type {model}")
|
2095
2146
|
|
2096
2147
|
@staticmethod
|
2097
2148
|
def create_json_file_schema_loader(
|
@@ -2125,16 +2176,11 @@ class ModelToComponentFactory:
|
|
2125
2176
|
additional_jwt_payload=model.additional_jwt_payload,
|
2126
2177
|
)
|
2127
2178
|
|
2128
|
-
@staticmethod
|
2129
2179
|
def create_list_partition_router(
|
2130
|
-
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
2180
|
+
self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
2131
2181
|
) -> ListPartitionRouter:
|
2132
2182
|
request_option = (
|
2133
|
-
|
2134
|
-
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
2135
|
-
field_name=model.request_option.field_name,
|
2136
|
-
parameters=model.parameters or {},
|
2137
|
-
)
|
2183
|
+
self._create_component_from_model(model.request_option, config)
|
2138
2184
|
if model.request_option
|
2139
2185
|
else None
|
2140
2186
|
)
|
@@ -2330,7 +2376,25 @@ class ModelToComponentFactory:
|
|
2330
2376
|
model: RequestOptionModel, config: Config, **kwargs: Any
|
2331
2377
|
) -> RequestOption:
|
2332
2378
|
inject_into = RequestOptionType(model.inject_into.value)
|
2333
|
-
|
2379
|
+
field_path: Optional[List[Union[InterpolatedString, str]]] = (
|
2380
|
+
[
|
2381
|
+
InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
|
2382
|
+
for segment in model.field_path
|
2383
|
+
]
|
2384
|
+
if model.field_path
|
2385
|
+
else None
|
2386
|
+
)
|
2387
|
+
field_name = (
|
2388
|
+
InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
|
2389
|
+
if model.field_name
|
2390
|
+
else None
|
2391
|
+
)
|
2392
|
+
return RequestOption(
|
2393
|
+
field_name=field_name,
|
2394
|
+
field_path=field_path,
|
2395
|
+
inject_into=inject_into,
|
2396
|
+
parameters=kwargs.get("parameters", {}),
|
2397
|
+
)
|
2334
2398
|
|
2335
2399
|
def create_record_selector(
|
2336
2400
|
self,
|
@@ -2351,6 +2415,8 @@ class ModelToComponentFactory:
|
|
2351
2415
|
if model.record_filter
|
2352
2416
|
else None
|
2353
2417
|
)
|
2418
|
+
|
2419
|
+
transform_before_filtering = False
|
2354
2420
|
if client_side_incremental_sync:
|
2355
2421
|
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
2356
2422
|
config=config,
|
@@ -2360,6 +2426,8 @@ class ModelToComponentFactory:
|
|
2360
2426
|
else None,
|
2361
2427
|
**client_side_incremental_sync,
|
2362
2428
|
)
|
2429
|
+
transform_before_filtering = True
|
2430
|
+
|
2363
2431
|
schema_normalization = (
|
2364
2432
|
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2365
2433
|
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
@@ -2374,6 +2442,7 @@ class ModelToComponentFactory:
|
|
2374
2442
|
transformations=transformations or [],
|
2375
2443
|
schema_normalization=schema_normalization,
|
2376
2444
|
parameters=model.parameters or {},
|
2445
|
+
transform_before_filtering=transform_before_filtering,
|
2377
2446
|
)
|
2378
2447
|
|
2379
2448
|
@staticmethod
|
@@ -2894,3 +2963,84 @@ class ModelToComponentFactory:
|
|
2894
2963
|
return isinstance(parser.inner_parser, JsonParser)
|
2895
2964
|
else:
|
2896
2965
|
return False
|
2966
|
+
|
2967
|
+
def create_http_api_budget(
|
2968
|
+
self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
|
2969
|
+
) -> HttpAPIBudget:
|
2970
|
+
policies = [
|
2971
|
+
self._create_component_from_model(model=policy, config=config)
|
2972
|
+
for policy in model.policies
|
2973
|
+
]
|
2974
|
+
|
2975
|
+
return HttpAPIBudget(
|
2976
|
+
policies=policies,
|
2977
|
+
ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
|
2978
|
+
ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
|
2979
|
+
status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
|
2980
|
+
)
|
2981
|
+
|
2982
|
+
def create_fixed_window_call_rate_policy(
|
2983
|
+
self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
2984
|
+
) -> FixedWindowCallRatePolicy:
|
2985
|
+
matchers = [
|
2986
|
+
self._create_component_from_model(model=matcher, config=config)
|
2987
|
+
for matcher in model.matchers
|
2988
|
+
]
|
2989
|
+
|
2990
|
+
# Set the initial reset timestamp to 10 days from now.
|
2991
|
+
# This value will be updated by the first request.
|
2992
|
+
return FixedWindowCallRatePolicy(
|
2993
|
+
next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
|
2994
|
+
period=parse_duration(model.period),
|
2995
|
+
call_limit=model.call_limit,
|
2996
|
+
matchers=matchers,
|
2997
|
+
)
|
2998
|
+
|
2999
|
+
def create_moving_window_call_rate_policy(
|
3000
|
+
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3001
|
+
) -> MovingWindowCallRatePolicy:
|
3002
|
+
rates = [
|
3003
|
+
self._create_component_from_model(model=rate, config=config) for rate in model.rates
|
3004
|
+
]
|
3005
|
+
matchers = [
|
3006
|
+
self._create_component_from_model(model=matcher, config=config)
|
3007
|
+
for matcher in model.matchers
|
3008
|
+
]
|
3009
|
+
return MovingWindowCallRatePolicy(
|
3010
|
+
rates=rates,
|
3011
|
+
matchers=matchers,
|
3012
|
+
)
|
3013
|
+
|
3014
|
+
def create_unlimited_call_rate_policy(
|
3015
|
+
self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
|
3016
|
+
) -> UnlimitedCallRatePolicy:
|
3017
|
+
matchers = [
|
3018
|
+
self._create_component_from_model(model=matcher, config=config)
|
3019
|
+
for matcher in model.matchers
|
3020
|
+
]
|
3021
|
+
|
3022
|
+
return UnlimitedCallRatePolicy(
|
3023
|
+
matchers=matchers,
|
3024
|
+
)
|
3025
|
+
|
3026
|
+
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
3027
|
+
return Rate(
|
3028
|
+
limit=model.limit,
|
3029
|
+
interval=parse_duration(model.interval),
|
3030
|
+
)
|
3031
|
+
|
3032
|
+
def create_http_request_matcher(
|
3033
|
+
self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
|
3034
|
+
) -> HttpRequestRegexMatcher:
|
3035
|
+
return HttpRequestRegexMatcher(
|
3036
|
+
method=model.method,
|
3037
|
+
url_base=model.url_base,
|
3038
|
+
url_path_pattern=model.url_path_pattern,
|
3039
|
+
params=model.params,
|
3040
|
+
headers=model.headers,
|
3041
|
+
)
|
3042
|
+
|
3043
|
+
def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
|
3044
|
+
self._api_budget = self.create_component(
|
3045
|
+
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3046
|
+
)
|
@@ -4,9 +4,9 @@ from dataclasses import InitVar, dataclass, field
|
|
4
4
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
5
|
|
6
6
|
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
7
8
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
9
|
AsyncJobOrchestrator,
|
9
|
-
AsyncPartition,
|
10
10
|
)
|
11
11
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
12
|
SinglePartitionRouter,
|
@@ -42,12 +42,12 @@ class AsyncJobPartitionRouter(StreamSlicer):
|
|
42
42
|
|
43
43
|
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
44
|
yield StreamSlice(
|
45
|
-
partition=dict(completed_partition.stream_slice.partition)
|
46
|
-
| {"partition": completed_partition},
|
45
|
+
partition=dict(completed_partition.stream_slice.partition),
|
47
46
|
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
47
|
+
extra_fields={"jobs": list(completed_partition.jobs)},
|
48
48
|
)
|
49
49
|
|
50
|
-
def fetch_records(self,
|
50
|
+
def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
|
51
51
|
"""
|
52
52
|
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
53
|
be responsible for. However, this was added in because the JobOrchestrator is required to
|
@@ -62,4 +62,4 @@ class AsyncJobPartitionRouter(StreamSlicer):
|
|
62
62
|
failure_type=FailureType.system_error,
|
63
63
|
)
|
64
64
|
|
65
|
-
return self._job_orchestrator.fetch_records(
|
65
|
+
return self._job_orchestrator.fetch_records(async_jobs=async_jobs)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
|
-
from typing import Any, Iterable, List, Mapping, Optional, Union
|
6
|
+
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
9
9
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
@@ -100,7 +100,9 @@ class ListPartitionRouter(PartitionRouter):
|
|
100
100
|
):
|
101
101
|
slice_value = stream_slice.get(self._cursor_field.eval(self.config))
|
102
102
|
if slice_value:
|
103
|
-
|
103
|
+
options: MutableMapping[str, Any] = {}
|
104
|
+
self.request_option.inject_into_request(options, slice_value, self.config)
|
105
|
+
return options
|
104
106
|
else:
|
105
107
|
return {}
|
106
108
|
else:
|