airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
- airbyte_cdk/connector_builder/message_grouper.py +448 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
- airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +37 -70
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
- airbyte_cdk/sources/streams/call_rate.py +47 -185
- airbyte_cdk/sources/streams/http/http.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
- airbyte_cdk/utils/datetime_helpers.py +66 -48
- airbyte_cdk/utils/mapping_helpers.py +26 -126
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
- airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
- airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
- airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
- airbyte_cdk/connector_builder/test_reader/types.py +0 -75
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
- airbyte_cdk/sources/specs/transfer_modes.py +0 -26
- airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -60,8 +60,10 @@ from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
61
61
|
from airbyte_cdk.sources.declarative.decoders import (
|
62
62
|
Decoder,
|
63
|
+
GzipJsonDecoder,
|
63
64
|
IterableDecoder,
|
64
65
|
JsonDecoder,
|
66
|
+
JsonlDecoder,
|
65
67
|
PaginationDecoderDecorator,
|
66
68
|
XmlDecoder,
|
67
69
|
ZipfileDecoder,
|
@@ -101,8 +103,8 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
101
103
|
LegacyToPerPartitionStateMigration,
|
102
104
|
)
|
103
105
|
from airbyte_cdk.sources.declarative.models import (
|
106
|
+
Clamping,
|
104
107
|
CustomStateMigration,
|
105
|
-
GzipDecoder,
|
106
108
|
)
|
107
109
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
108
110
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -140,6 +142,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
140
142
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
141
143
|
CompositeErrorHandler as CompositeErrorHandlerModel,
|
142
144
|
)
|
145
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
146
|
+
CompositeRawDecoder as CompositeRawDecoderModel,
|
147
|
+
)
|
143
148
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
144
149
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
145
150
|
)
|
@@ -150,7 +155,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
150
155
|
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
151
156
|
)
|
152
157
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
153
|
-
|
158
|
+
CsvParser as CsvParserModel,
|
154
159
|
)
|
155
160
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
156
161
|
CursorPagination as CursorPaginationModel,
|
@@ -221,17 +226,14 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
221
226
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
222
227
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
223
228
|
)
|
224
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
225
|
-
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
226
|
-
)
|
227
229
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
230
|
FlattenFields as FlattenFieldsModel,
|
229
231
|
)
|
230
232
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
-
|
233
|
+
GzipJsonDecoder as GzipJsonDecoderModel,
|
232
234
|
)
|
233
235
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
|
-
|
236
|
+
GzipParser as GzipParserModel,
|
235
237
|
)
|
236
238
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
237
239
|
HttpComponentsResolver as HttpComponentsResolverModel,
|
@@ -239,9 +241,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
239
241
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
240
242
|
HttpRequester as HttpRequesterModel,
|
241
243
|
)
|
242
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
-
HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
|
244
|
-
)
|
245
244
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
245
|
HttpResponseFilter as HttpResponseFilterModel,
|
247
246
|
)
|
@@ -260,6 +259,12 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
260
259
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
261
260
|
JsonlDecoder as JsonlDecoderModel,
|
262
261
|
)
|
262
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
263
|
+
JsonLineParser as JsonLineParserModel,
|
264
|
+
)
|
265
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
266
|
+
JsonParser as JsonParserModel,
|
267
|
+
)
|
263
268
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
264
269
|
JwtAuthenticator as JwtAuthenticatorModel,
|
265
270
|
)
|
@@ -290,9 +295,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
290
295
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
291
296
|
MinMaxDatetime as MinMaxDatetimeModel,
|
292
297
|
)
|
293
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
294
|
-
MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
|
295
|
-
)
|
296
298
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
297
299
|
NoAuth as NoAuthModel,
|
298
300
|
)
|
@@ -311,9 +313,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
311
313
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
312
314
|
ParentStreamConfig as ParentStreamConfigModel,
|
313
315
|
)
|
314
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
315
|
-
Rate as RateModel,
|
316
|
-
)
|
317
316
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
318
317
|
RecordFilter as RecordFilterModel,
|
319
318
|
)
|
@@ -357,9 +356,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
357
356
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
358
357
|
TypesMap as TypesMapModel,
|
359
358
|
)
|
360
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
361
|
-
UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
|
362
|
-
)
|
363
359
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
364
360
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
365
361
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -473,15 +469,6 @@ from airbyte_cdk.sources.message import (
|
|
473
469
|
MessageRepository,
|
474
470
|
NoopMessageRepository,
|
475
471
|
)
|
476
|
-
from airbyte_cdk.sources.streams.call_rate import (
|
477
|
-
APIBudget,
|
478
|
-
FixedWindowCallRatePolicy,
|
479
|
-
HttpAPIBudget,
|
480
|
-
HttpRequestRegexMatcher,
|
481
|
-
MovingWindowCallRatePolicy,
|
482
|
-
Rate,
|
483
|
-
UnlimitedCallRatePolicy,
|
484
|
-
)
|
485
472
|
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
486
473
|
ClampingEndProvider,
|
487
474
|
ClampingStrategy,
|
@@ -533,7 +520,6 @@ class ModelToComponentFactory:
|
|
533
520
|
self._evaluate_log_level(emit_connector_builder_messages)
|
534
521
|
)
|
535
522
|
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
536
|
-
self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
|
537
523
|
|
538
524
|
def _init_mappings(self) -> None:
|
539
525
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
@@ -545,9 +531,9 @@ class ModelToComponentFactory:
|
|
545
531
|
CheckStreamModel: self.create_check_stream,
|
546
532
|
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
547
533
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
534
|
+
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
548
535
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
549
536
|
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
550
|
-
CsvDecoderModel: self.create_csv_decoder,
|
551
537
|
CursorPaginationModel: self.create_cursor_pagination,
|
552
538
|
CustomAuthenticatorModel: self.create_custom_component,
|
553
539
|
CustomBackoffStrategyModel: self.create_custom_component,
|
@@ -577,7 +563,10 @@ class ModelToComponentFactory:
|
|
577
563
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
578
564
|
JsonDecoderModel: self.create_json_decoder,
|
579
565
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
580
|
-
|
566
|
+
JsonLineParserModel: self.create_json_line_parser,
|
567
|
+
JsonParserModel: self.create_json_parser,
|
568
|
+
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
569
|
+
GzipParserModel: self.create_gzip_parser,
|
581
570
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
582
571
|
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
583
572
|
KeysReplaceModel: self.create_keys_replace_transformation,
|
@@ -618,12 +607,6 @@ class ModelToComponentFactory:
|
|
618
607
|
StreamConfigModel: self.create_stream_config,
|
619
608
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
620
609
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
621
|
-
HTTPAPIBudgetModel: self.create_http_api_budget,
|
622
|
-
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
623
|
-
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
624
|
-
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
625
|
-
RateModel: self.create_rate,
|
626
|
-
HttpRequestRegexMatcherModel: self.create_http_request_matcher,
|
627
610
|
}
|
628
611
|
|
629
612
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -750,8 +733,8 @@ class ModelToComponentFactory:
|
|
750
733
|
}
|
751
734
|
return names_to_types[value_type]
|
752
735
|
|
736
|
+
@staticmethod
|
753
737
|
def create_api_key_authenticator(
|
754
|
-
self,
|
755
738
|
model: ApiKeyAuthenticatorModel,
|
756
739
|
config: Config,
|
757
740
|
token_provider: Optional[TokenProvider] = None,
|
@@ -773,8 +756,10 @@ class ModelToComponentFactory:
|
|
773
756
|
)
|
774
757
|
|
775
758
|
request_option = (
|
776
|
-
|
777
|
-
model.inject_into,
|
759
|
+
RequestOption(
|
760
|
+
inject_into=RequestOptionType(model.inject_into.inject_into.value),
|
761
|
+
field_name=model.inject_into.field_name,
|
762
|
+
parameters=model.parameters or {},
|
778
763
|
)
|
779
764
|
if model.inject_into
|
780
765
|
else RequestOption(
|
@@ -783,7 +768,6 @@ class ModelToComponentFactory:
|
|
783
768
|
parameters=model.parameters or {},
|
784
769
|
)
|
785
770
|
)
|
786
|
-
|
787
771
|
return ApiKeyAuthenticator(
|
788
772
|
token_provider=(
|
789
773
|
token_provider
|
@@ -865,7 +849,7 @@ class ModelToComponentFactory:
|
|
865
849
|
token_provider=token_provider,
|
866
850
|
)
|
867
851
|
else:
|
868
|
-
return
|
852
|
+
return ModelToComponentFactory.create_api_key_authenticator(
|
869
853
|
ApiKeyAuthenticatorModel(
|
870
854
|
type="ApiKeyAuthenticator",
|
871
855
|
api_token="",
|
@@ -951,17 +935,6 @@ class ModelToComponentFactory:
|
|
951
935
|
parameters={},
|
952
936
|
)
|
953
937
|
|
954
|
-
@staticmethod
|
955
|
-
def apply_stream_state_migrations(
|
956
|
-
stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
|
957
|
-
) -> MutableMapping[str, Any]:
|
958
|
-
if stream_state_migrations:
|
959
|
-
for state_migration in stream_state_migrations:
|
960
|
-
if state_migration.should_migrate(stream_state):
|
961
|
-
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
962
|
-
stream_state = dict(state_migration.migrate(stream_state))
|
963
|
-
return stream_state
|
964
|
-
|
965
938
|
def create_concurrent_cursor_from_datetime_based_cursor(
|
966
939
|
self,
|
967
940
|
model_type: Type[BaseModel],
|
@@ -971,7 +944,6 @@ class ModelToComponentFactory:
|
|
971
944
|
config: Config,
|
972
945
|
message_repository: Optional[MessageRepository] = None,
|
973
946
|
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
974
|
-
stream_state_migrations: Optional[List[Any]] = None,
|
975
947
|
**kwargs: Any,
|
976
948
|
) -> ConcurrentCursor:
|
977
949
|
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
@@ -982,7 +954,6 @@ class ModelToComponentFactory:
|
|
982
954
|
if "stream_state" not in kwargs
|
983
955
|
else kwargs["stream_state"]
|
984
956
|
)
|
985
|
-
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
986
957
|
|
987
958
|
component_type = component_definition.get("type")
|
988
959
|
if component_definition.get("type") != model_type.__name__:
|
@@ -1218,7 +1189,6 @@ class ModelToComponentFactory:
|
|
1218
1189
|
config: Config,
|
1219
1190
|
stream_state: MutableMapping[str, Any],
|
1220
1191
|
partition_router: PartitionRouter,
|
1221
|
-
stream_state_migrations: Optional[List[Any]] = None,
|
1222
1192
|
**kwargs: Any,
|
1223
1193
|
) -> ConcurrentPerPartitionCursor:
|
1224
1194
|
component_type = component_definition.get("type")
|
@@ -1267,10 +1237,8 @@ class ModelToComponentFactory:
|
|
1267
1237
|
stream_namespace=stream_namespace,
|
1268
1238
|
config=config,
|
1269
1239
|
message_repository=NoopMessageRepository(),
|
1270
|
-
stream_state_migrations=stream_state_migrations,
|
1271
1240
|
)
|
1272
1241
|
)
|
1273
|
-
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
1274
1242
|
|
1275
1243
|
# Return the concurrent cursor and state converter
|
1276
1244
|
return ConcurrentPerPartitionCursor(
|
@@ -1521,15 +1489,19 @@ class ModelToComponentFactory:
|
|
1521
1489
|
)
|
1522
1490
|
|
1523
1491
|
end_time_option = (
|
1524
|
-
|
1525
|
-
model.end_time_option,
|
1492
|
+
RequestOption(
|
1493
|
+
inject_into=RequestOptionType(model.end_time_option.inject_into.value),
|
1494
|
+
field_name=model.end_time_option.field_name,
|
1495
|
+
parameters=model.parameters or {},
|
1526
1496
|
)
|
1527
1497
|
if model.end_time_option
|
1528
1498
|
else None
|
1529
1499
|
)
|
1530
1500
|
start_time_option = (
|
1531
|
-
|
1532
|
-
model.start_time_option,
|
1501
|
+
RequestOption(
|
1502
|
+
inject_into=RequestOptionType(model.start_time_option.inject_into.value),
|
1503
|
+
field_name=model.start_time_option.field_name,
|
1504
|
+
parameters=model.parameters or {},
|
1533
1505
|
)
|
1534
1506
|
if model.start_time_option
|
1535
1507
|
else None
|
@@ -1600,15 +1572,19 @@ class ModelToComponentFactory:
|
|
1600
1572
|
cursor_model = model.incremental_sync
|
1601
1573
|
|
1602
1574
|
end_time_option = (
|
1603
|
-
|
1604
|
-
cursor_model.end_time_option,
|
1575
|
+
RequestOption(
|
1576
|
+
inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
|
1577
|
+
field_name=cursor_model.end_time_option.field_name,
|
1578
|
+
parameters=cursor_model.parameters or {},
|
1605
1579
|
)
|
1606
1580
|
if cursor_model.end_time_option
|
1607
1581
|
else None
|
1608
1582
|
)
|
1609
1583
|
start_time_option = (
|
1610
|
-
|
1611
|
-
cursor_model.start_time_option,
|
1584
|
+
RequestOption(
|
1585
|
+
inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
|
1586
|
+
field_name=cursor_model.start_time_option.field_name,
|
1587
|
+
parameters=cursor_model.parameters or {},
|
1612
1588
|
)
|
1613
1589
|
if cursor_model.start_time_option
|
1614
1590
|
else None
|
@@ -1680,7 +1656,7 @@ class ModelToComponentFactory:
|
|
1680
1656
|
) -> Optional[PartitionRouter]:
|
1681
1657
|
if (
|
1682
1658
|
hasattr(model, "partition_router")
|
1683
|
-
and isinstance(model, SimpleRetrieverModel
|
1659
|
+
and isinstance(model, SimpleRetrieverModel)
|
1684
1660
|
and model.partition_router
|
1685
1661
|
):
|
1686
1662
|
stream_slicer_model = model.partition_router
|
@@ -1714,31 +1690,6 @@ class ModelToComponentFactory:
|
|
1714
1690
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1715
1691
|
|
1716
1692
|
if model.incremental_sync and stream_slicer:
|
1717
|
-
if model.retriever.type == "AsyncRetriever":
|
1718
|
-
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1719
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1720
|
-
raise ValueError(
|
1721
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1722
|
-
)
|
1723
|
-
if stream_slicer:
|
1724
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1725
|
-
state_manager=self._connector_state_manager,
|
1726
|
-
model_type=DatetimeBasedCursorModel,
|
1727
|
-
component_definition=model.incremental_sync.__dict__,
|
1728
|
-
stream_name=model.name or "",
|
1729
|
-
stream_namespace=None,
|
1730
|
-
config=config or {},
|
1731
|
-
stream_state={},
|
1732
|
-
partition_router=stream_slicer,
|
1733
|
-
)
|
1734
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1735
|
-
model_type=DatetimeBasedCursorModel,
|
1736
|
-
component_definition=model.incremental_sync.__dict__,
|
1737
|
-
stream_name=model.name or "",
|
1738
|
-
stream_namespace=None,
|
1739
|
-
config=config or {},
|
1740
|
-
)
|
1741
|
-
|
1742
1693
|
incremental_sync_model = model.incremental_sync
|
1743
1694
|
if (
|
1744
1695
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
@@ -1779,7 +1730,6 @@ class ModelToComponentFactory:
|
|
1779
1730
|
stream_name=model.name or "",
|
1780
1731
|
stream_namespace=None,
|
1781
1732
|
config=config or {},
|
1782
|
-
stream_state_migrations=model.state_migrations,
|
1783
1733
|
)
|
1784
1734
|
return (
|
1785
1735
|
self._create_component_from_model(model=model.incremental_sync, config=config)
|
@@ -1936,8 +1886,6 @@ class ModelToComponentFactory:
|
|
1936
1886
|
)
|
1937
1887
|
)
|
1938
1888
|
|
1939
|
-
api_budget = self._api_budget
|
1940
|
-
|
1941
1889
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
1942
1890
|
request_body_data=model.request_body_data,
|
1943
1891
|
request_body_json=model.request_body_json,
|
@@ -1958,7 +1906,6 @@ class ModelToComponentFactory:
|
|
1958
1906
|
path=model.path,
|
1959
1907
|
authenticator=authenticator,
|
1960
1908
|
error_handler=error_handler,
|
1961
|
-
api_budget=api_budget,
|
1962
1909
|
http_method=HttpMethod[model.http_method.value],
|
1963
1910
|
request_options_provider=request_options_provider,
|
1964
1911
|
config=config,
|
@@ -2088,26 +2035,25 @@ class ModelToComponentFactory:
|
|
2088
2035
|
)
|
2089
2036
|
|
2090
2037
|
@staticmethod
|
2091
|
-
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) ->
|
2038
|
+
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
2092
2039
|
return JsonDecoder(parameters={})
|
2093
2040
|
|
2094
2041
|
@staticmethod
|
2095
|
-
def
|
2096
|
-
|
2097
|
-
|
2098
|
-
)
|
2042
|
+
def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
|
2043
|
+
encoding = model.encoding if model.encoding else "utf-8"
|
2044
|
+
return JsonParser(encoding=encoding)
|
2099
2045
|
|
2100
2046
|
@staticmethod
|
2101
|
-
def create_jsonl_decoder(
|
2102
|
-
|
2103
|
-
|
2104
|
-
)
|
2047
|
+
def create_jsonl_decoder(
|
2048
|
+
model: JsonlDecoderModel, config: Config, **kwargs: Any
|
2049
|
+
) -> JsonlDecoder:
|
2050
|
+
return JsonlDecoder(parameters={})
|
2105
2051
|
|
2106
2052
|
@staticmethod
|
2107
|
-
def
|
2108
|
-
|
2109
|
-
|
2110
|
-
)
|
2053
|
+
def create_json_line_parser(
|
2054
|
+
model: JsonLineParserModel, config: Config, **kwargs: Any
|
2055
|
+
) -> JsonLineParser:
|
2056
|
+
return JsonLineParser(encoding=model.encoding)
|
2111
2057
|
|
2112
2058
|
@staticmethod
|
2113
2059
|
def create_iterable_decoder(
|
@@ -2119,30 +2065,33 @@ class ModelToComponentFactory:
|
|
2119
2065
|
def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
|
2120
2066
|
return XmlDecoder(parameters={})
|
2121
2067
|
|
2068
|
+
@staticmethod
|
2069
|
+
def create_gzipjson_decoder(
|
2070
|
+
model: GzipJsonDecoderModel, config: Config, **kwargs: Any
|
2071
|
+
) -> GzipJsonDecoder:
|
2072
|
+
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
2073
|
+
|
2122
2074
|
def create_zipfile_decoder(
|
2123
2075
|
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
2124
2076
|
) -> ZipfileDecoder:
|
2125
|
-
|
2077
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
2078
|
+
return ZipfileDecoder(parser=parser)
|
2079
|
+
|
2080
|
+
def create_gzip_parser(
|
2081
|
+
self, model: GzipParserModel, config: Config, **kwargs: Any
|
2082
|
+
) -> GzipParser:
|
2083
|
+
inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
|
2084
|
+
return GzipParser(inner_parser=inner_parser)
|
2126
2085
|
|
2127
2086
|
@staticmethod
|
2128
|
-
def
|
2129
|
-
|
2130
|
-
# Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
|
2131
|
-
return JsonParser()
|
2132
|
-
elif isinstance(model, JsonlDecoderModel):
|
2133
|
-
return JsonLineParser()
|
2134
|
-
elif isinstance(model, CsvDecoderModel):
|
2135
|
-
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
2136
|
-
elif isinstance(model, GzipDecoderModel):
|
2137
|
-
return GzipParser(
|
2138
|
-
inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
|
2139
|
-
)
|
2140
|
-
elif isinstance(
|
2141
|
-
model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
|
2142
|
-
):
|
2143
|
-
raise ValueError(f"Decoder type {model} does not have parser associated to it")
|
2087
|
+
def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
|
2088
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
2144
2089
|
|
2145
|
-
|
2090
|
+
def create_composite_raw_decoder(
|
2091
|
+
self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
|
2092
|
+
) -> CompositeRawDecoder:
|
2093
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
2094
|
+
return CompositeRawDecoder(parser=parser)
|
2146
2095
|
|
2147
2096
|
@staticmethod
|
2148
2097
|
def create_json_file_schema_loader(
|
@@ -2176,11 +2125,16 @@ class ModelToComponentFactory:
|
|
2176
2125
|
additional_jwt_payload=model.additional_jwt_payload,
|
2177
2126
|
)
|
2178
2127
|
|
2128
|
+
@staticmethod
|
2179
2129
|
def create_list_partition_router(
|
2180
|
-
|
2130
|
+
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
2181
2131
|
) -> ListPartitionRouter:
|
2182
2132
|
request_option = (
|
2183
|
-
|
2133
|
+
RequestOption(
|
2134
|
+
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
2135
|
+
field_name=model.request_option.field_name,
|
2136
|
+
parameters=model.parameters or {},
|
2137
|
+
)
|
2184
2138
|
if model.request_option
|
2185
2139
|
else None
|
2186
2140
|
)
|
@@ -2376,25 +2330,7 @@ class ModelToComponentFactory:
|
|
2376
2330
|
model: RequestOptionModel, config: Config, **kwargs: Any
|
2377
2331
|
) -> RequestOption:
|
2378
2332
|
inject_into = RequestOptionType(model.inject_into.value)
|
2379
|
-
|
2380
|
-
[
|
2381
|
-
InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
|
2382
|
-
for segment in model.field_path
|
2383
|
-
]
|
2384
|
-
if model.field_path
|
2385
|
-
else None
|
2386
|
-
)
|
2387
|
-
field_name = (
|
2388
|
-
InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
|
2389
|
-
if model.field_name
|
2390
|
-
else None
|
2391
|
-
)
|
2392
|
-
return RequestOption(
|
2393
|
-
field_name=field_name,
|
2394
|
-
field_path=field_path,
|
2395
|
-
inject_into=inject_into,
|
2396
|
-
parameters=kwargs.get("parameters", {}),
|
2397
|
-
)
|
2333
|
+
return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
|
2398
2334
|
|
2399
2335
|
def create_record_selector(
|
2400
2336
|
self,
|
@@ -2415,8 +2351,6 @@ class ModelToComponentFactory:
|
|
2415
2351
|
if model.record_filter
|
2416
2352
|
else None
|
2417
2353
|
)
|
2418
|
-
|
2419
|
-
transform_before_filtering = False
|
2420
2354
|
if client_side_incremental_sync:
|
2421
2355
|
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
2422
2356
|
config=config,
|
@@ -2426,8 +2360,6 @@ class ModelToComponentFactory:
|
|
2426
2360
|
else None,
|
2427
2361
|
**client_side_incremental_sync,
|
2428
2362
|
)
|
2429
|
-
transform_before_filtering = True
|
2430
|
-
|
2431
2363
|
schema_normalization = (
|
2432
2364
|
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2433
2365
|
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
@@ -2442,7 +2374,6 @@ class ModelToComponentFactory:
|
|
2442
2374
|
transformations=transformations or [],
|
2443
2375
|
schema_normalization=schema_normalization,
|
2444
2376
|
parameters=model.parameters or {},
|
2445
|
-
transform_before_filtering=transform_before_filtering,
|
2446
2377
|
)
|
2447
2378
|
|
2448
2379
|
@staticmethod
|
@@ -2963,84 +2894,3 @@ class ModelToComponentFactory:
|
|
2963
2894
|
return isinstance(parser.inner_parser, JsonParser)
|
2964
2895
|
else:
|
2965
2896
|
return False
|
2966
|
-
|
2967
|
-
def create_http_api_budget(
|
2968
|
-
self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
|
2969
|
-
) -> HttpAPIBudget:
|
2970
|
-
policies = [
|
2971
|
-
self._create_component_from_model(model=policy, config=config)
|
2972
|
-
for policy in model.policies
|
2973
|
-
]
|
2974
|
-
|
2975
|
-
return HttpAPIBudget(
|
2976
|
-
policies=policies,
|
2977
|
-
ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
|
2978
|
-
ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
|
2979
|
-
status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
|
2980
|
-
)
|
2981
|
-
|
2982
|
-
def create_fixed_window_call_rate_policy(
|
2983
|
-
self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
2984
|
-
) -> FixedWindowCallRatePolicy:
|
2985
|
-
matchers = [
|
2986
|
-
self._create_component_from_model(model=matcher, config=config)
|
2987
|
-
for matcher in model.matchers
|
2988
|
-
]
|
2989
|
-
|
2990
|
-
# Set the initial reset timestamp to 10 days from now.
|
2991
|
-
# This value will be updated by the first request.
|
2992
|
-
return FixedWindowCallRatePolicy(
|
2993
|
-
next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
|
2994
|
-
period=parse_duration(model.period),
|
2995
|
-
call_limit=model.call_limit,
|
2996
|
-
matchers=matchers,
|
2997
|
-
)
|
2998
|
-
|
2999
|
-
def create_moving_window_call_rate_policy(
|
3000
|
-
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3001
|
-
) -> MovingWindowCallRatePolicy:
|
3002
|
-
rates = [
|
3003
|
-
self._create_component_from_model(model=rate, config=config) for rate in model.rates
|
3004
|
-
]
|
3005
|
-
matchers = [
|
3006
|
-
self._create_component_from_model(model=matcher, config=config)
|
3007
|
-
for matcher in model.matchers
|
3008
|
-
]
|
3009
|
-
return MovingWindowCallRatePolicy(
|
3010
|
-
rates=rates,
|
3011
|
-
matchers=matchers,
|
3012
|
-
)
|
3013
|
-
|
3014
|
-
def create_unlimited_call_rate_policy(
|
3015
|
-
self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
|
3016
|
-
) -> UnlimitedCallRatePolicy:
|
3017
|
-
matchers = [
|
3018
|
-
self._create_component_from_model(model=matcher, config=config)
|
3019
|
-
for matcher in model.matchers
|
3020
|
-
]
|
3021
|
-
|
3022
|
-
return UnlimitedCallRatePolicy(
|
3023
|
-
matchers=matchers,
|
3024
|
-
)
|
3025
|
-
|
3026
|
-
def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
|
3027
|
-
return Rate(
|
3028
|
-
limit=model.limit,
|
3029
|
-
interval=parse_duration(model.interval),
|
3030
|
-
)
|
3031
|
-
|
3032
|
-
def create_http_request_matcher(
|
3033
|
-
self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
|
3034
|
-
) -> HttpRequestRegexMatcher:
|
3035
|
-
return HttpRequestRegexMatcher(
|
3036
|
-
method=model.method,
|
3037
|
-
url_base=model.url_base,
|
3038
|
-
url_path_pattern=model.url_path_pattern,
|
3039
|
-
params=model.params,
|
3040
|
-
headers=model.headers,
|
3041
|
-
)
|
3042
|
-
|
3043
|
-
def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
|
3044
|
-
self._api_budget = self.create_component(
|
3045
|
-
model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
|
3046
|
-
)
|
@@ -4,9 +4,9 @@ from dataclasses import InitVar, dataclass, field
|
|
4
4
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
5
|
|
6
6
|
from airbyte_cdk.models import FailureType
|
7
|
-
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
8
7
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
9
8
|
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
10
|
)
|
11
11
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
12
|
SinglePartitionRouter,
|
@@ -42,12 +42,12 @@ class AsyncJobPartitionRouter(StreamSlicer):
|
|
42
42
|
|
43
43
|
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
44
|
yield StreamSlice(
|
45
|
-
partition=dict(completed_partition.stream_slice.partition)
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
46
47
|
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
47
|
-
extra_fields={"jobs": list(completed_partition.jobs)},
|
48
48
|
)
|
49
49
|
|
50
|
-
def fetch_records(self,
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
51
|
"""
|
52
52
|
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
53
|
be responsible for. However, this was added in because the JobOrchestrator is required to
|
@@ -62,4 +62,4 @@ class AsyncJobPartitionRouter(StreamSlicer):
|
|
62
62
|
failure_type=FailureType.system_error,
|
63
63
|
)
|
64
64
|
|
65
|
-
return self._job_orchestrator.fetch_records(
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
|
-
from typing import Any, Iterable, List, Mapping,
|
6
|
+
from typing import Any, Iterable, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
9
9
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
@@ -100,9 +100,7 @@ class ListPartitionRouter(PartitionRouter):
|
|
100
100
|
):
|
101
101
|
slice_value = stream_slice.get(self._cursor_field.eval(self.config))
|
102
102
|
if slice_value:
|
103
|
-
|
104
|
-
self.request_option.inject_into_request(options, slice_value, self.config)
|
105
|
-
return options
|
103
|
+
return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString
|
106
104
|
else:
|
107
105
|
return {}
|
108
106
|
else:
|