airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
- airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/sources/utils/transform.py +23 -2
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/mapping_helpers.py +27 -86
- airbyte_cdk/utils/slice_hasher.py +8 -1
- airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -87,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
87
87
|
)
|
88
88
|
from airbyte_cdk.sources.declarative.incremental import (
|
89
89
|
ChildPartitionResumableFullRefreshCursor,
|
90
|
+
ConcurrentCursorFactory,
|
91
|
+
ConcurrentPerPartitionCursor,
|
90
92
|
CursorFactory,
|
91
93
|
DatetimeBasedCursor,
|
92
94
|
DeclarativeCursor,
|
@@ -101,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
101
103
|
LegacyToPerPartitionStateMigration,
|
102
104
|
)
|
103
105
|
from airbyte_cdk.sources.declarative.models import (
|
106
|
+
Clamping,
|
104
107
|
CustomStateMigration,
|
105
108
|
)
|
106
109
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -130,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
130
133
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
131
134
|
CheckStream as CheckStreamModel,
|
132
135
|
)
|
136
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
137
|
+
ComplexFieldType as ComplexFieldTypeModel,
|
138
|
+
)
|
133
139
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
134
140
|
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
135
141
|
)
|
@@ -363,6 +369,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
363
369
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
364
370
|
ZipfileDecoder as ZipfileDecoderModel,
|
365
371
|
)
|
372
|
+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
373
|
+
COMPONENTS_MODULE_NAME,
|
374
|
+
SDM_COMPONENTS_MODULE_NAME,
|
375
|
+
)
|
366
376
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
367
377
|
CartesianProductStreamSlicer,
|
368
378
|
ListPartitionRouter,
|
@@ -422,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
422
432
|
SimpleRetrieverTestReadDecorator,
|
423
433
|
)
|
424
434
|
from airbyte_cdk.sources.declarative.schema import (
|
435
|
+
ComplexFieldType,
|
425
436
|
DefaultSchemaLoader,
|
426
437
|
DynamicSchemaLoader,
|
427
438
|
InlineSchemaLoader,
|
@@ -456,6 +467,16 @@ from airbyte_cdk.sources.message import (
|
|
456
467
|
InMemoryMessageRepository,
|
457
468
|
LogAppenderMessageRepositoryDecorator,
|
458
469
|
MessageRepository,
|
470
|
+
NoopMessageRepository,
|
471
|
+
)
|
472
|
+
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
473
|
+
ClampingEndProvider,
|
474
|
+
ClampingStrategy,
|
475
|
+
DayClampingStrategy,
|
476
|
+
MonthClampingStrategy,
|
477
|
+
NoClamping,
|
478
|
+
WeekClampingStrategy,
|
479
|
+
Weekday,
|
459
480
|
)
|
460
481
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
461
482
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -486,6 +507,7 @@ class ModelToComponentFactory:
|
|
486
507
|
disable_cache: bool = False,
|
487
508
|
disable_resumable_full_refresh: bool = False,
|
488
509
|
message_repository: Optional[MessageRepository] = None,
|
510
|
+
connector_state_manager: Optional[ConnectorStateManager] = None,
|
489
511
|
):
|
490
512
|
self._init_mappings()
|
491
513
|
self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
|
@@ -497,6 +519,7 @@ class ModelToComponentFactory:
|
|
497
519
|
self._message_repository = message_repository or InMemoryMessageRepository(
|
498
520
|
self._evaluate_log_level(emit_connector_builder_messages)
|
499
521
|
)
|
522
|
+
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
500
523
|
|
501
524
|
def _init_mappings(self) -> None:
|
502
525
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
@@ -555,6 +578,7 @@ class ModelToComponentFactory:
|
|
555
578
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
556
579
|
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
557
580
|
TypesMapModel: self.create_types_map,
|
581
|
+
ComplexFieldTypeModel: self.create_complex_field_type,
|
558
582
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
559
583
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
560
584
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -709,8 +733,8 @@ class ModelToComponentFactory:
|
|
709
733
|
}
|
710
734
|
return names_to_types[value_type]
|
711
735
|
|
736
|
+
@staticmethod
|
712
737
|
def create_api_key_authenticator(
|
713
|
-
self,
|
714
738
|
model: ApiKeyAuthenticatorModel,
|
715
739
|
config: Config,
|
716
740
|
token_provider: Optional[TokenProvider] = None,
|
@@ -732,8 +756,10 @@ class ModelToComponentFactory:
|
|
732
756
|
)
|
733
757
|
|
734
758
|
request_option = (
|
735
|
-
|
736
|
-
model.inject_into,
|
759
|
+
RequestOption(
|
760
|
+
inject_into=RequestOptionType(model.inject_into.inject_into.value),
|
761
|
+
field_name=model.inject_into.field_name,
|
762
|
+
parameters=model.parameters or {},
|
737
763
|
)
|
738
764
|
if model.inject_into
|
739
765
|
else RequestOption(
|
@@ -742,7 +768,6 @@ class ModelToComponentFactory:
|
|
742
768
|
parameters=model.parameters or {},
|
743
769
|
)
|
744
770
|
)
|
745
|
-
|
746
771
|
return ApiKeyAuthenticator(
|
747
772
|
token_provider=(
|
748
773
|
token_provider
|
@@ -824,7 +849,7 @@ class ModelToComponentFactory:
|
|
824
849
|
token_provider=token_provider,
|
825
850
|
)
|
826
851
|
else:
|
827
|
-
return
|
852
|
+
return ModelToComponentFactory.create_api_key_authenticator(
|
828
853
|
ApiKeyAuthenticatorModel(
|
829
854
|
type="ApiKeyAuthenticator",
|
830
855
|
api_token="",
|
@@ -878,7 +903,15 @@ class ModelToComponentFactory:
|
|
878
903
|
def create_check_dynamic_stream(
|
879
904
|
model: CheckDynamicStreamModel, config: Config, **kwargs: Any
|
880
905
|
) -> CheckDynamicStream:
|
881
|
-
|
906
|
+
assert model.use_check_availability is not None # for mypy
|
907
|
+
|
908
|
+
use_check_availability = model.use_check_availability
|
909
|
+
|
910
|
+
return CheckDynamicStream(
|
911
|
+
stream_count=model.stream_count,
|
912
|
+
use_check_availability=use_check_availability,
|
913
|
+
parameters={},
|
914
|
+
)
|
882
915
|
|
883
916
|
def create_composite_error_handler(
|
884
917
|
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -904,15 +937,24 @@ class ModelToComponentFactory:
|
|
904
937
|
|
905
938
|
def create_concurrent_cursor_from_datetime_based_cursor(
|
906
939
|
self,
|
907
|
-
state_manager: ConnectorStateManager,
|
908
940
|
model_type: Type[BaseModel],
|
909
941
|
component_definition: ComponentDefinition,
|
910
942
|
stream_name: str,
|
911
943
|
stream_namespace: Optional[str],
|
912
944
|
config: Config,
|
913
|
-
|
945
|
+
message_repository: Optional[MessageRepository] = None,
|
946
|
+
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
914
947
|
**kwargs: Any,
|
915
948
|
) -> ConcurrentCursor:
|
949
|
+
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
950
|
+
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
951
|
+
# incoming state and connector_state_manager that is initialized when the component factory is created
|
952
|
+
stream_state = (
|
953
|
+
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
954
|
+
if "stream_state" not in kwargs
|
955
|
+
else kwargs["stream_state"]
|
956
|
+
)
|
957
|
+
|
916
958
|
component_type = component_definition.get("type")
|
917
959
|
if component_definition.get("type") != model_type.__name__:
|
918
960
|
raise ValueError(
|
@@ -972,10 +1014,22 @@ class ModelToComponentFactory:
|
|
972
1014
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
973
1015
|
datetime_format=datetime_format,
|
974
1016
|
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
975
|
-
is_sequential_state=True,
|
1017
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
976
1018
|
cursor_granularity=cursor_granularity,
|
977
1019
|
)
|
978
1020
|
|
1021
|
+
# Adjusts the stream state by applying the runtime lookback window.
|
1022
|
+
# This is used to ensure correct state handling in case of failed partitions.
|
1023
|
+
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
1024
|
+
if runtime_lookback_window and stream_state_value:
|
1025
|
+
new_stream_state = (
|
1026
|
+
connector_state_converter.parse_timestamp(stream_state_value)
|
1027
|
+
- runtime_lookback_window
|
1028
|
+
)
|
1029
|
+
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
1030
|
+
new_stream_state
|
1031
|
+
)
|
1032
|
+
|
979
1033
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
980
1034
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
981
1035
|
start_date_runtime_value = self.create_min_max_datetime(
|
@@ -1042,12 +1096,59 @@ class ModelToComponentFactory:
|
|
1042
1096
|
if evaluated_step:
|
1043
1097
|
step_length = parse_duration(evaluated_step)
|
1044
1098
|
|
1099
|
+
clamping_strategy: ClampingStrategy = NoClamping()
|
1100
|
+
if datetime_based_cursor_model.clamping:
|
1101
|
+
# While it is undesirable to interpolate within the model factory (as opposed to at runtime),
|
1102
|
+
# it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
|
1103
|
+
# object which we want to keep agnostic of being low-code
|
1104
|
+
target = InterpolatedString(
|
1105
|
+
string=datetime_based_cursor_model.clamping.target,
|
1106
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1107
|
+
)
|
1108
|
+
evaluated_target = target.eval(config=config)
|
1109
|
+
match evaluated_target:
|
1110
|
+
case "DAY":
|
1111
|
+
clamping_strategy = DayClampingStrategy()
|
1112
|
+
end_date_provider = ClampingEndProvider(
|
1113
|
+
DayClampingStrategy(is_ceiling=False),
|
1114
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1115
|
+
granularity=cursor_granularity or datetime.timedelta(seconds=1),
|
1116
|
+
)
|
1117
|
+
case "WEEK":
|
1118
|
+
if (
|
1119
|
+
not datetime_based_cursor_model.clamping.target_details
|
1120
|
+
or "weekday" not in datetime_based_cursor_model.clamping.target_details
|
1121
|
+
):
|
1122
|
+
raise ValueError(
|
1123
|
+
"Given WEEK clamping, weekday needs to be provided as target_details"
|
1124
|
+
)
|
1125
|
+
weekday = self._assemble_weekday(
|
1126
|
+
datetime_based_cursor_model.clamping.target_details["weekday"]
|
1127
|
+
)
|
1128
|
+
clamping_strategy = WeekClampingStrategy(weekday)
|
1129
|
+
end_date_provider = ClampingEndProvider(
|
1130
|
+
WeekClampingStrategy(weekday, is_ceiling=False),
|
1131
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1132
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1133
|
+
)
|
1134
|
+
case "MONTH":
|
1135
|
+
clamping_strategy = MonthClampingStrategy()
|
1136
|
+
end_date_provider = ClampingEndProvider(
|
1137
|
+
MonthClampingStrategy(is_ceiling=False),
|
1138
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1139
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1140
|
+
)
|
1141
|
+
case _:
|
1142
|
+
raise ValueError(
|
1143
|
+
f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
|
1144
|
+
)
|
1145
|
+
|
1045
1146
|
return ConcurrentCursor(
|
1046
1147
|
stream_name=stream_name,
|
1047
1148
|
stream_namespace=stream_namespace,
|
1048
1149
|
stream_state=stream_state,
|
1049
|
-
message_repository=self._message_repository,
|
1050
|
-
connector_state_manager=
|
1150
|
+
message_repository=message_repository or self._message_repository,
|
1151
|
+
connector_state_manager=self._connector_state_manager,
|
1051
1152
|
connector_state_converter=connector_state_converter,
|
1052
1153
|
cursor_field=cursor_field,
|
1053
1154
|
slice_boundary_fields=slice_boundary_fields,
|
@@ -1056,6 +1157,100 @@ class ModelToComponentFactory:
|
|
1056
1157
|
lookback_window=lookback_window,
|
1057
1158
|
slice_range=step_length,
|
1058
1159
|
cursor_granularity=cursor_granularity,
|
1160
|
+
clamping_strategy=clamping_strategy,
|
1161
|
+
)
|
1162
|
+
|
1163
|
+
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1164
|
+
match weekday:
|
1165
|
+
case "MONDAY":
|
1166
|
+
return Weekday.MONDAY
|
1167
|
+
case "TUESDAY":
|
1168
|
+
return Weekday.TUESDAY
|
1169
|
+
case "WEDNESDAY":
|
1170
|
+
return Weekday.WEDNESDAY
|
1171
|
+
case "THURSDAY":
|
1172
|
+
return Weekday.THURSDAY
|
1173
|
+
case "FRIDAY":
|
1174
|
+
return Weekday.FRIDAY
|
1175
|
+
case "SATURDAY":
|
1176
|
+
return Weekday.SATURDAY
|
1177
|
+
case "SUNDAY":
|
1178
|
+
return Weekday.SUNDAY
|
1179
|
+
case _:
|
1180
|
+
raise ValueError(f"Unknown weekday {weekday}")
|
1181
|
+
|
1182
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1183
|
+
self,
|
1184
|
+
state_manager: ConnectorStateManager,
|
1185
|
+
model_type: Type[BaseModel],
|
1186
|
+
component_definition: ComponentDefinition,
|
1187
|
+
stream_name: str,
|
1188
|
+
stream_namespace: Optional[str],
|
1189
|
+
config: Config,
|
1190
|
+
stream_state: MutableMapping[str, Any],
|
1191
|
+
partition_router: PartitionRouter,
|
1192
|
+
**kwargs: Any,
|
1193
|
+
) -> ConcurrentPerPartitionCursor:
|
1194
|
+
component_type = component_definition.get("type")
|
1195
|
+
if component_definition.get("type") != model_type.__name__:
|
1196
|
+
raise ValueError(
|
1197
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1198
|
+
)
|
1199
|
+
|
1200
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1201
|
+
|
1202
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1203
|
+
raise ValueError(
|
1204
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1205
|
+
)
|
1206
|
+
|
1207
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1208
|
+
datetime_based_cursor_model.cursor_field,
|
1209
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1210
|
+
)
|
1211
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1212
|
+
|
1213
|
+
datetime_format = datetime_based_cursor_model.datetime_format
|
1214
|
+
|
1215
|
+
cursor_granularity = (
|
1216
|
+
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
1217
|
+
if datetime_based_cursor_model.cursor_granularity
|
1218
|
+
else None
|
1219
|
+
)
|
1220
|
+
|
1221
|
+
connector_state_converter: DateTimeStreamStateConverter
|
1222
|
+
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
1223
|
+
datetime_format=datetime_format,
|
1224
|
+
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
1225
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1226
|
+
cursor_granularity=cursor_granularity,
|
1227
|
+
)
|
1228
|
+
|
1229
|
+
# Create the cursor factory
|
1230
|
+
cursor_factory = ConcurrentCursorFactory(
|
1231
|
+
partial(
|
1232
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1233
|
+
state_manager=state_manager,
|
1234
|
+
model_type=model_type,
|
1235
|
+
component_definition=component_definition,
|
1236
|
+
stream_name=stream_name,
|
1237
|
+
stream_namespace=stream_namespace,
|
1238
|
+
config=config,
|
1239
|
+
message_repository=NoopMessageRepository(),
|
1240
|
+
)
|
1241
|
+
)
|
1242
|
+
|
1243
|
+
# Return the concurrent cursor and state converter
|
1244
|
+
return ConcurrentPerPartitionCursor(
|
1245
|
+
cursor_factory=cursor_factory,
|
1246
|
+
partition_router=partition_router,
|
1247
|
+
stream_name=stream_name,
|
1248
|
+
stream_namespace=stream_namespace,
|
1249
|
+
stream_state=stream_state,
|
1250
|
+
message_repository=self._message_repository, # type: ignore
|
1251
|
+
connector_state_manager=state_manager,
|
1252
|
+
connector_state_converter=connector_state_converter,
|
1253
|
+
cursor_field=cursor_field,
|
1059
1254
|
)
|
1060
1255
|
|
1061
1256
|
@staticmethod
|
@@ -1101,7 +1296,6 @@ class ModelToComponentFactory:
|
|
1101
1296
|
:param config: The custom defined connector config
|
1102
1297
|
:return: The declarative component built from the Pydantic model to be used at runtime
|
1103
1298
|
"""
|
1104
|
-
|
1105
1299
|
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
|
1106
1300
|
component_fields = get_type_hints(custom_component_class)
|
1107
1301
|
model_args = model.dict()
|
@@ -1155,14 +1349,38 @@ class ModelToComponentFactory:
|
|
1155
1349
|
return custom_component_class(**kwargs)
|
1156
1350
|
|
1157
1351
|
@staticmethod
|
1158
|
-
def _get_class_from_fully_qualified_class_name(
|
1352
|
+
def _get_class_from_fully_qualified_class_name(
|
1353
|
+
full_qualified_class_name: str,
|
1354
|
+
) -> Any:
|
1355
|
+
"""Get a class from its fully qualified name.
|
1356
|
+
|
1357
|
+
If a custom components module is needed, we assume it is already registered - probably
|
1358
|
+
as `source_declarative_manifest.components` or `components`.
|
1359
|
+
|
1360
|
+
Args:
|
1361
|
+
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
|
1362
|
+
|
1363
|
+
Returns:
|
1364
|
+
Any: The class object.
|
1365
|
+
|
1366
|
+
Raises:
|
1367
|
+
ValueError: If the class cannot be loaded.
|
1368
|
+
"""
|
1159
1369
|
split = full_qualified_class_name.split(".")
|
1160
|
-
|
1370
|
+
module_name_full = ".".join(split[:-1])
|
1161
1371
|
class_name = split[-1]
|
1372
|
+
|
1162
1373
|
try:
|
1163
|
-
|
1164
|
-
except
|
1165
|
-
raise ValueError(f"Could not load
|
1374
|
+
module_ref = importlib.import_module(module_name_full)
|
1375
|
+
except ModuleNotFoundError as e:
|
1376
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1377
|
+
|
1378
|
+
try:
|
1379
|
+
return getattr(module_ref, class_name)
|
1380
|
+
except AttributeError as e:
|
1381
|
+
raise ValueError(
|
1382
|
+
f"Could not load class `{class_name}` from module `{module_name_full}`.",
|
1383
|
+
) from e
|
1166
1384
|
|
1167
1385
|
@staticmethod
|
1168
1386
|
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
|
@@ -1271,15 +1489,19 @@ class ModelToComponentFactory:
|
|
1271
1489
|
)
|
1272
1490
|
|
1273
1491
|
end_time_option = (
|
1274
|
-
|
1275
|
-
model.end_time_option,
|
1492
|
+
RequestOption(
|
1493
|
+
inject_into=RequestOptionType(model.end_time_option.inject_into.value),
|
1494
|
+
field_name=model.end_time_option.field_name,
|
1495
|
+
parameters=model.parameters or {},
|
1276
1496
|
)
|
1277
1497
|
if model.end_time_option
|
1278
1498
|
else None
|
1279
1499
|
)
|
1280
1500
|
start_time_option = (
|
1281
|
-
|
1282
|
-
model.start_time_option,
|
1501
|
+
RequestOption(
|
1502
|
+
inject_into=RequestOptionType(model.start_time_option.inject_into.value),
|
1503
|
+
field_name=model.start_time_option.field_name,
|
1504
|
+
parameters=model.parameters or {},
|
1283
1505
|
)
|
1284
1506
|
if model.start_time_option
|
1285
1507
|
else None
|
@@ -1336,32 +1558,33 @@ class ModelToComponentFactory:
|
|
1336
1558
|
raise ValueError(
|
1337
1559
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1338
1560
|
)
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
else None
|
1349
|
-
),
|
1350
|
-
}
|
1561
|
+
cursor = (
|
1562
|
+
combined_slicers
|
1563
|
+
if isinstance(
|
1564
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1565
|
+
)
|
1566
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1567
|
+
)
|
1568
|
+
|
1569
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1351
1570
|
|
1352
1571
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1353
1572
|
cursor_model = model.incremental_sync
|
1354
1573
|
|
1355
1574
|
end_time_option = (
|
1356
|
-
|
1357
|
-
cursor_model.end_time_option,
|
1575
|
+
RequestOption(
|
1576
|
+
inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
|
1577
|
+
field_name=cursor_model.end_time_option.field_name,
|
1578
|
+
parameters=cursor_model.parameters or {},
|
1358
1579
|
)
|
1359
1580
|
if cursor_model.end_time_option
|
1360
1581
|
else None
|
1361
1582
|
)
|
1362
1583
|
start_time_option = (
|
1363
|
-
|
1364
|
-
cursor_model.start_time_option,
|
1584
|
+
RequestOption(
|
1585
|
+
inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
|
1586
|
+
field_name=cursor_model.start_time_option.field_name,
|
1587
|
+
parameters=cursor_model.parameters or {},
|
1365
1588
|
)
|
1366
1589
|
if cursor_model.start_time_option
|
1367
1590
|
else None
|
@@ -1433,7 +1656,7 @@ class ModelToComponentFactory:
|
|
1433
1656
|
) -> Optional[PartitionRouter]:
|
1434
1657
|
if (
|
1435
1658
|
hasattr(model, "partition_router")
|
1436
|
-
and isinstance(model, SimpleRetrieverModel)
|
1659
|
+
and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
|
1437
1660
|
and model.partition_router
|
1438
1661
|
):
|
1439
1662
|
stream_slicer_model = model.partition_router
|
@@ -1467,6 +1690,31 @@ class ModelToComponentFactory:
|
|
1467
1690
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1468
1691
|
|
1469
1692
|
if model.incremental_sync and stream_slicer:
|
1693
|
+
if model.retriever.type == "AsyncRetriever":
|
1694
|
+
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1695
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1696
|
+
raise ValueError(
|
1697
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1698
|
+
)
|
1699
|
+
if stream_slicer:
|
1700
|
+
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1701
|
+
state_manager=self._connector_state_manager,
|
1702
|
+
model_type=DatetimeBasedCursorModel,
|
1703
|
+
component_definition=model.incremental_sync.__dict__,
|
1704
|
+
stream_name=model.name or "",
|
1705
|
+
stream_namespace=None,
|
1706
|
+
config=config or {},
|
1707
|
+
stream_state={},
|
1708
|
+
partition_router=stream_slicer,
|
1709
|
+
)
|
1710
|
+
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1711
|
+
model_type=DatetimeBasedCursorModel,
|
1712
|
+
component_definition=model.incremental_sync.__dict__,
|
1713
|
+
stream_name=model.name or "",
|
1714
|
+
stream_namespace=None,
|
1715
|
+
config=config or {},
|
1716
|
+
)
|
1717
|
+
|
1470
1718
|
incremental_sync_model = model.incremental_sync
|
1471
1719
|
if (
|
1472
1720
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
@@ -1492,6 +1740,22 @@ class ModelToComponentFactory:
|
|
1492
1740
|
stream_cursor=cursor_component,
|
1493
1741
|
)
|
1494
1742
|
elif model.incremental_sync:
|
1743
|
+
if model.retriever.type == "AsyncRetriever":
|
1744
|
+
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1745
|
+
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1746
|
+
raise ValueError(
|
1747
|
+
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1748
|
+
)
|
1749
|
+
if model.retriever.partition_router:
|
1750
|
+
# Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
|
1751
|
+
raise ValueError("Per partition state is not supported yet for AsyncRetriever")
|
1752
|
+
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1753
|
+
model_type=DatetimeBasedCursorModel,
|
1754
|
+
component_definition=model.incremental_sync.__dict__,
|
1755
|
+
stream_name=model.name or "",
|
1756
|
+
stream_namespace=None,
|
1757
|
+
config=config or {},
|
1758
|
+
)
|
1495
1759
|
return (
|
1496
1760
|
self._create_component_from_model(model=model.incremental_sync, config=config)
|
1497
1761
|
if model.incremental_sync
|
@@ -1710,10 +1974,26 @@ class ModelToComponentFactory:
|
|
1710
1974
|
) -> InlineSchemaLoader:
|
1711
1975
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1712
1976
|
|
1713
|
-
|
1714
|
-
|
1977
|
+
def create_complex_field_type(
|
1978
|
+
self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
|
1979
|
+
) -> ComplexFieldType:
|
1980
|
+
items = (
|
1981
|
+
self._create_component_from_model(model=model.items, config=config)
|
1982
|
+
if isinstance(model.items, ComplexFieldTypeModel)
|
1983
|
+
else model.items
|
1984
|
+
)
|
1985
|
+
|
1986
|
+
return ComplexFieldType(field_type=model.field_type, items=items)
|
1987
|
+
|
1988
|
+
def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
|
1989
|
+
target_type = (
|
1990
|
+
self._create_component_from_model(model=model.target_type, config=config)
|
1991
|
+
if isinstance(model.target_type, ComplexFieldTypeModel)
|
1992
|
+
else model.target_type
|
1993
|
+
)
|
1994
|
+
|
1715
1995
|
return TypesMap(
|
1716
|
-
target_type=
|
1996
|
+
target_type=target_type,
|
1717
1997
|
current_type=model.current_type,
|
1718
1998
|
condition=model.condition if model.condition is not None else "True",
|
1719
1999
|
)
|
@@ -1870,11 +2150,16 @@ class ModelToComponentFactory:
|
|
1870
2150
|
additional_jwt_payload=model.additional_jwt_payload,
|
1871
2151
|
)
|
1872
2152
|
|
2153
|
+
@staticmethod
|
1873
2154
|
def create_list_partition_router(
|
1874
|
-
|
2155
|
+
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
1875
2156
|
) -> ListPartitionRouter:
|
1876
2157
|
request_option = (
|
1877
|
-
|
2158
|
+
RequestOption(
|
2159
|
+
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
2160
|
+
field_name=model.request_option.field_name,
|
2161
|
+
parameters=model.parameters or {},
|
2162
|
+
)
|
1878
2163
|
if model.request_option
|
1879
2164
|
else None
|
1880
2165
|
)
|
@@ -1911,6 +2196,12 @@ class ModelToComponentFactory:
|
|
1911
2196
|
def create_oauth_authenticator(
|
1912
2197
|
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
1913
2198
|
) -> DeclarativeOauth2Authenticator:
|
2199
|
+
profile_assertion = (
|
2200
|
+
self._create_component_from_model(model.profile_assertion, config=config)
|
2201
|
+
if model.profile_assertion
|
2202
|
+
else None
|
2203
|
+
)
|
2204
|
+
|
1914
2205
|
if model.refresh_token_updater:
|
1915
2206
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
1916
2207
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
@@ -1931,13 +2222,17 @@ class ModelToComponentFactory:
|
|
1931
2222
|
).eval(config),
|
1932
2223
|
client_id=InterpolatedString.create(
|
1933
2224
|
model.client_id, parameters=model.parameters or {}
|
1934
|
-
).eval(config)
|
2225
|
+
).eval(config)
|
2226
|
+
if model.client_id
|
2227
|
+
else model.client_id,
|
1935
2228
|
client_secret_name=InterpolatedString.create(
|
1936
2229
|
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
1937
2230
|
).eval(config),
|
1938
2231
|
client_secret=InterpolatedString.create(
|
1939
2232
|
model.client_secret, parameters=model.parameters or {}
|
1940
|
-
).eval(config)
|
2233
|
+
).eval(config)
|
2234
|
+
if model.client_secret
|
2235
|
+
else model.client_secret,
|
1941
2236
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
1942
2237
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
1943
2238
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
@@ -1983,6 +2278,8 @@ class ModelToComponentFactory:
|
|
1983
2278
|
config=config,
|
1984
2279
|
parameters=model.parameters or {},
|
1985
2280
|
message_repository=self._message_repository,
|
2281
|
+
profile_assertion=profile_assertion,
|
2282
|
+
use_profile_assertion=model.use_profile_assertion,
|
1986
2283
|
)
|
1987
2284
|
|
1988
2285
|
def create_offset_increment(
|
@@ -2058,25 +2355,7 @@ class ModelToComponentFactory:
|
|
2058
2355
|
model: RequestOptionModel, config: Config, **kwargs: Any
|
2059
2356
|
) -> RequestOption:
|
2060
2357
|
inject_into = RequestOptionType(model.inject_into.value)
|
2061
|
-
|
2062
|
-
[
|
2063
|
-
InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
|
2064
|
-
for segment in model.field_path
|
2065
|
-
]
|
2066
|
-
if model.field_path
|
2067
|
-
else None
|
2068
|
-
)
|
2069
|
-
field_name = (
|
2070
|
-
InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
|
2071
|
-
if model.field_name
|
2072
|
-
else None
|
2073
|
-
)
|
2074
|
-
return RequestOption(
|
2075
|
-
field_name=field_name,
|
2076
|
-
field_path=field_path,
|
2077
|
-
inject_into=inject_into,
|
2078
|
-
parameters=kwargs.get("parameters", {}),
|
2079
|
-
)
|
2358
|
+
return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
|
2080
2359
|
|
2081
2360
|
def create_record_selector(
|
2082
2361
|
self,
|