airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
- airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
- airbyte_cdk/sources/types.py +2 -4
- airbyte_cdk/sources/utils/transform.py +2 -23
- airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +1 -8
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
- airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
- airbyte_cdk/utils/datetime_helpers.py +0 -499
- airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -87,8 +87,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
87
87
|
)
|
88
88
|
from airbyte_cdk.sources.declarative.incremental import (
|
89
89
|
ChildPartitionResumableFullRefreshCursor,
|
90
|
-
ConcurrentCursorFactory,
|
91
|
-
ConcurrentPerPartitionCursor,
|
92
90
|
CursorFactory,
|
93
91
|
DatetimeBasedCursor,
|
94
92
|
DeclarativeCursor,
|
@@ -103,7 +101,6 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
103
101
|
LegacyToPerPartitionStateMigration,
|
104
102
|
)
|
105
103
|
from airbyte_cdk.sources.declarative.models import (
|
106
|
-
Clamping,
|
107
104
|
CustomStateMigration,
|
108
105
|
)
|
109
106
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -133,9 +130,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
133
130
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
134
131
|
CheckStream as CheckStreamModel,
|
135
132
|
)
|
136
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
137
|
-
ComplexFieldType as ComplexFieldTypeModel,
|
138
|
-
)
|
139
133
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
140
134
|
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
141
135
|
)
|
@@ -369,10 +363,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
369
363
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
370
364
|
ZipfileDecoder as ZipfileDecoderModel,
|
371
365
|
)
|
372
|
-
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
373
|
-
COMPONENTS_MODULE_NAME,
|
374
|
-
SDM_COMPONENTS_MODULE_NAME,
|
375
|
-
)
|
376
366
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
377
367
|
CartesianProductStreamSlicer,
|
378
368
|
ListPartitionRouter,
|
@@ -432,7 +422,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
432
422
|
SimpleRetrieverTestReadDecorator,
|
433
423
|
)
|
434
424
|
from airbyte_cdk.sources.declarative.schema import (
|
435
|
-
ComplexFieldType,
|
436
425
|
DefaultSchemaLoader,
|
437
426
|
DynamicSchemaLoader,
|
438
427
|
InlineSchemaLoader,
|
@@ -467,16 +456,6 @@ from airbyte_cdk.sources.message import (
|
|
467
456
|
InMemoryMessageRepository,
|
468
457
|
LogAppenderMessageRepositoryDecorator,
|
469
458
|
MessageRepository,
|
470
|
-
NoopMessageRepository,
|
471
|
-
)
|
472
|
-
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
473
|
-
ClampingEndProvider,
|
474
|
-
ClampingStrategy,
|
475
|
-
DayClampingStrategy,
|
476
|
-
MonthClampingStrategy,
|
477
|
-
NoClamping,
|
478
|
-
WeekClampingStrategy,
|
479
|
-
Weekday,
|
480
459
|
)
|
481
460
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
482
461
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -507,7 +486,6 @@ class ModelToComponentFactory:
|
|
507
486
|
disable_cache: bool = False,
|
508
487
|
disable_resumable_full_refresh: bool = False,
|
509
488
|
message_repository: Optional[MessageRepository] = None,
|
510
|
-
connector_state_manager: Optional[ConnectorStateManager] = None,
|
511
489
|
):
|
512
490
|
self._init_mappings()
|
513
491
|
self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
|
@@ -519,7 +497,6 @@ class ModelToComponentFactory:
|
|
519
497
|
self._message_repository = message_repository or InMemoryMessageRepository(
|
520
498
|
self._evaluate_log_level(emit_connector_builder_messages)
|
521
499
|
)
|
522
|
-
self._connector_state_manager = connector_state_manager or ConnectorStateManager()
|
523
500
|
|
524
501
|
def _init_mappings(self) -> None:
|
525
502
|
self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
|
@@ -578,7 +555,6 @@ class ModelToComponentFactory:
|
|
578
555
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
579
556
|
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
580
557
|
TypesMapModel: self.create_types_map,
|
581
|
-
ComplexFieldTypeModel: self.create_complex_field_type,
|
582
558
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
583
559
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
584
560
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -733,8 +709,8 @@ class ModelToComponentFactory:
|
|
733
709
|
}
|
734
710
|
return names_to_types[value_type]
|
735
711
|
|
736
|
-
@staticmethod
|
737
712
|
def create_api_key_authenticator(
|
713
|
+
self,
|
738
714
|
model: ApiKeyAuthenticatorModel,
|
739
715
|
config: Config,
|
740
716
|
token_provider: Optional[TokenProvider] = None,
|
@@ -756,10 +732,8 @@ class ModelToComponentFactory:
|
|
756
732
|
)
|
757
733
|
|
758
734
|
request_option = (
|
759
|
-
|
760
|
-
inject_into=
|
761
|
-
field_name=model.inject_into.field_name,
|
762
|
-
parameters=model.parameters or {},
|
735
|
+
self._create_component_from_model(
|
736
|
+
model.inject_into, config, parameters=model.parameters or {}
|
763
737
|
)
|
764
738
|
if model.inject_into
|
765
739
|
else RequestOption(
|
@@ -768,6 +742,7 @@ class ModelToComponentFactory:
|
|
768
742
|
parameters=model.parameters or {},
|
769
743
|
)
|
770
744
|
)
|
745
|
+
|
771
746
|
return ApiKeyAuthenticator(
|
772
747
|
token_provider=(
|
773
748
|
token_provider
|
@@ -849,7 +824,7 @@ class ModelToComponentFactory:
|
|
849
824
|
token_provider=token_provider,
|
850
825
|
)
|
851
826
|
else:
|
852
|
-
return
|
827
|
+
return self.create_api_key_authenticator(
|
853
828
|
ApiKeyAuthenticatorModel(
|
854
829
|
type="ApiKeyAuthenticator",
|
855
830
|
api_token="",
|
@@ -903,15 +878,7 @@ class ModelToComponentFactory:
|
|
903
878
|
def create_check_dynamic_stream(
|
904
879
|
model: CheckDynamicStreamModel, config: Config, **kwargs: Any
|
905
880
|
) -> CheckDynamicStream:
|
906
|
-
|
907
|
-
|
908
|
-
use_check_availability = model.use_check_availability
|
909
|
-
|
910
|
-
return CheckDynamicStream(
|
911
|
-
stream_count=model.stream_count,
|
912
|
-
use_check_availability=use_check_availability,
|
913
|
-
parameters={},
|
914
|
-
)
|
881
|
+
return CheckDynamicStream(stream_count=model.stream_count, parameters={})
|
915
882
|
|
916
883
|
def create_composite_error_handler(
|
917
884
|
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -937,24 +904,15 @@ class ModelToComponentFactory:
|
|
937
904
|
|
938
905
|
def create_concurrent_cursor_from_datetime_based_cursor(
|
939
906
|
self,
|
907
|
+
state_manager: ConnectorStateManager,
|
940
908
|
model_type: Type[BaseModel],
|
941
909
|
component_definition: ComponentDefinition,
|
942
910
|
stream_name: str,
|
943
911
|
stream_namespace: Optional[str],
|
944
912
|
config: Config,
|
945
|
-
|
946
|
-
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
913
|
+
stream_state: MutableMapping[str, Any],
|
947
914
|
**kwargs: Any,
|
948
915
|
) -> ConcurrentCursor:
|
949
|
-
# Per-partition incremental streams can dynamically create child cursors which will pass their current
|
950
|
-
# state via the stream_state keyword argument. Incremental syncs without parent streams use the
|
951
|
-
# incoming state and connector_state_manager that is initialized when the component factory is created
|
952
|
-
stream_state = (
|
953
|
-
self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
|
954
|
-
if "stream_state" not in kwargs
|
955
|
-
else kwargs["stream_state"]
|
956
|
-
)
|
957
|
-
|
958
916
|
component_type = component_definition.get("type")
|
959
917
|
if component_definition.get("type") != model_type.__name__:
|
960
918
|
raise ValueError(
|
@@ -1014,22 +972,10 @@ class ModelToComponentFactory:
|
|
1014
972
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
1015
973
|
datetime_format=datetime_format,
|
1016
974
|
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
1017
|
-
is_sequential_state=True,
|
975
|
+
is_sequential_state=True,
|
1018
976
|
cursor_granularity=cursor_granularity,
|
1019
977
|
)
|
1020
978
|
|
1021
|
-
# Adjusts the stream state by applying the runtime lookback window.
|
1022
|
-
# This is used to ensure correct state handling in case of failed partitions.
|
1023
|
-
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
1024
|
-
if runtime_lookback_window and stream_state_value:
|
1025
|
-
new_stream_state = (
|
1026
|
-
connector_state_converter.parse_timestamp(stream_state_value)
|
1027
|
-
- runtime_lookback_window
|
1028
|
-
)
|
1029
|
-
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
1030
|
-
new_stream_state
|
1031
|
-
)
|
1032
|
-
|
1033
979
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
1034
980
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
1035
981
|
start_date_runtime_value = self.create_min_max_datetime(
|
@@ -1096,59 +1042,12 @@ class ModelToComponentFactory:
|
|
1096
1042
|
if evaluated_step:
|
1097
1043
|
step_length = parse_duration(evaluated_step)
|
1098
1044
|
|
1099
|
-
clamping_strategy: ClampingStrategy = NoClamping()
|
1100
|
-
if datetime_based_cursor_model.clamping:
|
1101
|
-
# While it is undesirable to interpolate within the model factory (as opposed to at runtime),
|
1102
|
-
# it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
|
1103
|
-
# object which we want to keep agnostic of being low-code
|
1104
|
-
target = InterpolatedString(
|
1105
|
-
string=datetime_based_cursor_model.clamping.target,
|
1106
|
-
parameters=datetime_based_cursor_model.parameters or {},
|
1107
|
-
)
|
1108
|
-
evaluated_target = target.eval(config=config)
|
1109
|
-
match evaluated_target:
|
1110
|
-
case "DAY":
|
1111
|
-
clamping_strategy = DayClampingStrategy()
|
1112
|
-
end_date_provider = ClampingEndProvider(
|
1113
|
-
DayClampingStrategy(is_ceiling=False),
|
1114
|
-
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1115
|
-
granularity=cursor_granularity or datetime.timedelta(seconds=1),
|
1116
|
-
)
|
1117
|
-
case "WEEK":
|
1118
|
-
if (
|
1119
|
-
not datetime_based_cursor_model.clamping.target_details
|
1120
|
-
or "weekday" not in datetime_based_cursor_model.clamping.target_details
|
1121
|
-
):
|
1122
|
-
raise ValueError(
|
1123
|
-
"Given WEEK clamping, weekday needs to be provided as target_details"
|
1124
|
-
)
|
1125
|
-
weekday = self._assemble_weekday(
|
1126
|
-
datetime_based_cursor_model.clamping.target_details["weekday"]
|
1127
|
-
)
|
1128
|
-
clamping_strategy = WeekClampingStrategy(weekday)
|
1129
|
-
end_date_provider = ClampingEndProvider(
|
1130
|
-
WeekClampingStrategy(weekday, is_ceiling=False),
|
1131
|
-
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1132
|
-
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1133
|
-
)
|
1134
|
-
case "MONTH":
|
1135
|
-
clamping_strategy = MonthClampingStrategy()
|
1136
|
-
end_date_provider = ClampingEndProvider(
|
1137
|
-
MonthClampingStrategy(is_ceiling=False),
|
1138
|
-
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1139
|
-
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1140
|
-
)
|
1141
|
-
case _:
|
1142
|
-
raise ValueError(
|
1143
|
-
f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
|
1144
|
-
)
|
1145
|
-
|
1146
1045
|
return ConcurrentCursor(
|
1147
1046
|
stream_name=stream_name,
|
1148
1047
|
stream_namespace=stream_namespace,
|
1149
1048
|
stream_state=stream_state,
|
1150
|
-
message_repository=
|
1151
|
-
connector_state_manager=
|
1049
|
+
message_repository=self._message_repository,
|
1050
|
+
connector_state_manager=state_manager,
|
1152
1051
|
connector_state_converter=connector_state_converter,
|
1153
1052
|
cursor_field=cursor_field,
|
1154
1053
|
slice_boundary_fields=slice_boundary_fields,
|
@@ -1157,100 +1056,6 @@ class ModelToComponentFactory:
|
|
1157
1056
|
lookback_window=lookback_window,
|
1158
1057
|
slice_range=step_length,
|
1159
1058
|
cursor_granularity=cursor_granularity,
|
1160
|
-
clamping_strategy=clamping_strategy,
|
1161
|
-
)
|
1162
|
-
|
1163
|
-
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1164
|
-
match weekday:
|
1165
|
-
case "MONDAY":
|
1166
|
-
return Weekday.MONDAY
|
1167
|
-
case "TUESDAY":
|
1168
|
-
return Weekday.TUESDAY
|
1169
|
-
case "WEDNESDAY":
|
1170
|
-
return Weekday.WEDNESDAY
|
1171
|
-
case "THURSDAY":
|
1172
|
-
return Weekday.THURSDAY
|
1173
|
-
case "FRIDAY":
|
1174
|
-
return Weekday.FRIDAY
|
1175
|
-
case "SATURDAY":
|
1176
|
-
return Weekday.SATURDAY
|
1177
|
-
case "SUNDAY":
|
1178
|
-
return Weekday.SUNDAY
|
1179
|
-
case _:
|
1180
|
-
raise ValueError(f"Unknown weekday {weekday}")
|
1181
|
-
|
1182
|
-
def create_concurrent_cursor_from_perpartition_cursor(
|
1183
|
-
self,
|
1184
|
-
state_manager: ConnectorStateManager,
|
1185
|
-
model_type: Type[BaseModel],
|
1186
|
-
component_definition: ComponentDefinition,
|
1187
|
-
stream_name: str,
|
1188
|
-
stream_namespace: Optional[str],
|
1189
|
-
config: Config,
|
1190
|
-
stream_state: MutableMapping[str, Any],
|
1191
|
-
partition_router: PartitionRouter,
|
1192
|
-
**kwargs: Any,
|
1193
|
-
) -> ConcurrentPerPartitionCursor:
|
1194
|
-
component_type = component_definition.get("type")
|
1195
|
-
if component_definition.get("type") != model_type.__name__:
|
1196
|
-
raise ValueError(
|
1197
|
-
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1198
|
-
)
|
1199
|
-
|
1200
|
-
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1201
|
-
|
1202
|
-
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1203
|
-
raise ValueError(
|
1204
|
-
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1205
|
-
)
|
1206
|
-
|
1207
|
-
interpolated_cursor_field = InterpolatedString.create(
|
1208
|
-
datetime_based_cursor_model.cursor_field,
|
1209
|
-
parameters=datetime_based_cursor_model.parameters or {},
|
1210
|
-
)
|
1211
|
-
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1212
|
-
|
1213
|
-
datetime_format = datetime_based_cursor_model.datetime_format
|
1214
|
-
|
1215
|
-
cursor_granularity = (
|
1216
|
-
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
1217
|
-
if datetime_based_cursor_model.cursor_granularity
|
1218
|
-
else None
|
1219
|
-
)
|
1220
|
-
|
1221
|
-
connector_state_converter: DateTimeStreamStateConverter
|
1222
|
-
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
1223
|
-
datetime_format=datetime_format,
|
1224
|
-
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
1225
|
-
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
1226
|
-
cursor_granularity=cursor_granularity,
|
1227
|
-
)
|
1228
|
-
|
1229
|
-
# Create the cursor factory
|
1230
|
-
cursor_factory = ConcurrentCursorFactory(
|
1231
|
-
partial(
|
1232
|
-
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1233
|
-
state_manager=state_manager,
|
1234
|
-
model_type=model_type,
|
1235
|
-
component_definition=component_definition,
|
1236
|
-
stream_name=stream_name,
|
1237
|
-
stream_namespace=stream_namespace,
|
1238
|
-
config=config,
|
1239
|
-
message_repository=NoopMessageRepository(),
|
1240
|
-
)
|
1241
|
-
)
|
1242
|
-
|
1243
|
-
# Return the concurrent cursor and state converter
|
1244
|
-
return ConcurrentPerPartitionCursor(
|
1245
|
-
cursor_factory=cursor_factory,
|
1246
|
-
partition_router=partition_router,
|
1247
|
-
stream_name=stream_name,
|
1248
|
-
stream_namespace=stream_namespace,
|
1249
|
-
stream_state=stream_state,
|
1250
|
-
message_repository=self._message_repository, # type: ignore
|
1251
|
-
connector_state_manager=state_manager,
|
1252
|
-
connector_state_converter=connector_state_converter,
|
1253
|
-
cursor_field=cursor_field,
|
1254
1059
|
)
|
1255
1060
|
|
1256
1061
|
@staticmethod
|
@@ -1296,6 +1101,7 @@ class ModelToComponentFactory:
|
|
1296
1101
|
:param config: The custom defined connector config
|
1297
1102
|
:return: The declarative component built from the Pydantic model to be used at runtime
|
1298
1103
|
"""
|
1104
|
+
|
1299
1105
|
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
|
1300
1106
|
component_fields = get_type_hints(custom_component_class)
|
1301
1107
|
model_args = model.dict()
|
@@ -1349,38 +1155,14 @@ class ModelToComponentFactory:
|
|
1349
1155
|
return custom_component_class(**kwargs)
|
1350
1156
|
|
1351
1157
|
@staticmethod
|
1352
|
-
def _get_class_from_fully_qualified_class_name(
|
1353
|
-
full_qualified_class_name: str,
|
1354
|
-
) -> Any:
|
1355
|
-
"""Get a class from its fully qualified name.
|
1356
|
-
|
1357
|
-
If a custom components module is needed, we assume it is already registered - probably
|
1358
|
-
as `source_declarative_manifest.components` or `components`.
|
1359
|
-
|
1360
|
-
Args:
|
1361
|
-
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
|
1362
|
-
|
1363
|
-
Returns:
|
1364
|
-
Any: The class object.
|
1365
|
-
|
1366
|
-
Raises:
|
1367
|
-
ValueError: If the class cannot be loaded.
|
1368
|
-
"""
|
1158
|
+
def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
|
1369
1159
|
split = full_qualified_class_name.split(".")
|
1370
|
-
|
1160
|
+
module = ".".join(split[:-1])
|
1371
1161
|
class_name = split[-1]
|
1372
|
-
|
1373
1162
|
try:
|
1374
|
-
|
1375
|
-
except
|
1376
|
-
raise ValueError(f"Could not load
|
1377
|
-
|
1378
|
-
try:
|
1379
|
-
return getattr(module_ref, class_name)
|
1380
|
-
except AttributeError as e:
|
1381
|
-
raise ValueError(
|
1382
|
-
f"Could not load class `{class_name}` from module `{module_name_full}`.",
|
1383
|
-
) from e
|
1163
|
+
return getattr(importlib.import_module(module), class_name)
|
1164
|
+
except AttributeError:
|
1165
|
+
raise ValueError(f"Could not load class {full_qualified_class_name}.")
|
1384
1166
|
|
1385
1167
|
@staticmethod
|
1386
1168
|
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
|
@@ -1489,19 +1271,15 @@ class ModelToComponentFactory:
|
|
1489
1271
|
)
|
1490
1272
|
|
1491
1273
|
end_time_option = (
|
1492
|
-
|
1493
|
-
|
1494
|
-
field_name=model.end_time_option.field_name,
|
1495
|
-
parameters=model.parameters or {},
|
1274
|
+
self._create_component_from_model(
|
1275
|
+
model.end_time_option, config, parameters=model.parameters or {}
|
1496
1276
|
)
|
1497
1277
|
if model.end_time_option
|
1498
1278
|
else None
|
1499
1279
|
)
|
1500
1280
|
start_time_option = (
|
1501
|
-
|
1502
|
-
|
1503
|
-
field_name=model.start_time_option.field_name,
|
1504
|
-
parameters=model.parameters or {},
|
1281
|
+
self._create_component_from_model(
|
1282
|
+
model.start_time_option, config, parameters=model.parameters or {}
|
1505
1283
|
)
|
1506
1284
|
if model.start_time_option
|
1507
1285
|
else None
|
@@ -1558,33 +1336,32 @@ class ModelToComponentFactory:
|
|
1558
1336
|
raise ValueError(
|
1559
1337
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1560
1338
|
)
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1339
|
+
client_side_incremental_sync = {
|
1340
|
+
"date_time_based_cursor": self._create_component_from_model(
|
1341
|
+
model=model.incremental_sync, config=config
|
1342
|
+
),
|
1343
|
+
"substream_cursor": (
|
1344
|
+
combined_slicers
|
1345
|
+
if isinstance(
|
1346
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1347
|
+
)
|
1348
|
+
else None
|
1349
|
+
),
|
1350
|
+
}
|
1570
1351
|
|
1571
1352
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1572
1353
|
cursor_model = model.incremental_sync
|
1573
1354
|
|
1574
1355
|
end_time_option = (
|
1575
|
-
|
1576
|
-
|
1577
|
-
field_name=cursor_model.end_time_option.field_name,
|
1578
|
-
parameters=cursor_model.parameters or {},
|
1356
|
+
self._create_component_from_model(
|
1357
|
+
cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
|
1579
1358
|
)
|
1580
1359
|
if cursor_model.end_time_option
|
1581
1360
|
else None
|
1582
1361
|
)
|
1583
1362
|
start_time_option = (
|
1584
|
-
|
1585
|
-
|
1586
|
-
field_name=cursor_model.start_time_option.field_name,
|
1587
|
-
parameters=cursor_model.parameters or {},
|
1363
|
+
self._create_component_from_model(
|
1364
|
+
cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
|
1588
1365
|
)
|
1589
1366
|
if cursor_model.start_time_option
|
1590
1367
|
else None
|
@@ -1656,7 +1433,7 @@ class ModelToComponentFactory:
|
|
1656
1433
|
) -> Optional[PartitionRouter]:
|
1657
1434
|
if (
|
1658
1435
|
hasattr(model, "partition_router")
|
1659
|
-
and isinstance(model, SimpleRetrieverModel
|
1436
|
+
and isinstance(model, SimpleRetrieverModel)
|
1660
1437
|
and model.partition_router
|
1661
1438
|
):
|
1662
1439
|
stream_slicer_model = model.partition_router
|
@@ -1690,31 +1467,6 @@ class ModelToComponentFactory:
|
|
1690
1467
|
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1691
1468
|
|
1692
1469
|
if model.incremental_sync and stream_slicer:
|
1693
|
-
if model.retriever.type == "AsyncRetriever":
|
1694
|
-
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1695
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1696
|
-
raise ValueError(
|
1697
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1698
|
-
)
|
1699
|
-
if stream_slicer:
|
1700
|
-
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1701
|
-
state_manager=self._connector_state_manager,
|
1702
|
-
model_type=DatetimeBasedCursorModel,
|
1703
|
-
component_definition=model.incremental_sync.__dict__,
|
1704
|
-
stream_name=model.name or "",
|
1705
|
-
stream_namespace=None,
|
1706
|
-
config=config or {},
|
1707
|
-
stream_state={},
|
1708
|
-
partition_router=stream_slicer,
|
1709
|
-
)
|
1710
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1711
|
-
model_type=DatetimeBasedCursorModel,
|
1712
|
-
component_definition=model.incremental_sync.__dict__,
|
1713
|
-
stream_name=model.name or "",
|
1714
|
-
stream_namespace=None,
|
1715
|
-
config=config or {},
|
1716
|
-
)
|
1717
|
-
|
1718
1470
|
incremental_sync_model = model.incremental_sync
|
1719
1471
|
if (
|
1720
1472
|
hasattr(incremental_sync_model, "global_substream_cursor")
|
@@ -1740,22 +1492,6 @@ class ModelToComponentFactory:
|
|
1740
1492
|
stream_cursor=cursor_component,
|
1741
1493
|
)
|
1742
1494
|
elif model.incremental_sync:
|
1743
|
-
if model.retriever.type == "AsyncRetriever":
|
1744
|
-
if model.incremental_sync.type != "DatetimeBasedCursor":
|
1745
|
-
# We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
|
1746
|
-
raise ValueError(
|
1747
|
-
"AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
|
1748
|
-
)
|
1749
|
-
if model.retriever.partition_router:
|
1750
|
-
# Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
|
1751
|
-
raise ValueError("Per partition state is not supported yet for AsyncRetriever")
|
1752
|
-
return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
1753
|
-
model_type=DatetimeBasedCursorModel,
|
1754
|
-
component_definition=model.incremental_sync.__dict__,
|
1755
|
-
stream_name=model.name or "",
|
1756
|
-
stream_namespace=None,
|
1757
|
-
config=config or {},
|
1758
|
-
)
|
1759
1495
|
return (
|
1760
1496
|
self._create_component_from_model(model=model.incremental_sync, config=config)
|
1761
1497
|
if model.incremental_sync
|
@@ -1974,26 +1710,10 @@ class ModelToComponentFactory:
|
|
1974
1710
|
) -> InlineSchemaLoader:
|
1975
1711
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1976
1712
|
|
1977
|
-
|
1978
|
-
|
1979
|
-
) -> ComplexFieldType:
|
1980
|
-
items = (
|
1981
|
-
self._create_component_from_model(model=model.items, config=config)
|
1982
|
-
if isinstance(model.items, ComplexFieldTypeModel)
|
1983
|
-
else model.items
|
1984
|
-
)
|
1985
|
-
|
1986
|
-
return ComplexFieldType(field_type=model.field_type, items=items)
|
1987
|
-
|
1988
|
-
def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
|
1989
|
-
target_type = (
|
1990
|
-
self._create_component_from_model(model=model.target_type, config=config)
|
1991
|
-
if isinstance(model.target_type, ComplexFieldTypeModel)
|
1992
|
-
else model.target_type
|
1993
|
-
)
|
1994
|
-
|
1713
|
+
@staticmethod
|
1714
|
+
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1995
1715
|
return TypesMap(
|
1996
|
-
target_type=target_type,
|
1716
|
+
target_type=model.target_type,
|
1997
1717
|
current_type=model.current_type,
|
1998
1718
|
condition=model.condition if model.condition is not None else "True",
|
1999
1719
|
)
|
@@ -2150,16 +1870,11 @@ class ModelToComponentFactory:
|
|
2150
1870
|
additional_jwt_payload=model.additional_jwt_payload,
|
2151
1871
|
)
|
2152
1872
|
|
2153
|
-
@staticmethod
|
2154
1873
|
def create_list_partition_router(
|
2155
|
-
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
1874
|
+
self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
2156
1875
|
) -> ListPartitionRouter:
|
2157
1876
|
request_option = (
|
2158
|
-
|
2159
|
-
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
2160
|
-
field_name=model.request_option.field_name,
|
2161
|
-
parameters=model.parameters or {},
|
2162
|
-
)
|
1877
|
+
self._create_component_from_model(model.request_option, config)
|
2163
1878
|
if model.request_option
|
2164
1879
|
else None
|
2165
1880
|
)
|
@@ -2196,12 +1911,6 @@ class ModelToComponentFactory:
|
|
2196
1911
|
def create_oauth_authenticator(
|
2197
1912
|
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
2198
1913
|
) -> DeclarativeOauth2Authenticator:
|
2199
|
-
profile_assertion = (
|
2200
|
-
self._create_component_from_model(model.profile_assertion, config=config)
|
2201
|
-
if model.profile_assertion
|
2202
|
-
else None
|
2203
|
-
)
|
2204
|
-
|
2205
1914
|
if model.refresh_token_updater:
|
2206
1915
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
2207
1916
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
@@ -2222,17 +1931,13 @@ class ModelToComponentFactory:
|
|
2222
1931
|
).eval(config),
|
2223
1932
|
client_id=InterpolatedString.create(
|
2224
1933
|
model.client_id, parameters=model.parameters or {}
|
2225
|
-
).eval(config)
|
2226
|
-
if model.client_id
|
2227
|
-
else model.client_id,
|
1934
|
+
).eval(config),
|
2228
1935
|
client_secret_name=InterpolatedString.create(
|
2229
1936
|
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
2230
1937
|
).eval(config),
|
2231
1938
|
client_secret=InterpolatedString.create(
|
2232
1939
|
model.client_secret, parameters=model.parameters or {}
|
2233
|
-
).eval(config)
|
2234
|
-
if model.client_secret
|
2235
|
-
else model.client_secret,
|
1940
|
+
).eval(config),
|
2236
1941
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
2237
1942
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
2238
1943
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
@@ -2278,8 +1983,6 @@ class ModelToComponentFactory:
|
|
2278
1983
|
config=config,
|
2279
1984
|
parameters=model.parameters or {},
|
2280
1985
|
message_repository=self._message_repository,
|
2281
|
-
profile_assertion=profile_assertion,
|
2282
|
-
use_profile_assertion=model.use_profile_assertion,
|
2283
1986
|
)
|
2284
1987
|
|
2285
1988
|
def create_offset_increment(
|
@@ -2355,7 +2058,25 @@ class ModelToComponentFactory:
|
|
2355
2058
|
model: RequestOptionModel, config: Config, **kwargs: Any
|
2356
2059
|
) -> RequestOption:
|
2357
2060
|
inject_into = RequestOptionType(model.inject_into.value)
|
2358
|
-
|
2061
|
+
field_path: Optional[List[Union[InterpolatedString, str]]] = (
|
2062
|
+
[
|
2063
|
+
InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
|
2064
|
+
for segment in model.field_path
|
2065
|
+
]
|
2066
|
+
if model.field_path
|
2067
|
+
else None
|
2068
|
+
)
|
2069
|
+
field_name = (
|
2070
|
+
InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
|
2071
|
+
if model.field_name
|
2072
|
+
else None
|
2073
|
+
)
|
2074
|
+
return RequestOption(
|
2075
|
+
field_name=field_name,
|
2076
|
+
field_path=field_path,
|
2077
|
+
inject_into=inject_into,
|
2078
|
+
parameters=kwargs.get("parameters", {}),
|
2079
|
+
)
|
2359
2080
|
|
2360
2081
|
def create_record_selector(
|
2361
2082
|
self,
|