airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
  16. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  17. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  18. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  19. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
  20. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  21. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
  22. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  23. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  24. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
  25. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  26. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  27. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
  29. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  30. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  31. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
  33. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  34. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  35. airbyte_cdk/sources/http_logger.py +1 -1
  36. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  37. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  38. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  39. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  40. airbyte_cdk/sources/streams/core.py +6 -6
  41. airbyte_cdk/sources/streams/http/http.py +1 -2
  42. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  43. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  44. airbyte_cdk/sources/types.py +4 -2
  45. airbyte_cdk/sources/utils/transform.py +23 -2
  46. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  47. airbyte_cdk/utils/datetime_helpers.py +499 -0
  48. airbyte_cdk/utils/mapping_helpers.py +27 -86
  49. airbyte_cdk/utils/slice_hasher.py +8 -1
  50. airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
  51. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
  52. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
  53. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
  54. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
  55. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -87,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
87
87
  )
88
88
  from airbyte_cdk.sources.declarative.incremental import (
89
89
  ChildPartitionResumableFullRefreshCursor,
90
+ ConcurrentCursorFactory,
91
+ ConcurrentPerPartitionCursor,
90
92
  CursorFactory,
91
93
  DatetimeBasedCursor,
92
94
  DeclarativeCursor,
@@ -101,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
101
103
  LegacyToPerPartitionStateMigration,
102
104
  )
103
105
  from airbyte_cdk.sources.declarative.models import (
106
+ Clamping,
104
107
  CustomStateMigration,
105
108
  )
106
109
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -130,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
130
133
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
131
134
  CheckStream as CheckStreamModel,
132
135
  )
136
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
+ ComplexFieldType as ComplexFieldTypeModel,
138
+ )
133
139
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134
140
  ComponentMappingDefinition as ComponentMappingDefinitionModel,
135
141
  )
@@ -363,6 +369,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
363
369
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
364
370
  ZipfileDecoder as ZipfileDecoderModel,
365
371
  )
372
+ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
373
+ COMPONENTS_MODULE_NAME,
374
+ SDM_COMPONENTS_MODULE_NAME,
375
+ )
366
376
  from airbyte_cdk.sources.declarative.partition_routers import (
367
377
  CartesianProductStreamSlicer,
368
378
  ListPartitionRouter,
@@ -422,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
422
432
  SimpleRetrieverTestReadDecorator,
423
433
  )
424
434
  from airbyte_cdk.sources.declarative.schema import (
435
+ ComplexFieldType,
425
436
  DefaultSchemaLoader,
426
437
  DynamicSchemaLoader,
427
438
  InlineSchemaLoader,
@@ -456,6 +467,16 @@ from airbyte_cdk.sources.message import (
456
467
  InMemoryMessageRepository,
457
468
  LogAppenderMessageRepositoryDecorator,
458
469
  MessageRepository,
470
+ NoopMessageRepository,
471
+ )
472
+ from airbyte_cdk.sources.streams.concurrent.clamping import (
473
+ ClampingEndProvider,
474
+ ClampingStrategy,
475
+ DayClampingStrategy,
476
+ MonthClampingStrategy,
477
+ NoClamping,
478
+ WeekClampingStrategy,
479
+ Weekday,
459
480
  )
460
481
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
461
482
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -486,6 +507,7 @@ class ModelToComponentFactory:
486
507
  disable_cache: bool = False,
487
508
  disable_resumable_full_refresh: bool = False,
488
509
  message_repository: Optional[MessageRepository] = None,
510
+ connector_state_manager: Optional[ConnectorStateManager] = None,
489
511
  ):
490
512
  self._init_mappings()
491
513
  self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
@@ -497,6 +519,7 @@ class ModelToComponentFactory:
497
519
  self._message_repository = message_repository or InMemoryMessageRepository(
498
520
  self._evaluate_log_level(emit_connector_builder_messages)
499
521
  )
522
+ self._connector_state_manager = connector_state_manager or ConnectorStateManager()
500
523
 
501
524
  def _init_mappings(self) -> None:
502
525
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -555,6 +578,7 @@ class ModelToComponentFactory:
555
578
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
556
579
  SchemaTypeIdentifierModel: self.create_schema_type_identifier,
557
580
  TypesMapModel: self.create_types_map,
581
+ ComplexFieldTypeModel: self.create_complex_field_type,
558
582
  JwtAuthenticatorModel: self.create_jwt_authenticator,
559
583
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
560
584
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -709,8 +733,8 @@ class ModelToComponentFactory:
709
733
  }
710
734
  return names_to_types[value_type]
711
735
 
736
+ @staticmethod
712
737
  def create_api_key_authenticator(
713
- self,
714
738
  model: ApiKeyAuthenticatorModel,
715
739
  config: Config,
716
740
  token_provider: Optional[TokenProvider] = None,
@@ -732,8 +756,10 @@ class ModelToComponentFactory:
732
756
  )
733
757
 
734
758
  request_option = (
735
- self._create_component_from_model(
736
- model.inject_into, config, parameters=model.parameters or {}
759
+ RequestOption(
760
+ inject_into=RequestOptionType(model.inject_into.inject_into.value),
761
+ field_name=model.inject_into.field_name,
762
+ parameters=model.parameters or {},
737
763
  )
738
764
  if model.inject_into
739
765
  else RequestOption(
@@ -742,7 +768,6 @@ class ModelToComponentFactory:
742
768
  parameters=model.parameters or {},
743
769
  )
744
770
  )
745
-
746
771
  return ApiKeyAuthenticator(
747
772
  token_provider=(
748
773
  token_provider
@@ -824,7 +849,7 @@ class ModelToComponentFactory:
824
849
  token_provider=token_provider,
825
850
  )
826
851
  else:
827
- return self.create_api_key_authenticator(
852
+ return ModelToComponentFactory.create_api_key_authenticator(
828
853
  ApiKeyAuthenticatorModel(
829
854
  type="ApiKeyAuthenticator",
830
855
  api_token="",
@@ -878,7 +903,15 @@ class ModelToComponentFactory:
878
903
  def create_check_dynamic_stream(
879
904
  model: CheckDynamicStreamModel, config: Config, **kwargs: Any
880
905
  ) -> CheckDynamicStream:
881
- return CheckDynamicStream(stream_count=model.stream_count, parameters={})
906
+ assert model.use_check_availability is not None # for mypy
907
+
908
+ use_check_availability = model.use_check_availability
909
+
910
+ return CheckDynamicStream(
911
+ stream_count=model.stream_count,
912
+ use_check_availability=use_check_availability,
913
+ parameters={},
914
+ )
882
915
 
883
916
  def create_composite_error_handler(
884
917
  self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
@@ -904,15 +937,24 @@ class ModelToComponentFactory:
904
937
 
905
938
  def create_concurrent_cursor_from_datetime_based_cursor(
906
939
  self,
907
- state_manager: ConnectorStateManager,
908
940
  model_type: Type[BaseModel],
909
941
  component_definition: ComponentDefinition,
910
942
  stream_name: str,
911
943
  stream_namespace: Optional[str],
912
944
  config: Config,
913
- stream_state: MutableMapping[str, Any],
945
+ message_repository: Optional[MessageRepository] = None,
946
+ runtime_lookback_window: Optional[datetime.timedelta] = None,
914
947
  **kwargs: Any,
915
948
  ) -> ConcurrentCursor:
949
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
950
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
951
+ # incoming state and connector_state_manager that is initialized when the component factory is created
952
+ stream_state = (
953
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
954
+ if "stream_state" not in kwargs
955
+ else kwargs["stream_state"]
956
+ )
957
+
916
958
  component_type = component_definition.get("type")
917
959
  if component_definition.get("type") != model_type.__name__:
918
960
  raise ValueError(
@@ -972,10 +1014,22 @@ class ModelToComponentFactory:
972
1014
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
973
1015
  datetime_format=datetime_format,
974
1016
  input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
975
- is_sequential_state=True,
1017
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
976
1018
  cursor_granularity=cursor_granularity,
977
1019
  )
978
1020
 
1021
+ # Adjusts the stream state by applying the runtime lookback window.
1022
+ # This is used to ensure correct state handling in case of failed partitions.
1023
+ stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1024
+ if runtime_lookback_window and stream_state_value:
1025
+ new_stream_state = (
1026
+ connector_state_converter.parse_timestamp(stream_state_value)
1027
+ - runtime_lookback_window
1028
+ )
1029
+ stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1030
+ new_stream_state
1031
+ )
1032
+
979
1033
  start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
980
1034
  if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
981
1035
  start_date_runtime_value = self.create_min_max_datetime(
@@ -1042,12 +1096,59 @@ class ModelToComponentFactory:
1042
1096
  if evaluated_step:
1043
1097
  step_length = parse_duration(evaluated_step)
1044
1098
 
1099
+ clamping_strategy: ClampingStrategy = NoClamping()
1100
+ if datetime_based_cursor_model.clamping:
1101
+ # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1102
+ # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1103
+ # object which we want to keep agnostic of being low-code
1104
+ target = InterpolatedString(
1105
+ string=datetime_based_cursor_model.clamping.target,
1106
+ parameters=datetime_based_cursor_model.parameters or {},
1107
+ )
1108
+ evaluated_target = target.eval(config=config)
1109
+ match evaluated_target:
1110
+ case "DAY":
1111
+ clamping_strategy = DayClampingStrategy()
1112
+ end_date_provider = ClampingEndProvider(
1113
+ DayClampingStrategy(is_ceiling=False),
1114
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1115
+ granularity=cursor_granularity or datetime.timedelta(seconds=1),
1116
+ )
1117
+ case "WEEK":
1118
+ if (
1119
+ not datetime_based_cursor_model.clamping.target_details
1120
+ or "weekday" not in datetime_based_cursor_model.clamping.target_details
1121
+ ):
1122
+ raise ValueError(
1123
+ "Given WEEK clamping, weekday needs to be provided as target_details"
1124
+ )
1125
+ weekday = self._assemble_weekday(
1126
+ datetime_based_cursor_model.clamping.target_details["weekday"]
1127
+ )
1128
+ clamping_strategy = WeekClampingStrategy(weekday)
1129
+ end_date_provider = ClampingEndProvider(
1130
+ WeekClampingStrategy(weekday, is_ceiling=False),
1131
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1132
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1133
+ )
1134
+ case "MONTH":
1135
+ clamping_strategy = MonthClampingStrategy()
1136
+ end_date_provider = ClampingEndProvider(
1137
+ MonthClampingStrategy(is_ceiling=False),
1138
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1139
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1140
+ )
1141
+ case _:
1142
+ raise ValueError(
1143
+ f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1144
+ )
1145
+
1045
1146
  return ConcurrentCursor(
1046
1147
  stream_name=stream_name,
1047
1148
  stream_namespace=stream_namespace,
1048
1149
  stream_state=stream_state,
1049
- message_repository=self._message_repository,
1050
- connector_state_manager=state_manager,
1150
+ message_repository=message_repository or self._message_repository,
1151
+ connector_state_manager=self._connector_state_manager,
1051
1152
  connector_state_converter=connector_state_converter,
1052
1153
  cursor_field=cursor_field,
1053
1154
  slice_boundary_fields=slice_boundary_fields,
@@ -1056,6 +1157,100 @@ class ModelToComponentFactory:
1056
1157
  lookback_window=lookback_window,
1057
1158
  slice_range=step_length,
1058
1159
  cursor_granularity=cursor_granularity,
1160
+ clamping_strategy=clamping_strategy,
1161
+ )
1162
+
1163
+ def _assemble_weekday(self, weekday: str) -> Weekday:
1164
+ match weekday:
1165
+ case "MONDAY":
1166
+ return Weekday.MONDAY
1167
+ case "TUESDAY":
1168
+ return Weekday.TUESDAY
1169
+ case "WEDNESDAY":
1170
+ return Weekday.WEDNESDAY
1171
+ case "THURSDAY":
1172
+ return Weekday.THURSDAY
1173
+ case "FRIDAY":
1174
+ return Weekday.FRIDAY
1175
+ case "SATURDAY":
1176
+ return Weekday.SATURDAY
1177
+ case "SUNDAY":
1178
+ return Weekday.SUNDAY
1179
+ case _:
1180
+ raise ValueError(f"Unknown weekday {weekday}")
1181
+
1182
+ def create_concurrent_cursor_from_perpartition_cursor(
1183
+ self,
1184
+ state_manager: ConnectorStateManager,
1185
+ model_type: Type[BaseModel],
1186
+ component_definition: ComponentDefinition,
1187
+ stream_name: str,
1188
+ stream_namespace: Optional[str],
1189
+ config: Config,
1190
+ stream_state: MutableMapping[str, Any],
1191
+ partition_router: PartitionRouter,
1192
+ **kwargs: Any,
1193
+ ) -> ConcurrentPerPartitionCursor:
1194
+ component_type = component_definition.get("type")
1195
+ if component_definition.get("type") != model_type.__name__:
1196
+ raise ValueError(
1197
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1198
+ )
1199
+
1200
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1201
+
1202
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1203
+ raise ValueError(
1204
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1205
+ )
1206
+
1207
+ interpolated_cursor_field = InterpolatedString.create(
1208
+ datetime_based_cursor_model.cursor_field,
1209
+ parameters=datetime_based_cursor_model.parameters or {},
1210
+ )
1211
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1212
+
1213
+ datetime_format = datetime_based_cursor_model.datetime_format
1214
+
1215
+ cursor_granularity = (
1216
+ parse_duration(datetime_based_cursor_model.cursor_granularity)
1217
+ if datetime_based_cursor_model.cursor_granularity
1218
+ else None
1219
+ )
1220
+
1221
+ connector_state_converter: DateTimeStreamStateConverter
1222
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1223
+ datetime_format=datetime_format,
1224
+ input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1225
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1226
+ cursor_granularity=cursor_granularity,
1227
+ )
1228
+
1229
+ # Create the cursor factory
1230
+ cursor_factory = ConcurrentCursorFactory(
1231
+ partial(
1232
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1233
+ state_manager=state_manager,
1234
+ model_type=model_type,
1235
+ component_definition=component_definition,
1236
+ stream_name=stream_name,
1237
+ stream_namespace=stream_namespace,
1238
+ config=config,
1239
+ message_repository=NoopMessageRepository(),
1240
+ )
1241
+ )
1242
+
1243
+ # Return the concurrent cursor and state converter
1244
+ return ConcurrentPerPartitionCursor(
1245
+ cursor_factory=cursor_factory,
1246
+ partition_router=partition_router,
1247
+ stream_name=stream_name,
1248
+ stream_namespace=stream_namespace,
1249
+ stream_state=stream_state,
1250
+ message_repository=self._message_repository, # type: ignore
1251
+ connector_state_manager=state_manager,
1252
+ connector_state_converter=connector_state_converter,
1253
+ cursor_field=cursor_field,
1059
1254
  )
1060
1255
 
1061
1256
  @staticmethod
@@ -1101,7 +1296,6 @@ class ModelToComponentFactory:
1101
1296
  :param config: The custom defined connector config
1102
1297
  :return: The declarative component built from the Pydantic model to be used at runtime
1103
1298
  """
1104
-
1105
1299
  custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1106
1300
  component_fields = get_type_hints(custom_component_class)
1107
1301
  model_args = model.dict()
@@ -1155,14 +1349,38 @@ class ModelToComponentFactory:
1155
1349
  return custom_component_class(**kwargs)
1156
1350
 
1157
1351
  @staticmethod
1158
- def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
1352
+ def _get_class_from_fully_qualified_class_name(
1353
+ full_qualified_class_name: str,
1354
+ ) -> Any:
1355
+ """Get a class from its fully qualified name.
1356
+
1357
+ If a custom components module is needed, we assume it is already registered - probably
1358
+ as `source_declarative_manifest.components` or `components`.
1359
+
1360
+ Args:
1361
+ full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1362
+
1363
+ Returns:
1364
+ Any: The class object.
1365
+
1366
+ Raises:
1367
+ ValueError: If the class cannot be loaded.
1368
+ """
1159
1369
  split = full_qualified_class_name.split(".")
1160
- module = ".".join(split[:-1])
1370
+ module_name_full = ".".join(split[:-1])
1161
1371
  class_name = split[-1]
1372
+
1162
1373
  try:
1163
- return getattr(importlib.import_module(module), class_name)
1164
- except AttributeError:
1165
- raise ValueError(f"Could not load class {full_qualified_class_name}.")
1374
+ module_ref = importlib.import_module(module_name_full)
1375
+ except ModuleNotFoundError as e:
1376
+ raise ValueError(f"Could not load module `{module_name_full}`.") from e
1377
+
1378
+ try:
1379
+ return getattr(module_ref, class_name)
1380
+ except AttributeError as e:
1381
+ raise ValueError(
1382
+ f"Could not load class `{class_name}` from module `{module_name_full}`.",
1383
+ ) from e
1166
1384
 
1167
1385
  @staticmethod
1168
1386
  def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
@@ -1271,15 +1489,19 @@ class ModelToComponentFactory:
1271
1489
  )
1272
1490
 
1273
1491
  end_time_option = (
1274
- self._create_component_from_model(
1275
- model.end_time_option, config, parameters=model.parameters or {}
1492
+ RequestOption(
1493
+ inject_into=RequestOptionType(model.end_time_option.inject_into.value),
1494
+ field_name=model.end_time_option.field_name,
1495
+ parameters=model.parameters or {},
1276
1496
  )
1277
1497
  if model.end_time_option
1278
1498
  else None
1279
1499
  )
1280
1500
  start_time_option = (
1281
- self._create_component_from_model(
1282
- model.start_time_option, config, parameters=model.parameters or {}
1501
+ RequestOption(
1502
+ inject_into=RequestOptionType(model.start_time_option.inject_into.value),
1503
+ field_name=model.start_time_option.field_name,
1504
+ parameters=model.parameters or {},
1283
1505
  )
1284
1506
  if model.start_time_option
1285
1507
  else None
@@ -1336,32 +1558,33 @@ class ModelToComponentFactory:
1336
1558
  raise ValueError(
1337
1559
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1338
1560
  )
1339
- client_side_incremental_sync = {
1340
- "date_time_based_cursor": self._create_component_from_model(
1341
- model=model.incremental_sync, config=config
1342
- ),
1343
- "substream_cursor": (
1344
- combined_slicers
1345
- if isinstance(
1346
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1347
- )
1348
- else None
1349
- ),
1350
- }
1561
+ cursor = (
1562
+ combined_slicers
1563
+ if isinstance(
1564
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1565
+ )
1566
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1567
+ )
1568
+
1569
+ client_side_incremental_sync = {"cursor": cursor}
1351
1570
 
1352
1571
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1353
1572
  cursor_model = model.incremental_sync
1354
1573
 
1355
1574
  end_time_option = (
1356
- self._create_component_from_model(
1357
- cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1575
+ RequestOption(
1576
+ inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
1577
+ field_name=cursor_model.end_time_option.field_name,
1578
+ parameters=cursor_model.parameters or {},
1358
1579
  )
1359
1580
  if cursor_model.end_time_option
1360
1581
  else None
1361
1582
  )
1362
1583
  start_time_option = (
1363
- self._create_component_from_model(
1364
- cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1584
+ RequestOption(
1585
+ inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
1586
+ field_name=cursor_model.start_time_option.field_name,
1587
+ parameters=cursor_model.parameters or {},
1365
1588
  )
1366
1589
  if cursor_model.start_time_option
1367
1590
  else None
@@ -1433,7 +1656,7 @@ class ModelToComponentFactory:
1433
1656
  ) -> Optional[PartitionRouter]:
1434
1657
  if (
1435
1658
  hasattr(model, "partition_router")
1436
- and isinstance(model, SimpleRetrieverModel)
1659
+ and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1437
1660
  and model.partition_router
1438
1661
  ):
1439
1662
  stream_slicer_model = model.partition_router
@@ -1467,6 +1690,31 @@ class ModelToComponentFactory:
1467
1690
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1468
1691
 
1469
1692
  if model.incremental_sync and stream_slicer:
1693
+ if model.retriever.type == "AsyncRetriever":
1694
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1695
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1696
+ raise ValueError(
1697
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1698
+ )
1699
+ if stream_slicer:
1700
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1701
+ state_manager=self._connector_state_manager,
1702
+ model_type=DatetimeBasedCursorModel,
1703
+ component_definition=model.incremental_sync.__dict__,
1704
+ stream_name=model.name or "",
1705
+ stream_namespace=None,
1706
+ config=config or {},
1707
+ stream_state={},
1708
+ partition_router=stream_slicer,
1709
+ )
1710
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1711
+ model_type=DatetimeBasedCursorModel,
1712
+ component_definition=model.incremental_sync.__dict__,
1713
+ stream_name=model.name or "",
1714
+ stream_namespace=None,
1715
+ config=config or {},
1716
+ )
1717
+
1470
1718
  incremental_sync_model = model.incremental_sync
1471
1719
  if (
1472
1720
  hasattr(incremental_sync_model, "global_substream_cursor")
@@ -1492,6 +1740,22 @@ class ModelToComponentFactory:
1492
1740
  stream_cursor=cursor_component,
1493
1741
  )
1494
1742
  elif model.incremental_sync:
1743
+ if model.retriever.type == "AsyncRetriever":
1744
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1745
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1746
+ raise ValueError(
1747
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1748
+ )
1749
+ if model.retriever.partition_router:
1750
+ # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
1751
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever")
1752
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1753
+ model_type=DatetimeBasedCursorModel,
1754
+ component_definition=model.incremental_sync.__dict__,
1755
+ stream_name=model.name or "",
1756
+ stream_namespace=None,
1757
+ config=config or {},
1758
+ )
1495
1759
  return (
1496
1760
  self._create_component_from_model(model=model.incremental_sync, config=config)
1497
1761
  if model.incremental_sync
@@ -1710,10 +1974,26 @@ class ModelToComponentFactory:
1710
1974
  ) -> InlineSchemaLoader:
1711
1975
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1712
1976
 
1713
- @staticmethod
1714
- def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1977
+ def create_complex_field_type(
1978
+ self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
1979
+ ) -> ComplexFieldType:
1980
+ items = (
1981
+ self._create_component_from_model(model=model.items, config=config)
1982
+ if isinstance(model.items, ComplexFieldTypeModel)
1983
+ else model.items
1984
+ )
1985
+
1986
+ return ComplexFieldType(field_type=model.field_type, items=items)
1987
+
1988
+ def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
1989
+ target_type = (
1990
+ self._create_component_from_model(model=model.target_type, config=config)
1991
+ if isinstance(model.target_type, ComplexFieldTypeModel)
1992
+ else model.target_type
1993
+ )
1994
+
1715
1995
  return TypesMap(
1716
- target_type=model.target_type,
1996
+ target_type=target_type,
1717
1997
  current_type=model.current_type,
1718
1998
  condition=model.condition if model.condition is not None else "True",
1719
1999
  )
@@ -1870,11 +2150,16 @@ class ModelToComponentFactory:
1870
2150
  additional_jwt_payload=model.additional_jwt_payload,
1871
2151
  )
1872
2152
 
2153
+ @staticmethod
1873
2154
  def create_list_partition_router(
1874
- self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2155
+ model: ListPartitionRouterModel, config: Config, **kwargs: Any
1875
2156
  ) -> ListPartitionRouter:
1876
2157
  request_option = (
1877
- self._create_component_from_model(model.request_option, config)
2158
+ RequestOption(
2159
+ inject_into=RequestOptionType(model.request_option.inject_into.value),
2160
+ field_name=model.request_option.field_name,
2161
+ parameters=model.parameters or {},
2162
+ )
1878
2163
  if model.request_option
1879
2164
  else None
1880
2165
  )
@@ -1911,6 +2196,12 @@ class ModelToComponentFactory:
1911
2196
  def create_oauth_authenticator(
1912
2197
  self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
1913
2198
  ) -> DeclarativeOauth2Authenticator:
2199
+ profile_assertion = (
2200
+ self._create_component_from_model(model.profile_assertion, config=config)
2201
+ if model.profile_assertion
2202
+ else None
2203
+ )
2204
+
1914
2205
  if model.refresh_token_updater:
1915
2206
  # ignore type error because fixing it would have a lot of dependencies, revisit later
1916
2207
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
@@ -1931,13 +2222,17 @@ class ModelToComponentFactory:
1931
2222
  ).eval(config),
1932
2223
  client_id=InterpolatedString.create(
1933
2224
  model.client_id, parameters=model.parameters or {}
1934
- ).eval(config),
2225
+ ).eval(config)
2226
+ if model.client_id
2227
+ else model.client_id,
1935
2228
  client_secret_name=InterpolatedString.create(
1936
2229
  model.client_secret_name or "client_secret", parameters=model.parameters or {}
1937
2230
  ).eval(config),
1938
2231
  client_secret=InterpolatedString.create(
1939
2232
  model.client_secret, parameters=model.parameters or {}
1940
- ).eval(config),
2233
+ ).eval(config)
2234
+ if model.client_secret
2235
+ else model.client_secret,
1941
2236
  access_token_config_path=model.refresh_token_updater.access_token_config_path,
1942
2237
  refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
1943
2238
  token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
@@ -1983,6 +2278,8 @@ class ModelToComponentFactory:
1983
2278
  config=config,
1984
2279
  parameters=model.parameters or {},
1985
2280
  message_repository=self._message_repository,
2281
+ profile_assertion=profile_assertion,
2282
+ use_profile_assertion=model.use_profile_assertion,
1986
2283
  )
1987
2284
 
1988
2285
  def create_offset_increment(
@@ -2058,25 +2355,7 @@ class ModelToComponentFactory:
2058
2355
  model: RequestOptionModel, config: Config, **kwargs: Any
2059
2356
  ) -> RequestOption:
2060
2357
  inject_into = RequestOptionType(model.inject_into.value)
2061
- field_path: Optional[List[Union[InterpolatedString, str]]] = (
2062
- [
2063
- InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2064
- for segment in model.field_path
2065
- ]
2066
- if model.field_path
2067
- else None
2068
- )
2069
- field_name = (
2070
- InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2071
- if model.field_name
2072
- else None
2073
- )
2074
- return RequestOption(
2075
- field_name=field_name,
2076
- field_path=field_path,
2077
- inject_into=inject_into,
2078
- parameters=kwargs.get("parameters", {}),
2079
- )
2358
+ return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
2080
2359
 
2081
2360
  def create_record_selector(
2082
2361
  self,