airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  7. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  8. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +145 -43
  9. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +51 -2
  10. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  11. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  12. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  13. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  14. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  15. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -5
  18. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +313 -30
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +46 -12
  22. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  24. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  25. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  26. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  27. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  28. airbyte_cdk/sources/http_logger.py +1 -1
  29. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  30. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  31. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  32. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  33. airbyte_cdk/sources/streams/core.py +6 -6
  34. airbyte_cdk/sources/streams/http/http.py +1 -2
  35. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  36. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  37. airbyte_cdk/sources/types.py +4 -2
  38. airbyte_cdk/sources/utils/transform.py +23 -2
  39. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  40. airbyte_cdk/utils/datetime_helpers.py +499 -0
  41. airbyte_cdk/utils/slice_hasher.py +8 -1
  42. airbyte_cdk-6.33.0.dist-info/LICENSE_SHORT +1 -0
  43. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/METADATA +6 -6
  44. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/RECORD +47 -41
  45. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/WHEEL +1 -1
  46. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/LICENSE.txt +0 -0
  47. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,143 @@
1
+ """Contains functions to compile custom code from text."""
2
+
3
+ import hashlib
4
+ import os
5
+ import sys
6
+ from collections.abc import Mapping
7
+ from types import ModuleType
8
+ from typing import Any, cast
9
+
10
+ from typing_extensions import Literal
11
+
12
+ ChecksumType = Literal["md5", "sha256"]
13
+ CHECKSUM_FUNCTIONS = {
14
+ "md5": hashlib.md5,
15
+ "sha256": hashlib.sha256,
16
+ }
17
+ COMPONENTS_MODULE_NAME = "components"
18
+ SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19
+ INJECTED_MANIFEST = "__injected_declarative_manifest"
20
+ INJECTED_COMPONENTS_PY = "__injected_components_py"
21
+ INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22
+ ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
23
+
24
+
25
+ class AirbyteCodeTamperedError(Exception):
26
+ """Raised when the connector's components module does not match its checksum.
27
+
28
+ This is a fatal error, as it can be a sign of code tampering.
29
+ """
30
+
31
+
32
+ class AirbyteCustomCodeNotPermittedError(Exception):
33
+ """Raised when custom code is attempted to be run in an environment that does not support it."""
34
+
35
+ def __init__(self) -> None:
36
+ super().__init__(
37
+ "Custom connector code is not permitted in this environment. "
38
+ "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
39
+ "environment variable to 'true' in your Airbyte environment. "
40
+ "If you see this message in Airbyte Cloud, your workspace does not allow executing "
41
+ "custom connector code."
42
+ )
43
+
44
+
45
+ def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
+ """Return the hash of the input text using the specified hash type."""
47
+ if not input_text:
48
+ raise ValueError("Input text cannot be empty.")
49
+
50
+ hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
+ hash_object.update(input_text.encode())
52
+ return hash_object.hexdigest()
53
+
54
+
55
+ def custom_code_execution_permitted() -> bool:
56
+ """Return `True` if custom code execution is permitted, otherwise `False`.
57
+
58
+ Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
59
+ """
60
+ return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61
+
62
+
63
+ def validate_python_code(
64
+ code_text: str,
65
+ checksums: dict[str, str] | None,
66
+ ) -> None:
67
+ """Validate the provided Python code text against the provided checksums.
68
+
69
+ Currently we fail if no checksums are provided, although this may change in the future.
70
+ """
71
+ if not checksums:
72
+ raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73
+
74
+ for checksum_type, checksum in checksums.items():
75
+ if checksum_type not in CHECKSUM_FUNCTIONS:
76
+ raise ValueError(
77
+ f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78
+ )
79
+
80
+ if _hash_text(code_text, checksum_type) != checksum:
81
+ raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
82
+
83
+
84
+ def get_registered_components_module(
85
+ config: Mapping[str, Any] | None,
86
+ ) -> ModuleType | None:
87
+ """Get a components module object based on the provided config.
88
+
89
+ If custom python components is provided, this will be loaded. Otherwise, we will
90
+ attempt to load from the `components` module already imported/registered in sys.modules.
91
+
92
+ If custom `components.py` text is provided in config, it will be registered with sys.modules
93
+ so that it can be later imported by manifest declarations which reference the provided classes.
94
+
95
+ Returns `None` if no components is provided and the `components` module is not found.
96
+ """
97
+ if config and INJECTED_COMPONENTS_PY in config:
98
+ if not custom_code_execution_permitted():
99
+ raise AirbyteCustomCodeNotPermittedError
100
+
101
+ # Create a new module object and execute the provided Python code text within it
102
+ python_text: str = config[INJECTED_COMPONENTS_PY]
103
+ return register_components_module_from_string(
104
+ components_py_text=python_text,
105
+ checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
106
+ )
107
+
108
+ # Check for `components` or `source_declarative_manifest.components`.
109
+ if SDM_COMPONENTS_MODULE_NAME in sys.modules:
110
+ return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
111
+
112
+ if COMPONENTS_MODULE_NAME in sys.modules:
113
+ return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
114
+
115
+ # Could not find module 'components' in `sys.modules`
116
+ # and INJECTED_COMPONENTS_PY was not provided in config.
117
+ return None
118
+
119
+
120
+ def register_components_module_from_string(
121
+ components_py_text: str,
122
+ checksums: dict[str, Any] | None,
123
+ ) -> ModuleType:
124
+ """Load and return the components module from a provided string containing the python code."""
125
+ # First validate the code
126
+ validate_python_code(
127
+ code_text=components_py_text,
128
+ checksums=checksums,
129
+ )
130
+
131
+ # Create a new module object
132
+ components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
133
+
134
+ # Execute the module text in the module's namespace
135
+ exec(components_py_text, components_module.__dict__)
136
+
137
+ # Register the module in `sys.modules`` so it can be imported as
138
+ # `source_declarative_manifest.components` and/or `components`.
139
+ sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
140
+ sys.modules[COMPONENTS_MODULE_NAME] = components_module
141
+
142
+ # Now you can import and use the module
143
+ return components_module
@@ -87,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
87
87
  )
88
88
  from airbyte_cdk.sources.declarative.incremental import (
89
89
  ChildPartitionResumableFullRefreshCursor,
90
+ ConcurrentCursorFactory,
91
+ ConcurrentPerPartitionCursor,
90
92
  CursorFactory,
91
93
  DatetimeBasedCursor,
92
94
  DeclarativeCursor,
@@ -101,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
101
103
  LegacyToPerPartitionStateMigration,
102
104
  )
103
105
  from airbyte_cdk.sources.declarative.models import (
106
+ Clamping,
104
107
  CustomStateMigration,
105
108
  )
106
109
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -130,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
130
133
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
131
134
  CheckStream as CheckStreamModel,
132
135
  )
136
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
+ ComplexFieldType as ComplexFieldTypeModel,
138
+ )
133
139
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134
140
  ComponentMappingDefinition as ComponentMappingDefinitionModel,
135
141
  )
@@ -363,6 +369,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
363
369
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
364
370
  ZipfileDecoder as ZipfileDecoderModel,
365
371
  )
372
+ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
373
+ COMPONENTS_MODULE_NAME,
374
+ SDM_COMPONENTS_MODULE_NAME,
375
+ )
366
376
  from airbyte_cdk.sources.declarative.partition_routers import (
367
377
  CartesianProductStreamSlicer,
368
378
  ListPartitionRouter,
@@ -422,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
422
432
  SimpleRetrieverTestReadDecorator,
423
433
  )
424
434
  from airbyte_cdk.sources.declarative.schema import (
435
+ ComplexFieldType,
425
436
  DefaultSchemaLoader,
426
437
  DynamicSchemaLoader,
427
438
  InlineSchemaLoader,
@@ -456,6 +467,16 @@ from airbyte_cdk.sources.message import (
456
467
  InMemoryMessageRepository,
457
468
  LogAppenderMessageRepositoryDecorator,
458
469
  MessageRepository,
470
+ NoopMessageRepository,
471
+ )
472
+ from airbyte_cdk.sources.streams.concurrent.clamping import (
473
+ ClampingEndProvider,
474
+ ClampingStrategy,
475
+ DayClampingStrategy,
476
+ MonthClampingStrategy,
477
+ NoClamping,
478
+ WeekClampingStrategy,
479
+ Weekday,
459
480
  )
460
481
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
461
482
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -486,6 +507,7 @@ class ModelToComponentFactory:
486
507
  disable_cache: bool = False,
487
508
  disable_resumable_full_refresh: bool = False,
488
509
  message_repository: Optional[MessageRepository] = None,
510
+ connector_state_manager: Optional[ConnectorStateManager] = None,
489
511
  ):
490
512
  self._init_mappings()
491
513
  self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
@@ -497,6 +519,7 @@ class ModelToComponentFactory:
497
519
  self._message_repository = message_repository or InMemoryMessageRepository(
498
520
  self._evaluate_log_level(emit_connector_builder_messages)
499
521
  )
522
+ self._connector_state_manager = connector_state_manager or ConnectorStateManager()
500
523
 
501
524
  def _init_mappings(self) -> None:
502
525
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -555,6 +578,7 @@ class ModelToComponentFactory:
555
578
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
556
579
  SchemaTypeIdentifierModel: self.create_schema_type_identifier,
557
580
  TypesMapModel: self.create_types_map,
581
+ ComplexFieldTypeModel: self.create_complex_field_type,
558
582
  JwtAuthenticatorModel: self.create_jwt_authenticator,
559
583
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
560
584
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -878,7 +902,15 @@ class ModelToComponentFactory:
878
902
  def create_check_dynamic_stream(
879
903
  model: CheckDynamicStreamModel, config: Config, **kwargs: Any
880
904
  ) -> CheckDynamicStream:
881
- return CheckDynamicStream(stream_count=model.stream_count, parameters={})
905
+ assert model.use_check_availability is not None # for mypy
906
+
907
+ use_check_availability = model.use_check_availability
908
+
909
+ return CheckDynamicStream(
910
+ stream_count=model.stream_count,
911
+ use_check_availability=use_check_availability,
912
+ parameters={},
913
+ )
882
914
 
883
915
  def create_composite_error_handler(
884
916
  self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
@@ -904,15 +936,24 @@ class ModelToComponentFactory:
904
936
 
905
937
  def create_concurrent_cursor_from_datetime_based_cursor(
906
938
  self,
907
- state_manager: ConnectorStateManager,
908
939
  model_type: Type[BaseModel],
909
940
  component_definition: ComponentDefinition,
910
941
  stream_name: str,
911
942
  stream_namespace: Optional[str],
912
943
  config: Config,
913
- stream_state: MutableMapping[str, Any],
944
+ message_repository: Optional[MessageRepository] = None,
945
+ runtime_lookback_window: Optional[datetime.timedelta] = None,
914
946
  **kwargs: Any,
915
947
  ) -> ConcurrentCursor:
948
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
949
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
950
+ # incoming state and connector_state_manager that is initialized when the component factory is created
951
+ stream_state = (
952
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
953
+ if "stream_state" not in kwargs
954
+ else kwargs["stream_state"]
955
+ )
956
+
916
957
  component_type = component_definition.get("type")
917
958
  if component_definition.get("type") != model_type.__name__:
918
959
  raise ValueError(
@@ -972,10 +1013,22 @@ class ModelToComponentFactory:
972
1013
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
973
1014
  datetime_format=datetime_format,
974
1015
  input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
975
- is_sequential_state=True,
1016
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
976
1017
  cursor_granularity=cursor_granularity,
977
1018
  )
978
1019
 
1020
+ # Adjusts the stream state by applying the runtime lookback window.
1021
+ # This is used to ensure correct state handling in case of failed partitions.
1022
+ stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1023
+ if runtime_lookback_window and stream_state_value:
1024
+ new_stream_state = (
1025
+ connector_state_converter.parse_timestamp(stream_state_value)
1026
+ - runtime_lookback_window
1027
+ )
1028
+ stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1029
+ new_stream_state
1030
+ )
1031
+
979
1032
  start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
980
1033
  if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
981
1034
  start_date_runtime_value = self.create_min_max_datetime(
@@ -1042,12 +1095,59 @@ class ModelToComponentFactory:
1042
1095
  if evaluated_step:
1043
1096
  step_length = parse_duration(evaluated_step)
1044
1097
 
1098
+ clamping_strategy: ClampingStrategy = NoClamping()
1099
+ if datetime_based_cursor_model.clamping:
1100
+ # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1101
+ # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1102
+ # object which we want to keep agnostic of being low-code
1103
+ target = InterpolatedString(
1104
+ string=datetime_based_cursor_model.clamping.target,
1105
+ parameters=datetime_based_cursor_model.parameters or {},
1106
+ )
1107
+ evaluated_target = target.eval(config=config)
1108
+ match evaluated_target:
1109
+ case "DAY":
1110
+ clamping_strategy = DayClampingStrategy()
1111
+ end_date_provider = ClampingEndProvider(
1112
+ DayClampingStrategy(is_ceiling=False),
1113
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1114
+ granularity=cursor_granularity or datetime.timedelta(seconds=1),
1115
+ )
1116
+ case "WEEK":
1117
+ if (
1118
+ not datetime_based_cursor_model.clamping.target_details
1119
+ or "weekday" not in datetime_based_cursor_model.clamping.target_details
1120
+ ):
1121
+ raise ValueError(
1122
+ "Given WEEK clamping, weekday needs to be provided as target_details"
1123
+ )
1124
+ weekday = self._assemble_weekday(
1125
+ datetime_based_cursor_model.clamping.target_details["weekday"]
1126
+ )
1127
+ clamping_strategy = WeekClampingStrategy(weekday)
1128
+ end_date_provider = ClampingEndProvider(
1129
+ WeekClampingStrategy(weekday, is_ceiling=False),
1130
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1131
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1132
+ )
1133
+ case "MONTH":
1134
+ clamping_strategy = MonthClampingStrategy()
1135
+ end_date_provider = ClampingEndProvider(
1136
+ MonthClampingStrategy(is_ceiling=False),
1137
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1138
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1139
+ )
1140
+ case _:
1141
+ raise ValueError(
1142
+ f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1143
+ )
1144
+
1045
1145
  return ConcurrentCursor(
1046
1146
  stream_name=stream_name,
1047
1147
  stream_namespace=stream_namespace,
1048
1148
  stream_state=stream_state,
1049
- message_repository=self._message_repository,
1050
- connector_state_manager=state_manager,
1149
+ message_repository=message_repository or self._message_repository,
1150
+ connector_state_manager=self._connector_state_manager,
1051
1151
  connector_state_converter=connector_state_converter,
1052
1152
  cursor_field=cursor_field,
1053
1153
  slice_boundary_fields=slice_boundary_fields,
@@ -1056,6 +1156,100 @@ class ModelToComponentFactory:
1056
1156
  lookback_window=lookback_window,
1057
1157
  slice_range=step_length,
1058
1158
  cursor_granularity=cursor_granularity,
1159
+ clamping_strategy=clamping_strategy,
1160
+ )
1161
+
1162
+ def _assemble_weekday(self, weekday: str) -> Weekday:
1163
+ match weekday:
1164
+ case "MONDAY":
1165
+ return Weekday.MONDAY
1166
+ case "TUESDAY":
1167
+ return Weekday.TUESDAY
1168
+ case "WEDNESDAY":
1169
+ return Weekday.WEDNESDAY
1170
+ case "THURSDAY":
1171
+ return Weekday.THURSDAY
1172
+ case "FRIDAY":
1173
+ return Weekday.FRIDAY
1174
+ case "SATURDAY":
1175
+ return Weekday.SATURDAY
1176
+ case "SUNDAY":
1177
+ return Weekday.SUNDAY
1178
+ case _:
1179
+ raise ValueError(f"Unknown weekday {weekday}")
1180
+
1181
+ def create_concurrent_cursor_from_perpartition_cursor(
1182
+ self,
1183
+ state_manager: ConnectorStateManager,
1184
+ model_type: Type[BaseModel],
1185
+ component_definition: ComponentDefinition,
1186
+ stream_name: str,
1187
+ stream_namespace: Optional[str],
1188
+ config: Config,
1189
+ stream_state: MutableMapping[str, Any],
1190
+ partition_router: PartitionRouter,
1191
+ **kwargs: Any,
1192
+ ) -> ConcurrentPerPartitionCursor:
1193
+ component_type = component_definition.get("type")
1194
+ if component_definition.get("type") != model_type.__name__:
1195
+ raise ValueError(
1196
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1197
+ )
1198
+
1199
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1200
+
1201
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1202
+ raise ValueError(
1203
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1204
+ )
1205
+
1206
+ interpolated_cursor_field = InterpolatedString.create(
1207
+ datetime_based_cursor_model.cursor_field,
1208
+ parameters=datetime_based_cursor_model.parameters or {},
1209
+ )
1210
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1211
+
1212
+ datetime_format = datetime_based_cursor_model.datetime_format
1213
+
1214
+ cursor_granularity = (
1215
+ parse_duration(datetime_based_cursor_model.cursor_granularity)
1216
+ if datetime_based_cursor_model.cursor_granularity
1217
+ else None
1218
+ )
1219
+
1220
+ connector_state_converter: DateTimeStreamStateConverter
1221
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1222
+ datetime_format=datetime_format,
1223
+ input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1224
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1225
+ cursor_granularity=cursor_granularity,
1226
+ )
1227
+
1228
+ # Create the cursor factory
1229
+ cursor_factory = ConcurrentCursorFactory(
1230
+ partial(
1231
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1232
+ state_manager=state_manager,
1233
+ model_type=model_type,
1234
+ component_definition=component_definition,
1235
+ stream_name=stream_name,
1236
+ stream_namespace=stream_namespace,
1237
+ config=config,
1238
+ message_repository=NoopMessageRepository(),
1239
+ )
1240
+ )
1241
+
1242
+ # Return the concurrent cursor and state converter
1243
+ return ConcurrentPerPartitionCursor(
1244
+ cursor_factory=cursor_factory,
1245
+ partition_router=partition_router,
1246
+ stream_name=stream_name,
1247
+ stream_namespace=stream_namespace,
1248
+ stream_state=stream_state,
1249
+ message_repository=self._message_repository, # type: ignore
1250
+ connector_state_manager=state_manager,
1251
+ connector_state_converter=connector_state_converter,
1252
+ cursor_field=cursor_field,
1059
1253
  )
1060
1254
 
1061
1255
  @staticmethod
@@ -1101,7 +1295,6 @@ class ModelToComponentFactory:
1101
1295
  :param config: The custom defined connector config
1102
1296
  :return: The declarative component built from the Pydantic model to be used at runtime
1103
1297
  """
1104
-
1105
1298
  custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1106
1299
  component_fields = get_type_hints(custom_component_class)
1107
1300
  model_args = model.dict()
@@ -1155,14 +1348,38 @@ class ModelToComponentFactory:
1155
1348
  return custom_component_class(**kwargs)
1156
1349
 
1157
1350
  @staticmethod
1158
- def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
1351
+ def _get_class_from_fully_qualified_class_name(
1352
+ full_qualified_class_name: str,
1353
+ ) -> Any:
1354
+ """Get a class from its fully qualified name.
1355
+
1356
+ If a custom components module is needed, we assume it is already registered - probably
1357
+ as `source_declarative_manifest.components` or `components`.
1358
+
1359
+ Args:
1360
+ full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1361
+
1362
+ Returns:
1363
+ Any: The class object.
1364
+
1365
+ Raises:
1366
+ ValueError: If the class cannot be loaded.
1367
+ """
1159
1368
  split = full_qualified_class_name.split(".")
1160
- module = ".".join(split[:-1])
1369
+ module_name_full = ".".join(split[:-1])
1161
1370
  class_name = split[-1]
1371
+
1372
+ try:
1373
+ module_ref = importlib.import_module(module_name_full)
1374
+ except ModuleNotFoundError as e:
1375
+ raise ValueError(f"Could not load module `{module_name_full}`.") from e
1376
+
1162
1377
  try:
1163
- return getattr(importlib.import_module(module), class_name)
1164
- except AttributeError:
1165
- raise ValueError(f"Could not load class {full_qualified_class_name}.")
1378
+ return getattr(module_ref, class_name)
1379
+ except AttributeError as e:
1380
+ raise ValueError(
1381
+ f"Could not load class `{class_name}` from module `{module_name_full}`.",
1382
+ ) from e
1166
1383
 
1167
1384
  @staticmethod
1168
1385
  def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
@@ -1336,18 +1553,15 @@ class ModelToComponentFactory:
1336
1553
  raise ValueError(
1337
1554
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1338
1555
  )
1339
- client_side_incremental_sync = {
1340
- "date_time_based_cursor": self._create_component_from_model(
1341
- model=model.incremental_sync, config=config
1342
- ),
1343
- "substream_cursor": (
1344
- combined_slicers
1345
- if isinstance(
1346
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1347
- )
1348
- else None
1349
- ),
1350
- }
1556
+ cursor = (
1557
+ combined_slicers
1558
+ if isinstance(
1559
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1560
+ )
1561
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1562
+ )
1563
+
1564
+ client_side_incremental_sync = {"cursor": cursor}
1351
1565
 
1352
1566
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1353
1567
  cursor_model = model.incremental_sync
@@ -1433,7 +1647,7 @@ class ModelToComponentFactory:
1433
1647
  ) -> Optional[PartitionRouter]:
1434
1648
  if (
1435
1649
  hasattr(model, "partition_router")
1436
- and isinstance(model, SimpleRetrieverModel)
1650
+ and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1437
1651
  and model.partition_router
1438
1652
  ):
1439
1653
  stream_slicer_model = model.partition_router
@@ -1467,6 +1681,31 @@ class ModelToComponentFactory:
1467
1681
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1468
1682
 
1469
1683
  if model.incremental_sync and stream_slicer:
1684
+ if model.retriever.type == "AsyncRetriever":
1685
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1686
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1687
+ raise ValueError(
1688
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1689
+ )
1690
+ if stream_slicer:
1691
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1692
+ state_manager=self._connector_state_manager,
1693
+ model_type=DatetimeBasedCursorModel,
1694
+ component_definition=model.incremental_sync.__dict__,
1695
+ stream_name=model.name or "",
1696
+ stream_namespace=None,
1697
+ config=config or {},
1698
+ stream_state={},
1699
+ partition_router=stream_slicer,
1700
+ )
1701
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1702
+ model_type=DatetimeBasedCursorModel,
1703
+ component_definition=model.incremental_sync.__dict__,
1704
+ stream_name=model.name or "",
1705
+ stream_namespace=None,
1706
+ config=config or {},
1707
+ )
1708
+
1470
1709
  incremental_sync_model = model.incremental_sync
1471
1710
  if (
1472
1711
  hasattr(incremental_sync_model, "global_substream_cursor")
@@ -1492,6 +1731,22 @@ class ModelToComponentFactory:
1492
1731
  stream_cursor=cursor_component,
1493
1732
  )
1494
1733
  elif model.incremental_sync:
1734
+ if model.retriever.type == "AsyncRetriever":
1735
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1736
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1737
+ raise ValueError(
1738
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1739
+ )
1740
+ if model.retriever.partition_router:
1741
+ # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
1742
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever")
1743
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1744
+ model_type=DatetimeBasedCursorModel,
1745
+ component_definition=model.incremental_sync.__dict__,
1746
+ stream_name=model.name or "",
1747
+ stream_namespace=None,
1748
+ config=config or {},
1749
+ )
1495
1750
  return (
1496
1751
  self._create_component_from_model(model=model.incremental_sync, config=config)
1497
1752
  if model.incremental_sync
@@ -1710,10 +1965,26 @@ class ModelToComponentFactory:
1710
1965
  ) -> InlineSchemaLoader:
1711
1966
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1712
1967
 
1713
- @staticmethod
1714
- def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1968
+ def create_complex_field_type(
1969
+ self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
1970
+ ) -> ComplexFieldType:
1971
+ items = (
1972
+ self._create_component_from_model(model=model.items, config=config)
1973
+ if isinstance(model.items, ComplexFieldTypeModel)
1974
+ else model.items
1975
+ )
1976
+
1977
+ return ComplexFieldType(field_type=model.field_type, items=items)
1978
+
1979
+ def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
1980
+ target_type = (
1981
+ self._create_component_from_model(model=model.target_type, config=config)
1982
+ if isinstance(model.target_type, ComplexFieldTypeModel)
1983
+ else model.target_type
1984
+ )
1985
+
1715
1986
  return TypesMap(
1716
- target_type=model.target_type,
1987
+ target_type=target_type,
1717
1988
  current_type=model.current_type,
1718
1989
  condition=model.condition if model.condition is not None else "True",
1719
1990
  )
@@ -1911,6 +2182,12 @@ class ModelToComponentFactory:
1911
2182
  def create_oauth_authenticator(
1912
2183
  self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
1913
2184
  ) -> DeclarativeOauth2Authenticator:
2185
+ profile_assertion = (
2186
+ self._create_component_from_model(model.profile_assertion, config=config)
2187
+ if model.profile_assertion
2188
+ else None
2189
+ )
2190
+
1914
2191
  if model.refresh_token_updater:
1915
2192
  # ignore type error because fixing it would have a lot of dependencies, revisit later
1916
2193
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
@@ -1931,13 +2208,17 @@ class ModelToComponentFactory:
1931
2208
  ).eval(config),
1932
2209
  client_id=InterpolatedString.create(
1933
2210
  model.client_id, parameters=model.parameters or {}
1934
- ).eval(config),
2211
+ ).eval(config)
2212
+ if model.client_id
2213
+ else model.client_id,
1935
2214
  client_secret_name=InterpolatedString.create(
1936
2215
  model.client_secret_name or "client_secret", parameters=model.parameters or {}
1937
2216
  ).eval(config),
1938
2217
  client_secret=InterpolatedString.create(
1939
2218
  model.client_secret, parameters=model.parameters or {}
1940
- ).eval(config),
2219
+ ).eval(config)
2220
+ if model.client_secret
2221
+ else model.client_secret,
1941
2222
  access_token_config_path=model.refresh_token_updater.access_token_config_path,
1942
2223
  refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
1943
2224
  token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
@@ -1983,6 +2264,8 @@ class ModelToComponentFactory:
1983
2264
  config=config,
1984
2265
  parameters=model.parameters or {},
1985
2266
  message_repository=self._message_repository,
2267
+ profile_assertion=profile_assertion,
2268
+ use_profile_assertion=model.use_profile_assertion,
1986
2269
  )
1987
2270
 
1988
2271
  def create_offset_increment(