airbyte-cdk 6.26.0.dev4106__py3-none-any.whl → 6.26.0.dev4108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +22 -13
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +71 -34
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +33 -4
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +93 -27
  13. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  14. airbyte_cdk/sources/declarative/manifest_declarative_source.py +5 -3
  15. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +22 -5
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +138 -38
  17. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  18. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  19. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +49 -25
  20. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  21. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  22. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  23. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  24. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  25. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  26. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -1
  27. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  28. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  29. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +18 -11
  30. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +51 -0
  31. airbyte_cdk/sources/file_based/file_based_source.py +16 -55
  32. airbyte_cdk/sources/file_based/file_based_stream_reader.py +19 -31
  33. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +7 -7
  34. airbyte_cdk/sources/file_based/stream/identities_stream.py +5 -2
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/http.py +1 -2
  38. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  39. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +166 -83
  40. airbyte_cdk/sources/types.py +4 -2
  41. airbyte_cdk/sources/utils/transform.py +23 -2
  42. airbyte_cdk/utils/datetime_helpers.py +499 -0
  43. airbyte_cdk/utils/mapping_helpers.py +86 -27
  44. airbyte_cdk/utils/slice_hasher.py +8 -1
  45. airbyte_cdk-6.26.0.dev4108.dist-info/LICENSE_SHORT +1 -0
  46. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/METADATA +5 -5
  47. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/RECORD +50 -48
  48. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/WHEEL +1 -1
  49. airbyte_cdk/sources/file_based/config/permissions.py +0 -34
  50. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.26.0.dev4106.dist-info → airbyte_cdk-6.26.0.dev4108.dist-info}/entry_points.txt +0 -0
@@ -138,7 +138,9 @@ class DeclarativeStream(Stream):
138
138
  """
139
139
  :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
140
140
  """
141
- if stream_slice is None or stream_slice == {}:
141
+ if stream_slice is None or (
142
+ not isinstance(stream_slice, StreamSlice) and stream_slice == {}
143
+ ):
142
144
  # As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
143
145
  # As part of the declarative model without custom components, this should never happen as the CDK would wire up a
144
146
  # SinglePartitionRouter that would create this StreamSlice properly
@@ -22,6 +22,9 @@ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import
22
22
  )
23
23
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
24
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
26
+ AbstractStreamStateConverter,
27
+ )
25
28
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
26
29
 
27
30
  logger = logging.getLogger("airbyte")
@@ -72,6 +75,7 @@ class ConcurrentPerPartitionCursor(Cursor):
72
75
  stream_state: Any,
73
76
  message_repository: MessageRepository,
74
77
  connector_state_manager: ConnectorStateManager,
78
+ connector_state_converter: AbstractStreamStateConverter,
75
79
  cursor_field: CursorField,
76
80
  ) -> None:
77
81
  self._global_cursor: Optional[StreamState] = {}
@@ -79,6 +83,7 @@ class ConcurrentPerPartitionCursor(Cursor):
79
83
  self._stream_namespace = stream_namespace
80
84
  self._message_repository = message_repository
81
85
  self._connector_state_manager = connector_state_manager
86
+ self._connector_state_converter = connector_state_converter
82
87
  self._cursor_field = cursor_field
83
88
 
84
89
  self._cursor_factory = cursor_factory
@@ -95,6 +100,7 @@ class ConcurrentPerPartitionCursor(Cursor):
95
100
  self._lookback_window: int = 0
96
101
  self._parent_state: Optional[StreamState] = None
97
102
  self._over_limit: int = 0
103
+ self._use_global_cursor: bool = False
98
104
  self._partition_serializer = PerPartitionKeySerializer()
99
105
 
100
106
  self._set_initial_state(stream_state)
@@ -105,16 +111,18 @@ class ConcurrentPerPartitionCursor(Cursor):
105
111
 
106
112
  @property
107
113
  def state(self) -> MutableMapping[str, Any]:
108
- states = []
109
- for partition_tuple, cursor in self._cursor_per_partition.items():
110
- if cursor.state:
111
- states.append(
112
- {
113
- "partition": self._to_dict(partition_tuple),
114
- "cursor": copy.deepcopy(cursor.state),
115
- }
116
- )
117
- state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
114
+ state: dict[str, Any] = {"use_global_cursor": self._use_global_cursor}
115
+ if not self._use_global_cursor:
116
+ states = []
117
+ for partition_tuple, cursor in self._cursor_per_partition.items():
118
+ if cursor.state:
119
+ states.append(
120
+ {
121
+ "partition": self._to_dict(partition_tuple),
122
+ "cursor": copy.deepcopy(cursor.state),
123
+ }
124
+ )
125
+ state[self._PERPARTITION_STATE_KEY] = states
118
126
 
119
127
  if self._global_cursor:
120
128
  state[self._GLOBAL_STATE_KEY] = self._global_cursor
@@ -147,7 +155,8 @@ class ConcurrentPerPartitionCursor(Cursor):
147
155
  < cursor.state[self.cursor_field.cursor_field_key]
148
156
  ):
149
157
  self._new_global_cursor = copy.deepcopy(cursor.state)
150
- self._emit_state_message()
158
+ if not self._use_global_cursor:
159
+ self._emit_state_message()
151
160
 
152
161
  def ensure_at_least_one_state_emitted(self) -> None:
153
162
  """
@@ -192,7 +201,8 @@ class ConcurrentPerPartitionCursor(Cursor):
192
201
  self._global_cursor,
193
202
  self._lookback_window if self._global_cursor else 0,
194
203
  )
195
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
204
+ with self._lock:
205
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
196
206
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
197
207
  threading.Semaphore(0)
198
208
  )
@@ -210,16 +220,42 @@ class ConcurrentPerPartitionCursor(Cursor):
210
220
 
211
221
  def _ensure_partition_limit(self) -> None:
212
222
  """
213
- Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
223
+ Ensure the maximum number of partitions does not exceed the predefined limit.
224
+
225
+ Steps:
226
+ 1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
227
+ These partitions are considered processed and safe to delete.
228
+ 2. If the limit is still exceeded and no finished partitions are available for removal,
229
+ remove the oldest partition unconditionally. We expect failed partitions to be removed.
230
+
231
+ Logging:
232
+ - Logs a warning each time a partition is removed, indicating whether it was finished
233
+ or removed due to being the oldest.
214
234
  """
215
- while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
216
- self._over_limit += 1
217
- oldest_partition = self._cursor_per_partition.popitem(last=False)[
218
- 0
219
- ] # Remove the oldest partition
220
- logger.warning(
221
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
222
- )
235
+ with self._lock:
236
+ while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
+ self._over_limit += 1
238
+ # Try removing finished partitions first
239
+ for partition_key in list(self._cursor_per_partition.keys()):
240
+ if (
241
+ partition_key in self._finished_partitions
242
+ and self._semaphore_per_partition[partition_key]._value == 0
243
+ ):
244
+ oldest_partition = self._cursor_per_partition.pop(
245
+ partition_key
246
+ ) # Remove the oldest partition
247
+ logger.warning(
248
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
249
+ )
250
+ break
251
+ else:
252
+ # If no finished partitions can be removed, fall back to removing the oldest partition
253
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
254
+ 1
255
+ ] # Remove the oldest partition
256
+ logger.warning(
257
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
258
+ )
223
259
 
224
260
  def _set_initial_state(self, stream_state: StreamState) -> None:
225
261
  """
@@ -264,16 +300,20 @@ class ConcurrentPerPartitionCursor(Cursor):
264
300
  if not stream_state:
265
301
  return
266
302
 
267
- if self._PERPARTITION_STATE_KEY not in stream_state:
303
+ if (
304
+ self._PERPARTITION_STATE_KEY not in stream_state
305
+ and self._GLOBAL_STATE_KEY not in stream_state
306
+ ):
268
307
  # We assume that `stream_state` is in a global format that can be applied to all partitions.
269
308
  # Example: {"global_state_format_key": "global_state_format_value"}
270
- self._global_cursor = deepcopy(stream_state)
271
- self._new_global_cursor = deepcopy(stream_state)
309
+ self._set_global_state(stream_state)
272
310
 
273
311
  else:
312
+ self._use_global_cursor = stream_state.get("use_global_cursor", False)
313
+
274
314
  self._lookback_window = int(stream_state.get("lookback_window", 0))
275
315
 
276
- for state in stream_state[self._PERPARTITION_STATE_KEY]:
316
+ for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
277
317
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
278
318
  self._create_cursor(state["cursor"])
279
319
  )
@@ -283,8 +323,7 @@ class ConcurrentPerPartitionCursor(Cursor):
283
323
 
284
324
  # set default state for missing partitions if it is per partition with fallback to global
285
325
  if self._GLOBAL_STATE_KEY in stream_state:
286
- self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
287
- self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
326
+ self._set_global_state(stream_state[self._GLOBAL_STATE_KEY])
288
327
 
289
328
  # Set initial parent state
290
329
  if stream_state.get("parent_state"):
@@ -293,7 +332,31 @@ class ConcurrentPerPartitionCursor(Cursor):
293
332
  # Set parent state for partition routers based on parent streams
294
333
  self._partition_router.set_initial_state(stream_state)
295
334
 
335
+ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
336
+ """
337
+ Initializes the global cursor state from the provided stream state.
338
+
339
+ If the cursor field key is present in the stream state, its value is parsed,
340
+ formatted, and stored as the global cursor. This ensures consistency in state
341
+ representation across partitions.
342
+ """
343
+ if self.cursor_field.cursor_field_key in stream_state:
344
+ global_state_value = stream_state[self.cursor_field.cursor_field_key]
345
+ final_format_global_state_value = self._connector_state_converter.output_format(
346
+ self._connector_state_converter.parse_value(global_state_value)
347
+ )
348
+
349
+ fixed_global_state = {
350
+ self.cursor_field.cursor_field_key: final_format_global_state_value
351
+ }
352
+
353
+ self._global_cursor = deepcopy(fixed_global_state)
354
+ self._new_global_cursor = deepcopy(fixed_global_state)
355
+
296
356
  def observe(self, record: Record) -> None:
357
+ if not self._use_global_cursor and self.limit_reached():
358
+ self._use_global_cursor = True
359
+
297
360
  if not record.associated_slice:
298
361
  raise ValueError(
299
362
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
@@ -332,3 +395,6 @@ class ConcurrentPerPartitionCursor(Cursor):
332
395
  )
333
396
  cursor = self._cursor_per_partition[partition_key]
334
397
  return cursor
398
+
399
+ def limit_reached(self) -> bool:
400
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
@@ -365,14 +365,15 @@ class DatetimeBasedCursor(DeclarativeCursor):
365
365
  options: MutableMapping[str, Any] = {}
366
366
  if not stream_slice:
367
367
  return options
368
+
368
369
  if self.start_time_option and self.start_time_option.inject_into == option_type:
369
- options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
370
- self._partition_field_start.eval(self.config)
371
- )
370
+ start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
371
+ self.start_time_option.inject_into_request(options, start_time_value, self.config)
372
+
372
373
  if self.end_time_option and self.end_time_option.inject_into == option_type:
373
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
374
- self._partition_field_end.eval(self.config)
375
- )
374
+ end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
375
+ self.end_time_option.inject_into_request(options, end_time_value, self.config)
376
+
376
377
  return options
377
378
 
378
379
  def should_be_synced(self, record: Record) -> bool:
@@ -26,9 +26,6 @@ from airbyte_cdk.models import (
26
26
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
27
27
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
28
28
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
29
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
30
- CheckStream as CheckStreamModel,
31
- )
32
29
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
33
30
  DeclarativeStream as DeclarativeStreamModel,
34
31
  )
@@ -368,6 +365,11 @@ class ManifestDeclarativeSource(DeclarativeSource):
368
365
  # Ensure that each stream is created with a unique name
369
366
  name = dynamic_stream.get("name")
370
367
 
368
+ if not isinstance(name, str):
369
+ raise ValueError(
370
+ f"Expected stream name {name} to be a string, got {type(name)}."
371
+ )
372
+
371
373
  if name in seen_dynamic_streams:
372
374
  error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
373
375
  failure_type = FailureType.system_error
@@ -59,6 +59,11 @@ class CheckDynamicStream(BaseModel):
59
59
  description="Numbers of the streams to try reading from when running a check operation.",
60
60
  title="Stream Count",
61
61
  )
62
+ use_check_availability: Optional[bool] = Field(
63
+ True,
64
+ description="Enables stream check availability. This field is automatically set by the CDK.",
65
+ title="Use Check Availability",
66
+ )
62
67
 
63
68
 
64
69
  class ConcurrencyLevel(BaseModel):
@@ -736,8 +741,13 @@ class HttpResponseFilter(BaseModel):
736
741
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
737
742
 
738
743
 
744
+ class ComplexFieldType(BaseModel):
745
+ field_type: str
746
+ items: Optional[Union[str, ComplexFieldType]] = None
747
+
748
+
739
749
  class TypesMap(BaseModel):
740
- target_type: Union[str, List[str]]
750
+ target_type: Union[str, List[str], ComplexFieldType]
741
751
  current_type: Union[str, List[str]]
742
752
  condition: Optional[str] = None
743
753
 
@@ -1190,11 +1200,17 @@ class InjectInto(Enum):
1190
1200
 
1191
1201
  class RequestOption(BaseModel):
1192
1202
  type: Literal["RequestOption"]
1193
- field_name: str = Field(
1194
- ...,
1195
- description="Configures which key should be used in the location that the descriptor is being injected into",
1203
+ field_name: Optional[str] = Field(
1204
+ None,
1205
+ description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.",
1196
1206
  examples=["segment_id"],
1197
- title="Request Option",
1207
+ title="Field Name",
1208
+ )
1209
+ field_path: Optional[List[str]] = Field(
1210
+ None,
1211
+ description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
1212
+ examples=[["data", "viewer", "id"]],
1213
+ title="Field Path",
1198
1214
  )
1199
1215
  inject_into: InjectInto = Field(
1200
1216
  ...,
@@ -2260,6 +2276,7 @@ class DynamicDeclarativeStream(BaseModel):
2260
2276
  )
2261
2277
 
2262
2278
 
2279
+ ComplexFieldType.update_forward_refs()
2263
2280
  CompositeErrorHandler.update_forward_refs()
2264
2281
  DeclarativeSource1.update_forward_refs()
2265
2282
  DeclarativeSource2.update_forward_refs()
@@ -133,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
133
133
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
134
134
  CheckStream as CheckStreamModel,
135
135
  )
136
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
+ ComplexFieldType as ComplexFieldTypeModel,
138
+ )
136
139
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
140
  ComponentMappingDefinition as ComponentMappingDefinitionModel,
138
141
  )
@@ -429,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
429
432
  SimpleRetrieverTestReadDecorator,
430
433
  )
431
434
  from airbyte_cdk.sources.declarative.schema import (
435
+ ComplexFieldType,
432
436
  DefaultSchemaLoader,
433
437
  DynamicSchemaLoader,
434
438
  InlineSchemaLoader,
@@ -503,6 +507,7 @@ class ModelToComponentFactory:
503
507
  disable_cache: bool = False,
504
508
  disable_resumable_full_refresh: bool = False,
505
509
  message_repository: Optional[MessageRepository] = None,
510
+ connector_state_manager: Optional[ConnectorStateManager] = None,
506
511
  ):
507
512
  self._init_mappings()
508
513
  self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
@@ -514,6 +519,7 @@ class ModelToComponentFactory:
514
519
  self._message_repository = message_repository or InMemoryMessageRepository(
515
520
  self._evaluate_log_level(emit_connector_builder_messages)
516
521
  )
522
+ self._connector_state_manager = connector_state_manager or ConnectorStateManager()
517
523
 
518
524
  def _init_mappings(self) -> None:
519
525
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -572,6 +578,7 @@ class ModelToComponentFactory:
572
578
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
573
579
  SchemaTypeIdentifierModel: self.create_schema_type_identifier,
574
580
  TypesMapModel: self.create_types_map,
581
+ ComplexFieldTypeModel: self.create_complex_field_type,
575
582
  JwtAuthenticatorModel: self.create_jwt_authenticator,
576
583
  LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
577
584
  ListPartitionRouterModel: self.create_list_partition_router,
@@ -726,8 +733,8 @@ class ModelToComponentFactory:
726
733
  }
727
734
  return names_to_types[value_type]
728
735
 
729
- @staticmethod
730
736
  def create_api_key_authenticator(
737
+ self,
731
738
  model: ApiKeyAuthenticatorModel,
732
739
  config: Config,
733
740
  token_provider: Optional[TokenProvider] = None,
@@ -749,10 +756,8 @@ class ModelToComponentFactory:
749
756
  )
750
757
 
751
758
  request_option = (
752
- RequestOption(
753
- inject_into=RequestOptionType(model.inject_into.inject_into.value),
754
- field_name=model.inject_into.field_name,
755
- parameters=model.parameters or {},
759
+ self._create_component_from_model(
760
+ model.inject_into, config, parameters=model.parameters or {}
756
761
  )
757
762
  if model.inject_into
758
763
  else RequestOption(
@@ -761,6 +766,7 @@ class ModelToComponentFactory:
761
766
  parameters=model.parameters or {},
762
767
  )
763
768
  )
769
+
764
770
  return ApiKeyAuthenticator(
765
771
  token_provider=(
766
772
  token_provider
@@ -842,7 +848,7 @@ class ModelToComponentFactory:
842
848
  token_provider=token_provider,
843
849
  )
844
850
  else:
845
- return ModelToComponentFactory.create_api_key_authenticator(
851
+ return self.create_api_key_authenticator(
846
852
  ApiKeyAuthenticatorModel(
847
853
  type="ApiKeyAuthenticator",
848
854
  api_token="",
@@ -896,7 +902,15 @@ class ModelToComponentFactory:
896
902
  def create_check_dynamic_stream(
897
903
  model: CheckDynamicStreamModel, config: Config, **kwargs: Any
898
904
  ) -> CheckDynamicStream:
899
- return CheckDynamicStream(stream_count=model.stream_count, parameters={})
905
+ assert model.use_check_availability is not None # for mypy
906
+
907
+ use_check_availability = model.use_check_availability
908
+
909
+ return CheckDynamicStream(
910
+ stream_count=model.stream_count,
911
+ use_check_availability=use_check_availability,
912
+ parameters={},
913
+ )
900
914
 
901
915
  def create_composite_error_handler(
902
916
  self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
@@ -922,17 +936,24 @@ class ModelToComponentFactory:
922
936
 
923
937
  def create_concurrent_cursor_from_datetime_based_cursor(
924
938
  self,
925
- state_manager: ConnectorStateManager,
926
939
  model_type: Type[BaseModel],
927
940
  component_definition: ComponentDefinition,
928
941
  stream_name: str,
929
942
  stream_namespace: Optional[str],
930
943
  config: Config,
931
- stream_state: MutableMapping[str, Any],
932
944
  message_repository: Optional[MessageRepository] = None,
933
945
  runtime_lookback_window: Optional[datetime.timedelta] = None,
934
946
  **kwargs: Any,
935
947
  ) -> ConcurrentCursor:
948
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
949
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
950
+ # incoming state and connector_state_manager that is initialized when the component factory is created
951
+ stream_state = (
952
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
953
+ if "stream_state" not in kwargs
954
+ else kwargs["stream_state"]
955
+ )
956
+
936
957
  component_type = component_definition.get("type")
937
958
  if component_definition.get("type") != model_type.__name__:
938
959
  raise ValueError(
@@ -1126,7 +1147,7 @@ class ModelToComponentFactory:
1126
1147
  stream_namespace=stream_namespace,
1127
1148
  stream_state=stream_state,
1128
1149
  message_repository=message_repository or self._message_repository,
1129
- connector_state_manager=state_manager,
1150
+ connector_state_manager=self._connector_state_manager,
1130
1151
  connector_state_converter=connector_state_converter,
1131
1152
  cursor_field=cursor_field,
1132
1153
  slice_boundary_fields=slice_boundary_fields,
@@ -1188,6 +1209,22 @@ class ModelToComponentFactory:
1188
1209
  )
1189
1210
  cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1190
1211
 
1212
+ datetime_format = datetime_based_cursor_model.datetime_format
1213
+
1214
+ cursor_granularity = (
1215
+ parse_duration(datetime_based_cursor_model.cursor_granularity)
1216
+ if datetime_based_cursor_model.cursor_granularity
1217
+ else None
1218
+ )
1219
+
1220
+ connector_state_converter: DateTimeStreamStateConverter
1221
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1222
+ datetime_format=datetime_format,
1223
+ input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1224
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1225
+ cursor_granularity=cursor_granularity,
1226
+ )
1227
+
1191
1228
  # Create the cursor factory
1192
1229
  cursor_factory = ConcurrentCursorFactory(
1193
1230
  partial(
@@ -1211,6 +1248,7 @@ class ModelToComponentFactory:
1211
1248
  stream_state=stream_state,
1212
1249
  message_repository=self._message_repository, # type: ignore
1213
1250
  connector_state_manager=state_manager,
1251
+ connector_state_converter=connector_state_converter,
1214
1252
  cursor_field=cursor_field,
1215
1253
  )
1216
1254
 
@@ -1450,19 +1488,15 @@ class ModelToComponentFactory:
1450
1488
  )
1451
1489
 
1452
1490
  end_time_option = (
1453
- RequestOption(
1454
- inject_into=RequestOptionType(model.end_time_option.inject_into.value),
1455
- field_name=model.end_time_option.field_name,
1456
- parameters=model.parameters or {},
1491
+ self._create_component_from_model(
1492
+ model.end_time_option, config, parameters=model.parameters or {}
1457
1493
  )
1458
1494
  if model.end_time_option
1459
1495
  else None
1460
1496
  )
1461
1497
  start_time_option = (
1462
- RequestOption(
1463
- inject_into=RequestOptionType(model.start_time_option.inject_into.value),
1464
- field_name=model.start_time_option.field_name,
1465
- parameters=model.parameters or {},
1498
+ self._create_component_from_model(
1499
+ model.start_time_option, config, parameters=model.parameters or {}
1466
1500
  )
1467
1501
  if model.start_time_option
1468
1502
  else None
@@ -1533,19 +1567,15 @@ class ModelToComponentFactory:
1533
1567
  cursor_model = model.incremental_sync
1534
1568
 
1535
1569
  end_time_option = (
1536
- RequestOption(
1537
- inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
1538
- field_name=cursor_model.end_time_option.field_name,
1539
- parameters=cursor_model.parameters or {},
1570
+ self._create_component_from_model(
1571
+ cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1540
1572
  )
1541
1573
  if cursor_model.end_time_option
1542
1574
  else None
1543
1575
  )
1544
1576
  start_time_option = (
1545
- RequestOption(
1546
- inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
1547
- field_name=cursor_model.start_time_option.field_name,
1548
- parameters=cursor_model.parameters or {},
1577
+ self._create_component_from_model(
1578
+ cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1549
1579
  )
1550
1580
  if cursor_model.start_time_option
1551
1581
  else None
@@ -1617,7 +1647,7 @@ class ModelToComponentFactory:
1617
1647
  ) -> Optional[PartitionRouter]:
1618
1648
  if (
1619
1649
  hasattr(model, "partition_router")
1620
- and isinstance(model, SimpleRetrieverModel)
1650
+ and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1621
1651
  and model.partition_router
1622
1652
  ):
1623
1653
  stream_slicer_model = model.partition_router
@@ -1651,6 +1681,31 @@ class ModelToComponentFactory:
1651
1681
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1652
1682
 
1653
1683
  if model.incremental_sync and stream_slicer:
1684
+ if model.retriever.type == "AsyncRetriever":
1685
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1686
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1687
+ raise ValueError(
1688
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1689
+ )
1690
+ if stream_slicer:
1691
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1692
+ state_manager=self._connector_state_manager,
1693
+ model_type=DatetimeBasedCursorModel,
1694
+ component_definition=model.incremental_sync.__dict__,
1695
+ stream_name=model.name or "",
1696
+ stream_namespace=None,
1697
+ config=config or {},
1698
+ stream_state={},
1699
+ partition_router=stream_slicer,
1700
+ )
1701
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1702
+ model_type=DatetimeBasedCursorModel,
1703
+ component_definition=model.incremental_sync.__dict__,
1704
+ stream_name=model.name or "",
1705
+ stream_namespace=None,
1706
+ config=config or {},
1707
+ )
1708
+
1654
1709
  incremental_sync_model = model.incremental_sync
1655
1710
  if (
1656
1711
  hasattr(incremental_sync_model, "global_substream_cursor")
@@ -1676,6 +1731,22 @@ class ModelToComponentFactory:
1676
1731
  stream_cursor=cursor_component,
1677
1732
  )
1678
1733
  elif model.incremental_sync:
1734
+ if model.retriever.type == "AsyncRetriever":
1735
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1736
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1737
+ raise ValueError(
1738
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1739
+ )
1740
+ if model.retriever.partition_router:
1741
+ # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
1742
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever")
1743
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1744
+ model_type=DatetimeBasedCursorModel,
1745
+ component_definition=model.incremental_sync.__dict__,
1746
+ stream_name=model.name or "",
1747
+ stream_namespace=None,
1748
+ config=config or {},
1749
+ )
1679
1750
  return (
1680
1751
  self._create_component_from_model(model=model.incremental_sync, config=config)
1681
1752
  if model.incremental_sync
@@ -1894,10 +1965,26 @@ class ModelToComponentFactory:
1894
1965
  ) -> InlineSchemaLoader:
1895
1966
  return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
1896
1967
 
1897
- @staticmethod
1898
- def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1968
+ def create_complex_field_type(
1969
+ self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
1970
+ ) -> ComplexFieldType:
1971
+ items = (
1972
+ self._create_component_from_model(model=model.items, config=config)
1973
+ if isinstance(model.items, ComplexFieldTypeModel)
1974
+ else model.items
1975
+ )
1976
+
1977
+ return ComplexFieldType(field_type=model.field_type, items=items)
1978
+
1979
+ def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
1980
+ target_type = (
1981
+ self._create_component_from_model(model=model.target_type, config=config)
1982
+ if isinstance(model.target_type, ComplexFieldTypeModel)
1983
+ else model.target_type
1984
+ )
1985
+
1899
1986
  return TypesMap(
1900
- target_type=model.target_type,
1987
+ target_type=target_type,
1901
1988
  current_type=model.current_type,
1902
1989
  condition=model.condition if model.condition is not None else "True",
1903
1990
  )
@@ -2054,16 +2141,11 @@ class ModelToComponentFactory:
2054
2141
  additional_jwt_payload=model.additional_jwt_payload,
2055
2142
  )
2056
2143
 
2057
- @staticmethod
2058
2144
  def create_list_partition_router(
2059
- model: ListPartitionRouterModel, config: Config, **kwargs: Any
2145
+ self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2060
2146
  ) -> ListPartitionRouter:
2061
2147
  request_option = (
2062
- RequestOption(
2063
- inject_into=RequestOptionType(model.request_option.inject_into.value),
2064
- field_name=model.request_option.field_name,
2065
- parameters=model.parameters or {},
2066
- )
2148
+ self._create_component_from_model(model.request_option, config)
2067
2149
  if model.request_option
2068
2150
  else None
2069
2151
  )
@@ -2259,7 +2341,25 @@ class ModelToComponentFactory:
2259
2341
  model: RequestOptionModel, config: Config, **kwargs: Any
2260
2342
  ) -> RequestOption:
2261
2343
  inject_into = RequestOptionType(model.inject_into.value)
2262
- return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
2344
+ field_path: Optional[List[Union[InterpolatedString, str]]] = (
2345
+ [
2346
+ InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2347
+ for segment in model.field_path
2348
+ ]
2349
+ if model.field_path
2350
+ else None
2351
+ )
2352
+ field_name = (
2353
+ InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2354
+ if model.field_name
2355
+ else None
2356
+ )
2357
+ return RequestOption(
2358
+ field_name=field_name,
2359
+ field_path=field_path,
2360
+ inject_into=inject_into,
2361
+ parameters=kwargs.get("parameters", {}),
2362
+ )
2263
2363
 
2264
2364
  def create_record_selector(
2265
2365
  self,