airbyte-cdk 6.36.0.dev0__py3-none-any.whl → 6.37.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/connector_builder/models.py +16 -14
  2. airbyte_cdk/connector_builder/test_reader/helpers.py +120 -22
  3. airbyte_cdk/connector_builder/test_reader/message_grouper.py +16 -3
  4. airbyte_cdk/connector_builder/test_reader/types.py +9 -1
  5. airbyte_cdk/entrypoint.py +7 -7
  6. airbyte_cdk/sources/declarative/auth/token_provider.py +1 -0
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +15 -75
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +15 -16
  9. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +13 -2
  10. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -1
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
  12. airbyte_cdk/sources/declarative/interpolation/__init__.py +1 -1
  13. airbyte_cdk/sources/declarative/interpolation/filters.py +2 -1
  14. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +1 -1
  15. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +1 -1
  16. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +1 -1
  17. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +1 -1
  18. airbyte_cdk/sources/declarative/interpolation/interpolation.py +2 -1
  19. airbyte_cdk/sources/declarative/interpolation/jinja.py +14 -1
  20. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  21. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1 -1
  22. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +52 -30
  23. airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
  24. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
  25. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
  26. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +0 -2
  28. airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
  29. airbyte_cdk/sources/http_logger.py +3 -0
  30. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -0
  31. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/METADATA +1 -1
  32. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/RECORD +36 -36
  33. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/LICENSE.txt +0 -0
  34. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/LICENSE_SHORT +0 -0
  35. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-6.36.0.dev0.dist-info → airbyte_cdk-6.37.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -21,20 +21,6 @@ class HttpRequest:
21
21
  body: Optional[str] = None
22
22
 
23
23
 
24
- @dataclass
25
- class StreamReadPages:
26
- records: List[object]
27
- request: Optional[HttpRequest] = None
28
- response: Optional[HttpResponse] = None
29
-
30
-
31
- @dataclass
32
- class StreamReadSlices:
33
- pages: List[StreamReadPages]
34
- slice_descriptor: Optional[Dict[str, Any]]
35
- state: Optional[List[Dict[str, Any]]] = None
36
-
37
-
38
24
  @dataclass
39
25
  class LogMessage:
40
26
  message: str
@@ -46,11 +32,27 @@ class LogMessage:
46
32
  @dataclass
47
33
  class AuxiliaryRequest:
48
34
  title: str
35
+ type: str
49
36
  description: str
50
37
  request: HttpRequest
51
38
  response: HttpResponse
52
39
 
53
40
 
41
+ @dataclass
42
+ class StreamReadPages:
43
+ records: List[object]
44
+ request: Optional[HttpRequest] = None
45
+ response: Optional[HttpResponse] = None
46
+
47
+
48
+ @dataclass
49
+ class StreamReadSlices:
50
+ pages: List[StreamReadPages]
51
+ slice_descriptor: Optional[Dict[str, Any]]
52
+ state: Optional[List[Dict[str, Any]]] = None
53
+ auxiliary_requests: Optional[List[AuxiliaryRequest]] = None
54
+
55
+
54
56
  @dataclass
55
57
  class StreamRead(object):
56
58
  logs: List[LogMessage]
@@ -28,7 +28,7 @@ from airbyte_cdk.utils.schema_inferrer import (
28
28
  SchemaInferrer,
29
29
  )
30
30
 
31
- from .types import LOG_MESSAGES_OUTPUT_TYPE
31
+ from .types import ASYNC_AUXILIARY_REQUEST_TYPES, LOG_MESSAGES_OUTPUT_TYPE
32
32
 
33
33
  # -------
34
34
  # Parsers
@@ -226,7 +226,8 @@ def should_close_page(
226
226
  at_least_one_page_in_group
227
227
  and is_log_message(message)
228
228
  and (
229
- is_page_http_request(json_message) or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
229
+ is_page_http_request(json_message)
230
+ or message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
230
231
  )
231
232
  )
232
233
 
@@ -330,6 +331,10 @@ def is_auxiliary_http_request(message: Optional[Dict[str, Any]]) -> bool:
330
331
  return is_http_log(message) and message.get("http", {}).get("is_auxiliary", False)
331
332
 
332
333
 
334
+ def is_async_auxiliary_request(message: AuxiliaryRequest) -> bool:
335
+ return message.type in ASYNC_AUXILIARY_REQUEST_TYPES
336
+
337
+
333
338
  def is_log_message(message: AirbyteMessage) -> bool:
334
339
  """
335
340
  Determines whether the provided message is of type LOG.
@@ -413,6 +418,7 @@ def handle_current_slice(
413
418
  current_slice_pages: List[StreamReadPages],
414
419
  current_slice_descriptor: Optional[Dict[str, Any]] = None,
415
420
  latest_state_message: Optional[Dict[str, Any]] = None,
421
+ auxiliary_requests: Optional[List[AuxiliaryRequest]] = None,
416
422
  ) -> StreamReadSlices:
417
423
  """
418
424
  Handles the current slice by packaging its pages, descriptor, and state into a StreamReadSlices instance.
@@ -421,6 +427,7 @@ def handle_current_slice(
421
427
  current_slice_pages (List[StreamReadPages]): The pages to be included in the slice.
422
428
  current_slice_descriptor (Optional[Dict[str, Any]]): Descriptor for the current slice, optional.
423
429
  latest_state_message (Optional[Dict[str, Any]]): The latest state message, optional.
430
+ auxiliary_requests (Optional[List[AuxiliaryRequest]]): The auxiliary requests to include, optional.
424
431
 
425
432
  Returns:
426
433
  StreamReadSlices: An object containing the current slice's pages, descriptor, and state.
@@ -429,6 +436,7 @@ def handle_current_slice(
429
436
  pages=current_slice_pages,
430
437
  slice_descriptor=current_slice_descriptor,
431
438
  state=[latest_state_message] if latest_state_message else [],
439
+ auxiliary_requests=auxiliary_requests if auxiliary_requests else [],
432
440
  )
433
441
 
434
442
 
@@ -486,29 +494,24 @@ def handle_auxiliary_request(json_message: Dict[str, JsonType]) -> AuxiliaryRequ
486
494
  Raises:
487
495
  ValueError: If any of the "airbyte_cdk", "stream", or "http" fields is not a dictionary.
488
496
  """
489
- airbyte_cdk = json_message.get("airbyte_cdk", {})
490
-
491
- if not isinstance(airbyte_cdk, dict):
492
- raise ValueError(
493
- f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
494
- )
495
-
496
- stream = airbyte_cdk.get("stream", {})
497
497
 
498
- if not isinstance(stream, dict):
499
- raise ValueError(f"Expected stream to be a dict, got {stream} of type {type(stream)}")
498
+ airbyte_cdk = get_airbyte_cdk_from_message(json_message)
499
+ stream = get_stream_from_airbyte_cdk(airbyte_cdk)
500
+ title_prefix = get_auxiliary_request_title_prefix(stream)
501
+ http = get_http_property_from_message(json_message)
502
+ request_type = get_auxiliary_request_type(stream, http)
500
503
 
501
- title_prefix = "Parent stream: " if stream.get("is_substream", False) else ""
502
- http = json_message.get("http", {})
503
-
504
- if not isinstance(http, dict):
505
- raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
504
+ title = title_prefix + str(http.get("title", None))
505
+ description = str(http.get("description", None))
506
+ request = create_request_from_log_message(json_message)
507
+ response = create_response_from_log_message(json_message)
506
508
 
507
509
  return AuxiliaryRequest(
508
- title=title_prefix + str(http.get("title", None)),
509
- description=str(http.get("description", None)),
510
- request=create_request_from_log_message(json_message),
511
- response=create_response_from_log_message(json_message),
510
+ title=title,
511
+ type=request_type,
512
+ description=description,
513
+ request=request,
514
+ response=response,
512
515
  )
513
516
 
514
517
 
@@ -558,7 +561,8 @@ def handle_log_message(
558
561
  at_least_one_page_in_group,
559
562
  current_page_request,
560
563
  current_page_response,
561
- auxiliary_request or log_message,
564
+ auxiliary_request,
565
+ log_message,
562
566
  )
563
567
 
564
568
 
@@ -589,3 +593,97 @@ def handle_record_message(
589
593
  datetime_format_inferrer.accumulate(message.record) # type: ignore
590
594
 
591
595
  return records_count
596
+
597
+
598
+ # -------
599
+ # Reusable Getters
600
+ # -------
601
+
602
+
603
+ def get_airbyte_cdk_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore
604
+ """
605
+ Retrieves the "airbyte_cdk" dictionary from the provided JSON message.
606
+
607
+ This function validates that the extracted "airbyte_cdk" is of type dict,
608
+ raising a ValueError if the validation fails.
609
+
610
+ Parameters:
611
+ json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
612
+
613
+ Returns:
614
+ dict: The "airbyte_cdk" dictionary extracted from the JSON message.
615
+
616
+ Raises:
617
+ ValueError: If the "airbyte_cdk" field is not a dictionary.
618
+ """
619
+ airbyte_cdk = json_message.get("airbyte_cdk", {})
620
+
621
+ if not isinstance(airbyte_cdk, dict):
622
+ raise ValueError(
623
+ f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
624
+ )
625
+
626
+ return airbyte_cdk
627
+
628
+
629
+ def get_stream_from_airbyte_cdk(airbyte_cdk: dict) -> dict: # type: ignore
630
+ """
631
+ Retrieves the "stream" dictionary from the provided "airbyte_cdk" dictionary.
632
+
633
+ This function ensures that the extracted "stream" is of type dict,
634
+ raising a ValueError if the validation fails.
635
+
636
+ Parameters:
637
+ airbyte_cdk (dict): The dictionary representing the Airbyte CDK data.
638
+
639
+ Returns:
640
+ dict: The "stream" dictionary extracted from the Airbyte CDK data.
641
+
642
+ Raises:
643
+ ValueError: If the "stream" field is not a dictionary.
644
+ """
645
+
646
+ stream = airbyte_cdk.get("stream", {})
647
+
648
+ if not isinstance(stream, dict):
649
+ raise ValueError(f"Expected stream to be a dict, got {stream} of type {type(stream)}")
650
+
651
+ return stream
652
+
653
+
654
+ def get_auxiliary_request_title_prefix(stream: dict) -> str: # type: ignore
655
+ """
656
+ Generates a title prefix based on the stream type.
657
+ """
658
+ return "Parent stream: " if stream.get("is_substream", False) else ""
659
+
660
+
661
+ def get_http_property_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore
662
+ """
663
+ Retrieves the "http" dictionary from the provided JSON message.
664
+
665
+ This function validates that the extracted "http" is of type dict,
666
+ raising a ValueError if the validation fails.
667
+
668
+ Parameters:
669
+ json_message (Dict[str, JsonType]): A dictionary representing the JSON message.
670
+
671
+ Returns:
672
+ dict: The "http" dictionary extracted from the JSON message.
673
+
674
+ Raises:
675
+ ValueError: If the "http" field is not a dictionary.
676
+ """
677
+ http = json_message.get("http", {})
678
+
679
+ if not isinstance(http, dict):
680
+ raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
681
+
682
+ return http
683
+
684
+
685
+ def get_auxiliary_request_type(stream: dict, http: dict) -> str: # type: ignore
686
+ """
687
+ Determines the type of the auxiliary request based on the stream and HTTP properties.
688
+ """
689
+ return "PARENT_STREAM" if stream.get("is_substream", False) else str(http.get("type", None))
@@ -6,6 +6,7 @@
6
6
  from typing import Any, Dict, Iterator, List, Mapping, Optional
7
7
 
8
8
  from airbyte_cdk.connector_builder.models import (
9
+ AuxiliaryRequest,
9
10
  HttpRequest,
10
11
  HttpResponse,
11
12
  StreamReadPages,
@@ -24,6 +25,7 @@ from .helpers import (
24
25
  handle_current_slice,
25
26
  handle_log_message,
26
27
  handle_record_message,
28
+ is_async_auxiliary_request,
27
29
  is_config_update_message,
28
30
  is_log_message,
29
31
  is_record_message,
@@ -89,6 +91,7 @@ def get_message_groups(
89
91
  current_page_request: Optional[HttpRequest] = None
90
92
  current_page_response: Optional[HttpResponse] = None
91
93
  latest_state_message: Optional[Dict[str, Any]] = None
94
+ slice_auxiliary_requests: List[AuxiliaryRequest] = []
92
95
 
93
96
  while records_count < limit and (message := next(messages, None)):
94
97
  json_message = airbyte_message_to_json(message)
@@ -106,6 +109,7 @@ def get_message_groups(
106
109
  current_slice_pages,
107
110
  current_slice_descriptor,
108
111
  latest_state_message,
112
+ slice_auxiliary_requests,
109
113
  )
110
114
  current_slice_descriptor = parse_slice_description(message.log.message) # type: ignore
111
115
  current_slice_pages = []
@@ -118,7 +122,8 @@ def get_message_groups(
118
122
  at_least_one_page_in_group,
119
123
  current_page_request,
120
124
  current_page_response,
121
- log_or_auxiliary_request,
125
+ auxiliary_request,
126
+ log_message,
122
127
  ) = handle_log_message(
123
128
  message,
124
129
  json_message,
@@ -126,8 +131,15 @@ def get_message_groups(
126
131
  current_page_request,
127
132
  current_page_response,
128
133
  )
129
- if log_or_auxiliary_request:
130
- yield log_or_auxiliary_request
134
+
135
+ if auxiliary_request:
136
+ if is_async_auxiliary_request(auxiliary_request):
137
+ slice_auxiliary_requests.append(auxiliary_request)
138
+ else:
139
+ yield auxiliary_request
140
+
141
+ if log_message:
142
+ yield log_message
131
143
  elif is_trace_with_error(message):
132
144
  if message.trace is not None:
133
145
  yield message.trace
@@ -157,4 +169,5 @@ def get_message_groups(
157
169
  current_slice_pages,
158
170
  current_slice_descriptor,
159
171
  latest_state_message,
172
+ slice_auxiliary_requests,
160
173
  )
@@ -71,5 +71,13 @@ LOG_MESSAGES_OUTPUT_TYPE = tuple[
71
71
  bool,
72
72
  HttpRequest | None,
73
73
  HttpResponse | None,
74
- AuxiliaryRequest | AirbyteLogMessage | None,
74
+ AuxiliaryRequest | None,
75
+ AirbyteLogMessage | None,
76
+ ]
77
+
78
+ ASYNC_AUXILIARY_REQUEST_TYPES = [
79
+ "ASYNC_CREATE",
80
+ "ASYNC_POLL",
81
+ "ASYNC_ABORT",
82
+ "ASYNC_DELETE",
75
83
  ]
airbyte_cdk/entrypoint.py CHANGED
@@ -37,8 +37,8 @@ from airbyte_cdk.sources import Source
37
37
  from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
38
38
  from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit, split_config
39
39
 
40
- # from airbyte_cdk.utils import PrintBuffer, is_cloud_environment, message_utils # add PrintBuffer back once fixed
41
- from airbyte_cdk.utils import is_cloud_environment, message_utils
40
+ from airbyte_cdk.utils import PrintBuffer, is_cloud_environment, message_utils # add PrintBuffer back once fixed
41
+ # from airbyte_cdk.utils import is_cloud_environment, message_utils
42
42
  from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets, update_secrets
43
43
  from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
44
44
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
337
337
  parsed_args = source_entrypoint.parse_args(args)
338
338
  # temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
339
339
  # Refer to: https://github.com/airbytehq/oncall/issues/6235
340
- # with PrintBuffer():
341
- for message in source_entrypoint.run(parsed_args):
342
- # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
- # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
- print(f"{message}\n", end="", flush=True)
340
+ with PrintBuffer():
341
+ for message in source_entrypoint.run(parsed_args):
342
+ # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
+ # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
+ print(f"{message}\n", end="", flush=True)
345
345
 
346
346
 
347
347
  def _init_internal_request_filter() -> None:
@@ -58,6 +58,7 @@ class SessionTokenProvider(TokenProvider):
58
58
  "Obtains session token",
59
59
  None,
60
60
  is_auxiliary=True,
61
+ type="AUTH",
61
62
  ),
62
63
  )
63
64
  if response is None:
@@ -24,7 +24,6 @@ from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import Da
24
24
  from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
25
25
  PerPartitionWithGlobalCursor,
26
26
  )
27
- from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
28
27
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
29
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
30
29
  ConcurrencyLevel as ConcurrencyLevelModel,
@@ -36,17 +35,16 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
36
35
  ModelToComponentFactory,
37
36
  )
38
37
  from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
39
- from airbyte_cdk.sources.declarative.requesters import HttpRequester
40
38
  from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
41
39
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
42
40
  DeclarativePartitionFactory,
43
41
  StreamSlicerPartitionGenerator,
44
42
  )
45
- from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
46
43
  from airbyte_cdk.sources.declarative.types import ConnectionDefinition
47
44
  from airbyte_cdk.sources.source import TState
48
45
  from airbyte_cdk.sources.streams import Stream
49
46
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
47
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
50
48
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
51
49
  AlwaysAvailableAvailabilityStrategy,
52
50
  )
@@ -121,6 +119,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
121
119
  message_repository=self.message_repository,
122
120
  )
123
121
 
122
+ # TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
123
+ @property
124
+ def is_partially_declarative(self) -> bool:
125
+ """This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
126
+ return False
127
+
124
128
  def read(
125
129
  self,
126
130
  logger: logging.Logger,
@@ -321,9 +325,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
321
325
  incremental_sync_component_definition
322
326
  and incremental_sync_component_definition.get("type", "")
323
327
  == DatetimeBasedCursorModel.__name__
324
- and self._stream_supports_concurrent_partition_processing(
325
- declarative_stream=declarative_stream
326
- )
327
328
  and hasattr(declarative_stream.retriever, "stream_slicer")
328
329
  and isinstance(
329
330
  declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
@@ -375,6 +376,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
375
376
  )
376
377
  else:
377
378
  synchronous_streams.append(declarative_stream)
379
+ # TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
380
+ # Condition below needs to ensure that concurrent support is not lost for sources that already support
381
+ # it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
382
+ elif (
383
+ isinstance(declarative_stream, AbstractStreamFacade)
384
+ and self.is_partially_declarative
385
+ ):
386
+ concurrent_streams.append(declarative_stream.get_underlying_stream())
378
387
  else:
379
388
  synchronous_streams.append(declarative_stream)
380
389
 
@@ -390,9 +399,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
390
399
  and bool(incremental_sync_component_definition)
391
400
  and incremental_sync_component_definition.get("type", "")
392
401
  == DatetimeBasedCursorModel.__name__
393
- and self._stream_supports_concurrent_partition_processing(
394
- declarative_stream=declarative_stream
395
- )
396
402
  and hasattr(declarative_stream.retriever, "stream_slicer")
397
403
  and (
398
404
  isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
@@ -400,72 +406,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
400
406
  )
401
407
  )
402
408
 
403
- def _stream_supports_concurrent_partition_processing(
404
- self, declarative_stream: DeclarativeStream
405
- ) -> bool:
406
- """
407
- Many connectors make use of stream_state during interpolation on a per-partition basis under the assumption that
408
- state is updated sequentially. Because the concurrent CDK engine processes different partitions in parallel,
409
- stream_state is no longer a thread-safe interpolation context. It would be a race condition because a cursor's
410
- stream_state can be updated in any order depending on which stream partition's finish first.
411
-
412
- We should start to move away from depending on the value of stream_state for low-code components that operate
413
- per-partition, but we need to gate this otherwise some connectors will be blocked from publishing. See the
414
- cdk-migrations.md for the full list of connectors.
415
- """
416
-
417
- if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
418
- declarative_stream.retriever.requester, HttpRequester
419
- ):
420
- http_requester = declarative_stream.retriever.requester
421
- if "stream_state" in http_requester._path.string:
422
- self.logger.warning(
423
- f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
424
- )
425
- return False
426
-
427
- request_options_provider = http_requester._request_options_provider
428
- if request_options_provider.request_options_contain_stream_state():
429
- self.logger.warning(
430
- f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
431
- )
432
- return False
433
-
434
- record_selector = declarative_stream.retriever.record_selector
435
- if isinstance(record_selector, RecordSelector):
436
- if (
437
- record_selector.record_filter
438
- and not isinstance(
439
- record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
440
- )
441
- and "stream_state" in record_selector.record_filter.condition
442
- ):
443
- self.logger.warning(
444
- f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the RecordFilter which is not thread-safe. Defaulting to synchronous processing"
445
- )
446
- return False
447
-
448
- for add_fields in [
449
- transformation
450
- for transformation in record_selector.transformations
451
- if isinstance(transformation, AddFields)
452
- ]:
453
- for field in add_fields.fields:
454
- if isinstance(field.value, str) and "stream_state" in field.value:
455
- self.logger.warning(
456
- f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
457
- )
458
- return False
459
- if (
460
- isinstance(field.value, InterpolatedString)
461
- and "stream_state" in field.value.string
462
- ):
463
- self.logger.warning(
464
- f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
465
- )
466
- return False
467
- return True
468
-
469
409
  @staticmethod
470
410
  def _get_retriever(
471
411
  declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
@@ -82,7 +82,6 @@ definitions:
82
82
  - stream_interval
83
83
  - stream_partition
84
84
  - stream_slice
85
- - stream_state
86
85
  examples:
87
86
  - "{{ record['updates'] }}"
88
87
  - "{{ record['MetaData']['LastUpdatedTime'] }}"
@@ -1491,7 +1490,11 @@ definitions:
1491
1490
  limit:
1492
1491
  title: Limit
1493
1492
  description: The maximum number of calls allowed within the interval.
1494
- type: integer
1493
+ anyOf:
1494
+ - type: integer
1495
+ - type: string
1496
+ interpolation_context:
1497
+ - config
1495
1498
  interval:
1496
1499
  title: Interval
1497
1500
  description: The time interval for the rate limit.
@@ -1776,7 +1779,6 @@ definitions:
1776
1779
  - stream_interval
1777
1780
  - stream_partition
1778
1781
  - stream_slice
1779
- - stream_state
1780
1782
  examples:
1781
1783
  - "/products"
1782
1784
  - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
@@ -1826,7 +1828,6 @@ definitions:
1826
1828
  - stream_interval
1827
1829
  - stream_partition
1828
1830
  - stream_slice
1829
- - stream_state
1830
1831
  examples:
1831
1832
  - |
1832
1833
  [{"clause": {"type": "timestamp", "operator": 10, "parameters":
@@ -1844,7 +1845,6 @@ definitions:
1844
1845
  - stream_interval
1845
1846
  - stream_partition
1846
1847
  - stream_slice
1847
- - stream_state
1848
1848
  examples:
1849
1849
  - sort_order: "ASC"
1850
1850
  sort_field: "CREATED_AT"
@@ -1865,7 +1865,6 @@ definitions:
1865
1865
  - stream_interval
1866
1866
  - stream_partition
1867
1867
  - stream_slice
1868
- - stream_state
1869
1868
  examples:
1870
1869
  - Output-Format: JSON
1871
1870
  - Version: "{{ config['version'] }}"
@@ -1882,7 +1881,6 @@ definitions:
1882
1881
  - stream_interval
1883
1882
  - stream_partition
1884
1883
  - stream_slice
1885
- - stream_state
1886
1884
  examples:
1887
1885
  - unit: "day"
1888
1886
  - query: 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
@@ -2237,7 +2235,6 @@ definitions:
2237
2235
  interpolation_context:
2238
2236
  - config
2239
2237
  - record
2240
- - stream_state
2241
2238
  - stream_slice
2242
2239
  new:
2243
2240
  type: string
@@ -2251,7 +2248,6 @@ definitions:
2251
2248
  interpolation_context:
2252
2249
  - config
2253
2250
  - record
2254
- - stream_state
2255
2251
  - stream_slice
2256
2252
  $parameters:
2257
2253
  type: object
@@ -2901,7 +2897,6 @@ definitions:
2901
2897
  - stream_interval
2902
2898
  - stream_partition
2903
2899
  - stream_slice
2904
- - stream_state
2905
2900
  examples:
2906
2901
  - "{{ record['created_at'] >= stream_interval['start_time'] }}"
2907
2902
  - "{{ record.status in ['active', 'expired'] }}"
@@ -3689,12 +3684,6 @@ interpolation:
3689
3684
  - title: stream_slice
3690
3685
  description: This variable is deprecated. Use stream_interval or stream_partition instead.
3691
3686
  type: object
3692
- - title: stream_state
3693
- description: The current state of the stream. The object's keys are defined by the incremental sync's cursor_field the and partition router's values.
3694
- type: object
3695
- examples:
3696
- - created_at: "2020-01-01 00:00:00.000+00:00"
3697
- - updated_at: "2020-01-02 00:00:00.000+00:00"
3698
3687
  macros:
3699
3688
  - title: now_utc
3700
3689
  description: Returns the current date and time in the UTC timezone.
@@ -3759,6 +3748,16 @@ interpolation:
3759
3748
  - "{{ format_datetime(config['start_time'], '%Y-%m-%d') }}"
3760
3749
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
3761
3750
  - "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
3751
+ - title: str_to_datetime
3752
+ description: Converts a string to a datetime object with UTC timezone.
3753
+ arguments:
3754
+ s: The string to convert.
3755
+ return_type: datetime.datetime
3756
+ examples:
3757
+ - "{{ str_to_datetime('2022-01-14') }}"
3758
+ - "{{ str_to_datetime('2022-01-01 13:45:30') }}"
3759
+ - "{{ str_to_datetime('2022-01-01T13:45:30+00:00') }}"
3760
+ - "{{ str_to_datetime('2022-01-01T13:45:30.123456Z') }}"
3762
3761
  filters:
3763
3762
  - title: hash
3764
3763
  description: Convert the specified value to a hashed string.
@@ -107,6 +107,16 @@ class CsvParser(Parser):
107
107
  encoding: Optional[str] = "utf-8"
108
108
  delimiter: Optional[str] = ","
109
109
 
110
+ def _get_delimiter(self) -> Optional[str]:
111
+ """
112
+ Get delimiter from the configuration. Check for the escape character and decode it.
113
+ """
114
+ if self.delimiter is not None:
115
+ if self.delimiter.startswith("\\"):
116
+ self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape")
117
+
118
+ return self.delimiter
119
+
110
120
  def parse(
111
121
  self,
112
122
  data: BufferedIOBase,
@@ -115,8 +125,9 @@ class CsvParser(Parser):
115
125
  Parse CSV data from decompressed bytes.
116
126
  """
117
127
  text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
118
- reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
119
- yield from reader
128
+ reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",")
129
+ for row in reader:
130
+ yield row
120
131
 
121
132
 
122
133
  @dataclass
@@ -251,7 +251,6 @@ class ConcurrentPerPartitionCursor(Cursor):
251
251
  self._message_repository.emit_message(state_message)
252
252
 
253
253
  def stream_slices(self) -> Iterable[StreamSlice]:
254
- print("stream_slices")
255
254
  if self._timer.is_running():
256
255
  raise RuntimeError("stream_slices has been executed more than once.")
257
256