airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  41. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  42. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  43. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -1
  44. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  46. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  47. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  48. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  49. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  50. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  51. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  52. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  53. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  54. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  55. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  56. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  59. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  60. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  62. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  63. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +7 -2
  64. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +656 -678
  65. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  66. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  67. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +782 -232
  68. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  69. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  70. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  71. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  72. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  79. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  80. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  81. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  82. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  83. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  84. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  85. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  89. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  90. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  91. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  92. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  94. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  95. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  96. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  97. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  98. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  99. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  100. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  101. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  102. airbyte_cdk/sources/declarative/types.py +8 -1
  103. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  104. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  105. airbyte_cdk/sources/embedded/catalog.py +16 -4
  106. airbyte_cdk/sources/embedded/runner.py +19 -3
  107. airbyte_cdk/sources/embedded/tools.py +3 -1
  108. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  109. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  110. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  111. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  112. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  113. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  114. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  115. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  116. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  117. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  118. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  119. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  120. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  121. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  122. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  123. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  124. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  125. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  126. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  127. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  128. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  129. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  130. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  131. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  132. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  133. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  135. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  136. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  137. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  138. airbyte_cdk/sources/http_logger.py +5 -1
  139. airbyte_cdk/sources/message/repository.py +18 -4
  140. airbyte_cdk/sources/source.py +17 -7
  141. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  142. airbyte_cdk/sources/streams/call_rate.py +63 -19
  143. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  144. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  145. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  146. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  147. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  148. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  149. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  151. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  152. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  153. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  154. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  155. airbyte_cdk/sources/streams/core.py +77 -22
  156. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  157. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  158. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  160. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  161. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  162. airbyte_cdk/sources/streams/http/http.py +133 -33
  163. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  164. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  165. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  166. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  167. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  168. airbyte_cdk/sources/types.py +5 -1
  169. airbyte_cdk/sources/utils/record_helper.py +12 -3
  170. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  171. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  172. airbyte_cdk/sources/utils/transform.py +24 -9
  173. airbyte_cdk/sql/exceptions.py +19 -6
  174. airbyte_cdk/sql/secrets.py +3 -1
  175. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  176. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  177. airbyte_cdk/test/catalog_builder.py +19 -8
  178. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  179. airbyte_cdk/test/mock_http/mocker.py +41 -11
  180. airbyte_cdk/test/mock_http/request.py +9 -3
  181. airbyte_cdk/test/mock_http/response.py +3 -1
  182. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  183. airbyte_cdk/test/state_builder.py +10 -2
  184. airbyte_cdk/test/utils/data.py +6 -2
  185. airbyte_cdk/test/utils/http_mocking.py +3 -1
  186. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  187. airbyte_cdk/utils/analytics_message.py +10 -2
  188. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  189. airbyte_cdk/utils/mapping_helpers.py +3 -1
  190. airbyte_cdk/utils/message_utils.py +11 -4
  191. airbyte_cdk/utils/print_buffer.py +6 -1
  192. airbyte_cdk/utils/schema_inferrer.py +30 -9
  193. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  194. airbyte_cdk/utils/traced_exception.py +35 -9
  195. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/METADATA +7 -6
  196. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/RECORD +198 -198
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/LICENSE.txt +0 -0
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/WHEEL +0 -0
@@ -61,7 +61,9 @@ class RecordSelector(HttpSelector):
61
61
  :return: List of Records selected from the response
62
62
  """
63
63
  all_data: Iterable[Mapping[str, Any]] = self.extractor.extract_records(response)
64
- yield from self.filter_and_transform(all_data, stream_state, records_schema, stream_slice, next_page_token)
64
+ yield from self.filter_and_transform(
65
+ all_data, stream_state, records_schema, stream_slice, next_page_token
66
+ )
65
67
 
66
68
  def filter_and_transform(
67
69
  self,
@@ -106,7 +108,10 @@ class RecordSelector(HttpSelector):
106
108
  ) -> Iterable[Mapping[str, Any]]:
107
109
  if self.record_filter:
108
110
  yield from self.record_filter.filter_records(
109
- records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
111
+ records,
112
+ stream_state=stream_state,
113
+ stream_slice=stream_slice,
114
+ next_page_token=next_page_token,
110
115
  )
111
116
  else:
112
117
  yield from records
@@ -119,5 +124,7 @@ class RecordSelector(HttpSelector):
119
124
  ) -> Iterable[Mapping[str, Any]]:
120
125
  for record in records:
121
126
  for transformation in self.transformations:
122
- transformation.transform(record, config=self.config, stream_state=stream_state, stream_slice=stream_slice) # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
127
+ transformation.transform(
128
+ record, config=self.config, stream_state=stream_state, stream_slice=stream_slice
129
+ ) # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
123
130
  yield record
@@ -68,7 +68,9 @@ class ResponseToFileExtractor(RecordExtractor):
68
68
 
69
69
  res = b.replace(b"\x00", b"")
70
70
  if len(res) < len(b):
71
- self.logger.warning("Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res))
71
+ self.logger.warning(
72
+ "Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res)
73
+ )
72
74
  return res
73
75
 
74
76
  def _save_to_file(self, response: requests.Response) -> Tuple[str, str]:
@@ -106,9 +108,13 @@ class ResponseToFileExtractor(RecordExtractor):
106
108
  if os.path.isfile(tmp_file):
107
109
  return tmp_file, response_encoding
108
110
  else:
109
- raise ValueError(f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist.")
111
+ raise ValueError(
112
+ f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist."
113
+ )
110
114
 
111
- def _read_with_chunks(self, path: str, file_encoding: str, chunk_size: int = 100) -> Iterable[Mapping[str, Any]]:
115
+ def _read_with_chunks(
116
+ self, path: str, file_encoding: str, chunk_size: int = 100
117
+ ) -> Iterable[Mapping[str, Any]]:
112
118
  """
113
119
  Reads data from a file in chunks and yields each row as a dictionary.
114
120
 
@@ -126,7 +132,9 @@ class ResponseToFileExtractor(RecordExtractor):
126
132
 
127
133
  try:
128
134
  with open(path, "r", encoding=file_encoding) as data:
129
- chunks = pd.read_csv(data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object)
135
+ chunks = pd.read_csv(
136
+ data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
137
+ )
130
138
  for chunk in chunks:
131
139
  chunk = chunk.replace({nan: None}).to_dict(orient="records")
132
140
  for row in chunk:
@@ -140,7 +148,9 @@ class ResponseToFileExtractor(RecordExtractor):
140
148
  # remove binary tmp file, after data is read
141
149
  os.remove(path)
142
150
 
143
- def extract_records(self, response: Optional[requests.Response] = None) -> Iterable[Mapping[str, Any]]:
151
+ def extract_records(
152
+ self, response: Optional[requests.Response] = None
153
+ ) -> Iterable[Mapping[str, Any]]:
144
154
  """
145
155
  Extracts records from the given response by:
146
156
  1) Saving the result to a tmp file
@@ -13,7 +13,10 @@ from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDate
13
13
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
14
14
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
15
15
  from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation
16
- from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption, RequestOptionType
16
+ from airbyte_cdk.sources.declarative.requesters.request_option import (
17
+ RequestOption,
18
+ RequestOptionType,
19
+ )
17
20
  from airbyte_cdk.sources.message import MessageRepository
18
21
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
19
22
  from isodate import Duration, duration_isoformat, parse_duration
@@ -72,27 +75,41 @@ class DatetimeBasedCursor(DeclarativeCursor):
72
75
  cursor_datetime_formats: List[str] = field(default_factory=lambda: [])
73
76
 
74
77
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
75
- if (self.step and not self.cursor_granularity) or (not self.step and self.cursor_granularity):
78
+ if (self.step and not self.cursor_granularity) or (
79
+ not self.step and self.cursor_granularity
80
+ ):
76
81
  raise ValueError(
77
82
  f"If step is defined, cursor_granularity should be as well and vice-versa. "
78
83
  f"Right now, step is `{self.step}` and cursor_granularity is `{self.cursor_granularity}`"
79
84
  )
80
85
  self._start_datetime = MinMaxDatetime.create(self.start_datetime, parameters)
81
- self._end_datetime = None if not self.end_datetime else MinMaxDatetime.create(self.end_datetime, parameters)
86
+ self._end_datetime = (
87
+ None if not self.end_datetime else MinMaxDatetime.create(self.end_datetime, parameters)
88
+ )
82
89
 
83
90
  self._timezone = datetime.timezone.utc
84
91
  self._interpolation = JinjaInterpolation()
85
92
 
86
93
  self._step = (
87
- self._parse_timedelta(InterpolatedString.create(self.step, parameters=parameters).eval(self.config))
94
+ self._parse_timedelta(
95
+ InterpolatedString.create(self.step, parameters=parameters).eval(self.config)
96
+ )
88
97
  if self.step
89
98
  else datetime.timedelta.max
90
99
  )
91
100
  self._cursor_granularity = self._parse_timedelta(self.cursor_granularity)
92
101
  self.cursor_field = InterpolatedString.create(self.cursor_field, parameters=parameters)
93
- self._lookback_window = InterpolatedString.create(self.lookback_window, parameters=parameters) if self.lookback_window else None
94
- self._partition_field_start = InterpolatedString.create(self.partition_field_start or "start_time", parameters=parameters)
95
- self._partition_field_end = InterpolatedString.create(self.partition_field_end or "end_time", parameters=parameters)
102
+ self._lookback_window = (
103
+ InterpolatedString.create(self.lookback_window, parameters=parameters)
104
+ if self.lookback_window
105
+ else None
106
+ )
107
+ self._partition_field_start = InterpolatedString.create(
108
+ self.partition_field_start or "start_time", parameters=parameters
109
+ )
110
+ self._partition_field_end = InterpolatedString.create(
111
+ self.partition_field_end or "end_time", parameters=parameters
112
+ )
96
113
  self._parser = DatetimeParser()
97
114
 
98
115
  # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
@@ -114,7 +131,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
114
131
 
115
132
  :param stream_state: The state of the stream as returned by get_stream_state
116
133
  """
117
- self._cursor = stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
134
+ self._cursor = (
135
+ stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None
136
+ ) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
118
137
 
119
138
  def observe(self, stream_slice: StreamSlice, record: Record) -> None:
120
139
  """
@@ -131,28 +150,38 @@ class DatetimeBasedCursor(DeclarativeCursor):
131
150
 
132
151
  start_field = self._partition_field_start.eval(self.config)
133
152
  end_field = self._partition_field_end.eval(self.config)
134
- is_highest_observed_cursor_value = not self._highest_observed_cursor_field_value or self.parse_date(
135
- record_cursor_value
136
- ) > self.parse_date(self._highest_observed_cursor_field_value)
153
+ is_highest_observed_cursor_value = (
154
+ not self._highest_observed_cursor_field_value
155
+ or self.parse_date(record_cursor_value)
156
+ > self.parse_date(self._highest_observed_cursor_field_value)
157
+ )
137
158
  if (
138
- self._is_within_daterange_boundaries(record, stream_slice.get(start_field), stream_slice.get(end_field)) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
159
+ self._is_within_daterange_boundaries(
160
+ record, stream_slice.get(start_field), stream_slice.get(end_field)
161
+ ) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
139
162
  and is_highest_observed_cursor_value
140
163
  ):
141
164
  self._highest_observed_cursor_field_value = record_cursor_value
142
165
 
143
166
  def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
144
167
  if stream_slice.partition:
145
- raise ValueError(f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}.")
168
+ raise ValueError(
169
+ f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}."
170
+ )
146
171
  cursor_value_str_by_cursor_value_datetime = dict(
147
172
  map(
148
173
  # we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like
149
174
  # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z'
150
175
  lambda datetime_str: (self.parse_date(datetime_str), datetime_str), # type: ignore # because of the filter on the next line, this will only be called with a str
151
- filter(lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]),
176
+ filter(
177
+ lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]
178
+ ),
152
179
  )
153
180
  )
154
181
  self._cursor = (
155
- cursor_value_str_by_cursor_value_datetime[max(cursor_value_str_by_cursor_value_datetime.keys())]
182
+ cursor_value_str_by_cursor_value_datetime[
183
+ max(cursor_value_str_by_cursor_value_datetime.keys())
184
+ ]
156
185
  if cursor_value_str_by_cursor_value_datetime
157
186
  else None
158
187
  )
@@ -175,11 +204,19 @@ class DatetimeBasedCursor(DeclarativeCursor):
175
204
  # through each slice and does not belong to a specific slice. We just return stream state as it is.
176
205
  return self.get_stream_state()
177
206
 
178
- def _calculate_earliest_possible_value(self, end_datetime: datetime.datetime) -> datetime.datetime:
179
- lookback_delta = self._parse_timedelta(self._lookback_window.eval(self.config) if self._lookback_window else "P0D")
180
- earliest_possible_start_datetime = min(self._start_datetime.get_datetime(self.config), end_datetime)
207
+ def _calculate_earliest_possible_value(
208
+ self, end_datetime: datetime.datetime
209
+ ) -> datetime.datetime:
210
+ lookback_delta = self._parse_timedelta(
211
+ self._lookback_window.eval(self.config) if self._lookback_window else "P0D"
212
+ )
213
+ earliest_possible_start_datetime = min(
214
+ self._start_datetime.get_datetime(self.config), end_datetime
215
+ )
181
216
  try:
182
- cursor_datetime = self._calculate_cursor_datetime_from_state(self.get_stream_state()) - lookback_delta
217
+ cursor_datetime = (
218
+ self._calculate_cursor_datetime_from_state(self.get_stream_state()) - lookback_delta
219
+ )
183
220
  except OverflowError:
184
221
  # cursor_datetime defers to the minimum date if it does not exist in the state. Trying to subtract
185
222
  # a timedelta from the minimum datetime results in an OverflowError
@@ -200,7 +237,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
200
237
  return now
201
238
  return min(self._end_datetime.get_datetime(self.config), now)
202
239
 
203
- def _calculate_cursor_datetime_from_state(self, stream_state: Mapping[str, Any]) -> datetime.datetime:
240
+ def _calculate_cursor_datetime_from_state(
241
+ self, stream_state: Mapping[str, Any]
242
+ ) -> datetime.datetime:
204
243
  if self.cursor_field.eval(self.config, stream_state=stream_state) in stream_state: # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
205
244
  return self.parse_date(stream_state[self.cursor_field.eval(self.config)]) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
206
245
  return datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
@@ -209,7 +248,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
209
248
  return self._parser.format(dt, self.datetime_format)
210
249
 
211
250
  def _partition_daterange(
212
- self, start: datetime.datetime, end: datetime.datetime, step: Union[datetime.timedelta, Duration]
251
+ self,
252
+ start: datetime.datetime,
253
+ end: datetime.datetime,
254
+ step: Union[datetime.timedelta, Duration],
213
255
  ) -> List[StreamSlice]:
214
256
  start_field = self._partition_field_start.eval(self.config)
215
257
  end_field = self._partition_field_end.eval(self.config)
@@ -220,7 +262,11 @@ class DatetimeBasedCursor(DeclarativeCursor):
220
262
  end_date = self._get_date(next_start - self._cursor_granularity, end, min)
221
263
  dates.append(
222
264
  StreamSlice(
223
- partition={}, cursor_slice={start_field: self._format_datetime(start), end_field: self._format_datetime(end_date)}
265
+ partition={},
266
+ cursor_slice={
267
+ start_field: self._format_datetime(start),
268
+ end_field: self._format_datetime(end_date),
269
+ },
224
270
  )
225
271
  )
226
272
  start = next_start
@@ -231,7 +277,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
231
277
  return start < end
232
278
  return start <= end
233
279
 
234
- def _evaluate_next_start_date_safely(self, start: datetime.datetime, step: datetime.timedelta) -> datetime.datetime:
280
+ def _evaluate_next_start_date_safely(
281
+ self, start: datetime.datetime, step: datetime.timedelta
282
+ ) -> datetime.datetime:
235
283
  """
236
284
  Given that we set the default step at datetime.timedelta.max, we will generate an OverflowError when evaluating the next start_date
237
285
  This method assumes that users would never enter a step that would generate an overflow. Given that would be the case, the code
@@ -308,7 +356,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
308
356
  # Never update kwargs
309
357
  return {}
310
358
 
311
- def _get_request_options(self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]) -> Mapping[str, Any]:
359
+ def _get_request_options(
360
+ self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
361
+ ) -> Mapping[str, Any]:
312
362
  options: MutableMapping[str, Any] = {}
313
363
  if not stream_slice:
314
364
  return options
@@ -317,7 +367,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
317
367
  self._partition_field_start.eval(self.config)
318
368
  )
319
369
  if self.end_time_option and self.end_time_option.inject_into == option_type:
320
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(self._partition_field_end.eval(self.config)) # type: ignore # field_name is always casted to an interpolated string
370
+ options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
371
+ self._partition_field_end.eval(self.config)
372
+ ) # type: ignore # field_name is always casted to an interpolated string
321
373
  return options
322
374
 
323
375
  def should_be_synced(self, record: Record) -> bool:
@@ -330,11 +382,18 @@ class DatetimeBasedCursor(DeclarativeCursor):
330
382
  )
331
383
  return True
332
384
  latest_possible_cursor_value = self.select_best_end_datetime()
333
- earliest_possible_cursor_value = self._calculate_earliest_possible_value(latest_possible_cursor_value)
334
- return self._is_within_daterange_boundaries(record, earliest_possible_cursor_value, latest_possible_cursor_value)
385
+ earliest_possible_cursor_value = self._calculate_earliest_possible_value(
386
+ latest_possible_cursor_value
387
+ )
388
+ return self._is_within_daterange_boundaries(
389
+ record, earliest_possible_cursor_value, latest_possible_cursor_value
390
+ )
335
391
 
336
392
  def _is_within_daterange_boundaries(
337
- self, record: Record, start_datetime_boundary: Union[datetime.datetime, str], end_datetime_boundary: Union[datetime.datetime, str]
393
+ self,
394
+ record: Record,
395
+ start_datetime_boundary: Union[datetime.datetime, str],
396
+ end_datetime_boundary: Union[datetime.datetime, str],
338
397
  ) -> bool:
339
398
  cursor_field = self.cursor_field.eval(self.config) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
340
399
  record_cursor_value = record.get(cursor_field)
@@ -348,7 +407,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
348
407
  start_datetime_boundary = self.parse_date(start_datetime_boundary)
349
408
  if isinstance(end_datetime_boundary, str):
350
409
  end_datetime_boundary = self.parse_date(end_datetime_boundary)
351
- return start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
410
+ return (
411
+ start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
412
+ )
352
413
 
353
414
  def _send_log(self, level: Level, message: str) -> None:
354
415
  if self.message_repository:
@@ -378,8 +439,12 @@ class DatetimeBasedCursor(DeclarativeCursor):
378
439
  :param lookback_window_in_seconds: The lookback duration in seconds to potentially update to.
379
440
  """
380
441
  runtime_lookback_window = duration_isoformat(timedelta(seconds=lookback_window_in_seconds))
381
- config_lookback = parse_duration(self._lookback_window.eval(self.config) if self._lookback_window else "P0D")
442
+ config_lookback = parse_duration(
443
+ self._lookback_window.eval(self.config) if self._lookback_window else "P0D"
444
+ )
382
445
 
383
446
  # Check if the new runtime lookback window is greater than the current config lookback
384
447
  if parse_duration(runtime_lookback_window) > config_lookback:
385
- self._lookback_window = InterpolatedString.create(runtime_lookback_window, parameters={})
448
+ self._lookback_window = InterpolatedString.create(
449
+ runtime_lookback_window, parameters={}
450
+ )
@@ -84,7 +84,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
84
84
  self._partition_router = partition_router
85
85
  self._timer = Timer()
86
86
  self._lock = threading.Lock()
87
- self._slice_semaphore = threading.Semaphore(0) # Start with 0, indicating no slices being tracked
87
+ self._slice_semaphore = threading.Semaphore(
88
+ 0
89
+ ) # Start with 0, indicating no slices being tracked
88
90
  self._all_slices_yielded = False
89
91
  self._lookback_window: Optional[int] = None
90
92
  self._current_partition: Optional[Mapping[str, Any]] = None
@@ -116,7 +118,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
116
118
  )
117
119
 
118
120
  self.start_slices_generation()
119
- for slice, last, state in iterate_with_last_flag_and_state(slice_generator, self._partition_router.get_stream_state):
121
+ for slice, last, state in iterate_with_last_flag_and_state(
122
+ slice_generator, self._partition_router.get_stream_state
123
+ ):
120
124
  self._parent_state = state
121
125
  self.register_slice(last)
122
126
  yield slice
@@ -124,7 +128,8 @@ class GlobalSubstreamCursor(DeclarativeCursor):
124
128
 
125
129
  def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
126
130
  slice_generator = (
127
- StreamSlice(partition=partition, cursor_slice=cursor_slice) for cursor_slice in self._stream_cursor.stream_slices()
131
+ StreamSlice(partition=partition, cursor_slice=cursor_slice)
132
+ for cursor_slice in self._stream_cursor.stream_slices()
128
133
  )
129
134
 
130
135
  yield from slice_generator
@@ -199,10 +204,14 @@ class GlobalSubstreamCursor(DeclarativeCursor):
199
204
  if hasattr(self._stream_cursor, "set_runtime_lookback_window"):
200
205
  self._stream_cursor.set_runtime_lookback_window(lookback_window)
201
206
  else:
202
- raise ValueError("The cursor class for Global Substream Cursor does not have a set_runtime_lookback_window method")
207
+ raise ValueError(
208
+ "The cursor class for Global Substream Cursor does not have a set_runtime_lookback_window method"
209
+ )
203
210
 
204
211
  def observe(self, stream_slice: StreamSlice, record: Record) -> None:
205
- self._stream_cursor.observe(StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record)
212
+ self._stream_cursor.observe(
213
+ StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
214
+ )
206
215
 
207
216
  def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
208
217
  """
@@ -220,7 +229,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
220
229
  self._slice_semaphore.acquire()
221
230
  if self._all_slices_yielded and self._slice_semaphore._value == 0:
222
231
  self._lookback_window = self._timer.finish()
223
- self._stream_cursor.close_slice(StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args)
232
+ self._stream_cursor.close_slice(
233
+ StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args
234
+ )
224
235
 
225
236
  def get_stream_state(self) -> StreamState:
226
237
  state: dict[str, Any] = {"state": self._stream_cursor.get_stream_state()}
@@ -322,12 +333,15 @@ class GlobalSubstreamCursor(DeclarativeCursor):
322
333
 
323
334
  def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
324
335
  return self._stream_cursor.is_greater_than_or_equal(
325
- self._convert_record_to_cursor_record(first), self._convert_record_to_cursor_record(second)
336
+ self._convert_record_to_cursor_record(first),
337
+ self._convert_record_to_cursor_record(second),
326
338
  )
327
339
 
328
340
  @staticmethod
329
341
  def _convert_record_to_cursor_record(record: Record) -> Record:
330
342
  return Record(
331
343
  record.data,
332
- StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice) if record.associated_slice else None,
344
+ StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
345
+ if record.associated_slice
346
+ else None,
333
347
  )
@@ -8,7 +8,9 @@ from typing import Any, Callable, Iterable, Mapping, Optional, Union
8
8
 
9
9
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
10
10
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
11
- from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import PerPartitionKeySerializer
11
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
12
+ PerPartitionKeySerializer,
13
+ )
12
14
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
13
15
 
14
16
  logger = logging.getLogger("airbyte")
@@ -67,12 +69,18 @@ class PerPartitionCursor(DeclarativeCursor):
67
69
 
68
70
  cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
69
71
  if not cursor:
70
- partition_state = self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
72
+ partition_state = (
73
+ self._state_to_migrate_from
74
+ if self._state_to_migrate_from
75
+ else self._NO_CURSOR_STATE
76
+ )
71
77
  cursor = self._create_cursor(partition_state)
72
78
  self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
73
79
 
74
80
  for cursor_slice in cursor.stream_slices():
75
- yield StreamSlice(partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields)
81
+ yield StreamSlice(
82
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
83
+ )
76
84
 
77
85
  def _ensure_partition_limit(self) -> None:
78
86
  """
@@ -80,7 +88,9 @@ class PerPartitionCursor(DeclarativeCursor):
80
88
  """
81
89
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
82
90
  self._over_limit += 1
83
- oldest_partition = self._cursor_per_partition.popitem(last=False)[0] # Remove the oldest partition
91
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
92
+ 0
93
+ ] # Remove the oldest partition
84
94
  logger.warning(
85
95
  f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
86
96
  )
@@ -128,7 +138,9 @@ class PerPartitionCursor(DeclarativeCursor):
128
138
 
129
139
  else:
130
140
  for state in stream_state["states"]:
131
- self._cursor_per_partition[self._to_partition_key(state["partition"])] = self._create_cursor(state["cursor"])
141
+ self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
142
+ self._create_cursor(state["cursor"])
143
+ )
132
144
 
133
145
  # set default state for missing partitions if it is per partition with fallback to global
134
146
  if "state" in stream_state:
@@ -214,7 +226,9 @@ class PerPartitionCursor(DeclarativeCursor):
214
226
  stream_state=stream_state,
215
227
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
216
228
  next_page_token=next_page_token,
217
- ) | self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].get_request_params(
229
+ ) | self._cursor_per_partition[
230
+ self._to_partition_key(stream_slice.partition)
231
+ ].get_request_params(
218
232
  stream_state=stream_state,
219
233
  stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
220
234
  next_page_token=next_page_token,
@@ -234,7 +248,9 @@ class PerPartitionCursor(DeclarativeCursor):
234
248
  stream_state=stream_state,
235
249
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
236
250
  next_page_token=next_page_token,
237
- ) | self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].get_request_headers(
251
+ ) | self._cursor_per_partition[
252
+ self._to_partition_key(stream_slice.partition)
253
+ ].get_request_headers(
238
254
  stream_state=stream_state,
239
255
  stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
240
256
  next_page_token=next_page_token,
@@ -254,7 +270,9 @@ class PerPartitionCursor(DeclarativeCursor):
254
270
  stream_state=stream_state,
255
271
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
256
272
  next_page_token=next_page_token,
257
- ) | self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].get_request_body_data(
273
+ ) | self._cursor_per_partition[
274
+ self._to_partition_key(stream_slice.partition)
275
+ ].get_request_body_data(
258
276
  stream_state=stream_state,
259
277
  stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
260
278
  next_page_token=next_page_token,
@@ -274,7 +292,9 @@ class PerPartitionCursor(DeclarativeCursor):
274
292
  stream_state=stream_state,
275
293
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
276
294
  next_page_token=next_page_token,
277
- ) | self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].get_request_body_json(
295
+ ) | self._cursor_per_partition[
296
+ self._to_partition_key(stream_slice.partition)
297
+ ].get_request_body_json(
278
298
  stream_state=stream_state,
279
299
  stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice),
280
300
  next_page_token=next_page_token,
@@ -283,32 +303,43 @@ class PerPartitionCursor(DeclarativeCursor):
283
303
  raise ValueError("A partition needs to be provided in order to get request body json")
284
304
 
285
305
  def should_be_synced(self, record: Record) -> bool:
286
- return self._get_cursor(record).should_be_synced(self._convert_record_to_cursor_record(record))
306
+ return self._get_cursor(record).should_be_synced(
307
+ self._convert_record_to_cursor_record(record)
308
+ )
287
309
 
288
310
  def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
289
311
  if not first.associated_slice or not second.associated_slice:
290
- raise ValueError(f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}")
312
+ raise ValueError(
313
+ f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
314
+ )
291
315
  if first.associated_slice.partition != second.associated_slice.partition:
292
316
  raise ValueError(
293
317
  f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
294
318
  )
295
319
 
296
320
  return self._get_cursor(first).is_greater_than_or_equal(
297
- self._convert_record_to_cursor_record(first), self._convert_record_to_cursor_record(second)
321
+ self._convert_record_to_cursor_record(first),
322
+ self._convert_record_to_cursor_record(second),
298
323
  )
299
324
 
300
325
  @staticmethod
301
326
  def _convert_record_to_cursor_record(record: Record) -> Record:
302
327
  return Record(
303
328
  record.data,
304
- StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice) if record.associated_slice else None,
329
+ StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
330
+ if record.associated_slice
331
+ else None,
305
332
  )
306
333
 
307
334
  def _get_cursor(self, record: Record) -> DeclarativeCursor:
308
335
  if not record.associated_slice:
309
- raise ValueError("Invalid state as stream slices that are emitted should refer to an existing cursor")
336
+ raise ValueError(
337
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
338
+ )
310
339
  partition_key = self._to_partition_key(record.associated_slice.partition)
311
340
  if partition_key not in self._cursor_per_partition:
312
- raise ValueError("Invalid state as stream slices that are emitted should refer to an existing cursor")
341
+ raise ValueError(
342
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
343
+ )
313
344
  cursor = self._cursor_per_partition[partition_key]
314
345
  return cursor
@@ -5,8 +5,14 @@ from typing import Any, Iterable, Mapping, MutableMapping, Optional, Union
5
5
 
6
6
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
7
7
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
8
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor, iterate_with_last_flag_and_state
9
- from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import CursorFactory, PerPartitionCursor
8
+ from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
9
+ GlobalSubstreamCursor,
10
+ iterate_with_last_flag_and_state,
11
+ )
12
+ from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import (
13
+ CursorFactory,
14
+ PerPartitionCursor,
15
+ )
10
16
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
11
17
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
12
18
 
@@ -60,7 +66,12 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
60
66
  Suitable for streams where the number of partitions may vary significantly, requiring dynamic switching between per-partition and global state management to ensure data consistency and efficient synchronization.
61
67
  """
62
68
 
63
- def __init__(self, cursor_factory: CursorFactory, partition_router: PartitionRouter, stream_cursor: DatetimeBasedCursor):
69
+ def __init__(
70
+ self,
71
+ cursor_factory: CursorFactory,
72
+ partition_router: PartitionRouter,
73
+ stream_cursor: DatetimeBasedCursor,
74
+ ):
64
75
  self._partition_router = partition_router
65
76
  self._per_partition_cursor = PerPartitionCursor(cursor_factory, partition_router)
66
77
  self._global_cursor = GlobalSubstreamCursor(stream_cursor, partition_router)
@@ -82,7 +93,8 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
82
93
  # Generate slices for the current cursor and handle the last slice using the flag
83
94
  self._parent_state = parent_state
84
95
  for slice, is_last_slice, _ in iterate_with_last_flag_and_state(
85
- self._get_active_cursor().generate_slices_from_partition(partition=partition), lambda: None
96
+ self._get_active_cursor().generate_slices_from_partition(partition=partition),
97
+ lambda: None,
86
98
  ):
87
99
  self._global_cursor.register_slice(is_last_slice and is_last_partition)
88
100
  yield slice
@@ -182,7 +194,9 @@ class PerPartitionWithGlobalCursor(DeclarativeCursor):
182
194
  )
183
195
 
184
196
  def should_be_synced(self, record: Record) -> bool:
185
- return self._global_cursor.should_be_synced(record) or self._per_partition_cursor.should_be_synced(record)
197
+ return self._global_cursor.should_be_synced(
198
+ record
199
+ ) or self._per_partition_cursor.should_be_synced(record)
186
200
 
187
201
  def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
188
202
  return self._global_cursor.is_greater_than_or_equal(first, second)
@@ -30,7 +30,9 @@ class ResumableFullRefreshCursor(DeclarativeCursor):
30
30
  def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
31
31
  # The ResumableFullRefreshCursor doesn't support nested streams yet so receiving a partition is unexpected
32
32
  if stream_slice.partition:
33
- raise ValueError(f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}.")
33
+ raise ValueError(
34
+ f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}."
35
+ )
34
36
  self._cursor = stream_slice.cursor_slice
35
37
 
36
38
  def should_be_synced(self, record: Record) -> bool:
@@ -8,7 +8,21 @@ from typing import Any, Final, List, Mapping
8
8
  from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation
9
9
  from airbyte_cdk.sources.types import Config
10
10
 
11
- FALSE_VALUES: Final[List[Any]] = ["False", "false", "{}", "[]", "()", "", "0", "0.0", {}, False, [], (), set()]
11
+ FALSE_VALUES: Final[List[Any]] = [
12
+ "False",
13
+ "false",
14
+ "{}",
15
+ "[]",
16
+ "()",
17
+ "",
18
+ "0",
19
+ "0.0",
20
+ {},
21
+ False,
22
+ [],
23
+ (),
24
+ set(),
25
+ ]
12
26
 
13
27
 
14
28
  @dataclass
@@ -40,7 +54,11 @@ class InterpolatedBoolean:
40
54
  return self.condition
41
55
  else:
42
56
  evaluated = self._interpolation.eval(
43
- self.condition, config, self._default, parameters=self._parameters, **additional_parameters
57
+ self.condition,
58
+ config,
59
+ self._default,
60
+ parameters=self._parameters,
61
+ **additional_parameters,
44
62
  )
45
63
  if evaluated in FALSE_VALUES:
46
64
  return False