airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
  41. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  42. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  43. airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
  44. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
  46. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  47. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  48. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  49. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  50. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  51. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  52. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  53. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  54. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  55. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  56. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  59. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  60. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  62. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  63. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  64. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  65. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
  66. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
  67. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  68. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  69. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
  70. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  71. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  72. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  79. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  80. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  81. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  82. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  83. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  84. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  85. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  89. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  90. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  91. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  92. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  94. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  95. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  96. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  97. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  98. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  99. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  100. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  101. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  102. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  103. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  104. airbyte_cdk/sources/declarative/types.py +8 -1
  105. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  106. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  107. airbyte_cdk/sources/embedded/catalog.py +16 -4
  108. airbyte_cdk/sources/embedded/runner.py +19 -3
  109. airbyte_cdk/sources/embedded/tools.py +3 -1
  110. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  111. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  112. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  113. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  114. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  115. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  116. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  117. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  118. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  119. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  120. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  121. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  122. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  123. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  124. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  125. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  126. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  127. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  128. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  129. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  130. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  131. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  132. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  133. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  135. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  136. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  137. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  138. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  139. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  140. airbyte_cdk/sources/http_logger.py +5 -1
  141. airbyte_cdk/sources/message/repository.py +18 -4
  142. airbyte_cdk/sources/source.py +17 -7
  143. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  144. airbyte_cdk/sources/streams/call_rate.py +63 -19
  145. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  146. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  147. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  148. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  149. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  151. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  152. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  153. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  154. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  155. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  156. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  157. airbyte_cdk/sources/streams/core.py +77 -22
  158. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  160. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  161. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  162. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  163. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  164. airbyte_cdk/sources/streams/http/http.py +133 -33
  165. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  166. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  167. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  168. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  169. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  170. airbyte_cdk/sources/types.py +5 -1
  171. airbyte_cdk/sources/utils/record_helper.py +12 -3
  172. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  173. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  174. airbyte_cdk/sources/utils/transform.py +24 -9
  175. airbyte_cdk/sql/exceptions.py +19 -6
  176. airbyte_cdk/sql/secrets.py +3 -1
  177. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  178. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  179. airbyte_cdk/test/catalog_builder.py +19 -8
  180. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  181. airbyte_cdk/test/mock_http/mocker.py +41 -11
  182. airbyte_cdk/test/mock_http/request.py +9 -3
  183. airbyte_cdk/test/mock_http/response.py +3 -1
  184. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  185. airbyte_cdk/test/state_builder.py +10 -2
  186. airbyte_cdk/test/utils/data.py +6 -2
  187. airbyte_cdk/test/utils/http_mocking.py +3 -1
  188. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  189. airbyte_cdk/utils/analytics_message.py +10 -2
  190. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  191. airbyte_cdk/utils/mapping_helpers.py +3 -1
  192. airbyte_cdk/utils/message_utils.py +11 -4
  193. airbyte_cdk/utils/print_buffer.py +6 -1
  194. airbyte_cdk/utils/schema_inferrer.py +30 -9
  195. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  196. airbyte_cdk/utils/traced_exception.py +35 -9
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
  199. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
  200. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -11,7 +11,9 @@ from airbyte_cdk.sources.message import MessageRepository
11
11
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
12
12
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
13
13
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
14
- from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import AbstractStreamStateConverter
14
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
15
+ AbstractStreamStateConverter,
16
+ )
15
17
 
16
18
 
17
19
  def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
@@ -62,8 +64,7 @@ class CursorField:
62
64
  class Cursor(ABC):
63
65
  @property
64
66
  @abstractmethod
65
- def state(self) -> MutableMapping[str, Any]:
66
- ...
67
+ def state(self) -> MutableMapping[str, Any]: ...
67
68
 
68
69
  @abstractmethod
69
70
  def observe(self, record: Record) -> None:
@@ -128,8 +129,12 @@ class FinalStateCursor(Cursor):
128
129
  Used primarily for full refresh syncs that do not have a valid cursor value to emit at the end of a sync
129
130
  """
130
131
 
131
- self._connector_state_manager.update_state_for_stream(self._stream_name, self._stream_namespace, self.state)
132
- state_message = self._connector_state_manager.create_state_message(self._stream_name, self._stream_namespace)
132
+ self._connector_state_manager.update_state_for_stream(
133
+ self._stream_name, self._stream_namespace, self.state
134
+ )
135
+ state_message = self._connector_state_manager.create_state_message(
136
+ self._stream_name, self._stream_namespace
137
+ )
133
138
  self._message_repository.emit_message(state_message)
134
139
 
135
140
 
@@ -182,13 +187,22 @@ class ConcurrentCursor(Cursor):
182
187
  def slice_boundary_fields(self) -> Optional[Tuple[str, str]]:
183
188
  return self._slice_boundary_fields
184
189
 
185
- def _get_concurrent_state(self, state: MutableMapping[str, Any]) -> Tuple[CursorValueType, MutableMapping[str, Any]]:
190
+ def _get_concurrent_state(
191
+ self, state: MutableMapping[str, Any]
192
+ ) -> Tuple[CursorValueType, MutableMapping[str, Any]]:
186
193
  if self._connector_state_converter.is_state_message_compatible(state):
187
- return self._start or self._connector_state_converter.zero_value, self._connector_state_converter.deserialize(state)
188
- return self._connector_state_converter.convert_from_sequential_state(self._cursor_field, state, self._start)
194
+ return (
195
+ self._start or self._connector_state_converter.zero_value,
196
+ self._connector_state_converter.deserialize(state),
197
+ )
198
+ return self._connector_state_converter.convert_from_sequential_state(
199
+ self._cursor_field, state, self._start
200
+ )
189
201
 
190
202
  def observe(self, record: Record) -> None:
191
- most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(record.partition)
203
+ most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
204
+ record.partition
205
+ )
192
206
  cursor_value = self._extract_cursor_value(record)
193
207
 
194
208
  if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
@@ -200,7 +214,9 @@ class ConcurrentCursor(Cursor):
200
214
  def close_partition(self, partition: Partition) -> None:
201
215
  slice_count_before = len(self.state.get("slices", []))
202
216
  self._add_slice_to_state(partition)
203
- if slice_count_before < len(self.state["slices"]): # only emit if at least one slice has been processed
217
+ if slice_count_before < len(
218
+ self.state["slices"]
219
+ ): # only emit if at least one slice has been processed
204
220
  self._merge_partitions()
205
221
  self._emit_state_message()
206
222
  self._has_closed_at_least_one_slice = True
@@ -253,9 +269,13 @@ class ConcurrentCursor(Cursor):
253
269
  self._connector_state_manager.update_state_for_stream(
254
270
  self._stream_name,
255
271
  self._stream_namespace,
256
- self._connector_state_converter.convert_to_state_message(self._cursor_field, self.state),
272
+ self._connector_state_converter.convert_to_state_message(
273
+ self._cursor_field, self.state
274
+ ),
275
+ )
276
+ state_message = self._connector_state_manager.create_state_message(
277
+ self._stream_name, self._stream_namespace
257
278
  )
258
- state_message = self._connector_state_manager.create_state_message(self._stream_name, self._stream_namespace)
259
279
  self._message_repository.emit_message(state_message)
260
280
 
261
281
  def _merge_partitions(self) -> None:
@@ -268,7 +288,9 @@ class ConcurrentCursor(Cursor):
268
288
  raise KeyError(f"Could not find key `{key}` in empty slice")
269
289
  return self._connector_state_converter.parse_value(_slice[key]) # type: ignore # we expect the devs to specify a key that would return a CursorValueType
270
290
  except KeyError as exception:
271
- raise KeyError(f"Partition is expected to have key `{key}` but could not be found") from exception
291
+ raise KeyError(
292
+ f"Partition is expected to have key `{key}` but could not be found"
293
+ ) from exception
272
294
 
273
295
  def ensure_at_least_one_state_emitted(self) -> None:
274
296
  """
@@ -300,7 +322,9 @@ class ConcurrentCursor(Cursor):
300
322
 
301
323
  if len(self.state["slices"]) == 1:
302
324
  yield from self._split_per_slice_range(
303
- self._calculate_lower_boundary_of_last_slice(self.state["slices"][0][self._connector_state_converter.END_KEY]),
325
+ self._calculate_lower_boundary_of_last_slice(
326
+ self.state["slices"][0][self._connector_state_converter.END_KEY]
327
+ ),
304
328
  self._end_provider(),
305
329
  True,
306
330
  )
@@ -308,7 +332,8 @@ class ConcurrentCursor(Cursor):
308
332
  for i in range(len(self.state["slices"]) - 1):
309
333
  if self._cursor_granularity:
310
334
  yield from self._split_per_slice_range(
311
- self.state["slices"][i][self._connector_state_converter.END_KEY] + self._cursor_granularity,
335
+ self.state["slices"][i][self._connector_state_converter.END_KEY]
336
+ + self._cursor_granularity,
312
337
  self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
313
338
  False,
314
339
  )
@@ -319,7 +344,9 @@ class ConcurrentCursor(Cursor):
319
344
  False,
320
345
  )
321
346
  yield from self._split_per_slice_range(
322
- self._calculate_lower_boundary_of_last_slice(self.state["slices"][-1][self._connector_state_converter.END_KEY]),
347
+ self._calculate_lower_boundary_of_last_slice(
348
+ self.state["slices"][-1][self._connector_state_converter.END_KEY]
349
+ ),
323
350
  self._end_provider(),
324
351
  True,
325
352
  )
@@ -327,9 +354,14 @@ class ConcurrentCursor(Cursor):
327
354
  raise ValueError("Expected at least one slice")
328
355
 
329
356
  def _is_start_before_first_slice(self) -> bool:
330
- return self._start is not None and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
357
+ return (
358
+ self._start is not None
359
+ and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
360
+ )
331
361
 
332
- def _calculate_lower_boundary_of_last_slice(self, lower_boundary: CursorValueType) -> CursorValueType:
362
+ def _calculate_lower_boundary_of_last_slice(
363
+ self, lower_boundary: CursorValueType
364
+ ) -> CursorValueType:
333
365
  if self._lookback_window:
334
366
  return lower_boundary - self._lookback_window
335
367
  return lower_boundary
@@ -353,9 +385,13 @@ class ConcurrentCursor(Cursor):
353
385
  stop_processing = False
354
386
  current_lower_boundary = lower
355
387
  while not stop_processing:
356
- current_upper_boundary = min(self._evaluate_upper_safely(current_lower_boundary, self._slice_range), upper)
388
+ current_upper_boundary = min(
389
+ self._evaluate_upper_safely(current_lower_boundary, self._slice_range), upper
390
+ )
357
391
  has_reached_upper_boundary = current_upper_boundary >= upper
358
- if self._cursor_granularity and (not upper_is_end or not has_reached_upper_boundary):
392
+ if self._cursor_granularity and (
393
+ not upper_is_end or not has_reached_upper_boundary
394
+ ):
359
395
  yield current_lower_boundary, current_upper_boundary - self._cursor_granularity
360
396
  else:
361
397
  yield current_lower_boundary, current_upper_boundary
@@ -8,7 +8,10 @@ from typing import Any, Iterable, List, Mapping, Optional
8
8
 
9
9
  from airbyte_cdk.models import AirbyteStream, SyncMode
10
10
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
11
- from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability
11
+ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
12
+ AbstractAvailabilityStrategy,
13
+ StreamAvailability,
14
+ )
12
15
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
13
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
14
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -60,7 +63,11 @@ class DefaultStream(AbstractStream):
60
63
  return self._json_schema
61
64
 
62
65
  def as_airbyte_stream(self) -> AirbyteStream:
63
- stream = AirbyteStream(name=self.name, json_schema=dict(self._json_schema), supported_sync_modes=[SyncMode.full_refresh])
66
+ stream = AirbyteStream(
67
+ name=self.name,
68
+ json_schema=dict(self._json_schema),
69
+ supported_sync_modes=[SyncMode.full_refresh],
70
+ )
64
71
 
65
72
  if self._namespace:
66
73
  stream.namespace = self._namespace
@@ -5,7 +5,9 @@ from typing import List, Optional, Union
5
5
  from airbyte_cdk.sources.streams import Stream
6
6
 
7
7
 
8
- def get_primary_key_from_stream(stream_primary_key: Optional[Union[str, List[str], List[List[str]]]]) -> List[str]:
8
+ def get_primary_key_from_stream(
9
+ stream_primary_key: Optional[Union[str, List[str], List[List[str]]]],
10
+ ) -> List[str]:
9
11
  if stream_primary_key is None:
10
12
  return []
11
13
  elif isinstance(stream_primary_key, str):
@@ -22,7 +24,9 @@ def get_primary_key_from_stream(stream_primary_key: Optional[Union[str, List[str
22
24
  def get_cursor_field_from_stream(stream: Stream) -> Optional[str]:
23
25
  if isinstance(stream.cursor_field, list):
24
26
  if len(stream.cursor_field) > 1:
25
- raise ValueError(f"Nested cursor fields are not supported. Got {stream.cursor_field} for {stream.name}")
27
+ raise ValueError(
28
+ f"Nested cursor fields are not supported. Got {stream.cursor_field} for {stream.name}"
29
+ )
26
30
  elif len(stream.cursor_field) == 0:
27
31
  return None
28
32
  else:
@@ -4,7 +4,9 @@
4
4
  import time
5
5
  from queue import Queue
6
6
 
7
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
7
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
8
+ PartitionGenerationCompletedSentinel,
9
+ )
8
10
  from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
9
11
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
10
12
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
@@ -16,7 +18,12 @@ class PartitionEnqueuer:
16
18
  Generates partitions from a partition generator and puts them in a queue.
17
19
  """
18
20
 
19
- def __init__(self, queue: Queue[QueueItem], thread_pool_manager: ThreadPoolManager, sleep_time_in_seconds: float = 0.1) -> None:
21
+ def __init__(
22
+ self,
23
+ queue: Queue[QueueItem],
24
+ thread_pool_manager: ThreadPoolManager,
25
+ sleep_time_in_seconds: float = 0.1,
26
+ ) -> None:
20
27
  """
21
28
  :param queue: The queue to put the partitions in.
22
29
  :param throttler: The throttler to use to throttle the partition generation.
@@ -5,7 +5,10 @@ from queue import Queue
5
5
 
6
6
  from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
7
7
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
- from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel, QueueItem
8
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import (
9
+ PartitionCompleteSentinel,
10
+ QueueItem,
11
+ )
9
12
 
10
13
 
11
14
  class PartitionReader:
@@ -13,7 +13,12 @@ class Record:
13
13
  Represents a record read from a stream.
14
14
  """
15
15
 
16
- def __init__(self, data: Mapping[str, Any], partition: "Partition", is_file_transfer_message: bool = False):
16
+ def __init__(
17
+ self,
18
+ data: Mapping[str, Any],
19
+ partition: "Partition",
20
+ is_file_transfer_message: bool = False,
21
+ ):
17
22
  self.data = data
18
23
  self.partition = partition
19
24
  self.is_file_transfer_message = is_file_transfer_message
@@ -21,7 +26,10 @@ class Record:
21
26
  def __eq__(self, other: Any) -> bool:
22
27
  if not isinstance(other, Record):
23
28
  return False
24
- return self.data == other.data and self.partition.stream_name() == other.partition.stream_name()
29
+ return (
30
+ self.data == other.data
31
+ and self.partition.stream_name() == other.partition.stream_name()
32
+ )
25
33
 
26
34
  def __repr__(self) -> str:
27
35
  return f"Record(data={self.data}, stream_name={self.partition.stream_name()})"
@@ -4,7 +4,9 @@
4
4
 
5
5
  from typing import Any, Union
6
6
 
7
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
7
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
8
+ PartitionGenerationCompletedSentinel,
9
+ )
8
10
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
9
11
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
10
12
 
@@ -31,4 +33,6 @@ class PartitionCompleteSentinel:
31
33
  """
32
34
  Typedef representing the items that can be added to the ThreadBasedConcurrentStream
33
35
  """
34
- QueueItem = Union[Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception]
36
+ QueueItem = Union[
37
+ Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception
38
+ ]
@@ -30,7 +30,9 @@ class AbstractStreamStateConverter(ABC):
30
30
  def __init__(self, is_sequential_state: bool = True):
31
31
  self._is_sequential_state = is_sequential_state
32
32
 
33
- def convert_to_state_message(self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
33
+ def convert_to_state_message(
34
+ self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]
35
+ ) -> MutableMapping[str, Any]:
34
36
  """
35
37
  Convert the state message from the concurrency-compatible format to the stream's original format.
36
38
 
@@ -41,7 +43,9 @@ class AbstractStreamStateConverter(ABC):
41
43
  legacy_state = stream_state.get("legacy", {})
42
44
  latest_complete_time = self._get_latest_complete_time(stream_state.get("slices", []))
43
45
  if latest_complete_time is not None:
44
- legacy_state.update({cursor_field.cursor_field_key: self._to_state_message(latest_complete_time)})
46
+ legacy_state.update(
47
+ {cursor_field.cursor_field_key: self._to_state_message(latest_complete_time)}
48
+ )
45
49
  return legacy_state or {}
46
50
  else:
47
51
  return self.serialize(stream_state, ConcurrencyCompatibleStateType.date_range)
@@ -51,7 +55,9 @@ class AbstractStreamStateConverter(ABC):
51
55
  Get the latest time before which all records have been processed.
52
56
  """
53
57
  if not slices:
54
- raise RuntimeError("Expected at least one slice but there were none. This is unexpected; please contact Support.")
58
+ raise RuntimeError(
59
+ "Expected at least one slice but there were none. This is unexpected; please contact Support."
60
+ )
55
61
  merged_intervals = self.merge_intervals(slices)
56
62
  first_interval = merged_intervals[0]
57
63
 
@@ -66,7 +72,9 @@ class AbstractStreamStateConverter(ABC):
66
72
  stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
67
73
  return state
68
74
 
69
- def serialize(self, state: MutableMapping[str, Any], state_type: ConcurrencyCompatibleStateType) -> MutableMapping[str, Any]:
75
+ def serialize(
76
+ self, state: MutableMapping[str, Any], state_type: ConcurrencyCompatibleStateType
77
+ ) -> MutableMapping[str, Any]:
70
78
  """
71
79
  Perform any transformations needed for compatibility with the converter.
72
80
  """
@@ -77,13 +85,17 @@ class AbstractStreamStateConverter(ABC):
77
85
  self.END_KEY: self._to_state_message(stream_slice[self.END_KEY]),
78
86
  }
79
87
  if stream_slice.get(self.MOST_RECENT_RECORD_KEY):
80
- serialized_slice[self.MOST_RECENT_RECORD_KEY] = self._to_state_message(stream_slice[self.MOST_RECENT_RECORD_KEY])
88
+ serialized_slice[self.MOST_RECENT_RECORD_KEY] = self._to_state_message(
89
+ stream_slice[self.MOST_RECENT_RECORD_KEY]
90
+ )
81
91
  serialized_slices.append(serialized_slice)
82
92
  return {"slices": serialized_slices, "state_type": state_type.value}
83
93
 
84
94
  @staticmethod
85
95
  def is_state_message_compatible(state: MutableMapping[str, Any]) -> bool:
86
- return bool(state) and state.get("state_type") in [t.value for t in ConcurrencyCompatibleStateType]
96
+ return bool(state) and state.get("state_type") in [
97
+ t.value for t in ConcurrencyCompatibleStateType
98
+ ]
87
99
 
88
100
  @abstractmethod
89
101
  def convert_from_sequential_state(
@@ -112,7 +124,9 @@ class AbstractStreamStateConverter(ABC):
112
124
  """
113
125
  ...
114
126
 
115
- def merge_intervals(self, intervals: List[MutableMapping[str, Any]]) -> List[MutableMapping[str, Any]]:
127
+ def merge_intervals(
128
+ self, intervals: List[MutableMapping[str, Any]]
129
+ ) -> List[MutableMapping[str, Any]]:
116
130
  """
117
131
  Compute and return a list of merged intervals.
118
132
 
@@ -122,7 +136,9 @@ class AbstractStreamStateConverter(ABC):
122
136
  if not intervals:
123
137
  return []
124
138
 
125
- sorted_intervals = sorted(intervals, key=lambda interval: (interval[self.START_KEY], interval[self.END_KEY]))
139
+ sorted_intervals = sorted(
140
+ intervals, key=lambda interval: (interval[self.START_KEY], interval[self.END_KEY])
141
+ )
126
142
  merged_intervals = [sorted_intervals[0]]
127
143
 
128
144
  for current_interval in sorted_intervals[1:]:
@@ -155,5 +171,4 @@ class AbstractStreamStateConverter(ABC):
155
171
 
156
172
  @property
157
173
  @abstractmethod
158
- def zero_value(self) -> Any:
159
- ...
174
+ def zero_value(self) -> Any: ...
@@ -28,8 +28,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
28
28
 
29
29
  @property
30
30
  @abstractmethod
31
- def _zero_value(self) -> Any:
32
- ...
31
+ def _zero_value(self) -> Any: ...
33
32
 
34
33
  @property
35
34
  def zero_value(self) -> datetime:
@@ -40,16 +39,13 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
40
39
  return lambda: datetime.now(timezone.utc)
41
40
 
42
41
  @abstractmethod
43
- def increment(self, timestamp: datetime) -> datetime:
44
- ...
42
+ def increment(self, timestamp: datetime) -> datetime: ...
45
43
 
46
44
  @abstractmethod
47
- def parse_timestamp(self, timestamp: Any) -> datetime:
48
- ...
45
+ def parse_timestamp(self, timestamp: Any) -> datetime: ...
49
46
 
50
47
  @abstractmethod
51
- def output_format(self, timestamp: datetime) -> Any:
52
- ...
48
+ def output_format(self, timestamp: datetime) -> Any: ...
53
49
 
54
50
  def parse_value(self, value: Any) -> Any:
55
51
  """
@@ -61,7 +57,10 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
61
57
  return bool(self.increment(end_time) >= start_time)
62
58
 
63
59
  def convert_from_sequential_state(
64
- self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: Optional[datetime]
60
+ self,
61
+ cursor_field: CursorField,
62
+ stream_state: MutableMapping[str, Any],
63
+ start: Optional[datetime],
65
64
  ) -> Tuple[datetime, MutableMapping[str, Any]]:
66
65
  """
67
66
  Convert the state message to the format required by the ConcurrentCursor.
@@ -82,7 +81,9 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
82
81
  # Create a slice to represent the records synced during prior syncs.
83
82
  # The start and end are the same to avoid confusion as to whether the records for this slice
84
83
  # were actually synced
85
- slices = [{self.START_KEY: start if start is not None else sync_start, self.END_KEY: sync_start}]
84
+ slices = [
85
+ {self.START_KEY: start if start is not None else sync_start, self.END_KEY: sync_start}
86
+ ]
86
87
 
87
88
  return sync_start, {
88
89
  "state_type": ConcurrencyCompatibleStateType.date_range.value,
@@ -90,10 +91,17 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
90
91
  "legacy": stream_state,
91
92
  }
92
93
 
93
- def _get_sync_start(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: Optional[datetime]) -> datetime:
94
+ def _get_sync_start(
95
+ self,
96
+ cursor_field: CursorField,
97
+ stream_state: MutableMapping[str, Any],
98
+ start: Optional[datetime],
99
+ ) -> datetime:
94
100
  sync_start = start if start is not None else self.zero_value
95
101
  prev_sync_low_water_mark = (
96
- self.parse_timestamp(stream_state[cursor_field.cursor_field_key]) if cursor_field.cursor_field_key in stream_state else None
102
+ self.parse_timestamp(stream_state[cursor_field.cursor_field_key])
103
+ if cursor_field.cursor_field_key in stream_state
104
+ else None
97
105
  )
98
106
  if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
99
107
  return prev_sync_low_water_mark
@@ -126,7 +134,9 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
126
134
  def parse_timestamp(self, timestamp: int) -> datetime:
127
135
  dt_object = pendulum.from_timestamp(timestamp)
128
136
  if not isinstance(dt_object, DateTime):
129
- raise ValueError(f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})")
137
+ raise ValueError(
138
+ f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
139
+ )
130
140
  return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
131
141
 
132
142
 
@@ -146,7 +156,9 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
146
156
 
147
157
  _zero_value = "0001-01-01T00:00:00.000Z"
148
158
 
149
- def __init__(self, is_sequential_state: bool = True, cursor_granularity: Optional[timedelta] = None):
159
+ def __init__(
160
+ self, is_sequential_state: bool = True, cursor_granularity: Optional[timedelta] = None
161
+ ):
150
162
  super().__init__(is_sequential_state=is_sequential_state)
151
163
  self._cursor_granularity = cursor_granularity or timedelta(milliseconds=1)
152
164
 
@@ -159,7 +171,9 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
159
171
  def parse_timestamp(self, timestamp: str) -> datetime:
160
172
  dt_object = pendulum.parse(timestamp)
161
173
  if not isinstance(dt_object, DateTime):
162
- raise ValueError(f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})")
174
+ raise ValueError(
175
+ f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
176
+ )
163
177
  return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
164
178
 
165
179
 
@@ -176,7 +190,9 @@ class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateC
176
190
  is_sequential_state: bool = True,
177
191
  cursor_granularity: Optional[timedelta] = None,
178
192
  ):
179
- super().__init__(is_sequential_state=is_sequential_state, cursor_granularity=cursor_granularity)
193
+ super().__init__(
194
+ is_sequential_state=is_sequential_state, cursor_granularity=cursor_granularity
195
+ )
180
196
  self._datetime_format = datetime_format
181
197
  self._input_datetime_formats = input_datetime_formats if input_datetime_formats else []
182
198
  self._input_datetime_formats += [self._datetime_format]