airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
  41. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  42. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  43. airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
  44. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
  46. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  47. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  48. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  49. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  50. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  51. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  52. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  53. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  54. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  55. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  56. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  59. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  60. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  62. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  63. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  64. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  65. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
  66. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
  67. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  68. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  69. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
  70. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  71. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  72. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  79. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  80. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  81. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  82. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  83. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  84. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  85. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  89. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  90. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  91. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  92. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  94. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  95. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  96. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  97. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  98. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  99. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  100. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  101. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  102. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  103. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  104. airbyte_cdk/sources/declarative/types.py +8 -1
  105. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  106. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  107. airbyte_cdk/sources/embedded/catalog.py +16 -4
  108. airbyte_cdk/sources/embedded/runner.py +19 -3
  109. airbyte_cdk/sources/embedded/tools.py +3 -1
  110. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  111. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  112. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  113. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  114. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  115. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  116. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  117. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  118. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  119. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  120. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  121. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  122. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  123. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  124. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  125. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  126. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  127. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  128. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  129. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  130. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  131. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  132. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  133. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  135. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  136. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  137. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  138. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  139. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  140. airbyte_cdk/sources/http_logger.py +5 -1
  141. airbyte_cdk/sources/message/repository.py +18 -4
  142. airbyte_cdk/sources/source.py +17 -7
  143. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  144. airbyte_cdk/sources/streams/call_rate.py +63 -19
  145. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  146. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  147. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  148. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  149. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  151. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  152. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  153. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  154. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  155. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  156. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  157. airbyte_cdk/sources/streams/core.py +77 -22
  158. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  160. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  161. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  162. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  163. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  164. airbyte_cdk/sources/streams/http/http.py +133 -33
  165. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  166. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  167. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  168. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  169. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  170. airbyte_cdk/sources/types.py +5 -1
  171. airbyte_cdk/sources/utils/record_helper.py +12 -3
  172. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  173. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  174. airbyte_cdk/sources/utils/transform.py +24 -9
  175. airbyte_cdk/sql/exceptions.py +19 -6
  176. airbyte_cdk/sql/secrets.py +3 -1
  177. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  178. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  179. airbyte_cdk/test/catalog_builder.py +19 -8
  180. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  181. airbyte_cdk/test/mock_http/mocker.py +41 -11
  182. airbyte_cdk/test/mock_http/request.py +9 -3
  183. airbyte_cdk/test/mock_http/response.py +3 -1
  184. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  185. airbyte_cdk/test/state_builder.py +10 -2
  186. airbyte_cdk/test/utils/data.py +6 -2
  187. airbyte_cdk/test/utils/http_mocking.py +3 -1
  188. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  189. airbyte_cdk/utils/analytics_message.py +10 -2
  190. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  191. airbyte_cdk/utils/mapping_helpers.py +3 -1
  192. airbyte_cdk/utils/message_utils.py +11 -4
  193. airbyte_cdk/utils/print_buffer.py +6 -1
  194. airbyte_cdk/utils/schema_inferrer.py +30 -9
  195. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  196. airbyte_cdk/utils/traced_exception.py +35 -9
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
  199. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
  200. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -11,7 +11,13 @@ from functools import cached_property, lru_cache
11
11
  from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
12
12
 
13
13
  import airbyte_cdk.sources.utils.casing as casing
14
- from airbyte_cdk.models import AirbyteMessage, AirbyteStream, ConfiguredAirbyteStream, DestinationSyncMode, SyncMode
14
+ from airbyte_cdk.models import (
15
+ AirbyteMessage,
16
+ AirbyteStream,
17
+ ConfiguredAirbyteStream,
18
+ DestinationSyncMode,
19
+ SyncMode,
20
+ )
15
21
  from airbyte_cdk.models import Type as MessageType
16
22
  from airbyte_cdk.sources.streams.checkpoint import (
17
23
  CheckpointMode,
@@ -84,7 +90,10 @@ class CheckpointMixin(ABC):
84
90
  """State setter, accept state serialized by state getter."""
85
91
 
86
92
 
87
- @deprecated(version="0.87.0", reason="Deprecated in favor of the CheckpointMixin which offers similar functionality")
93
+ @deprecated(
94
+ version="0.87.0",
95
+ reason="Deprecated in favor of the CheckpointMixin which offers similar functionality",
96
+ )
88
97
  class IncrementalMixin(CheckpointMixin, ABC):
89
98
  """Mixin to make stream incremental.
90
99
 
@@ -192,9 +201,14 @@ class Stream(ABC):
192
201
  for record_data_or_message in records:
193
202
  yield record_data_or_message
194
203
  if isinstance(record_data_or_message, Mapping) or (
195
- hasattr(record_data_or_message, "type") and record_data_or_message.type == MessageType.RECORD
204
+ hasattr(record_data_or_message, "type")
205
+ and record_data_or_message.type == MessageType.RECORD
196
206
  ):
197
- record_data = record_data_or_message if isinstance(record_data_or_message, Mapping) else record_data_or_message.record
207
+ record_data = (
208
+ record_data_or_message
209
+ if isinstance(record_data_or_message, Mapping)
210
+ else record_data_or_message.record
211
+ )
198
212
 
199
213
  # Thanks I hate it. RFR fundamentally doesn't fit with the concept of the legacy Stream.get_updated_state()
200
214
  # method because RFR streams rely on pagination as a cursor. Stream.get_updated_state() was designed to make
@@ -206,14 +220,23 @@ class Stream(ABC):
206
220
  if self.cursor_field:
207
221
  # Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
208
222
  # should be fixed on the stream implementation, but we should also protect against this in the CDK as well
209
- stream_state_tracker = self.get_updated_state(stream_state_tracker, record_data)
223
+ stream_state_tracker = self.get_updated_state(
224
+ stream_state_tracker, record_data
225
+ )
210
226
  self._observe_state(checkpoint_reader, stream_state_tracker)
211
227
  record_counter += 1
212
228
 
213
229
  checkpoint_interval = self.state_checkpoint_interval
214
230
  checkpoint = checkpoint_reader.get_checkpoint()
215
- if should_checkpoint and checkpoint_interval and record_counter % checkpoint_interval == 0 and checkpoint is not None:
216
- airbyte_state_message = self._checkpoint_state(checkpoint, state_manager=state_manager)
231
+ if (
232
+ should_checkpoint
233
+ and checkpoint_interval
234
+ and record_counter % checkpoint_interval == 0
235
+ and checkpoint is not None
236
+ ):
237
+ airbyte_state_message = self._checkpoint_state(
238
+ checkpoint, state_manager=state_manager
239
+ )
217
240
  yield airbyte_state_message
218
241
 
219
242
  if internal_config.is_limit_reached(record_counter):
@@ -221,7 +244,9 @@ class Stream(ABC):
221
244
  self._observe_state(checkpoint_reader)
222
245
  checkpoint_state = checkpoint_reader.get_checkpoint()
223
246
  if should_checkpoint and checkpoint_state is not None:
224
- airbyte_state_message = self._checkpoint_state(checkpoint_state, state_manager=state_manager)
247
+ airbyte_state_message = self._checkpoint_state(
248
+ checkpoint_state, state_manager=state_manager
249
+ )
225
250
  yield airbyte_state_message
226
251
 
227
252
  next_slice = checkpoint_reader.next()
@@ -252,7 +277,9 @@ class Stream(ABC):
252
277
  configured_stream=configured_stream,
253
278
  logger=self.logger,
254
279
  slice_logger=DebugSliceLogger(),
255
- stream_state=dict(state) if state else {}, # read() expects MutableMapping instead of Mapping which is used more often
280
+ stream_state=dict(state)
281
+ if state
282
+ else {}, # read() expects MutableMapping instead of Mapping which is used more often
256
283
  state_manager=None,
257
284
  internal_config=InternalConfig(),
258
285
  )
@@ -378,7 +405,11 @@ class Stream(ABC):
378
405
  """
379
406
 
380
407
  def stream_slices(
381
- self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
408
+ self,
409
+ *,
410
+ sync_mode: SyncMode,
411
+ cursor_field: Optional[List[str]] = None,
412
+ stream_state: Optional[Mapping[str, Any]] = None,
382
413
  ) -> Iterable[Optional[Mapping[str, Any]]]:
383
414
  """
384
415
  Override to define the slices for this stream. See the stream slicing section of the docs for more information.
@@ -449,12 +480,16 @@ class Stream(ABC):
449
480
  mappings_or_slices = [{}]
450
481
 
451
482
  slices_iterable_copy, iterable_for_detecting_format = itertools.tee(mappings_or_slices, 2)
452
- stream_classification = self._classify_stream(mappings_or_slices=iterable_for_detecting_format)
483
+ stream_classification = self._classify_stream(
484
+ mappings_or_slices=iterable_for_detecting_format
485
+ )
453
486
 
454
487
  # Streams that override has_multiple_slices are explicitly indicating that they will iterate over
455
488
  # multiple partitions. Inspecting slices to automatically apply the correct cursor is only needed as
456
489
  # a backup. So if this value was already assigned to True by the stream, we don't need to reassign it
457
- self.has_multiple_slices = self.has_multiple_slices or stream_classification.has_multiple_slices
490
+ self.has_multiple_slices = (
491
+ self.has_multiple_slices or stream_classification.has_multiple_slices
492
+ )
458
493
 
459
494
  cursor = self.get_cursor()
460
495
  if cursor:
@@ -463,7 +498,9 @@ class Stream(ABC):
463
498
  checkpoint_mode = self._checkpoint_mode
464
499
 
465
500
  if cursor and stream_classification.is_legacy_format:
466
- return LegacyCursorBasedCheckpointReader(stream_slices=slices_iterable_copy, cursor=cursor, read_state_from_cursor=True)
501
+ return LegacyCursorBasedCheckpointReader(
502
+ stream_slices=slices_iterable_copy, cursor=cursor, read_state_from_cursor=True
503
+ )
467
504
  elif cursor:
468
505
  return CursorBasedCheckpointReader(
469
506
  stream_slices=slices_iterable_copy,
@@ -475,7 +512,9 @@ class Stream(ABC):
475
512
  # not iterate over a static set of slices.
476
513
  return ResumableFullRefreshCheckpointReader(stream_state=stream_state)
477
514
  elif checkpoint_mode == CheckpointMode.INCREMENTAL:
478
- return IncrementalCheckpointReader(stream_slices=slices_iterable_copy, stream_state=stream_state)
515
+ return IncrementalCheckpointReader(
516
+ stream_slices=slices_iterable_copy, stream_state=stream_state
517
+ )
479
518
  else:
480
519
  return FullRefreshCheckpointReader(stream_slices=slices_iterable_copy)
481
520
 
@@ -489,7 +528,9 @@ class Stream(ABC):
489
528
  return CheckpointMode.FULL_REFRESH
490
529
 
491
530
  @staticmethod
492
- def _classify_stream(mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]]) -> StreamClassification:
531
+ def _classify_stream(
532
+ mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]],
533
+ ) -> StreamClassification:
493
534
  """
494
535
  This is a bit of a crazy solution, but also the only way we can detect certain attributes about the stream since Python
495
536
  streams do not follow consistent implementation patterns. We care about the following two attributes:
@@ -506,7 +547,9 @@ class Stream(ABC):
506
547
  raise ValueError("A stream should always have at least one slice")
507
548
  try:
508
549
  next_slice = next(mappings_or_slices)
509
- if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(partition={}, cursor_slice={}):
550
+ if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(
551
+ partition={}, cursor_slice={}
552
+ ):
510
553
  is_legacy_format = False
511
554
  slice_has_value = False
512
555
  elif next_slice == {}:
@@ -526,7 +569,9 @@ class Stream(ABC):
526
569
  if slice_has_value:
527
570
  # If the first slice contained a partition value from the result of stream_slices(), this is a substream that might
528
571
  # have multiple parent records to iterate over
529
- return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=slice_has_value)
572
+ return StreamClassification(
573
+ is_legacy_format=is_legacy_format, has_multiple_slices=slice_has_value
574
+ )
530
575
 
531
576
  try:
532
577
  # If stream_slices() returns multiple slices, this is also a substream that can potentially generate empty slices
@@ -534,7 +579,9 @@ class Stream(ABC):
534
579
  return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=True)
535
580
  except StopIteration:
536
581
  # If the result of stream_slices() only returns a single empty stream slice, then we know this is a regular stream
537
- return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=False)
582
+ return StreamClassification(
583
+ is_legacy_format=is_legacy_format, has_multiple_slices=False
584
+ )
538
585
 
539
586
  def log_stream_sync_configuration(self) -> None:
540
587
  """
@@ -549,7 +596,9 @@ class Stream(ABC):
549
596
  )
550
597
 
551
598
  @staticmethod
552
- def _wrapped_primary_key(keys: Optional[Union[str, List[str], List[List[str]]]]) -> Optional[List[List[str]]]:
599
+ def _wrapped_primary_key(
600
+ keys: Optional[Union[str, List[str], List[List[str]]]],
601
+ ) -> Optional[List[List[str]]]:
553
602
  """
554
603
  :return: wrap the primary_key property in a list of list of strings required by the Airbyte Stream object.
555
604
  """
@@ -571,7 +620,9 @@ class Stream(ABC):
571
620
  else:
572
621
  raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
573
622
 
574
- def _observe_state(self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None) -> None:
623
+ def _observe_state(
624
+ self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None
625
+ ) -> None:
575
626
  """
576
627
  Convenience method that attempts to read the Stream's state using the recommended way of connector's managing their
577
628
  own state via state setter/getter. But if we get back an AttributeError, then the legacy Stream.get_updated_state()
@@ -617,7 +668,9 @@ class Stream(ABC):
617
668
  def configured_json_schema(self, json_schema: Dict[str, Any]) -> None:
618
669
  self._configured_json_schema = self._filter_schema_invalid_properties(json_schema)
619
670
 
620
- def _filter_schema_invalid_properties(self, configured_catalog_json_schema: Dict[str, Any]) -> Dict[str, Any]:
671
+ def _filter_schema_invalid_properties(
672
+ self, configured_catalog_json_schema: Dict[str, Any]
673
+ ) -> Dict[str, Any]:
621
674
  """
622
675
  Filters the properties in json_schema that are not present in the stream schema.
623
676
  Configured Schemas can have very old fields, so we need to housekeeping ourselves.
@@ -639,6 +692,8 @@ class Stream(ABC):
639
692
  valid_configured_schema_properties = {}
640
693
 
641
694
  for configured_schema_property in valid_configured_schema_properties_keys:
642
- valid_configured_schema_properties[configured_schema_property] = stream_schema_properties[configured_schema_property]
695
+ valid_configured_schema_properties[configured_schema_property] = (
696
+ stream_schema_properties[configured_schema_property]
697
+ )
643
698
 
644
699
  return {**configured_catalog_json_schema, "properties": valid_configured_schema_properties}
@@ -15,7 +15,9 @@ if typing.TYPE_CHECKING:
15
15
 
16
16
 
17
17
  class HttpAvailabilityStrategy(AvailabilityStrategy):
18
- def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
18
+ def check_availability(
19
+ self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
20
+ ) -> Tuple[bool, Optional[str]]:
19
21
  """
20
22
  Check stream availability by attempting to read the first record of the
21
23
  stream.
@@ -5,7 +5,10 @@
5
5
  from typing import Mapping, Type, Union
6
6
 
7
7
  from airbyte_cdk.models import FailureType
8
- from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, ResponseAction
8
+ from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
9
+ ErrorResolution,
10
+ ResponseAction,
11
+ )
9
12
  from requests.exceptions import InvalidSchema, InvalidURL, RequestException
10
13
 
11
14
  DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = {
@@ -30,7 +30,9 @@ class ErrorHandler(ABC):
30
30
  pass
31
31
 
32
32
  @abstractmethod
33
- def interpret_response(self, response: Optional[Union[requests.Response, Exception]]) -> ErrorResolution:
33
+ def interpret_response(
34
+ self, response: Optional[Union[requests.Response, Exception]]
35
+ ) -> ErrorResolution:
34
36
  """
35
37
  Interpret the response or exception and return the corresponding response action, failure type, and error message.
36
38
 
@@ -8,9 +8,14 @@ from typing import Mapping, Optional, Union
8
8
 
9
9
  import requests
10
10
  from airbyte_cdk.models import FailureType
11
- from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import DEFAULT_ERROR_MAPPING
11
+ from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import (
12
+ DEFAULT_ERROR_MAPPING,
13
+ )
12
14
  from airbyte_cdk.sources.streams.http.error_handlers.error_handler import ErrorHandler
13
- from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, ResponseAction
15
+ from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
16
+ ErrorResolution,
17
+ ResponseAction,
18
+ )
14
19
 
15
20
 
16
21
  class HttpStatusErrorHandler(ErrorHandler):
@@ -39,7 +44,9 @@ class HttpStatusErrorHandler(ErrorHandler):
39
44
  def max_time(self) -> Optional[int]:
40
45
  return self._max_time
41
46
 
42
- def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
47
+ def interpret_response(
48
+ self, response_or_exception: Optional[Union[requests.Response, Exception]] = None
49
+ ) -> ErrorResolution:
43
50
  """
44
51
  Interpret the response and return the corresponding response action, failure type, and error message.
45
52
 
@@ -48,12 +55,16 @@ class HttpStatusErrorHandler(ErrorHandler):
48
55
  """
49
56
 
50
57
  if isinstance(response_or_exception, Exception):
51
- mapped_error: Optional[ErrorResolution] = self._error_mapping.get(response_or_exception.__class__)
58
+ mapped_error: Optional[ErrorResolution] = self._error_mapping.get(
59
+ response_or_exception.__class__
60
+ )
52
61
 
53
62
  if mapped_error is not None:
54
63
  return mapped_error
55
64
  else:
56
- self._logger.error(f"Unexpected exception in error handler: {response_or_exception}")
65
+ self._logger.error(
66
+ f"Unexpected exception in error handler: {response_or_exception}"
67
+ )
57
68
  return ErrorResolution(
58
69
  response_action=ResponseAction.RETRY,
59
70
  failure_type=FailureType.system_error,
@@ -33,13 +33,17 @@ def _format_response_error_message(response: requests.Response) -> str:
33
33
  try:
34
34
  response.raise_for_status()
35
35
  except HTTPError as exception:
36
- return filter_secrets(f"Response was not ok: `{str(exception)}`. Response content is: {response.text}")
36
+ return filter_secrets(
37
+ f"Response was not ok: `{str(exception)}`. Response content is: {response.text}"
38
+ )
37
39
  # We purposefully do not add the response.content because the response is "ok" so there might be sensitive information in the payload.
38
40
  # Feel free the
39
41
  return f"Unexpected response with HTTP status {response.status_code}"
40
42
 
41
43
 
42
- def create_fallback_error_resolution(response_or_exception: Optional[Union[requests.Response, Exception]]) -> ErrorResolution:
44
+ def create_fallback_error_resolution(
45
+ response_or_exception: Optional[Union[requests.Response, Exception]],
46
+ ) -> ErrorResolution:
43
47
  if response_or_exception is None:
44
48
  # We do not expect this case to happen but if it does, it would be good to understand the cause and improve the error message
45
49
  error_message = "Error handler did not receive a valid response or exception. This is unexpected please contact Airbyte Support"
@@ -55,4 +59,6 @@ def create_fallback_error_resolution(response_or_exception: Optional[Union[reque
55
59
  )
56
60
 
57
61
 
58
- SUCCESS_RESOLUTION = ErrorResolution(response_action=ResponseAction.SUCCESS, failure_type=None, error_message=None)
62
+ SUCCESS_RESOLUTION = ErrorResolution(
63
+ response_action=ResponseAction.SUCCESS, failure_type=None, error_message=None
64
+ )
@@ -15,10 +15,10 @@ class BaseBackoffException(requests.exceptions.HTTPError):
15
15
  response: Optional[Union[requests.Response, Exception]],
16
16
  error_message: str = "",
17
17
  ):
18
-
19
18
  if isinstance(response, requests.Response):
20
19
  error_message = (
21
- error_message or f"Request URL: {request.url}, Response Code: {response.status_code}, Response Text: {response.text}"
20
+ error_message
21
+ or f"Request URL: {request.url}, Response Code: {response.status_code}, Response Text: {response.text}"
22
22
  )
23
23
  super().__init__(error_message, request=request, response=response)
24
24
  else: