airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
  41. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  42. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  43. airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
  44. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
  46. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  47. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  48. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  49. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  50. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  51. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  52. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  53. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  54. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  55. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  56. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  59. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  60. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  62. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  63. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  64. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  65. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
  66. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
  67. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  68. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  69. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
  70. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  71. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  72. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  79. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  80. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  81. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  82. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  83. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  84. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  85. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  89. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  90. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  91. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  92. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  94. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  95. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  96. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  97. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  98. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  99. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  100. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  101. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  102. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  103. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  104. airbyte_cdk/sources/declarative/types.py +8 -1
  105. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  106. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  107. airbyte_cdk/sources/embedded/catalog.py +16 -4
  108. airbyte_cdk/sources/embedded/runner.py +19 -3
  109. airbyte_cdk/sources/embedded/tools.py +3 -1
  110. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  111. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  112. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  113. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  114. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  115. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  116. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  117. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  118. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  119. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  120. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  121. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  122. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  123. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  124. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  125. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  126. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  127. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  128. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  129. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  130. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  131. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  132. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  133. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  135. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  136. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  137. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  138. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  139. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  140. airbyte_cdk/sources/http_logger.py +5 -1
  141. airbyte_cdk/sources/message/repository.py +18 -4
  142. airbyte_cdk/sources/source.py +17 -7
  143. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  144. airbyte_cdk/sources/streams/call_rate.py +63 -19
  145. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  146. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  147. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  148. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  149. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  151. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  152. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  153. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  154. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  155. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  156. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  157. airbyte_cdk/sources/streams/core.py +77 -22
  158. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  160. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  161. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  162. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  163. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  164. airbyte_cdk/sources/streams/http/http.py +133 -33
  165. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  166. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  167. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  168. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  169. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  170. airbyte_cdk/sources/types.py +5 -1
  171. airbyte_cdk/sources/utils/record_helper.py +12 -3
  172. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  173. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  174. airbyte_cdk/sources/utils/transform.py +24 -9
  175. airbyte_cdk/sql/exceptions.py +19 -6
  176. airbyte_cdk/sql/secrets.py +3 -1
  177. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  178. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  179. airbyte_cdk/test/catalog_builder.py +19 -8
  180. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  181. airbyte_cdk/test/mock_http/mocker.py +41 -11
  182. airbyte_cdk/test/mock_http/request.py +9 -3
  183. airbyte_cdk/test/mock_http/response.py +3 -1
  184. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  185. airbyte_cdk/test/state_builder.py +10 -2
  186. airbyte_cdk/test/utils/data.py +6 -2
  187. airbyte_cdk/test/utils/http_mocking.py +3 -1
  188. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  189. airbyte_cdk/utils/analytics_message.py +10 -2
  190. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  191. airbyte_cdk/utils/mapping_helpers.py +3 -1
  192. airbyte_cdk/utils/message_utils.py +11 -4
  193. airbyte_cdk/utils/print_buffer.py +6 -1
  194. airbyte_cdk/utils/schema_inferrer.py +30 -9
  195. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  196. airbyte_cdk/utils/traced_exception.py +35 -9
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
  199. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
  200. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -27,15 +27,19 @@ class ExperimentalClassWarning(DeprecationWarning):
27
27
 
28
28
  class BaseSource(BaseConnector[TConfig], ABC, Generic[TConfig, TState, TCatalog]):
29
29
  @abstractmethod
30
- def read_state(self, state_path: str) -> TState:
31
- ...
30
+ def read_state(self, state_path: str) -> TState: ...
32
31
 
33
32
  @abstractmethod
34
- def read_catalog(self, catalog_path: str) -> TCatalog:
35
- ...
33
+ def read_catalog(self, catalog_path: str) -> TCatalog: ...
36
34
 
37
35
  @abstractmethod
38
- def read(self, logger: logging.Logger, config: TConfig, catalog: TCatalog, state: Optional[TState] = None) -> Iterable[AirbyteMessage]:
36
+ def read(
37
+ self,
38
+ logger: logging.Logger,
39
+ config: TConfig,
40
+ catalog: TCatalog,
41
+ state: Optional[TState] = None,
42
+ ) -> Iterable[AirbyteMessage]:
39
43
  """
40
44
  Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state.
41
45
  """
@@ -69,8 +73,14 @@ class Source(
69
73
  if state_obj:
70
74
  for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list
71
75
  parsed_message = AirbyteStateMessageSerializer.load(state)
72
- if not parsed_message.stream and not parsed_message.data and not parsed_message.global_:
73
- raise ValueError("AirbyteStateMessage should contain either a stream, global, or state field")
76
+ if (
77
+ not parsed_message.stream
78
+ and not parsed_message.data
79
+ and not parsed_message.global_
80
+ ):
81
+ raise ValueError(
82
+ "AirbyteStateMessage should contain either a stream, global, or state field"
83
+ )
74
84
  parsed_state_messages.append(parsed_message)
75
85
  return parsed_state_messages
76
86
 
@@ -20,7 +20,9 @@ class AvailabilityStrategy(ABC):
20
20
  """
21
21
 
22
22
  @abstractmethod
23
- def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
23
+ def check_availability(
24
+ self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
25
+ ) -> Tuple[bool, Optional[str]]:
24
26
  """
25
27
  Checks stream availability.
26
28
 
@@ -52,7 +54,9 @@ class AvailabilityStrategy(ABC):
52
54
  return next(slices)
53
55
 
54
56
  @staticmethod
55
- def get_first_record_for_slice(stream: Stream, stream_slice: Optional[Mapping[str, Any]]) -> StreamData:
57
+ def get_first_record_for_slice(
58
+ stream: Stream, stream_slice: Optional[Mapping[str, Any]]
59
+ ) -> StreamData:
56
60
  """
57
61
  Gets the first record for a stream_slice of a stream.
58
62
 
@@ -70,7 +74,9 @@ class AvailabilityStrategy(ABC):
70
74
 
71
75
  # We wrap the return output of read_records() because some implementations return types that are iterable,
72
76
  # but not iterators such as lists or tuples
73
- records_for_slice = iter(stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice))
77
+ records_for_slice = iter(
78
+ stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
79
+ )
74
80
 
75
81
  return next(records_for_slice)
76
82
  finally:
@@ -76,7 +76,9 @@ class AbstractCallRatePolicy(abc.ABC):
76
76
  """
77
77
 
78
78
  @abc.abstractmethod
79
- def update(self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]) -> None:
79
+ def update(
80
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
81
+ ) -> None:
80
82
  """Update call rate counting with current values
81
83
 
82
84
  :param available_calls:
@@ -202,12 +204,20 @@ class UnlimitedCallRatePolicy(BaseCallRatePolicy):
202
204
  def try_acquire(self, request: Any, weight: int) -> None:
203
205
  """Do nothing"""
204
206
 
205
- def update(self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]) -> None:
207
+ def update(
208
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
209
+ ) -> None:
206
210
  """Do nothing"""
207
211
 
208
212
 
209
213
  class FixedWindowCallRatePolicy(BaseCallRatePolicy):
210
- def __init__(self, next_reset_ts: datetime.datetime, period: timedelta, call_limit: int, matchers: list[RequestMatcher]):
214
+ def __init__(
215
+ self,
216
+ next_reset_ts: datetime.datetime,
217
+ period: timedelta,
218
+ call_limit: int,
219
+ matchers: list[RequestMatcher],
220
+ ):
211
221
  """A policy that allows {call_limit} calls within a {period} time interval
212
222
 
213
223
  :param next_reset_ts: next call rate reset time point
@@ -235,7 +245,8 @@ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
235
245
  if self._calls_num + weight > self._call_limit:
236
246
  reset_in = self._next_reset_ts - datetime.datetime.now()
237
247
  error_message = (
238
- f"reached maximum number of allowed calls {self._call_limit} " f"per {self._offset} interval, next reset in {reset_in}."
248
+ f"reached maximum number of allowed calls {self._call_limit} "
249
+ f"per {self._offset} interval, next reset in {reset_in}."
239
250
  )
240
251
  raise CallRateLimitHit(
241
252
  error=error_message,
@@ -247,7 +258,9 @@ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
247
258
 
248
259
  self._calls_num += weight
249
260
 
250
- def update(self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]) -> None:
261
+ def update(
262
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
263
+ ) -> None:
251
264
  """Update call rate counters, by default, only reacts to decreasing updates of available_calls and changes to call_reset_ts.
252
265
  We ignore updates with available_calls > current_available_calls to support call rate limits that are lower than API limits.
253
266
 
@@ -260,12 +273,18 @@ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
260
273
 
261
274
  if available_calls is not None and current_available_calls > available_calls:
262
275
  logger.debug(
263
- "got rate limit update from api, adjusting available calls from %s to %s", current_available_calls, available_calls
276
+ "got rate limit update from api, adjusting available calls from %s to %s",
277
+ current_available_calls,
278
+ available_calls,
264
279
  )
265
280
  self._calls_num = self._call_limit - available_calls
266
281
 
267
282
  if call_reset_ts is not None and call_reset_ts != self._next_reset_ts:
268
- logger.debug("got rate limit update from api, adjusting reset time from %s to %s", self._next_reset_ts, call_reset_ts)
283
+ logger.debug(
284
+ "got rate limit update from api, adjusting reset time from %s to %s",
285
+ self._next_reset_ts,
286
+ call_reset_ts,
287
+ )
269
288
  self._next_reset_ts = call_reset_ts
270
289
 
271
290
  def _update_current_window(self) -> None:
@@ -292,7 +311,10 @@ class MovingWindowCallRatePolicy(BaseCallRatePolicy):
292
311
  """
293
312
  if not rates:
294
313
  raise ValueError("The list of rates can not be empty")
295
- pyrate_rates = [PyRateRate(limit=rate.limit, interval=int(rate.interval.total_seconds() * 1000)) for rate in rates]
314
+ pyrate_rates = [
315
+ PyRateRate(limit=rate.limit, interval=int(rate.interval.total_seconds() * 1000))
316
+ for rate in rates
317
+ ]
296
318
  self._bucket = InMemoryBucket(pyrate_rates)
297
319
  # Limiter will create the background task that clears old requests in the bucket
298
320
  self._limiter = Limiter(self._bucket)
@@ -320,14 +342,18 @@ class MovingWindowCallRatePolicy(BaseCallRatePolicy):
320
342
  time_to_wait=timedelta(milliseconds=time_to_wait),
321
343
  )
322
344
 
323
- def update(self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]) -> None:
345
+ def update(
346
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
347
+ ) -> None:
324
348
  """Adjust call bucket to reflect the state of the API server
325
349
 
326
350
  :param available_calls:
327
351
  :param call_reset_ts:
328
352
  :return:
329
353
  """
330
- if available_calls is not None and call_reset_ts is None: # we do our best to sync buckets with API
354
+ if (
355
+ available_calls is not None and call_reset_ts is None
356
+ ): # we do our best to sync buckets with API
331
357
  if available_calls == 0:
332
358
  with self._limiter.lock:
333
359
  items_to_add = self._bucket.count() < self._bucket.rates[0].limit
@@ -350,7 +376,9 @@ class AbstractAPIBudget(abc.ABC):
350
376
  """
351
377
 
352
378
  @abc.abstractmethod
353
- def acquire_call(self, request: Any, block: bool = True, timeout: Optional[float] = None) -> None:
379
+ def acquire_call(
380
+ self, request: Any, block: bool = True, timeout: Optional[float] = None
381
+ ) -> None:
354
382
  """Try to get a call from budget, will block by default
355
383
 
356
384
  :param request:
@@ -375,7 +403,9 @@ class AbstractAPIBudget(abc.ABC):
375
403
  class APIBudget(AbstractAPIBudget):
376
404
  """Default APIBudget implementation"""
377
405
 
378
- def __init__(self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000) -> None:
406
+ def __init__(
407
+ self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000
408
+ ) -> None:
379
409
  """Constructor
380
410
 
381
411
  :param policies: list of policies in this budget
@@ -392,7 +422,9 @@ class APIBudget(AbstractAPIBudget):
392
422
  return policy
393
423
  return None
394
424
 
395
- def acquire_call(self, request: Any, block: bool = True, timeout: Optional[float] = None) -> None:
425
+ def acquire_call(
426
+ self, request: Any, block: bool = True, timeout: Optional[float] = None
427
+ ) -> None:
396
428
  """Try to get a call from budget, will block by default.
397
429
  Matchers will be called sequentially in the same order they were added.
398
430
  The first matcher that returns True will
@@ -417,7 +449,9 @@ class APIBudget(AbstractAPIBudget):
417
449
  """
418
450
  pass
419
451
 
420
- def _do_acquire(self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]) -> None:
452
+ def _do_acquire(
453
+ self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]
454
+ ) -> None:
421
455
  """Internal method to try to acquire a call credit
422
456
 
423
457
  :param request:
@@ -439,14 +473,20 @@ class APIBudget(AbstractAPIBudget):
439
473
  else:
440
474
  time_to_wait = exc.time_to_wait
441
475
 
442
- time_to_wait = max(timedelta(0), time_to_wait) # sometimes we get negative duration
443
- logger.info("reached call limit %s. going to sleep for %s", exc.rate, time_to_wait)
476
+ time_to_wait = max(
477
+ timedelta(0), time_to_wait
478
+ ) # sometimes we get negative duration
479
+ logger.info(
480
+ "reached call limit %s. going to sleep for %s", exc.rate, time_to_wait
481
+ )
444
482
  time.sleep(time_to_wait.total_seconds())
445
483
  else:
446
484
  raise
447
485
 
448
486
  if last_exception:
449
- logger.info("we used all %s attempts to acquire and failed", self._maximum_attempts_to_acquire)
487
+ logger.info(
488
+ "we used all %s attempts to acquire and failed", self._maximum_attempts_to_acquire
489
+ )
450
490
  raise last_exception
451
491
 
452
492
 
@@ -481,9 +521,13 @@ class HttpAPIBudget(APIBudget):
481
521
  reset_ts = self.get_reset_ts_from_response(response)
482
522
  policy.update(available_calls=available_calls, call_reset_ts=reset_ts)
483
523
 
484
- def get_reset_ts_from_response(self, response: requests.Response) -> Optional[datetime.datetime]:
524
+ def get_reset_ts_from_response(
525
+ self, response: requests.Response
526
+ ) -> Optional[datetime.datetime]:
485
527
  if response.headers.get(self._ratelimit_reset_header):
486
- return datetime.datetime.fromtimestamp(int(response.headers[self._ratelimit_reset_header]))
528
+ return datetime.datetime.fromtimestamp(
529
+ int(response.headers[self._ratelimit_reset_header])
530
+ )
487
531
  return None
488
532
 
489
533
  def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]:
@@ -53,7 +53,9 @@ class IncrementalCheckpointReader(CheckpointReader):
53
53
  before syncing data.
54
54
  """
55
55
 
56
- def __init__(self, stream_state: Mapping[str, Any], stream_slices: Iterable[Optional[Mapping[str, Any]]]):
56
+ def __init__(
57
+ self, stream_state: Mapping[str, Any], stream_slices: Iterable[Optional[Mapping[str, Any]]]
58
+ ):
57
59
  self._state: Optional[Mapping[str, Any]] = stream_state
58
60
  self._stream_slices = iter(stream_slices)
59
61
  self._has_slices = False
@@ -87,7 +89,12 @@ class CursorBasedCheckpointReader(CheckpointReader):
87
89
  that belongs to the Concurrent CDK.
88
90
  """
89
91
 
90
- def __init__(self, cursor: Cursor, stream_slices: Iterable[Optional[Mapping[str, Any]]], read_state_from_cursor: bool = False):
92
+ def __init__(
93
+ self,
94
+ cursor: Cursor,
95
+ stream_slices: Iterable[Optional[Mapping[str, Any]]],
96
+ read_state_from_cursor: bool = False,
97
+ ):
91
98
  self._cursor = cursor
92
99
  self._stream_slices = iter(stream_slices)
93
100
  # read_state_from_cursor is used to delineate that partitions should determine when to stop syncing dynamically according
@@ -153,7 +160,11 @@ class CursorBasedCheckpointReader(CheckpointReader):
153
160
  next_slice = self.read_and_convert_slice()
154
161
  state_for_slice = self._cursor.select_state(next_slice)
155
162
  has_more = state_for_slice == FULL_REFRESH_COMPLETE_STATE
156
- return StreamSlice(cursor_slice=state_for_slice or {}, partition=next_slice.partition, extra_fields=next_slice.extra_fields)
163
+ return StreamSlice(
164
+ cursor_slice=state_for_slice or {},
165
+ partition=next_slice.partition,
166
+ extra_fields=next_slice.extra_fields,
167
+ )
157
168
  else:
158
169
  state_for_slice = self._cursor.select_state(self.current_slice)
159
170
  if state_for_slice == FULL_REFRESH_COMPLETE_STATE:
@@ -173,7 +184,9 @@ class CursorBasedCheckpointReader(CheckpointReader):
173
184
  )
174
185
  # The reader continues to process the current partition if it's state is still in progress
175
186
  return StreamSlice(
176
- cursor_slice=state_for_slice or {}, partition=self.current_slice.partition, extra_fields=self.current_slice.extra_fields
187
+ cursor_slice=state_for_slice or {},
188
+ partition=self.current_slice.partition,
189
+ extra_fields=self.current_slice.extra_fields,
177
190
  )
178
191
  else:
179
192
  # Unlike RFR cursors that iterate dynamically according to how stream state is updated, most cursors operate
@@ -218,8 +231,17 @@ class LegacyCursorBasedCheckpointReader(CursorBasedCheckpointReader):
218
231
  }
219
232
  """
220
233
 
221
- def __init__(self, cursor: Cursor, stream_slices: Iterable[Optional[Mapping[str, Any]]], read_state_from_cursor: bool = False):
222
- super().__init__(cursor=cursor, stream_slices=stream_slices, read_state_from_cursor=read_state_from_cursor)
234
+ def __init__(
235
+ self,
236
+ cursor: Cursor,
237
+ stream_slices: Iterable[Optional[Mapping[str, Any]]],
238
+ read_state_from_cursor: bool = False,
239
+ ):
240
+ super().__init__(
241
+ cursor=cursor,
242
+ stream_slices=stream_slices,
243
+ read_state_from_cursor=read_state_from_cursor,
244
+ )
223
245
 
224
246
  def next(self) -> Optional[Mapping[str, Any]]:
225
247
  try:
@@ -228,7 +250,9 @@ class LegacyCursorBasedCheckpointReader(CursorBasedCheckpointReader):
228
250
  if "partition" in dict(self.current_slice):
229
251
  raise ValueError("Stream is configured to use invalid stream slice key 'partition'")
230
252
  elif "cursor_slice" in dict(self.current_slice):
231
- raise ValueError("Stream is configured to use invalid stream slice key 'cursor_slice'")
253
+ raise ValueError(
254
+ "Stream is configured to use invalid stream slice key 'cursor_slice'"
255
+ )
232
256
 
233
257
  # We convert StreamSlice to a regular mapping because legacy connectors operate on the basic Mapping object. We
234
258
  # also duplicate all fields at the top level for backwards compatibility for existing Python sources
@@ -5,7 +5,9 @@ from typing import Any, Mapping, MutableMapping, Optional
5
5
 
6
6
  from airbyte_cdk.models import FailureType
7
7
  from airbyte_cdk.sources.streams.checkpoint import Cursor
8
- from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import PerPartitionKeySerializer
8
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
9
+ PerPartitionKeySerializer,
10
+ )
9
11
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
10
12
  from airbyte_cdk.utils import AirbyteTracedException
11
13
 
@@ -97,7 +99,9 @@ class SubstreamResumableFullRefreshCursor(Cursor):
97
99
  if not stream_slice:
98
100
  raise ValueError("A partition needs to be provided in order to extract a state")
99
101
 
100
- return self._per_partition_state.get(self._to_partition_key(stream_slice.partition), {}).get("cursor")
102
+ return self._per_partition_state.get(
103
+ self._to_partition_key(stream_slice.partition), {}
104
+ ).get("cursor")
101
105
 
102
106
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
103
107
  return self._partition_serializer.to_partition_key(partition)
@@ -6,9 +6,17 @@ import copy
6
6
  import json
7
7
  import logging
8
8
  from functools import lru_cache
9
- from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
-
11
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteStream, ConfiguredAirbyteStream, Level, SyncMode, Type
9
+ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
10
+
11
+ from airbyte_cdk.models import (
12
+ AirbyteLogMessage,
13
+ AirbyteMessage,
14
+ AirbyteStream,
15
+ ConfiguredAirbyteStream,
16
+ Level,
17
+ SyncMode,
18
+ Type,
19
+ )
12
20
  from airbyte_cdk.sources import AbstractSource, Source
13
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
22
  from airbyte_cdk.sources.message import MessageRepository
@@ -16,15 +24,23 @@ from airbyte_cdk.sources.source import ExperimentalClassWarning
16
24
  from airbyte_cdk.sources.streams import Stream
17
25
  from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
18
26
  from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
19
- from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, AlwaysAvailableAvailabilityStrategy
27
+ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
28
+ AbstractAvailabilityStrategy,
29
+ AlwaysAvailableAvailabilityStrategy,
30
+ )
20
31
  from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
21
32
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
22
33
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
23
- from airbyte_cdk.sources.streams.concurrent.helpers import get_cursor_field_from_stream, get_primary_key_from_stream
34
+ from airbyte_cdk.sources.streams.concurrent.helpers import (
35
+ get_cursor_field_from_stream,
36
+ get_primary_key_from_stream,
37
+ )
24
38
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
39
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
26
40
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
27
- from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import DateTimeStreamStateConverter
41
+ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
42
+ DateTimeStreamStateConverter,
43
+ )
28
44
  from airbyte_cdk.sources.streams.core import StreamData
29
45
  from airbyte_cdk.sources.types import StreamSlice
30
46
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
@@ -75,7 +91,9 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
75
91
  partition_generator=StreamPartitionGenerator(
76
92
  stream,
77
93
  message_repository,
78
- SyncMode.full_refresh if isinstance(cursor, FinalStateCursor) else SyncMode.incremental,
94
+ SyncMode.full_refresh
95
+ if isinstance(cursor, FinalStateCursor)
96
+ else SyncMode.incremental,
79
97
  [cursor_field] if cursor_field is not None else None,
80
98
  state,
81
99
  cursor,
@@ -97,14 +115,23 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
97
115
 
98
116
  @property
99
117
  def state(self) -> MutableMapping[str, Any]:
100
- raise NotImplementedError("This should not be called as part of the Concurrent CDK code. Please report the problem to Airbyte")
118
+ raise NotImplementedError(
119
+ "This should not be called as part of the Concurrent CDK code. Please report the problem to Airbyte"
120
+ )
101
121
 
102
122
  @state.setter
103
123
  def state(self, value: Mapping[str, Any]) -> None:
104
124
  if "state" in dir(self._legacy_stream):
105
125
  self._legacy_stream.state = value # type: ignore # validating `state` is attribute of stream using `if` above
106
126
 
107
- def __init__(self, stream: DefaultStream, legacy_stream: Stream, cursor: Cursor, slice_logger: SliceLogger, logger: logging.Logger):
127
+ def __init__(
128
+ self,
129
+ stream: DefaultStream,
130
+ legacy_stream: Stream,
131
+ cursor: Cursor,
132
+ slice_logger: SliceLogger,
133
+ logger: logging.Logger,
134
+ ):
108
135
  """
109
136
  :param stream: The underlying AbstractStream
110
137
  """
@@ -141,7 +168,10 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
141
168
  # This shouldn't happen if the ConcurrentCursor was used
142
169
  state = "unknown; no state attribute was available on the cursor"
143
170
  yield AirbyteMessage(
144
- type=Type.LOG, log=AirbyteLogMessage(level=Level.ERROR, message=f"Cursor State at time of exception: {state}")
171
+ type=Type.LOG,
172
+ log=AirbyteLogMessage(
173
+ level=Level.ERROR, message=f"Cursor State at time of exception: {state}"
174
+ ),
145
175
  )
146
176
  raise exc
147
177
 
@@ -180,7 +210,9 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
180
210
  def supports_incremental(self) -> bool:
181
211
  return self._legacy_stream.supports_incremental
182
212
 
183
- def check_availability(self, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
213
+ def check_availability(
214
+ self, logger: logging.Logger, source: Optional["Source"] = None
215
+ ) -> Tuple[bool, Optional[str]]:
184
216
  """
185
217
  Verifies the stream is available. Delegates to the underlying AbstractStream and ignores the parameters
186
218
  :param logger: (ignored)
@@ -264,7 +296,9 @@ class StreamPartition(Partition):
264
296
  ):
265
297
  if isinstance(record_data, Mapping):
266
298
  data_to_return = dict(record_data)
267
- self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
299
+ self._stream.transformer.transform(
300
+ data_to_return, self._stream.get_json_schema()
301
+ )
268
302
  yield Record(data_to_return, self)
269
303
  else:
270
304
  self._message_repository.emit_message(record_data)
@@ -329,9 +363,17 @@ class StreamPartitionGenerator(PartitionGenerator):
329
363
  self._cursor = cursor
330
364
 
331
365
  def generate(self) -> Iterable[Partition]:
332
- for s in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
366
+ for s in self._stream.stream_slices(
367
+ sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state
368
+ ):
333
369
  yield StreamPartition(
334
- self._stream, copy.deepcopy(s), self.message_repository, self._sync_mode, self._cursor_field, self._state, self._cursor
370
+ self._stream,
371
+ copy.deepcopy(s),
372
+ self.message_repository,
373
+ self._sync_mode,
374
+ self._cursor_field,
375
+ self._state,
376
+ self._cursor,
335
377
  )
336
378
 
337
379
 
@@ -348,7 +390,7 @@ class CursorPartitionGenerator(PartitionGenerator):
348
390
 
349
391
  def __init__(
350
392
  self,
351
- stream_factory: Callable[[], Stream],
393
+ stream: Stream,
352
394
  message_repository: MessageRepository,
353
395
  cursor: Cursor,
354
396
  connector_state_converter: DateTimeStreamStateConverter,
@@ -358,12 +400,12 @@ class CursorPartitionGenerator(PartitionGenerator):
358
400
  """
359
401
  Initialize the CursorPartitionGenerator with a stream, sync mode, and cursor.
360
402
 
361
- :param stream_factory: The stream factory that created the stream to delegate to for partition generation.
403
+ :param stream: The stream to delegate to for partition generation.
362
404
  :param message_repository: The message repository to use to emit non-record messages.
363
405
  :param sync_mode: The synchronization mode.
364
406
  :param cursor: A Cursor object that maintains the state and the cursor field.
365
407
  """
366
- self._stream_factory = stream_factory
408
+ self._stream = stream
367
409
  self.message_repository = message_repository
368
410
  self._sync_mode = SyncMode.full_refresh
369
411
  self._cursor = cursor
@@ -382,8 +424,16 @@ class CursorPartitionGenerator(PartitionGenerator):
382
424
  :return: An iterable of StreamPartition objects.
383
425
  """
384
426
 
385
- start_boundary = self._slice_boundary_fields[self._START_BOUNDARY] if self._slice_boundary_fields else "start"
386
- end_boundary = self._slice_boundary_fields[self._END_BOUNDARY] if self._slice_boundary_fields else "end"
427
+ start_boundary = (
428
+ self._slice_boundary_fields[self._START_BOUNDARY]
429
+ if self._slice_boundary_fields
430
+ else "start"
431
+ )
432
+ end_boundary = (
433
+ self._slice_boundary_fields[self._END_BOUNDARY]
434
+ if self._slice_boundary_fields
435
+ else "end"
436
+ )
387
437
 
388
438
  for slice_start, slice_end in self._cursor.generate_slices():
389
439
  stream_slice = StreamSlice(
@@ -395,7 +445,7 @@ class CursorPartitionGenerator(PartitionGenerator):
395
445
  )
396
446
 
397
447
  yield StreamPartition(
398
- self._stream_factory(),
448
+ self._stream,
399
449
  copy.deepcopy(stream_slice),
400
450
  self.message_repository,
401
451
  self._sync_mode,
@@ -405,12 +455,17 @@ class CursorPartitionGenerator(PartitionGenerator):
405
455
  )
406
456
 
407
457
 
408
- @deprecated("Availability strategy has been soft deprecated. Do not use. Class is subject to removal", category=ExperimentalClassWarning)
458
+ @deprecated(
459
+ "Availability strategy has been soft deprecated. Do not use. Class is subject to removal",
460
+ category=ExperimentalClassWarning,
461
+ )
409
462
  class AvailabilityStrategyFacade(AvailabilityStrategy):
410
463
  def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy):
411
464
  self._abstract_availability_strategy = abstract_availability_strategy
412
465
 
413
- def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None) -> Tuple[bool, Optional[str]]:
466
+ def check_availability(
467
+ self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
468
+ ) -> Tuple[bool, Optional[str]]:
414
469
  """
415
470
  Checks stream availability.
416
471