airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
  41. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  42. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  43. airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
  44. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
  46. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  47. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  48. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  49. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  50. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  51. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  52. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  53. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  54. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  55. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  56. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  59. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  60. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  62. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  63. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  64. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  65. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
  66. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
  67. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  68. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  69. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
  70. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  71. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  72. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  79. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  80. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  81. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  82. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  83. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  84. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  85. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  89. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  90. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  91. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  92. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  94. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  95. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  96. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  97. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  98. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  99. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  100. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  101. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  102. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  103. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  104. airbyte_cdk/sources/declarative/types.py +8 -1
  105. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  106. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  107. airbyte_cdk/sources/embedded/catalog.py +16 -4
  108. airbyte_cdk/sources/embedded/runner.py +19 -3
  109. airbyte_cdk/sources/embedded/tools.py +3 -1
  110. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  111. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  112. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  113. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  114. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  115. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  116. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  117. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  118. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  119. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  120. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  121. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  122. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  123. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  124. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  125. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  126. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  127. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  128. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  129. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  130. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  131. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  132. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  133. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  135. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  136. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  137. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  138. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  139. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  140. airbyte_cdk/sources/http_logger.py +5 -1
  141. airbyte_cdk/sources/message/repository.py +18 -4
  142. airbyte_cdk/sources/source.py +17 -7
  143. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  144. airbyte_cdk/sources/streams/call_rate.py +63 -19
  145. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  146. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  147. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  148. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  149. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  151. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  152. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  153. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  154. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  155. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  156. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  157. airbyte_cdk/sources/streams/core.py +77 -22
  158. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  160. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  161. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  162. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  163. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  164. airbyte_cdk/sources/streams/http/http.py +133 -33
  165. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  166. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  167. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  168. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  169. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  170. airbyte_cdk/sources/types.py +5 -1
  171. airbyte_cdk/sources/utils/record_helper.py +12 -3
  172. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  173. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  174. airbyte_cdk/sources/utils/transform.py +24 -9
  175. airbyte_cdk/sql/exceptions.py +19 -6
  176. airbyte_cdk/sql/secrets.py +3 -1
  177. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  178. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  179. airbyte_cdk/test/catalog_builder.py +19 -8
  180. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  181. airbyte_cdk/test/mock_http/mocker.py +41 -11
  182. airbyte_cdk/test/mock_http/request.py +9 -3
  183. airbyte_cdk/test/mock_http/response.py +3 -1
  184. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  185. airbyte_cdk/test/state_builder.py +10 -2
  186. airbyte_cdk/test/utils/data.py +6 -2
  187. airbyte_cdk/test/utils/http_mocking.py +3 -1
  188. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  189. airbyte_cdk/utils/analytics_message.py +10 -2
  190. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  191. airbyte_cdk/utils/mapping_helpers.py +3 -1
  192. airbyte_cdk/utils/message_utils.py +11 -4
  193. airbyte_cdk/utils/print_buffer.py +6 -1
  194. airbyte_cdk/utils/schema_inferrer.py +30 -9
  195. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  196. airbyte_cdk/utils/traced_exception.py +35 -9
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
  199. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
  200. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -11,7 +11,9 @@ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
11
11
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
12
12
 
13
13
 
14
- def assemble_uncaught_exception(exception_type: type[BaseException], exception_value: BaseException) -> AirbyteTracedException:
14
+ def assemble_uncaught_exception(
15
+ exception_type: type[BaseException], exception_value: BaseException
16
+ ) -> AirbyteTracedException:
15
17
  if issubclass(exception_type, AirbyteTracedException):
16
18
  return exception_value # type: ignore # validated as part of the previous line
17
19
  return AirbyteTracedException.from_exception(exception_value)
@@ -23,7 +25,11 @@ def init_uncaught_exception_handler(logger: logging.Logger) -> None:
23
25
  printed to the console without having secrets removed.
24
26
  """
25
27
 
26
- def hook_fn(exception_type: type[BaseException], exception_value: BaseException, traceback_: Optional[TracebackType]) -> Any:
28
+ def hook_fn(
29
+ exception_type: type[BaseException],
30
+ exception_value: BaseException,
31
+ traceback_: Optional[TracebackType],
32
+ ) -> Any:
27
33
  # For developer ergonomics, we want to see the stack trace in the logs when we do a ctrl-c
28
34
  if issubclass(exception_type, KeyboardInterrupt):
29
35
  sys.__excepthook__(exception_type, exception_value, traceback_)
@@ -41,6 +47,10 @@ def init_uncaught_exception_handler(logger: logging.Logger) -> None:
41
47
 
42
48
  def generate_failed_streams_error_message(stream_failures: Mapping[str, List[Exception]]) -> str:
43
49
  failures = "\n".join(
44
- [f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exceptions in stream_failures.items() for exception in exceptions]
50
+ [
51
+ f"{stream}: {filter_secrets(exception.__repr__())}"
52
+ for stream, exceptions in stream_failures.items()
53
+ for exception in exceptions
54
+ ]
45
55
  )
46
56
  return f"During the sync, the following streams did not sync successfully: {failures}"
airbyte_cdk/logger.py CHANGED
@@ -7,7 +7,13 @@ import logging
7
7
  import logging.config
8
8
  from typing import Any, Callable, Mapping, Optional, Tuple
9
9
 
10
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteMessageSerializer, Level, Type
10
+ from airbyte_cdk.models import (
11
+ AirbyteLogMessage,
12
+ AirbyteMessage,
13
+ AirbyteMessageSerializer,
14
+ Level,
15
+ Type,
16
+ )
11
17
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
12
18
  from orjson import orjson
13
19
 
@@ -68,7 +74,9 @@ class AirbyteLogFormatter(logging.Formatter):
68
74
  else:
69
75
  message = super().format(record)
70
76
  message = filter_secrets(message)
71
- log_message = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message))
77
+ log_message = AirbyteMessage(
78
+ type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
79
+ )
72
80
  return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string
73
81
 
74
82
  @staticmethod
@@ -6,9 +6,11 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
9
- from airbyte_protocol_dataclasses.models import *
9
+ from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
10
10
  from serpyco_rs.metadata import Alias
11
11
 
12
+ # ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
13
+
12
14
 
13
15
  @dataclass
14
16
  class AirbyteStateBlob:
@@ -42,7 +44,11 @@ class AirbyteStateBlob:
42
44
  setattr(self, key, value)
43
45
 
44
46
  def __eq__(self, other: object) -> bool:
45
- return False if not isinstance(other, AirbyteStateBlob) else bool(self.__dict__ == other.__dict__)
47
+ return (
48
+ False
49
+ if not isinstance(other, AirbyteStateBlob)
50
+ else bool(self.__dict__ == other.__dict__)
51
+ )
46
52
 
47
53
 
48
54
  # The following dataclasses have been redeclared to include the new version of AirbyteStateBlob
@@ -62,9 +68,9 @@ class AirbyteGlobalState:
62
68
  class AirbyteStateMessage:
63
69
  type: Optional[AirbyteStateType] = None # type: ignore [name-defined]
64
70
  stream: Optional[AirbyteStreamState] = None
65
- global_: Annotated[
66
- AirbyteGlobalState | None, Alias("global")
67
- ] = None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
71
+ global_: Annotated[AirbyteGlobalState | None, Alias("global")] = (
72
+ None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
73
+ )
68
74
  data: Optional[Dict[str, Any]] = None
69
75
  sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
70
76
  destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
@@ -30,9 +30,15 @@ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]
30
30
  return AirbyteStateBlobType() if t is AirbyteStateBlob else None
31
31
 
32
32
 
33
- AirbyteStreamStateSerializer = Serializer(AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver)
34
- AirbyteStateMessageSerializer = Serializer(AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver)
35
- AirbyteMessageSerializer = Serializer(AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver)
33
+ AirbyteStreamStateSerializer = Serializer(
34
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
35
+ )
36
+ AirbyteStateMessageSerializer = Serializer(
37
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
38
+ )
39
+ AirbyteMessageSerializer = Serializer(
40
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
41
+ )
36
42
  ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
37
43
  ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
38
44
  ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_protocol_dataclasses.models.well_known_types import *
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
@@ -4,7 +4,18 @@
4
4
 
5
5
  import logging
6
6
  from abc import ABC, abstractmethod
7
- from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
7
+ from typing import (
8
+ Any,
9
+ Dict,
10
+ Iterable,
11
+ Iterator,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ )
8
19
 
9
20
  from airbyte_cdk.exception_handler import generate_failed_streams_error_message
10
21
  from airbyte_cdk.models import (
@@ -30,7 +41,9 @@ from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_messa
30
41
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
31
42
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
32
43
  from airbyte_cdk.utils.event_timing import create_timer
33
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
44
+ from airbyte_cdk.utils.stream_status_utils import (
45
+ as_airbyte_message as stream_status_as_airbyte_message,
46
+ )
34
47
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
35
48
 
36
49
  _default_message_repository = InMemoryMessageRepository()
@@ -43,7 +56,9 @@ class AbstractSource(Source, ABC):
43
56
  """
44
57
 
45
58
  @abstractmethod
46
- def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
59
+ def check_connection(
60
+ self, logger: logging.Logger, config: Mapping[str, Any]
61
+ ) -> Tuple[bool, Optional[Any]]:
47
62
  """
48
63
  :param logger: source logger
49
64
  :param config: The user-provided configuration as specified by the source's spec.
@@ -109,7 +124,9 @@ class AbstractSource(Source, ABC):
109
124
  # Used direct reference to `stream_instance` instead of `is_stream_exist` to avoid mypy type checking errors
110
125
  if not stream_instance:
111
126
  if not self.raise_exception_on_missing_stream:
112
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
127
+ yield stream_status_as_airbyte_message(
128
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
129
+ )
113
130
  continue
114
131
 
115
132
  error_message = (
@@ -129,7 +146,9 @@ class AbstractSource(Source, ABC):
129
146
 
130
147
  timer.start_event(f"Syncing stream {configured_stream.stream.name}")
131
148
  logger.info(f"Marking stream {configured_stream.stream.name} as STARTED")
132
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.STARTED)
149
+ yield stream_status_as_airbyte_message(
150
+ configured_stream.stream, AirbyteStreamStatus.STARTED
151
+ )
133
152
  yield from self._read_stream(
134
153
  logger=logger,
135
154
  stream_instance=stream_instance,
@@ -138,13 +157,19 @@ class AbstractSource(Source, ABC):
138
157
  internal_config=internal_config,
139
158
  )
140
159
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
141
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
160
+ yield stream_status_as_airbyte_message(
161
+ configured_stream.stream, AirbyteStreamStatus.COMPLETE
162
+ )
142
163
 
143
164
  except Exception as e:
144
165
  yield from self._emit_queued_messages()
145
- logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
166
+ logger.exception(
167
+ f"Encountered an exception while reading stream {configured_stream.stream.name}"
168
+ )
146
169
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
147
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
170
+ yield stream_status_as_airbyte_message(
171
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
172
+ )
148
173
 
149
174
  stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
150
175
 
@@ -152,10 +177,14 @@ class AbstractSource(Source, ABC):
152
177
  traced_exception = e
153
178
  info_message = f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
154
179
  else:
155
- traced_exception = self._serialize_exception(stream_descriptor, e, stream_instance=stream_instance)
180
+ traced_exception = self._serialize_exception(
181
+ stream_descriptor, e, stream_instance=stream_instance
182
+ )
156
183
  info_message = f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}"
157
184
 
158
- yield traced_exception.as_sanitized_airbyte_message(stream_descriptor=stream_descriptor)
185
+ yield traced_exception.as_sanitized_airbyte_message(
186
+ stream_descriptor=stream_descriptor
187
+ )
159
188
  stream_name_to_exception[stream_instance.name] = traced_exception # type: ignore # use configured_stream if stream_instance is None
160
189
  if self.stop_sync_on_stream_failure:
161
190
  logger.info(info_message)
@@ -169,12 +198,16 @@ class AbstractSource(Source, ABC):
169
198
  logger.info(timer.report())
170
199
 
171
200
  if len(stream_name_to_exception) > 0:
172
- error_message = generate_failed_streams_error_message({key: [value] for key, value in stream_name_to_exception.items()}) # type: ignore # for some reason, mypy can't figure out the types for key and value
201
+ error_message = generate_failed_streams_error_message(
202
+ {key: [value] for key, value in stream_name_to_exception.items()}
203
+ ) # type: ignore # for some reason, mypy can't figure out the types for key and value
173
204
  logger.info(error_message)
174
205
  # We still raise at least one exception when a stream raises an exception because the platform currently relies
175
206
  # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
176
207
  # type because this combined error isn't actionable, but rather the previously emitted individual errors.
177
- raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
208
+ raise AirbyteTracedException(
209
+ message=error_message, failure_type=FailureType.config_error
210
+ )
178
211
  logger.info(f"Finished syncing {self.name}")
179
212
 
180
213
  @staticmethod
@@ -183,7 +216,9 @@ class AbstractSource(Source, ABC):
183
216
  ) -> AirbyteTracedException:
184
217
  display_message = stream_instance.get_error_display_message(e) if stream_instance else None
185
218
  if display_message:
186
- return AirbyteTracedException.from_exception(e, message=display_message, stream_descriptor=stream_descriptor)
219
+ return AirbyteTracedException.from_exception(
220
+ e, message=display_message, stream_descriptor=stream_descriptor
221
+ )
187
222
  return AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
188
223
 
189
224
  @property
@@ -199,7 +234,9 @@ class AbstractSource(Source, ABC):
199
234
  internal_config: InternalConfig,
200
235
  ) -> Iterator[AirbyteMessage]:
201
236
  if internal_config.page_size and isinstance(stream_instance, HttpStream):
202
- logger.info(f"Setting page size for {stream_instance.name} to {internal_config.page_size}")
237
+ logger.info(
238
+ f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
239
+ )
203
240
  stream_instance.page_size = internal_config.page_size
204
241
  logger.debug(
205
242
  f"Syncing configured stream: {configured_stream.stream.name}",
@@ -243,7 +280,9 @@ class AbstractSource(Source, ABC):
243
280
  if record_counter == 1:
244
281
  logger.info(f"Marking stream {stream_name} as RUNNING")
245
282
  # If we just read the first record of the stream, emit the transition to the RUNNING state
246
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.RUNNING)
283
+ yield stream_status_as_airbyte_message(
284
+ configured_stream.stream, AirbyteStreamStatus.RUNNING
285
+ )
247
286
  yield from self._emit_queued_messages()
248
287
  yield record
249
288
 
@@ -254,7 +293,9 @@ class AbstractSource(Source, ABC):
254
293
  yield from self.message_repository.consume_queue()
255
294
  return
256
295
 
257
- def _get_message(self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream) -> AirbyteMessage:
296
+ def _get_message(
297
+ self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream
298
+ ) -> AirbyteMessage:
258
299
  """
259
300
  Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage
260
301
  """
@@ -262,7 +303,12 @@ class AbstractSource(Source, ABC):
262
303
  case AirbyteMessage():
263
304
  return record_data_or_message
264
305
  case _:
265
- return stream_data_to_airbyte_message(stream.name, record_data_or_message, stream.transformer, stream.get_json_schema())
306
+ return stream_data_to_airbyte_message(
307
+ stream.name,
308
+ record_data_or_message,
309
+ stream.transformer,
310
+ stream.get_json_schema(),
311
+ )
266
312
 
267
313
  @property
268
314
  def message_repository(self) -> Union[None, MessageRepository]:
@@ -7,7 +7,9 @@ from typing import Dict, Iterable, List, Optional, Set
7
7
  from airbyte_cdk.exception_handler import generate_failed_streams_error_message
8
8
  from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, FailureType, StreamDescriptor
9
9
  from airbyte_cdk.models import Type as MessageType
10
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
10
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
11
+ PartitionGenerationCompletedSentinel,
12
+ )
11
13
  from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
12
14
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
13
15
  from airbyte_cdk.sources.message import MessageRepository
@@ -20,7 +22,9 @@ from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCom
20
22
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
21
23
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
22
24
  from airbyte_cdk.utils import AirbyteTracedException
23
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
25
+ from airbyte_cdk.utils.stream_status_utils import (
26
+ as_airbyte_message as stream_status_as_airbyte_message,
27
+ )
24
28
 
25
29
 
26
30
  class ConcurrentReadProcessor:
@@ -61,7 +65,9 @@ class ConcurrentReadProcessor:
61
65
  self._streams_done: Set[str] = set()
62
66
  self._exceptions_per_stream_name: dict[str, List[Exception]] = {}
63
67
 
64
- def on_partition_generation_completed(self, sentinel: PartitionGenerationCompletedSentinel) -> Iterable[AirbyteMessage]:
68
+ def on_partition_generation_completed(
69
+ self, sentinel: PartitionGenerationCompletedSentinel
70
+ ) -> Iterable[AirbyteMessage]:
65
71
  """
66
72
  This method is called when a partition generation is completed.
67
73
  1. Remove the stream from the list of streams currently generating partitions
@@ -72,7 +78,10 @@ class ConcurrentReadProcessor:
72
78
  self._streams_currently_generating_partitions.remove(sentinel.stream.name)
73
79
  # It is possible for the stream to already be done if no partitions were generated
74
80
  # If the partition generation process was completed and there are no partitions left to process, the stream is done
75
- if self._is_stream_done(stream_name) or len(self._streams_to_running_partitions[stream_name]) == 0:
81
+ if (
82
+ self._is_stream_done(stream_name)
83
+ or len(self._streams_to_running_partitions[stream_name]) == 0
84
+ ):
76
85
  yield from self._on_stream_is_done(stream_name)
77
86
  if self._stream_instances_to_start_partition_generation:
78
87
  yield self.start_next_partition_generator() # type:ignore # None may be yielded
@@ -87,10 +96,14 @@ class ConcurrentReadProcessor:
87
96
  stream_name = partition.stream_name()
88
97
  self._streams_to_running_partitions[stream_name].add(partition)
89
98
  if self._slice_logger.should_log_slice_message(self._logger):
90
- self._message_repository.emit_message(self._slice_logger.create_slice_log_message(partition.to_slice()))
99
+ self._message_repository.emit_message(
100
+ self._slice_logger.create_slice_log_message(partition.to_slice())
101
+ )
91
102
  self._thread_pool_manager.submit(self._partition_reader.process_partition, partition)
92
103
 
93
- def on_partition_complete_sentinel(self, sentinel: PartitionCompleteSentinel) -> Iterable[AirbyteMessage]:
104
+ def on_partition_complete_sentinel(
105
+ self, sentinel: PartitionCompleteSentinel
106
+ ) -> Iterable[AirbyteMessage]:
94
107
  """
95
108
  This method is called when a partition is completed.
96
109
  1. Close the partition
@@ -112,7 +125,10 @@ class ConcurrentReadProcessor:
112
125
  if partition in partitions_running:
113
126
  partitions_running.remove(partition)
114
127
  # If all partitions were generated and this was the last one, the stream is done
115
- if partition.stream_name() not in self._streams_currently_generating_partitions and len(partitions_running) == 0:
128
+ if (
129
+ partition.stream_name() not in self._streams_currently_generating_partitions
130
+ and len(partitions_running) == 0
131
+ ):
116
132
  yield from self._on_stream_is_done(partition.stream_name())
117
133
  yield from self._message_repository.consume_queue()
118
134
 
@@ -139,7 +155,9 @@ class ConcurrentReadProcessor:
139
155
  if message.type == MessageType.RECORD:
140
156
  if self._record_counter[stream.name] == 0:
141
157
  self._logger.info(f"Marking stream {stream.name} as RUNNING")
142
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING)
158
+ yield stream_status_as_airbyte_message(
159
+ stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
160
+ )
143
161
  self._record_counter[stream.name] += 1
144
162
  stream.cursor.observe(record)
145
163
  yield message
@@ -152,13 +170,17 @@ class ConcurrentReadProcessor:
152
170
  2. Raise the exception
153
171
  """
154
172
  self._flag_exception(exception.stream_name, exception.exception)
155
- self._logger.exception(f"Exception while syncing stream {exception.stream_name}", exc_info=exception.exception)
173
+ self._logger.exception(
174
+ f"Exception while syncing stream {exception.stream_name}", exc_info=exception.exception
175
+ )
156
176
 
157
177
  stream_descriptor = StreamDescriptor(name=exception.stream_name)
158
178
  if isinstance(exception.exception, AirbyteTracedException):
159
179
  yield exception.exception.as_airbyte_message(stream_descriptor=stream_descriptor)
160
180
  else:
161
- yield AirbyteTracedException.from_exception(exception, stream_descriptor=stream_descriptor).as_airbyte_message()
181
+ yield AirbyteTracedException.from_exception(
182
+ exception, stream_descriptor=stream_descriptor
183
+ ).as_airbyte_message()
162
184
 
163
185
  def _flag_exception(self, stream_name: str, exception: Exception) -> None:
164
186
  self._exceptions_per_stream_name.setdefault(stream_name, []).append(exception)
@@ -192,7 +214,12 @@ class ConcurrentReadProcessor:
192
214
  2. There are no more streams to read from
193
215
  3. All partitions for all streams are closed
194
216
  """
195
- is_done = all([self._is_stream_done(stream_name) for stream_name in self._stream_name_to_instance.keys()])
217
+ is_done = all(
218
+ [
219
+ self._is_stream_done(stream_name)
220
+ for stream_name in self._stream_name_to_instance.keys()
221
+ ]
222
+ )
196
223
  if is_done and self._exceptions_per_stream_name:
197
224
  error_message = generate_failed_streams_error_message(self._exceptions_per_stream_name)
198
225
  self._logger.info(error_message)
@@ -200,7 +227,9 @@ class ConcurrentReadProcessor:
200
227
  # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
201
228
  # type because this combined error isn't actionable, but rather the previously emitted individual errors.
202
229
  raise AirbyteTracedException(
203
- message=error_message, internal_message="Concurrent read failure", failure_type=FailureType.config_error
230
+ message=error_message,
231
+ internal_message="Concurrent read failure",
232
+ failure_type=FailureType.config_error,
204
233
  )
205
234
  return is_done
206
235
 
@@ -208,7 +237,9 @@ class ConcurrentReadProcessor:
208
237
  return stream_name in self._streams_done
209
238
 
210
239
  def _on_stream_is_done(self, stream_name: str) -> Iterable[AirbyteMessage]:
211
- self._logger.info(f"Read {self._record_counter[stream_name]} records from {stream_name} stream")
240
+ self._logger.info(
241
+ f"Read {self._record_counter[stream_name]} records from {stream_name} stream"
242
+ )
212
243
  self._logger.info(f"Marking stream {stream_name} as STOPPED")
213
244
  stream = self._stream_name_to_instance[stream_name]
214
245
  stream.cursor.ensure_at_least_one_state_emitted()
@@ -216,6 +247,8 @@ class ConcurrentReadProcessor:
216
247
  self._logger.info(f"Finished syncing {stream.name}")
217
248
  self._streams_done.add(stream_name)
218
249
  stream_status = (
219
- AirbyteStreamStatus.INCOMPLETE if self._exceptions_per_stream_name.get(stream_name, []) else AirbyteStreamStatus.COMPLETE
250
+ AirbyteStreamStatus.INCOMPLETE
251
+ if self._exceptions_per_stream_name.get(stream_name, [])
252
+ else AirbyteStreamStatus.COMPLETE
220
253
  )
221
254
  yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), stream_status)
@@ -8,7 +8,9 @@ from typing import Iterable, Iterator, List
8
8
 
9
9
  from airbyte_cdk.models import AirbyteMessage
10
10
  from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
11
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
11
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
12
+ PartitionGenerationCompletedSentinel,
13
+ )
12
14
  from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
13
15
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
14
16
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
@@ -17,7 +19,10 @@ from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionE
17
19
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
18
20
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
19
21
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
20
- from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel, QueueItem
22
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import (
23
+ PartitionCompleteSentinel,
24
+ QueueItem,
25
+ )
21
26
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
22
27
 
23
28
 
@@ -41,14 +46,25 @@ class ConcurrentSource:
41
46
  timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
42
47
  ) -> "ConcurrentSource":
43
48
  is_single_threaded = initial_number_of_partitions_to_generate == 1 and num_workers == 1
44
- too_many_generator = not is_single_threaded and initial_number_of_partitions_to_generate >= num_workers
45
- assert not too_many_generator, "It is required to have more workers than threads generating partitions"
49
+ too_many_generator = (
50
+ not is_single_threaded and initial_number_of_partitions_to_generate >= num_workers
51
+ )
52
+ assert (
53
+ not too_many_generator
54
+ ), "It is required to have more workers than threads generating partitions"
46
55
  threadpool = ThreadPoolManager(
47
- concurrent.futures.ThreadPoolExecutor(max_workers=num_workers, thread_name_prefix="workerpool"),
56
+ concurrent.futures.ThreadPoolExecutor(
57
+ max_workers=num_workers, thread_name_prefix="workerpool"
58
+ ),
48
59
  logger,
49
60
  )
50
61
  return ConcurrentSource(
51
- threadpool, logger, slice_logger, message_repository, initial_number_of_partitions_to_generate, timeout_seconds
62
+ threadpool,
63
+ logger,
64
+ slice_logger,
65
+ message_repository,
66
+ initial_number_of_partitions_to_generate,
67
+ timeout_seconds,
52
68
  )
53
69
 
54
70
  def __init__(
@@ -107,7 +123,9 @@ class ConcurrentSource:
107
123
  self._threadpool.check_for_errors_and_shutdown()
108
124
  self._logger.info("Finished syncing")
109
125
 
110
- def _submit_initial_partition_generators(self, concurrent_stream_processor: ConcurrentReadProcessor) -> Iterable[AirbyteMessage]:
126
+ def _submit_initial_partition_generators(
127
+ self, concurrent_stream_processor: ConcurrentReadProcessor
128
+ ) -> Iterable[AirbyteMessage]:
111
129
  for _ in range(self._initial_number_partitions_to_generate):
112
130
  status_message = concurrent_stream_processor.start_next_partition_generator()
113
131
  if status_message:
@@ -15,8 +15,17 @@ from airbyte_cdk.sources.streams import Stream
15
15
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
16
16
  from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
17
17
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
18
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField, CursorValueType, FinalStateCursor, GapType
19
- from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import AbstractStreamStateConverter
18
+ from airbyte_cdk.sources.streams.concurrent.cursor import (
19
+ ConcurrentCursor,
20
+ Cursor,
21
+ CursorField,
22
+ CursorValueType,
23
+ FinalStateCursor,
24
+ GapType,
25
+ )
26
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
27
+ AbstractStreamStateConverter,
28
+ )
20
29
 
21
30
  DEFAULT_LOOKBACK_SECONDS = 0
22
31
 
@@ -43,14 +52,20 @@ class ConcurrentSourceAdapter(AbstractSource, ABC):
43
52
  abstract_streams = self._select_abstract_streams(config, catalog)
44
53
  concurrent_stream_names = {stream.name for stream in abstract_streams}
45
54
  configured_catalog_for_regular_streams = ConfiguredAirbyteCatalog(
46
- streams=[stream for stream in catalog.streams if stream.stream.name not in concurrent_stream_names]
55
+ streams=[
56
+ stream
57
+ for stream in catalog.streams
58
+ if stream.stream.name not in concurrent_stream_names
59
+ ]
47
60
  )
48
61
  if abstract_streams:
49
62
  yield from self._concurrent_source.read(abstract_streams)
50
63
  if configured_catalog_for_regular_streams.streams:
51
64
  yield from super().read(logger, config, configured_catalog_for_regular_streams, state)
52
65
 
53
- def _select_abstract_streams(self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> List[AbstractStream]:
66
+ def _select_abstract_streams(
67
+ self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog
68
+ ) -> List[AbstractStream]:
54
69
  """
55
70
  Selects streams that can be processed concurrently and returns their abstract representations.
56
71
  """
@@ -67,7 +82,11 @@ class ConcurrentSourceAdapter(AbstractSource, ABC):
67
82
  return abstract_streams
68
83
 
69
84
  def convert_to_concurrent_stream(
70
- self, logger: logging.Logger, stream: Stream, state_manager: ConnectorStateManager, cursor: Optional[Cursor] = None
85
+ self,
86
+ logger: logging.Logger,
87
+ stream: Stream,
88
+ state_manager: ConnectorStateManager,
89
+ cursor: Optional[Cursor] = None,
71
90
  ) -> Stream:
72
91
  """
73
92
  Prepares a stream for concurrent processing by initializing or assigning a cursor,
@@ -106,7 +125,9 @@ class ConcurrentSourceAdapter(AbstractSource, ABC):
106
125
 
107
126
  if cursor_field_name:
108
127
  if not isinstance(cursor_field_name, str):
109
- raise ValueError(f"Cursor field type must be a string, but received {type(cursor_field_name).__name__}.")
128
+ raise ValueError(
129
+ f"Cursor field type must be a string, but received {type(cursor_field_name).__name__}."
130
+ )
110
131
 
111
132
  return ConcurrentCursor(
112
133
  stream.name,
@@ -37,7 +37,9 @@ class ThreadPoolManager:
37
37
  def prune_to_validate_has_reached_futures_limit(self) -> bool:
38
38
  self._prune_futures(self._futures)
39
39
  if len(self._futures) > self._logging_threshold:
40
- self._logger.warning(f"ThreadPoolManager: The list of futures is getting bigger than expected ({len(self._futures)})")
40
+ self._logger.warning(
41
+ f"ThreadPoolManager: The list of futures is getting bigger than expected ({len(self._futures)})"
42
+ )
41
43
  return len(self._futures) >= self._max_concurrent_tasks
42
44
 
43
45
  def submit(self, function: Callable[..., Any], *args: Any) -> None:
@@ -92,14 +94,18 @@ class ThreadPoolManager:
92
94
  )
93
95
  self._stop_and_raise_exception(self._most_recently_seen_exception)
94
96
 
95
- exceptions_from_futures = [f for f in [future.exception() for future in self._futures] if f is not None]
97
+ exceptions_from_futures = [
98
+ f for f in [future.exception() for future in self._futures] if f is not None
99
+ ]
96
100
  if exceptions_from_futures:
97
101
  exception = RuntimeError(f"Failed reading with errors: {exceptions_from_futures}")
98
102
  self._stop_and_raise_exception(exception)
99
103
  else:
100
104
  futures_not_done = [f for f in self._futures if not f.done()]
101
105
  if futures_not_done:
102
- exception = RuntimeError(f"Failed reading with futures not done: {futures_not_done}")
106
+ exception = RuntimeError(
107
+ f"Failed reading with futures not done: {futures_not_done}"
108
+ )
103
109
  self._stop_and_raise_exception(exception)
104
110
  else:
105
111
  self._shutdown()