airbyte-cdk 0.72.1__py3-none-any.whl → 6.13.1.dev4106__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (517) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +421 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +65 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +25 -3
  79. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  80. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  81. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  82. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +159 -74
  83. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  84. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  85. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  86. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  91. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  92. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  93. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  94. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  95. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  96. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  97. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  98. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  99. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  100. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  101. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1695 -225
  102. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  103. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  104. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  105. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  106. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  107. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  108. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  109. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  122. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  123. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  124. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  125. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  126. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  127. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  134. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  135. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  136. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  140. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  141. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  142. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  143. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  144. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  145. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  146. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  147. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  148. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  149. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +228 -72
  150. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  151. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  152. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  153. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  154. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  155. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  156. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  157. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  158. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  159. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  160. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  163. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  164. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  165. airbyte_cdk/sources/declarative/types.py +19 -110
  166. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  167. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  168. airbyte_cdk/sources/embedded/catalog.py +16 -4
  169. airbyte_cdk/sources/embedded/runner.py +19 -3
  170. airbyte_cdk/sources/embedded/tools.py +5 -2
  171. airbyte_cdk/sources/file_based/README.md +152 -0
  172. airbyte_cdk/sources/file_based/__init__.py +24 -0
  173. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  174. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  175. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  176. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +58 -10
  177. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  178. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  179. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  180. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  181. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  182. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  184. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  185. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  186. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  187. airbyte_cdk/sources/file_based/exceptions.py +52 -15
  188. airbyte_cdk/sources/file_based/file_based_source.py +163 -33
  189. airbyte_cdk/sources/file_based/file_based_stream_reader.py +83 -5
  190. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  191. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  192. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  193. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  194. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  195. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  196. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  197. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  198. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +145 -41
  199. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  200. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  201. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  202. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  204. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  205. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  206. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  210. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  211. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  212. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +175 -45
  213. airbyte_cdk/sources/http_logger.py +8 -3
  214. airbyte_cdk/sources/message/__init__.py +7 -1
  215. airbyte_cdk/sources/message/repository.py +18 -4
  216. airbyte_cdk/sources/source.py +42 -38
  217. airbyte_cdk/sources/streams/__init__.py +2 -2
  218. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  219. airbyte_cdk/sources/streams/call_rate.py +64 -21
  220. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  221. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  222. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  223. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  224. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  225. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  226. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  227. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  228. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  229. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  230. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  231. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  232. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  233. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  234. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  236. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  237. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  238. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  239. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  240. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  241. airbyte_cdk/sources/streams/core.py +412 -87
  242. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  243. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  244. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  245. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  253. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  254. airbyte_cdk/sources/streams/http/http.py +369 -246
  255. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  256. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  257. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  259. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  260. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  261. airbyte_cdk/sources/types.py +154 -0
  262. airbyte_cdk/sources/utils/record_helper.py +36 -21
  263. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  264. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  265. airbyte_cdk/sources/utils/transform.py +54 -20
  266. airbyte_cdk/sql/_util/hashing.py +34 -0
  267. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  268. airbyte_cdk/sql/constants.py +32 -0
  269. airbyte_cdk/sql/exceptions.py +235 -0
  270. airbyte_cdk/sql/secrets.py +123 -0
  271. airbyte_cdk/sql/shared/__init__.py +15 -0
  272. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  273. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  274. airbyte_cdk/sql/types.py +160 -0
  275. airbyte_cdk/test/catalog_builder.py +70 -18
  276. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  277. airbyte_cdk/test/mock_http/__init__.py +1 -1
  278. airbyte_cdk/test/mock_http/matcher.py +6 -0
  279. airbyte_cdk/test/mock_http/mocker.py +57 -10
  280. airbyte_cdk/test/mock_http/request.py +19 -3
  281. airbyte_cdk/test/mock_http/response.py +3 -1
  282. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  283. airbyte_cdk/test/state_builder.py +18 -10
  284. airbyte_cdk/test/utils/__init__.py +1 -0
  285. airbyte_cdk/test/utils/data.py +24 -0
  286. airbyte_cdk/test/utils/http_mocking.py +16 -0
  287. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  288. airbyte_cdk/test/utils/reading.py +26 -0
  289. airbyte_cdk/utils/__init__.py +2 -1
  290. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  291. airbyte_cdk/utils/analytics_message.py +10 -2
  292. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  293. airbyte_cdk/utils/event_timing.py +10 -10
  294. airbyte_cdk/utils/mapping_helpers.py +3 -1
  295. airbyte_cdk/utils/message_utils.py +20 -11
  296. airbyte_cdk/utils/print_buffer.py +75 -0
  297. airbyte_cdk/utils/schema_inferrer.py +198 -28
  298. airbyte_cdk/utils/slice_hasher.py +30 -0
  299. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  300. airbyte_cdk/utils/stream_status_utils.py +8 -1
  301. airbyte_cdk/utils/traced_exception.py +61 -21
  302. airbyte_cdk-6.13.1.dev4106.dist-info/METADATA +109 -0
  303. airbyte_cdk-6.13.1.dev4106.dist-info/RECORD +349 -0
  304. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/WHEEL +1 -2
  305. airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt +3 -0
  306. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  307. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  308. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  309. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  311. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  312. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  313. airbyte_cdk/sources/deprecated/client.py +0 -99
  314. airbyte_cdk/sources/singer/__init__.py +0 -8
  315. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  316. airbyte_cdk/sources/singer/source.py +0 -186
  317. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  318. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  319. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  320. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  321. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  322. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  323. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  324. airbyte_cdk/sources/utils/schema_models.py +0 -84
  325. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  326. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  327. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  328. source_declarative_manifest/main.py +0 -29
  329. unit_tests/connector_builder/__init__.py +0 -3
  330. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  331. unit_tests/connector_builder/test_message_grouper.py +0 -713
  332. unit_tests/connector_builder/utils.py +0 -27
  333. unit_tests/destinations/test_destination.py +0 -243
  334. unit_tests/singer/test_singer_helpers.py +0 -56
  335. unit_tests/singer/test_singer_source.py +0 -112
  336. unit_tests/sources/__init__.py +0 -0
  337. unit_tests/sources/concurrent_source/__init__.py +0 -3
  338. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  339. unit_tests/sources/declarative/__init__.py +0 -3
  340. unit_tests/sources/declarative/auth/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  342. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  343. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  344. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  345. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  346. unit_tests/sources/declarative/checks/__init__.py +0 -3
  347. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  348. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  349. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  350. unit_tests/sources/declarative/external_component.py +0 -13
  351. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  352. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  353. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  355. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  356. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  357. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  359. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  360. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  361. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  362. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  363. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  364. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  365. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  366. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  367. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  368. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  369. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  370. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  371. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  372. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  373. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  374. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  375. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  376. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  377. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  384. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  385. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  386. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  387. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  388. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  389. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  390. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  391. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  392. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  393. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  394. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  395. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  396. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  397. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  398. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  399. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  400. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  401. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  402. unit_tests/sources/declarative/schema/__init__.py +0 -6
  403. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  404. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  405. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  406. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  407. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  408. unit_tests/sources/declarative/states/__init__.py +0 -3
  409. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  411. unit_tests/sources/declarative/test_create_partial.py +0 -83
  412. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  413. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  414. unit_tests/sources/declarative/test_types.py +0 -39
  415. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  416. unit_tests/sources/file_based/__init__.py +0 -0
  417. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  419. unit_tests/sources/file_based/config/__init__.py +0 -0
  420. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  421. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  422. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  423. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  424. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  425. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  426. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  427. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  428. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  429. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  430. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  431. unit_tests/sources/file_based/helpers.py +0 -70
  432. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  433. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  434. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  435. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  436. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  437. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  438. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  439. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  440. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  441. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  442. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  443. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  444. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  445. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  446. unit_tests/sources/file_based/stream/__init__.py +0 -0
  447. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  449. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  450. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  451. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  452. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  453. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  454. unit_tests/sources/file_based/test_scenarios.py +0 -253
  455. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  456. unit_tests/sources/fixtures/__init__.py +0 -3
  457. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  458. unit_tests/sources/message/__init__.py +0 -0
  459. unit_tests/sources/message/test_repository.py +0 -153
  460. unit_tests/sources/streams/__init__.py +0 -0
  461. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  462. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  464. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  466. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  467. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  469. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  470. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  471. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  472. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  473. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  474. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  475. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  476. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  477. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  478. unit_tests/sources/streams/http/__init__.py +0 -0
  479. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  481. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  482. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  483. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  484. unit_tests/sources/streams/http/test_http.py +0 -635
  485. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  486. unit_tests/sources/streams/test_call_rate.py +0 -300
  487. unit_tests/sources/streams/test_stream_read.py +0 -405
  488. unit_tests/sources/streams/test_streams_core.py +0 -184
  489. unit_tests/sources/test_abstract_source.py +0 -1442
  490. unit_tests/sources/test_concurrent_source.py +0 -112
  491. unit_tests/sources/test_config.py +0 -92
  492. unit_tests/sources/test_connector_state_manager.py +0 -482
  493. unit_tests/sources/test_http_logger.py +0 -252
  494. unit_tests/sources/test_integration_source.py +0 -86
  495. unit_tests/sources/test_source.py +0 -684
  496. unit_tests/sources/test_source_read.py +0 -460
  497. unit_tests/test/__init__.py +0 -0
  498. unit_tests/test/mock_http/__init__.py +0 -0
  499. unit_tests/test/mock_http/test_matcher.py +0 -53
  500. unit_tests/test/mock_http/test_mocker.py +0 -214
  501. unit_tests/test/mock_http/test_request.py +0 -117
  502. unit_tests/test/mock_http/test_response_builder.py +0 -177
  503. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  504. unit_tests/utils/__init__.py +0 -0
  505. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  506. unit_tests/utils/test_mapping_helpers.py +0 -54
  507. unit_tests/utils/test_message_utils.py +0 -91
  508. unit_tests/utils/test_rate_limiting.py +0 -26
  509. unit_tests/utils/test_schema_inferrer.py +0 -202
  510. unit_tests/utils/test_secret_utils.py +0 -135
  511. unit_tests/utils/test_stream_status_utils.py +0 -61
  512. unit_tests/utils/test_traced_exception.py +0 -107
  513. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  514. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  515. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  516. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  517. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.13.1.dev4106.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,44 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+ from typing import Any, Dict
3
+
4
+ from serpyco_rs import CustomType, Serializer
5
+
6
+ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
7
+ AirbyteMessage,
8
+ AirbyteStateBlob,
9
+ AirbyteStateMessage,
10
+ AirbyteStreamState,
11
+ ConfiguredAirbyteCatalog,
12
+ ConfiguredAirbyteStream,
13
+ ConnectorSpecification,
14
+ )
15
+
16
+
17
+ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
18
+ def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
19
+ # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
20
+ return {k: v for k, v in value.__dict__.items()}
21
+
22
+ def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
23
+ return AirbyteStateBlob(value)
24
+
25
+ def get_json_schema(self) -> Dict[str, Any]:
26
+ return {"type": "object"}
27
+
28
+
29
+ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
30
+ return AirbyteStateBlobType() if t is AirbyteStateBlob else None
31
+
32
+
33
+ AirbyteStreamStateSerializer = Serializer(
34
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
35
+ )
36
+ AirbyteStateMessageSerializer = Serializer(
37
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
38
+ )
39
+ AirbyteMessageSerializer = Serializer(
40
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
41
+ )
42
+ ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
43
+ ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
44
+ ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, Optional
5
+
6
+
7
+ @dataclass
8
+ class AirbyteFileTransferRecordMessage:
9
+ stream: str
10
+ file: Dict[str, Any]
11
+ emitted_at: int
12
+ namespace: Optional[str] = None
13
+ data: Optional[Dict[str, Any]] = None
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_protocol.models.well_known_types import *
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
@@ -19,4 +19,8 @@ from .source import Source
19
19
  # this will not be thread-safe.
20
20
  dpath.options.ALLOW_EMPTY_STRING_KEYS = True
21
21
 
22
- __all__ = ["AbstractSource", "BaseConfig", "Source"]
22
+ __all__ = [
23
+ "AbstractSource",
24
+ "BaseConfig",
25
+ "Source",
26
+ ]
@@ -4,8 +4,20 @@
4
4
 
5
5
  import logging
6
6
  from abc import ABC, abstractmethod
7
- from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
7
+ from typing import (
8
+ Any,
9
+ Dict,
10
+ Iterable,
11
+ Iterator,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ )
8
19
 
20
+ from airbyte_cdk.exception_handler import generate_failed_streams_error_message
9
21
  from airbyte_cdk.models import (
10
22
  AirbyteCatalog,
11
23
  AirbyteConnectionStatus,
@@ -17,21 +29,21 @@ from airbyte_cdk.models import (
17
29
  FailureType,
18
30
  Status,
19
31
  StreamDescriptor,
20
- SyncMode,
21
32
  )
22
33
  from airbyte_cdk.models import Type as MessageType
23
34
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
24
35
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
25
36
  from airbyte_cdk.sources.source import Source
26
- from airbyte_cdk.sources.streams import FULL_REFRESH_SENTINEL_STATE_KEY, Stream
37
+ from airbyte_cdk.sources.streams import Stream
27
38
  from airbyte_cdk.sources.streams.core import StreamData
28
39
  from airbyte_cdk.sources.streams.http.http import HttpStream
29
40
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
30
41
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
31
42
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
32
- from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
33
43
  from airbyte_cdk.utils.event_timing import create_timer
34
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
44
+ from airbyte_cdk.utils.stream_status_utils import (
45
+ as_airbyte_message as stream_status_as_airbyte_message,
46
+ )
35
47
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
36
48
 
37
49
  _default_message_repository = InMemoryMessageRepository()
@@ -44,7 +56,9 @@ class AbstractSource(Source, ABC):
44
56
  """
45
57
 
46
58
  @abstractmethod
47
- def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
59
+ def check_connection(
60
+ self, logger: logging.Logger, config: Mapping[str, Any]
61
+ ) -> Tuple[bool, Optional[Any]]:
48
62
  """
49
63
  :param logger: source logger
50
64
  :param config: The user-provided configuration as specified by the source's spec.
@@ -68,11 +82,6 @@ class AbstractSource(Source, ABC):
68
82
  _stream_to_instance_map: Dict[str, Stream] = {}
69
83
  _slice_logger: SliceLogger = DebugSliceLogger()
70
84
 
71
- @property
72
- def name(self) -> str:
73
- """Source name"""
74
- return self.__class__.__name__
75
-
76
85
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
77
86
  """Implements the Discover operation from the Airbyte Specification.
78
87
  See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#discover.
@@ -94,7 +103,7 @@ class AbstractSource(Source, ABC):
94
103
  logger: logging.Logger,
95
104
  config: Mapping[str, Any],
96
105
  catalog: ConfiguredAirbyteCatalog,
97
- state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
106
+ state: Optional[List[AirbyteStateMessage]] = None,
98
107
  ) -> Iterator[AirbyteMessage]:
99
108
  """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/."""
100
109
  logger.info(f"Starting syncing {self.name}")
@@ -102,7 +111,7 @@ class AbstractSource(Source, ABC):
102
111
  # TODO assert all streams exist in the connector
103
112
  # get the streams once in case the connector needs to make any queries to generate them
104
113
  stream_instances = {s.name: s for s in self.streams(config)}
105
- state_manager = ConnectorStateManager(stream_instance_map={s.stream.name: s.stream for s in catalog.streams}, state=state)
114
+ state_manager = ConnectorStateManager(state=state)
106
115
  self._stream_to_instance_map = stream_instances
107
116
 
108
117
  stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
@@ -110,22 +119,36 @@ class AbstractSource(Source, ABC):
110
119
  with create_timer(self.name) as timer:
111
120
  for configured_stream in catalog.streams:
112
121
  stream_instance = stream_instances.get(configured_stream.stream.name)
113
- if not stream_instance:
114
- if not self.raise_exception_on_missing_stream:
115
- continue
116
- raise KeyError(
117
- f"The stream {configured_stream.stream.name} no longer exists in the configuration. "
118
- f"Refresh the schema in replication settings and remove this stream from future sync attempts."
119
- )
120
-
122
+ is_stream_exist = bool(stream_instance)
121
123
  try:
124
+ # Used direct reference to `stream_instance` instead of `is_stream_exist` to avoid mypy type checking errors
125
+ if not stream_instance:
126
+ if not self.raise_exception_on_missing_stream:
127
+ yield stream_status_as_airbyte_message(
128
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
129
+ )
130
+ continue
131
+
132
+ error_message = (
133
+ f"The stream '{configured_stream.stream.name}' in your connection configuration was not found in the source. "
134
+ f"Refresh the schema in your replication settings and remove this stream from future sync attempts."
135
+ )
136
+
137
+ # Use configured_stream as stream_instance to support references in error handling.
138
+ stream_instance = configured_stream.stream
139
+
140
+ raise AirbyteTracedException(
141
+ message="A stream listed in your configuration was not found in the source. Please check the logs for more "
142
+ "details.",
143
+ internal_message=error_message,
144
+ failure_type=FailureType.config_error,
145
+ )
146
+
122
147
  timer.start_event(f"Syncing stream {configured_stream.stream.name}")
123
- stream_is_available, reason = stream_instance.check_availability(logger, self)
124
- if not stream_is_available:
125
- logger.warning(f"Skipped syncing stream '{stream_instance.name}' because it was unavailable. {reason}")
126
- continue
127
148
  logger.info(f"Marking stream {configured_stream.stream.name} as STARTED")
128
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.STARTED)
149
+ yield stream_status_as_airbyte_message(
150
+ configured_stream.stream, AirbyteStreamStatus.STARTED
151
+ )
129
152
  yield from self._read_stream(
130
153
  logger=logger,
131
154
  stream_instance=stream_instance,
@@ -134,52 +157,73 @@ class AbstractSource(Source, ABC):
134
157
  internal_config=internal_config,
135
158
  )
136
159
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
137
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
138
- except AirbyteTracedException as e:
139
- logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
140
- logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
141
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
142
- yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
143
- stream_name_to_exception[stream_instance.name] = e
144
- if self.stop_sync_on_stream_failure:
145
- logger.info(
146
- f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
147
- )
148
- break
160
+ yield stream_status_as_airbyte_message(
161
+ configured_stream.stream, AirbyteStreamStatus.COMPLETE
162
+ )
163
+
149
164
  except Exception as e:
150
165
  yield from self._emit_queued_messages()
151
- logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
166
+ logger.exception(
167
+ f"Encountered an exception while reading stream {configured_stream.stream.name}"
168
+ )
152
169
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
153
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
154
- display_message = stream_instance.get_error_display_message(e)
155
- if display_message:
156
- traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
170
+ yield stream_status_as_airbyte_message(
171
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
172
+ )
173
+
174
+ stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
175
+
176
+ if isinstance(e, AirbyteTracedException):
177
+ traced_exception = e
178
+ info_message = f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
157
179
  else:
158
- traced_exception = AirbyteTracedException.from_exception(e)
180
+ traced_exception = self._serialize_exception(
181
+ stream_descriptor, e, stream_instance=stream_instance
182
+ )
183
+ info_message = f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}"
184
+
159
185
  yield traced_exception.as_sanitized_airbyte_message(
160
- stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
186
+ stream_descriptor=stream_descriptor
161
187
  )
162
- stream_name_to_exception[stream_instance.name] = traced_exception
188
+ stream_name_to_exception[stream_instance.name] = traced_exception # type: ignore # use configured_stream if stream_instance is None
163
189
  if self.stop_sync_on_stream_failure:
164
- logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
190
+ logger.info(info_message)
165
191
  break
166
192
  finally:
167
- timer.finish_event()
168
- logger.info(f"Finished syncing {configured_stream.stream.name}")
169
- logger.info(timer.report())
193
+ # Finish read event only if the stream instance exists;
194
+ # otherwise, there's no need as it never started
195
+ if is_stream_exist:
196
+ timer.finish_event()
197
+ logger.info(f"Finished syncing {configured_stream.stream.name}")
198
+ logger.info(timer.report())
170
199
 
171
200
  if len(stream_name_to_exception) > 0:
172
- error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
201
+ error_message = generate_failed_streams_error_message(
202
+ {key: [value] for key, value in stream_name_to_exception.items()}
203
+ )
173
204
  logger.info(error_message)
174
205
  # We still raise at least one exception when a stream raises an exception because the platform currently relies
175
206
  # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
176
207
  # type because this combined error isn't actionable, but rather the previously emitted individual errors.
177
- raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
208
+ raise AirbyteTracedException(
209
+ message=error_message, failure_type=FailureType.config_error
210
+ )
178
211
  logger.info(f"Finished syncing {self.name}")
179
212
 
213
+ @staticmethod
214
+ def _serialize_exception(
215
+ stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Optional[Stream] = None
216
+ ) -> AirbyteTracedException:
217
+ display_message = stream_instance.get_error_display_message(e) if stream_instance else None
218
+ if display_message:
219
+ return AirbyteTracedException.from_exception(
220
+ e, message=display_message, stream_descriptor=stream_descriptor
221
+ )
222
+ return AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
223
+
180
224
  @property
181
225
  def raise_exception_on_missing_stream(self) -> bool:
182
- return True
226
+ return False
183
227
 
184
228
  def _read_stream(
185
229
  self,
@@ -190,7 +234,9 @@ class AbstractSource(Source, ABC):
190
234
  internal_config: InternalConfig,
191
235
  ) -> Iterator[AirbyteMessage]:
192
236
  if internal_config.page_size and isinstance(stream_instance, HttpStream):
193
- logger.info(f"Setting page size for {stream_instance.name} to {internal_config.page_size}")
237
+ logger.info(
238
+ f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
239
+ )
194
240
  stream_instance.page_size = internal_config.page_size
195
241
  logger.debug(
196
242
  f"Syncing configured stream: {configured_stream.stream.name}",
@@ -203,15 +249,16 @@ class AbstractSource(Source, ABC):
203
249
  stream_instance.log_stream_sync_configuration()
204
250
 
205
251
  stream_name = configured_stream.stream.name
206
- # The platform always passes stream state regardless of sync mode. We shouldn't need to consider this case within the
207
- # connector, but right now we need to prevent accidental usage of the previous stream state
208
- stream_state = (
209
- state_manager.get_stream_state(stream_name, stream_instance.namespace)
210
- if configured_stream.sync_mode == SyncMode.incremental
211
- else {}
212
- )
252
+ stream_state = state_manager.get_stream_state(stream_name, stream_instance.namespace)
213
253
 
214
- if stream_state and "state" in dir(stream_instance) and not self._stream_state_is_full_refresh(stream_state):
254
+ # This is a hack. Existing full refresh streams that are converted into resumable full refresh need to discard
255
+ # the state because the terminal state for a full refresh sync is not compatible with substream resumable full
256
+ # refresh state. This is only required when running live traffic regression testing since the platform normally
257
+ # handles whether to pass state
258
+ if stream_state == {"__ab_no_cursor_state_message": True}:
259
+ stream_state = {}
260
+
261
+ if "state" in dir(stream_instance):
215
262
  stream_instance.state = stream_state # type: ignore # we check that state in the dir(stream_instance)
216
263
  logger.info(f"Setting state of {self.name} stream to {stream_state}")
217
264
 
@@ -233,7 +280,9 @@ class AbstractSource(Source, ABC):
233
280
  if record_counter == 1:
234
281
  logger.info(f"Marking stream {stream_name} as RUNNING")
235
282
  # If we just read the first record of the stream, emit the transition to the RUNNING state
236
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.RUNNING)
283
+ yield stream_status_as_airbyte_message(
284
+ configured_stream.stream, AirbyteStreamStatus.RUNNING
285
+ )
237
286
  yield from self._emit_queued_messages()
238
287
  yield record
239
288
 
@@ -244,14 +293,22 @@ class AbstractSource(Source, ABC):
244
293
  yield from self.message_repository.consume_queue()
245
294
  return
246
295
 
247
- def _get_message(self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream) -> AirbyteMessage:
296
+ def _get_message(
297
+ self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream
298
+ ) -> AirbyteMessage:
248
299
  """
249
300
  Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage
250
301
  """
251
- if isinstance(record_data_or_message, AirbyteMessage):
252
- return record_data_or_message
253
- else:
254
- return stream_data_to_airbyte_message(stream.name, record_data_or_message, stream.transformer, stream.get_json_schema())
302
+ match record_data_or_message:
303
+ case AirbyteMessage():
304
+ return record_data_or_message
305
+ case _:
306
+ return stream_data_to_airbyte_message(
307
+ stream.name,
308
+ record_data_or_message,
309
+ stream.transformer,
310
+ stream.get_json_schema(),
311
+ )
255
312
 
256
313
  @property
257
314
  def message_repository(self) -> Union[None, MessageRepository]:
@@ -267,14 +324,3 @@ class AbstractSource(Source, ABC):
267
324
  on the first error seen and emit a single error trace message for that stream.
268
325
  """
269
326
  return False
270
-
271
- @staticmethod
272
- def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
273
- failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
274
- return f"During the sync, the following streams did not sync successfully: {failures}"
275
-
276
- @staticmethod
277
- def _stream_state_is_full_refresh(stream_state: Mapping[str, Any]) -> bool:
278
- # For full refresh syncs that don't have a suitable cursor value, we emit a state that contains a sentinel key.
279
- # This key is never used by a connector and is needed during a read to skip assigning the incoming state.
280
- return FULL_REFRESH_SENTINEL_STATE_KEY in stream_state
@@ -1,3 +1,8 @@
1
- #
2
1
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
2
+ """The concurrent source model replaces the legacy Source model.
3
+
4
+ The concurrent source model is a new way to build sources in the Airbyte CDK. It is designed to
5
+ be more ergonomic and performant than the legacy Source model.
6
+
7
+ To implement a source using the concurrent source model, check out the submodules in this package.
8
+ """
@@ -4,20 +4,27 @@
4
4
  import logging
5
5
  from typing import Dict, Iterable, List, Optional, Set
6
6
 
7
- from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus
7
+ from airbyte_cdk.exception_handler import generate_failed_streams_error_message
8
+ from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, FailureType, StreamDescriptor
8
9
  from airbyte_cdk.models import Type as MessageType
9
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
10
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
11
+ PartitionGenerationCompletedSentinel,
12
+ )
13
+ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
10
14
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
11
15
  from airbyte_cdk.sources.message import MessageRepository
12
16
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
13
17
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
14
18
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
15
19
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
17
20
  from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
21
+ from airbyte_cdk.sources.types import Record
18
22
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
19
23
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
24
+ from airbyte_cdk.utils import AirbyteTracedException
25
+ from airbyte_cdk.utils.stream_status_utils import (
26
+ as_airbyte_message as stream_status_as_airbyte_message,
27
+ )
21
28
 
22
29
 
23
30
  class ConcurrentReadProcessor:
@@ -56,8 +63,11 @@ class ConcurrentReadProcessor:
56
63
  self._message_repository = message_repository
57
64
  self._partition_reader = partition_reader
58
65
  self._streams_done: Set[str] = set()
66
+ self._exceptions_per_stream_name: dict[str, List[Exception]] = {}
59
67
 
60
- def on_partition_generation_completed(self, sentinel: PartitionGenerationCompletedSentinel) -> Iterable[AirbyteMessage]:
68
+ def on_partition_generation_completed(
69
+ self, sentinel: PartitionGenerationCompletedSentinel
70
+ ) -> Iterable[AirbyteMessage]:
61
71
  """
62
72
  This method is called when a partition generation is completed.
63
73
  1. Remove the stream from the list of streams currently generating partitions
@@ -68,10 +78,13 @@ class ConcurrentReadProcessor:
68
78
  self._streams_currently_generating_partitions.remove(sentinel.stream.name)
69
79
  # It is possible for the stream to already be done if no partitions were generated
70
80
  # If the partition generation process was completed and there are no partitions left to process, the stream is done
71
- if self._is_stream_done(stream_name) or len(self._streams_to_running_partitions[stream_name]) == 0:
81
+ if (
82
+ self._is_stream_done(stream_name)
83
+ or len(self._streams_to_running_partitions[stream_name]) == 0
84
+ ):
72
85
  yield from self._on_stream_is_done(stream_name)
73
86
  if self._stream_instances_to_start_partition_generation:
74
- yield self.start_next_partition_generator()
87
+ yield self.start_next_partition_generator() # type:ignore # None may be yielded
75
88
 
76
89
  def on_partition(self, partition: Partition) -> None:
77
90
  """
@@ -83,10 +96,14 @@ class ConcurrentReadProcessor:
83
96
  stream_name = partition.stream_name()
84
97
  self._streams_to_running_partitions[stream_name].add(partition)
85
98
  if self._slice_logger.should_log_slice_message(self._logger):
86
- self._message_repository.emit_message(self._slice_logger.create_slice_log_message(partition.to_slice()))
99
+ self._message_repository.emit_message(
100
+ self._slice_logger.create_slice_log_message(partition.to_slice())
101
+ )
87
102
  self._thread_pool_manager.submit(self._partition_reader.process_partition, partition)
88
103
 
89
- def on_partition_complete_sentinel(self, sentinel: PartitionCompleteSentinel) -> Iterable[AirbyteMessage]:
104
+ def on_partition_complete_sentinel(
105
+ self, sentinel: PartitionCompleteSentinel
106
+ ) -> Iterable[AirbyteMessage]:
90
107
  """
91
108
  This method is called when a partition is completed.
92
109
  1. Close the partition
@@ -94,14 +111,27 @@ class ConcurrentReadProcessor:
94
111
  3. Emit messages that were added to the message repository
95
112
  """
96
113
  partition = sentinel.partition
97
- partition.close()
98
- partitions_running = self._streams_to_running_partitions[partition.stream_name()]
99
- if partition in partitions_running:
100
- partitions_running.remove(partition)
101
- # If all partitions were generated and this was the last one, the stream is done
102
- if partition.stream_name() not in self._streams_currently_generating_partitions and len(partitions_running) == 0:
103
- yield from self._on_stream_is_done(partition.stream_name())
104
- yield from self._message_repository.consume_queue()
114
+
115
+ try:
116
+ if sentinel.is_successful:
117
+ stream = self._stream_name_to_instance[partition.stream_name()]
118
+ stream.cursor.close_partition(partition)
119
+ except Exception as exception:
120
+ self._flag_exception(partition.stream_name(), exception)
121
+ yield AirbyteTracedException.from_exception(
122
+ exception, stream_descriptor=StreamDescriptor(name=partition.stream_name())
123
+ ).as_sanitized_airbyte_message()
124
+ finally:
125
+ partitions_running = self._streams_to_running_partitions[partition.stream_name()]
126
+ if partition in partitions_running:
127
+ partitions_running.remove(partition)
128
+ # If all partitions were generated and this was the last one, the stream is done
129
+ if (
130
+ partition.stream_name() not in self._streams_currently_generating_partitions
131
+ and len(partitions_running) == 0
132
+ ):
133
+ yield from self._on_stream_is_done(partition.stream_name())
134
+ yield from self._message_repository.consume_queue()
105
135
 
106
136
  def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
107
137
  """
@@ -109,31 +139,52 @@ class ConcurrentReadProcessor:
109
139
  1. Convert the record to an AirbyteMessage
110
140
  2. If this is the first record for the stream, mark the stream as RUNNING
111
141
  3. Increment the record counter for the stream
112
- 4. Emit the message
113
- 5. Emit messages that were added to the message repository
142
+ 4. Ensures the cursor knows the record has been successfully emitted
143
+ 5. Emit the message
144
+ 6. Emit messages that were added to the message repository
114
145
  """
115
146
  # Do not pass a transformer or a schema
116
147
  # AbstractStreams are expected to return data as they are expected.
117
148
  # Any transformation on the data should be done before reaching this point
118
- message = stream_data_to_airbyte_message(record.stream_name, record.data)
149
+ message = stream_data_to_airbyte_message(
150
+ stream_name=record.stream_name,
151
+ data_or_message=record.data,
152
+ is_file_transfer_message=record.is_file_transfer_message,
153
+ )
119
154
  stream = self._stream_name_to_instance[record.stream_name]
120
155
 
121
156
  if message.type == MessageType.RECORD:
122
157
  if self._record_counter[stream.name] == 0:
123
158
  self._logger.info(f"Marking stream {stream.name} as RUNNING")
124
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING)
159
+ yield stream_status_as_airbyte_message(
160
+ stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
161
+ )
125
162
  self._record_counter[stream.name] += 1
163
+ stream.cursor.observe(record)
126
164
  yield message
127
165
  yield from self._message_repository.consume_queue()
128
166
 
129
- def on_exception(self, exception: Exception) -> Iterable[AirbyteMessage]:
167
+ def on_exception(self, exception: StreamThreadException) -> Iterable[AirbyteMessage]:
130
168
  """
131
169
  This method is called when an exception is raised.
132
170
  1. Stop all running streams
133
171
  2. Raise the exception
134
172
  """
135
- yield from self._stop_streams()
136
- raise exception
173
+ self._flag_exception(exception.stream_name, exception.exception)
174
+ self._logger.exception(
175
+ f"Exception while syncing stream {exception.stream_name}", exc_info=exception.exception
176
+ )
177
+
178
+ stream_descriptor = StreamDescriptor(name=exception.stream_name)
179
+ if isinstance(exception.exception, AirbyteTracedException):
180
+ yield exception.exception.as_airbyte_message(stream_descriptor=stream_descriptor)
181
+ else:
182
+ yield AirbyteTracedException.from_exception(
183
+ exception, stream_descriptor=stream_descriptor
184
+ ).as_airbyte_message()
185
+
186
+ def _flag_exception(self, stream_name: str, exception: Exception) -> None:
187
+ self._exceptions_per_stream_name.setdefault(stream_name, []).append(exception)
137
188
 
138
189
  def start_next_partition_generator(self) -> Optional[AirbyteMessage]:
139
190
  """
@@ -164,26 +215,41 @@ class ConcurrentReadProcessor:
164
215
  2. There are no more streams to read from
165
216
  3. All partitions for all streams are closed
166
217
  """
167
- return all([self._is_stream_done(stream_name) for stream_name in self._stream_name_to_instance.keys()])
218
+ is_done = all(
219
+ [
220
+ self._is_stream_done(stream_name)
221
+ for stream_name in self._stream_name_to_instance.keys()
222
+ ]
223
+ )
224
+ if is_done and self._exceptions_per_stream_name:
225
+ error_message = generate_failed_streams_error_message(self._exceptions_per_stream_name)
226
+ self._logger.info(error_message)
227
+ # We still raise at least one exception when a stream raises an exception because the platform currently relies
228
+ # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
229
+ # type because this combined error isn't actionable, but rather the previously emitted individual errors.
230
+ raise AirbyteTracedException(
231
+ message=error_message,
232
+ internal_message="Concurrent read failure",
233
+ failure_type=FailureType.config_error,
234
+ )
235
+ return is_done
168
236
 
169
237
  def _is_stream_done(self, stream_name: str) -> bool:
170
238
  return stream_name in self._streams_done
171
239
 
172
240
  def _on_stream_is_done(self, stream_name: str) -> Iterable[AirbyteMessage]:
173
- self._logger.info(f"Read {self._record_counter[stream_name]} records from {stream_name} stream")
241
+ self._logger.info(
242
+ f"Read {self._record_counter[stream_name]} records from {stream_name} stream"
243
+ )
174
244
  self._logger.info(f"Marking stream {stream_name} as STOPPED")
175
245
  stream = self._stream_name_to_instance[stream_name]
176
246
  stream.cursor.ensure_at_least_one_state_emitted()
177
247
  yield from self._message_repository.consume_queue()
178
248
  self._logger.info(f"Finished syncing {stream.name}")
179
249
  self._streams_done.add(stream_name)
180
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.COMPLETE)
181
-
182
- def _stop_streams(self) -> Iterable[AirbyteMessage]:
183
- self._thread_pool_manager.shutdown()
184
- for stream_name in self._streams_to_running_partitions.keys():
185
- stream = self._stream_name_to_instance[stream_name]
186
- if not self._is_stream_done(stream_name):
187
- self._logger.info(f"Marking stream {stream.name} as STOPPED")
188
- self._logger.info(f"Finished syncing {stream.name}")
189
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.INCOMPLETE)
250
+ stream_status = (
251
+ AirbyteStreamStatus.INCOMPLETE
252
+ if self._exceptions_per_stream_name.get(stream_name, [])
253
+ else AirbyteStreamStatus.COMPLETE
254
+ )
255
+ yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), stream_status)