airbyte-cdk 0.72.0__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1763 -226
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -0,0 +1,44 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+ from typing import Any, Dict
3
+
4
+ from serpyco_rs import CustomType, Serializer
5
+
6
+ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
7
+ AirbyteMessage,
8
+ AirbyteStateBlob,
9
+ AirbyteStateMessage,
10
+ AirbyteStreamState,
11
+ ConfiguredAirbyteCatalog,
12
+ ConfiguredAirbyteStream,
13
+ ConnectorSpecification,
14
+ )
15
+
16
+
17
+ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
18
+ def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
19
+ # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
20
+ return {k: v for k, v in value.__dict__.items()}
21
+
22
+ def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
23
+ return AirbyteStateBlob(value)
24
+
25
+ def get_json_schema(self) -> Dict[str, Any]:
26
+ return {"type": "object"}
27
+
28
+
29
+ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
30
+ return AirbyteStateBlobType() if t is AirbyteStateBlob else None
31
+
32
+
33
+ AirbyteStreamStateSerializer = Serializer(
34
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
35
+ )
36
+ AirbyteStateMessageSerializer = Serializer(
37
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
38
+ )
39
+ AirbyteMessageSerializer = Serializer(
40
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
41
+ )
42
+ ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
43
+ ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
44
+ ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, Optional
5
+
6
+
7
+ @dataclass
8
+ class AirbyteFileTransferRecordMessage:
9
+ stream: str
10
+ file: Dict[str, Any]
11
+ emitted_at: int
12
+ namespace: Optional[str] = None
13
+ data: Optional[Dict[str, Any]] = None
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_protocol.models.well_known_types import *
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
@@ -19,4 +19,8 @@ from .source import Source
19
19
  # this will not be thread-safe.
20
20
  dpath.options.ALLOW_EMPTY_STRING_KEYS = True
21
21
 
22
- __all__ = ["AbstractSource", "BaseConfig", "Source"]
22
+ __all__ = [
23
+ "AbstractSource",
24
+ "BaseConfig",
25
+ "Source",
26
+ ]
@@ -4,8 +4,20 @@
4
4
 
5
5
  import logging
6
6
  from abc import ABC, abstractmethod
7
- from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
7
+ from typing import (
8
+ Any,
9
+ Dict,
10
+ Iterable,
11
+ Iterator,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ )
8
19
 
20
+ from airbyte_cdk.exception_handler import generate_failed_streams_error_message
9
21
  from airbyte_cdk.models import (
10
22
  AirbyteCatalog,
11
23
  AirbyteConnectionStatus,
@@ -17,21 +29,21 @@ from airbyte_cdk.models import (
17
29
  FailureType,
18
30
  Status,
19
31
  StreamDescriptor,
20
- SyncMode,
21
32
  )
22
33
  from airbyte_cdk.models import Type as MessageType
23
34
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
24
35
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
25
36
  from airbyte_cdk.sources.source import Source
26
- from airbyte_cdk.sources.streams import FULL_REFRESH_SENTINEL_STATE_KEY, Stream
37
+ from airbyte_cdk.sources.streams import Stream
27
38
  from airbyte_cdk.sources.streams.core import StreamData
28
39
  from airbyte_cdk.sources.streams.http.http import HttpStream
29
40
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
30
41
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
31
42
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
32
- from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
33
43
  from airbyte_cdk.utils.event_timing import create_timer
34
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
44
+ from airbyte_cdk.utils.stream_status_utils import (
45
+ as_airbyte_message as stream_status_as_airbyte_message,
46
+ )
35
47
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
36
48
 
37
49
  _default_message_repository = InMemoryMessageRepository()
@@ -44,7 +56,9 @@ class AbstractSource(Source, ABC):
44
56
  """
45
57
 
46
58
  @abstractmethod
47
- def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
59
+ def check_connection(
60
+ self, logger: logging.Logger, config: Mapping[str, Any]
61
+ ) -> Tuple[bool, Optional[Any]]:
48
62
  """
49
63
  :param logger: source logger
50
64
  :param config: The user-provided configuration as specified by the source's spec.
@@ -68,11 +82,6 @@ class AbstractSource(Source, ABC):
68
82
  _stream_to_instance_map: Dict[str, Stream] = {}
69
83
  _slice_logger: SliceLogger = DebugSliceLogger()
70
84
 
71
- @property
72
- def name(self) -> str:
73
- """Source name"""
74
- return self.__class__.__name__
75
-
76
85
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
77
86
  """Implements the Discover operation from the Airbyte Specification.
78
87
  See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#discover.
@@ -94,7 +103,7 @@ class AbstractSource(Source, ABC):
94
103
  logger: logging.Logger,
95
104
  config: Mapping[str, Any],
96
105
  catalog: ConfiguredAirbyteCatalog,
97
- state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
106
+ state: Optional[List[AirbyteStateMessage]] = None,
98
107
  ) -> Iterator[AirbyteMessage]:
99
108
  """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/."""
100
109
  logger.info(f"Starting syncing {self.name}")
@@ -102,7 +111,7 @@ class AbstractSource(Source, ABC):
102
111
  # TODO assert all streams exist in the connector
103
112
  # get the streams once in case the connector needs to make any queries to generate them
104
113
  stream_instances = {s.name: s for s in self.streams(config)}
105
- state_manager = ConnectorStateManager(stream_instance_map={s.stream.name: s.stream for s in catalog.streams}, state=state)
114
+ state_manager = ConnectorStateManager(state=state)
106
115
  self._stream_to_instance_map = stream_instances
107
116
 
108
117
  stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
@@ -110,22 +119,36 @@ class AbstractSource(Source, ABC):
110
119
  with create_timer(self.name) as timer:
111
120
  for configured_stream in catalog.streams:
112
121
  stream_instance = stream_instances.get(configured_stream.stream.name)
113
- if not stream_instance:
114
- if not self.raise_exception_on_missing_stream:
115
- continue
116
- raise KeyError(
117
- f"The stream {configured_stream.stream.name} no longer exists in the configuration. "
118
- f"Refresh the schema in replication settings and remove this stream from future sync attempts."
119
- )
120
-
122
+ is_stream_exist = bool(stream_instance)
121
123
  try:
124
+ # Used direct reference to `stream_instance` instead of `is_stream_exist` to avoid mypy type checking errors
125
+ if not stream_instance:
126
+ if not self.raise_exception_on_missing_stream:
127
+ yield stream_status_as_airbyte_message(
128
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
129
+ )
130
+ continue
131
+
132
+ error_message = (
133
+ f"The stream '{configured_stream.stream.name}' in your connection configuration was not found in the source. "
134
+ f"Refresh the schema in your replication settings and remove this stream from future sync attempts."
135
+ )
136
+
137
+ # Use configured_stream as stream_instance to support references in error handling.
138
+ stream_instance = configured_stream.stream
139
+
140
+ raise AirbyteTracedException(
141
+ message="A stream listed in your configuration was not found in the source. Please check the logs for more "
142
+ "details.",
143
+ internal_message=error_message,
144
+ failure_type=FailureType.config_error,
145
+ )
146
+
122
147
  timer.start_event(f"Syncing stream {configured_stream.stream.name}")
123
- stream_is_available, reason = stream_instance.check_availability(logger, self)
124
- if not stream_is_available:
125
- logger.warning(f"Skipped syncing stream '{stream_instance.name}' because it was unavailable. {reason}")
126
- continue
127
148
  logger.info(f"Marking stream {configured_stream.stream.name} as STARTED")
128
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.STARTED)
149
+ yield stream_status_as_airbyte_message(
150
+ configured_stream.stream, AirbyteStreamStatus.STARTED
151
+ )
129
152
  yield from self._read_stream(
130
153
  logger=logger,
131
154
  stream_instance=stream_instance,
@@ -134,52 +157,73 @@ class AbstractSource(Source, ABC):
134
157
  internal_config=internal_config,
135
158
  )
136
159
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
137
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
138
- except AirbyteTracedException as e:
139
- logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
140
- logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
141
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
142
- yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
143
- stream_name_to_exception[stream_instance.name] = e
144
- if self.stop_sync_on_stream_failure:
145
- logger.info(
146
- f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
147
- )
148
- break
160
+ yield stream_status_as_airbyte_message(
161
+ configured_stream.stream, AirbyteStreamStatus.COMPLETE
162
+ )
163
+
149
164
  except Exception as e:
150
165
  yield from self._emit_queued_messages()
151
- logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
166
+ logger.exception(
167
+ f"Encountered an exception while reading stream {configured_stream.stream.name}"
168
+ )
152
169
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
153
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
154
- display_message = stream_instance.get_error_display_message(e)
155
- if display_message:
156
- traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
170
+ yield stream_status_as_airbyte_message(
171
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
172
+ )
173
+
174
+ stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
175
+
176
+ if isinstance(e, AirbyteTracedException):
177
+ traced_exception = e
178
+ info_message = f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
157
179
  else:
158
- traced_exception = AirbyteTracedException.from_exception(e)
180
+ traced_exception = self._serialize_exception(
181
+ stream_descriptor, e, stream_instance=stream_instance
182
+ )
183
+ info_message = f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}"
184
+
159
185
  yield traced_exception.as_sanitized_airbyte_message(
160
- stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
186
+ stream_descriptor=stream_descriptor
161
187
  )
162
- stream_name_to_exception[stream_instance.name] = traced_exception
188
+ stream_name_to_exception[stream_instance.name] = traced_exception # type: ignore # use configured_stream if stream_instance is None
163
189
  if self.stop_sync_on_stream_failure:
164
- logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
190
+ logger.info(info_message)
165
191
  break
166
192
  finally:
167
- timer.finish_event()
168
- logger.info(f"Finished syncing {configured_stream.stream.name}")
169
- logger.info(timer.report())
193
+ # Finish read event only if the stream instance exists;
194
+ # otherwise, there's no need as it never started
195
+ if is_stream_exist:
196
+ timer.finish_event()
197
+ logger.info(f"Finished syncing {configured_stream.stream.name}")
198
+ logger.info(timer.report())
170
199
 
171
200
  if len(stream_name_to_exception) > 0:
172
- error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
201
+ error_message = generate_failed_streams_error_message(
202
+ {key: [value] for key, value in stream_name_to_exception.items()}
203
+ )
173
204
  logger.info(error_message)
174
205
  # We still raise at least one exception when a stream raises an exception because the platform currently relies
175
206
  # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
176
207
  # type because this combined error isn't actionable, but rather the previously emitted individual errors.
177
- raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
208
+ raise AirbyteTracedException(
209
+ message=error_message, failure_type=FailureType.config_error
210
+ )
178
211
  logger.info(f"Finished syncing {self.name}")
179
212
 
213
+ @staticmethod
214
+ def _serialize_exception(
215
+ stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Optional[Stream] = None
216
+ ) -> AirbyteTracedException:
217
+ display_message = stream_instance.get_error_display_message(e) if stream_instance else None
218
+ if display_message:
219
+ return AirbyteTracedException.from_exception(
220
+ e, message=display_message, stream_descriptor=stream_descriptor
221
+ )
222
+ return AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
223
+
180
224
  @property
181
225
  def raise_exception_on_missing_stream(self) -> bool:
182
- return True
226
+ return False
183
227
 
184
228
  def _read_stream(
185
229
  self,
@@ -190,7 +234,9 @@ class AbstractSource(Source, ABC):
190
234
  internal_config: InternalConfig,
191
235
  ) -> Iterator[AirbyteMessage]:
192
236
  if internal_config.page_size and isinstance(stream_instance, HttpStream):
193
- logger.info(f"Setting page size for {stream_instance.name} to {internal_config.page_size}")
237
+ logger.info(
238
+ f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
239
+ )
194
240
  stream_instance.page_size = internal_config.page_size
195
241
  logger.debug(
196
242
  f"Syncing configured stream: {configured_stream.stream.name}",
@@ -203,15 +249,16 @@ class AbstractSource(Source, ABC):
203
249
  stream_instance.log_stream_sync_configuration()
204
250
 
205
251
  stream_name = configured_stream.stream.name
206
- # The platform always passes stream state regardless of sync mode. We shouldn't need to consider this case within the
207
- # connector, but right now we need to prevent accidental usage of the previous stream state
208
- stream_state = (
209
- state_manager.get_stream_state(stream_name, stream_instance.namespace)
210
- if configured_stream.sync_mode == SyncMode.incremental
211
- else {}
212
- )
252
+ stream_state = state_manager.get_stream_state(stream_name, stream_instance.namespace)
213
253
 
214
- if stream_state and "state" in dir(stream_instance) and not self._stream_state_is_full_refresh(stream_state):
254
+ # This is a hack. Existing full refresh streams that are converted into resumable full refresh need to discard
255
+ # the state because the terminal state for a full refresh sync is not compatible with substream resumable full
256
+ # refresh state. This is only required when running live traffic regression testing since the platform normally
257
+ # handles whether to pass state
258
+ if stream_state == {"__ab_no_cursor_state_message": True}:
259
+ stream_state = {}
260
+
261
+ if "state" in dir(stream_instance):
215
262
  stream_instance.state = stream_state # type: ignore # we check that state in the dir(stream_instance)
216
263
  logger.info(f"Setting state of {self.name} stream to {stream_state}")
217
264
 
@@ -233,7 +280,9 @@ class AbstractSource(Source, ABC):
233
280
  if record_counter == 1:
234
281
  logger.info(f"Marking stream {stream_name} as RUNNING")
235
282
  # If we just read the first record of the stream, emit the transition to the RUNNING state
236
- yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.RUNNING)
283
+ yield stream_status_as_airbyte_message(
284
+ configured_stream.stream, AirbyteStreamStatus.RUNNING
285
+ )
237
286
  yield from self._emit_queued_messages()
238
287
  yield record
239
288
 
@@ -244,14 +293,22 @@ class AbstractSource(Source, ABC):
244
293
  yield from self.message_repository.consume_queue()
245
294
  return
246
295
 
247
- def _get_message(self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream) -> AirbyteMessage:
296
+ def _get_message(
297
+ self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream
298
+ ) -> AirbyteMessage:
248
299
  """
249
300
  Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage
250
301
  """
251
- if isinstance(record_data_or_message, AirbyteMessage):
252
- return record_data_or_message
253
- else:
254
- return stream_data_to_airbyte_message(stream.name, record_data_or_message, stream.transformer, stream.get_json_schema())
302
+ match record_data_or_message:
303
+ case AirbyteMessage():
304
+ return record_data_or_message
305
+ case _:
306
+ return stream_data_to_airbyte_message(
307
+ stream.name,
308
+ record_data_or_message,
309
+ stream.transformer,
310
+ stream.get_json_schema(),
311
+ )
255
312
 
256
313
  @property
257
314
  def message_repository(self) -> Union[None, MessageRepository]:
@@ -267,14 +324,3 @@ class AbstractSource(Source, ABC):
267
324
  on the first error seen and emit a single error trace message for that stream.
268
325
  """
269
326
  return False
270
-
271
- @staticmethod
272
- def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
273
- failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
274
- return f"During the sync, the following streams did not sync successfully: {failures}"
275
-
276
- @staticmethod
277
- def _stream_state_is_full_refresh(stream_state: Mapping[str, Any]) -> bool:
278
- # For full refresh syncs that don't have a suitable cursor value, we emit a state that contains a sentinel key.
279
- # This key is never used by a connector and is needed during a read to skip assigning the incoming state.
280
- return FULL_REFRESH_SENTINEL_STATE_KEY in stream_state
@@ -1,3 +1,8 @@
1
- #
2
1
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
2
+ """The concurrent source model replaces the legacy Source model.
3
+
4
+ The concurrent source model is a new way to build sources in the Airbyte CDK. It is designed to
5
+ be more ergonomic and performant than the legacy Source model.
6
+
7
+ To implement a source using the concurrent source model, check out the submodules in this package.
8
+ """
@@ -4,20 +4,27 @@
4
4
  import logging
5
5
  from typing import Dict, Iterable, List, Optional, Set
6
6
 
7
- from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus
7
+ from airbyte_cdk.exception_handler import generate_failed_streams_error_message
8
+ from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, FailureType, StreamDescriptor
8
9
  from airbyte_cdk.models import Type as MessageType
9
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
10
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
11
+ PartitionGenerationCompletedSentinel,
12
+ )
13
+ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
10
14
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
11
15
  from airbyte_cdk.sources.message import MessageRepository
12
16
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
13
17
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
14
18
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
15
19
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
17
20
  from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
21
+ from airbyte_cdk.sources.types import Record
18
22
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
19
23
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
- from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
24
+ from airbyte_cdk.utils import AirbyteTracedException
25
+ from airbyte_cdk.utils.stream_status_utils import (
26
+ as_airbyte_message as stream_status_as_airbyte_message,
27
+ )
21
28
 
22
29
 
23
30
  class ConcurrentReadProcessor:
@@ -56,8 +63,11 @@ class ConcurrentReadProcessor:
56
63
  self._message_repository = message_repository
57
64
  self._partition_reader = partition_reader
58
65
  self._streams_done: Set[str] = set()
66
+ self._exceptions_per_stream_name: dict[str, List[Exception]] = {}
59
67
 
60
- def on_partition_generation_completed(self, sentinel: PartitionGenerationCompletedSentinel) -> Iterable[AirbyteMessage]:
68
+ def on_partition_generation_completed(
69
+ self, sentinel: PartitionGenerationCompletedSentinel
70
+ ) -> Iterable[AirbyteMessage]:
61
71
  """
62
72
  This method is called when a partition generation is completed.
63
73
  1. Remove the stream from the list of streams currently generating partitions
@@ -68,10 +78,13 @@ class ConcurrentReadProcessor:
68
78
  self._streams_currently_generating_partitions.remove(sentinel.stream.name)
69
79
  # It is possible for the stream to already be done if no partitions were generated
70
80
  # If the partition generation process was completed and there are no partitions left to process, the stream is done
71
- if self._is_stream_done(stream_name) or len(self._streams_to_running_partitions[stream_name]) == 0:
81
+ if (
82
+ self._is_stream_done(stream_name)
83
+ or len(self._streams_to_running_partitions[stream_name]) == 0
84
+ ):
72
85
  yield from self._on_stream_is_done(stream_name)
73
86
  if self._stream_instances_to_start_partition_generation:
74
- yield self.start_next_partition_generator()
87
+ yield self.start_next_partition_generator() # type:ignore # None may be yielded
75
88
 
76
89
  def on_partition(self, partition: Partition) -> None:
77
90
  """
@@ -83,10 +96,14 @@ class ConcurrentReadProcessor:
83
96
  stream_name = partition.stream_name()
84
97
  self._streams_to_running_partitions[stream_name].add(partition)
85
98
  if self._slice_logger.should_log_slice_message(self._logger):
86
- self._message_repository.emit_message(self._slice_logger.create_slice_log_message(partition.to_slice()))
99
+ self._message_repository.emit_message(
100
+ self._slice_logger.create_slice_log_message(partition.to_slice())
101
+ )
87
102
  self._thread_pool_manager.submit(self._partition_reader.process_partition, partition)
88
103
 
89
- def on_partition_complete_sentinel(self, sentinel: PartitionCompleteSentinel) -> Iterable[AirbyteMessage]:
104
+ def on_partition_complete_sentinel(
105
+ self, sentinel: PartitionCompleteSentinel
106
+ ) -> Iterable[AirbyteMessage]:
90
107
  """
91
108
  This method is called when a partition is completed.
92
109
  1. Close the partition
@@ -94,14 +111,27 @@ class ConcurrentReadProcessor:
94
111
  3. Emit messages that were added to the message repository
95
112
  """
96
113
  partition = sentinel.partition
97
- partition.close()
98
- partitions_running = self._streams_to_running_partitions[partition.stream_name()]
99
- if partition in partitions_running:
100
- partitions_running.remove(partition)
101
- # If all partitions were generated and this was the last one, the stream is done
102
- if partition.stream_name() not in self._streams_currently_generating_partitions and len(partitions_running) == 0:
103
- yield from self._on_stream_is_done(partition.stream_name())
104
- yield from self._message_repository.consume_queue()
114
+
115
+ try:
116
+ if sentinel.is_successful:
117
+ stream = self._stream_name_to_instance[partition.stream_name()]
118
+ stream.cursor.close_partition(partition)
119
+ except Exception as exception:
120
+ self._flag_exception(partition.stream_name(), exception)
121
+ yield AirbyteTracedException.from_exception(
122
+ exception, stream_descriptor=StreamDescriptor(name=partition.stream_name())
123
+ ).as_sanitized_airbyte_message()
124
+ finally:
125
+ partitions_running = self._streams_to_running_partitions[partition.stream_name()]
126
+ if partition in partitions_running:
127
+ partitions_running.remove(partition)
128
+ # If all partitions were generated and this was the last one, the stream is done
129
+ if (
130
+ partition.stream_name() not in self._streams_currently_generating_partitions
131
+ and len(partitions_running) == 0
132
+ ):
133
+ yield from self._on_stream_is_done(partition.stream_name())
134
+ yield from self._message_repository.consume_queue()
105
135
 
106
136
  def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
107
137
  """
@@ -109,31 +139,52 @@ class ConcurrentReadProcessor:
109
139
  1. Convert the record to an AirbyteMessage
110
140
  2. If this is the first record for the stream, mark the stream as RUNNING
111
141
  3. Increment the record counter for the stream
112
- 4. Emit the message
113
- 5. Emit messages that were added to the message repository
142
+ 4. Ensures the cursor knows the record has been successfully emitted
143
+ 5. Emit the message
144
+ 6. Emit messages that were added to the message repository
114
145
  """
115
146
  # Do not pass a transformer or a schema
116
147
  # AbstractStreams are expected to return data as they are expected.
117
148
  # Any transformation on the data should be done before reaching this point
118
- message = stream_data_to_airbyte_message(record.stream_name, record.data)
149
+ message = stream_data_to_airbyte_message(
150
+ stream_name=record.stream_name,
151
+ data_or_message=record.data,
152
+ is_file_transfer_message=record.is_file_transfer_message,
153
+ )
119
154
  stream = self._stream_name_to_instance[record.stream_name]
120
155
 
121
156
  if message.type == MessageType.RECORD:
122
157
  if self._record_counter[stream.name] == 0:
123
158
  self._logger.info(f"Marking stream {stream.name} as RUNNING")
124
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING)
159
+ yield stream_status_as_airbyte_message(
160
+ stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
161
+ )
125
162
  self._record_counter[stream.name] += 1
163
+ stream.cursor.observe(record)
126
164
  yield message
127
165
  yield from self._message_repository.consume_queue()
128
166
 
129
- def on_exception(self, exception: Exception) -> Iterable[AirbyteMessage]:
167
+ def on_exception(self, exception: StreamThreadException) -> Iterable[AirbyteMessage]:
130
168
  """
131
169
  This method is called when an exception is raised.
132
170
  1. Stop all running streams
133
171
  2. Raise the exception
134
172
  """
135
- yield from self._stop_streams()
136
- raise exception
173
+ self._flag_exception(exception.stream_name, exception.exception)
174
+ self._logger.exception(
175
+ f"Exception while syncing stream {exception.stream_name}", exc_info=exception.exception
176
+ )
177
+
178
+ stream_descriptor = StreamDescriptor(name=exception.stream_name)
179
+ if isinstance(exception.exception, AirbyteTracedException):
180
+ yield exception.exception.as_airbyte_message(stream_descriptor=stream_descriptor)
181
+ else:
182
+ yield AirbyteTracedException.from_exception(
183
+ exception, stream_descriptor=stream_descriptor
184
+ ).as_airbyte_message()
185
+
186
+ def _flag_exception(self, stream_name: str, exception: Exception) -> None:
187
+ self._exceptions_per_stream_name.setdefault(stream_name, []).append(exception)
137
188
 
138
189
  def start_next_partition_generator(self) -> Optional[AirbyteMessage]:
139
190
  """
@@ -164,26 +215,41 @@ class ConcurrentReadProcessor:
164
215
  2. There are no more streams to read from
165
216
  3. All partitions for all streams are closed
166
217
  """
167
- return all([self._is_stream_done(stream_name) for stream_name in self._stream_name_to_instance.keys()])
218
+ is_done = all(
219
+ [
220
+ self._is_stream_done(stream_name)
221
+ for stream_name in self._stream_name_to_instance.keys()
222
+ ]
223
+ )
224
+ if is_done and self._exceptions_per_stream_name:
225
+ error_message = generate_failed_streams_error_message(self._exceptions_per_stream_name)
226
+ self._logger.info(error_message)
227
+ # We still raise at least one exception when a stream raises an exception because the platform currently relies
228
+ # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
229
+ # type because this combined error isn't actionable, but rather the previously emitted individual errors.
230
+ raise AirbyteTracedException(
231
+ message=error_message,
232
+ internal_message="Concurrent read failure",
233
+ failure_type=FailureType.config_error,
234
+ )
235
+ return is_done
168
236
 
169
237
  def _is_stream_done(self, stream_name: str) -> bool:
170
238
  return stream_name in self._streams_done
171
239
 
172
240
  def _on_stream_is_done(self, stream_name: str) -> Iterable[AirbyteMessage]:
173
- self._logger.info(f"Read {self._record_counter[stream_name]} records from {stream_name} stream")
241
+ self._logger.info(
242
+ f"Read {self._record_counter[stream_name]} records from {stream_name} stream"
243
+ )
174
244
  self._logger.info(f"Marking stream {stream_name} as STOPPED")
175
245
  stream = self._stream_name_to_instance[stream_name]
176
246
  stream.cursor.ensure_at_least_one_state_emitted()
177
247
  yield from self._message_repository.consume_queue()
178
248
  self._logger.info(f"Finished syncing {stream.name}")
179
249
  self._streams_done.add(stream_name)
180
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.COMPLETE)
181
-
182
- def _stop_streams(self) -> Iterable[AirbyteMessage]:
183
- self._thread_pool_manager.shutdown()
184
- for stream_name in self._streams_to_running_partitions.keys():
185
- stream = self._stream_name_to_instance[stream_name]
186
- if not self._is_stream_done(stream_name):
187
- self._logger.info(f"Marking stream {stream.name} as STOPPED")
188
- self._logger.info(f"Finished syncing {stream.name}")
189
- yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), AirbyteStreamStatus.INCOMPLETE)
250
+ stream_status = (
251
+ AirbyteStreamStatus.INCOMPLETE
252
+ if self._exceptions_per_stream_name.get(stream_name, [])
253
+ else AirbyteStreamStatus.COMPLETE
254
+ )
255
+ yield stream_status_as_airbyte_message(stream.as_airbyte_stream(), stream_status)