airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -8,15 +8,21 @@ from typing import Iterable, Iterator, List
8
8
 
9
9
  from airbyte_cdk.models import AirbyteMessage
10
10
  from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
11
- from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
11
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
12
+ PartitionGenerationCompletedSentinel,
13
+ )
14
+ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
12
15
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
13
16
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
14
17
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
15
18
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
16
19
  from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
17
20
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
18
- from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
19
- from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel, QueueItem
21
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import (
22
+ PartitionCompleteSentinel,
23
+ QueueItem,
24
+ )
25
+ from airbyte_cdk.sources.types import Record
20
26
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
21
27
 
22
28
 
@@ -40,14 +46,25 @@ class ConcurrentSource:
40
46
  timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
41
47
  ) -> "ConcurrentSource":
42
48
  is_single_threaded = initial_number_of_partitions_to_generate == 1 and num_workers == 1
43
- too_many_generator = not is_single_threaded and initial_number_of_partitions_to_generate >= num_workers
44
- assert not too_many_generator, "It is required to have more workers than threads generating partitions"
49
+ too_many_generator = (
50
+ not is_single_threaded and initial_number_of_partitions_to_generate >= num_workers
51
+ )
52
+ assert (
53
+ not too_many_generator
54
+ ), "It is required to have more workers than threads generating partitions"
45
55
  threadpool = ThreadPoolManager(
46
- concurrent.futures.ThreadPoolExecutor(max_workers=num_workers, thread_name_prefix="workerpool"),
56
+ concurrent.futures.ThreadPoolExecutor(
57
+ max_workers=num_workers, thread_name_prefix="workerpool"
58
+ ),
47
59
  logger,
48
60
  )
49
61
  return ConcurrentSource(
50
- threadpool, logger, slice_logger, message_repository, initial_number_of_partitions_to_generate, timeout_seconds
62
+ threadpool,
63
+ logger,
64
+ slice_logger,
65
+ message_repository,
66
+ initial_number_of_partitions_to_generate,
67
+ timeout_seconds,
51
68
  )
52
69
 
53
70
  def __init__(
@@ -79,11 +96,6 @@ class ConcurrentSource:
79
96
  streams: List[AbstractStream],
80
97
  ) -> Iterator[AirbyteMessage]:
81
98
  self._logger.info("Starting syncing")
82
- stream_instances_to_read_from = self._get_streams_to_read_from(streams)
83
-
84
- # Return early if there are no streams to read from
85
- if not stream_instances_to_read_from:
86
- return
87
99
 
88
100
  # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
89
101
  # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
@@ -91,7 +103,7 @@ class ConcurrentSource:
91
103
  # information and might even need to be configurable depending on the source
92
104
  queue: Queue[QueueItem] = Queue(maxsize=10_000)
93
105
  concurrent_stream_processor = ConcurrentReadProcessor(
94
- stream_instances_to_read_from,
106
+ streams,
95
107
  PartitionEnqueuer(queue, self._threadpool),
96
108
  self._threadpool,
97
109
  self._logger,
@@ -111,7 +123,9 @@ class ConcurrentSource:
111
123
  self._threadpool.check_for_errors_and_shutdown()
112
124
  self._logger.info("Finished syncing")
113
125
 
114
- def _submit_initial_partition_generators(self, concurrent_stream_processor: ConcurrentReadProcessor) -> Iterable[AirbyteMessage]:
126
+ def _submit_initial_partition_generators(
127
+ self, concurrent_stream_processor: ConcurrentReadProcessor
128
+ ) -> Iterable[AirbyteMessage]:
115
129
  for _ in range(self._initial_number_partitions_to_generate):
116
130
  status_message = concurrent_stream_processor.start_next_partition_generator()
117
131
  if status_message:
@@ -123,11 +137,6 @@ class ConcurrentSource:
123
137
  concurrent_stream_processor: ConcurrentReadProcessor,
124
138
  ) -> Iterable[AirbyteMessage]:
125
139
  while airbyte_message_or_record_or_exception := queue.get():
126
- try:
127
- self._threadpool.shutdown_if_exception()
128
- except Exception as exception:
129
- concurrent_stream_processor.on_exception(exception)
130
-
131
140
  yield from self._handle_item(
132
141
  airbyte_message_or_record_or_exception,
133
142
  concurrent_stream_processor,
@@ -142,7 +151,7 @@ class ConcurrentSource:
142
151
  concurrent_stream_processor: ConcurrentReadProcessor,
143
152
  ) -> Iterable[AirbyteMessage]:
144
153
  # handle queue item and call the appropriate handler depending on the type of the queue item
145
- if isinstance(queue_item, Exception):
154
+ if isinstance(queue_item, StreamThreadException):
146
155
  yield from concurrent_stream_processor.on_exception(queue_item)
147
156
  elif isinstance(queue_item, PartitionGenerationCompletedSentinel):
148
157
  yield from concurrent_stream_processor.on_partition_generation_completed(queue_item)
@@ -154,19 +163,3 @@ class ConcurrentSource:
154
163
  yield from concurrent_stream_processor.on_record(queue_item)
155
164
  else:
156
165
  raise ValueError(f"Unknown queue item type: {type(queue_item)}")
157
-
158
- def _get_streams_to_read_from(self, streams: List[AbstractStream]) -> List[AbstractStream]:
159
- """
160
- Iterate over the configured streams and return a list of streams to read from.
161
- If a stream is not configured, it will be skipped.
162
- If a stream is configured but does not exist in the source and self.raise_exception_on_missing_stream is True, an exception will be raised
163
- If a stream is not available, it will be skipped
164
- """
165
- stream_instances_to_read_from = []
166
- for stream in streams:
167
- stream_availability = stream.check_availability()
168
- if not stream_availability.is_available():
169
- self._logger.warning(f"Skipped syncing stream '{stream.name}' because it was unavailable. {stream_availability.message()}")
170
- continue
171
- stream_instances_to_read_from.append(stream)
172
- return stream_instances_to_read_from
@@ -1,16 +1,33 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
+
4
5
  import logging
5
6
  from abc import ABC
6
- from typing import Any, Iterator, List, Mapping, MutableMapping, Optional, Union
7
+ from datetime import timedelta
8
+ from typing import Any, Callable, Iterator, List, Mapping, MutableMapping, Optional, Tuple
7
9
 
8
10
  from airbyte_cdk.models import AirbyteMessage, AirbyteStateMessage, ConfiguredAirbyteCatalog
9
11
  from airbyte_cdk.sources import AbstractSource
10
12
  from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
13
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
11
14
  from airbyte_cdk.sources.streams import Stream
12
15
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
13
16
  from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
17
+ from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
18
+ from airbyte_cdk.sources.streams.concurrent.cursor import (
19
+ ConcurrentCursor,
20
+ Cursor,
21
+ CursorField,
22
+ CursorValueType,
23
+ FinalStateCursor,
24
+ GapType,
25
+ )
26
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
27
+ AbstractStreamStateConverter,
28
+ )
29
+
30
+ DEFAULT_LOOKBACK_SECONDS = 0
14
31
 
15
32
 
16
33
  class ConcurrentSourceAdapter(AbstractSource, ABC):
@@ -30,19 +47,25 @@ class ConcurrentSourceAdapter(AbstractSource, ABC):
30
47
  logger: logging.Logger,
31
48
  config: Mapping[str, Any],
32
49
  catalog: ConfiguredAirbyteCatalog,
33
- state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
50
+ state: Optional[List[AirbyteStateMessage]] = None,
34
51
  ) -> Iterator[AirbyteMessage]:
35
52
  abstract_streams = self._select_abstract_streams(config, catalog)
36
53
  concurrent_stream_names = {stream.name for stream in abstract_streams}
37
54
  configured_catalog_for_regular_streams = ConfiguredAirbyteCatalog(
38
- streams=[stream for stream in catalog.streams if stream.stream.name not in concurrent_stream_names]
55
+ streams=[
56
+ stream
57
+ for stream in catalog.streams
58
+ if stream.stream.name not in concurrent_stream_names
59
+ ]
39
60
  )
40
61
  if abstract_streams:
41
62
  yield from self._concurrent_source.read(abstract_streams)
42
63
  if configured_catalog_for_regular_streams.streams:
43
64
  yield from super().read(logger, config, configured_catalog_for_regular_streams, state)
44
65
 
45
- def _select_abstract_streams(self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog) -> List[AbstractStream]:
66
+ def _select_abstract_streams(
67
+ self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog
68
+ ) -> List[AbstractStream]:
46
69
  """
47
70
  Selects streams that can be processed concurrently and returns their abstract representations.
48
71
  """
@@ -52,12 +75,73 @@ class ConcurrentSourceAdapter(AbstractSource, ABC):
52
75
  for configured_stream in configured_catalog.streams:
53
76
  stream_instance = stream_name_to_instance.get(configured_stream.stream.name)
54
77
  if not stream_instance:
55
- if not self.raise_exception_on_missing_stream:
56
- continue
57
- raise KeyError(
58
- f"The stream {configured_stream.stream.name} no longer exists in the configuration. "
59
- f"Refresh the schema in replication settings and remove this stream from future sync attempts."
60
- )
78
+ continue
79
+
61
80
  if isinstance(stream_instance, AbstractStreamFacade):
62
81
  abstract_streams.append(stream_instance.get_underlying_stream())
63
82
  return abstract_streams
83
+
84
+ def convert_to_concurrent_stream(
85
+ self,
86
+ logger: logging.Logger,
87
+ stream: Stream,
88
+ state_manager: ConnectorStateManager,
89
+ cursor: Optional[Cursor] = None,
90
+ ) -> Stream:
91
+ """
92
+ Prepares a stream for concurrent processing by initializing or assigning a cursor,
93
+ managing the stream's state, and returning an updated Stream instance.
94
+ """
95
+ state: MutableMapping[str, Any] = {}
96
+
97
+ if cursor:
98
+ state = state_manager.get_stream_state(stream.name, stream.namespace)
99
+
100
+ stream.cursor = cursor # type: ignore[assignment] # cursor is of type ConcurrentCursor, which inherits from Cursor
101
+ if hasattr(stream, "parent"):
102
+ stream.parent.cursor = cursor
103
+ else:
104
+ cursor = FinalStateCursor(
105
+ stream_name=stream.name,
106
+ stream_namespace=stream.namespace,
107
+ message_repository=self.message_repository, # type: ignore[arg-type] # _default_message_repository will be returned in the worst case
108
+ )
109
+ return StreamFacade.create_from_stream(stream, self, logger, state, cursor)
110
+
111
+ def initialize_cursor(
112
+ self,
113
+ stream: Stream,
114
+ state_manager: ConnectorStateManager,
115
+ converter: AbstractStreamStateConverter,
116
+ slice_boundary_fields: Optional[Tuple[str, str]],
117
+ start: Optional[CursorValueType],
118
+ end_provider: Callable[[], CursorValueType],
119
+ lookback_window: Optional[GapType] = None,
120
+ slice_range: Optional[GapType] = None,
121
+ ) -> Optional[ConcurrentCursor]:
122
+ lookback_window = lookback_window or timedelta(seconds=DEFAULT_LOOKBACK_SECONDS)
123
+
124
+ cursor_field_name = stream.cursor_field
125
+
126
+ if cursor_field_name:
127
+ if not isinstance(cursor_field_name, str):
128
+ raise ValueError(
129
+ f"Cursor field type must be a string, but received {type(cursor_field_name).__name__}."
130
+ )
131
+
132
+ return ConcurrentCursor(
133
+ stream.name,
134
+ stream.namespace,
135
+ state_manager.get_stream_state(stream.name, stream.namespace),
136
+ self.message_repository, # type: ignore[arg-type] # _default_message_repository will be returned in the worst case
137
+ state_manager,
138
+ converter,
139
+ CursorField(cursor_field_name),
140
+ slice_boundary_fields,
141
+ start,
142
+ end_provider,
143
+ lookback_window,
144
+ slice_range,
145
+ )
146
+
147
+ return None
@@ -0,0 +1,25 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from typing import Any
4
+
5
+
6
+ class StreamThreadException(Exception):
7
+ def __init__(self, exception: Exception, stream_name: str):
8
+ self._exception = exception
9
+ self._stream_name = stream_name
10
+
11
+ @property
12
+ def stream_name(self) -> str:
13
+ return self._stream_name
14
+
15
+ @property
16
+ def exception(self) -> Exception:
17
+ return self._exception
18
+
19
+ def __str__(self) -> str:
20
+ return f"Exception while syncing stream {self._stream_name}: {self._exception}"
21
+
22
+ def __eq__(self, other: Any) -> bool:
23
+ if isinstance(other, StreamThreadException):
24
+ return self._exception == other._exception and self._stream_name == other._stream_name
25
+ return False
@@ -37,7 +37,9 @@ class ThreadPoolManager:
37
37
  def prune_to_validate_has_reached_futures_limit(self) -> bool:
38
38
  self._prune_futures(self._futures)
39
39
  if len(self._futures) > self._logging_threshold:
40
- self._logger.warning(f"ThreadPoolManager: The list of futures is getting bigger than expected ({len(self._futures)})")
40
+ self._logger.warning(
41
+ f"ThreadPoolManager: The list of futures is getting bigger than expected ({len(self._futures)})"
42
+ )
41
43
  return len(self._futures) >= self._max_concurrent_tasks
42
44
 
43
45
  def submit(self, function: Callable[..., Any], *args: Any) -> None:
@@ -71,39 +73,43 @@ class ThreadPoolManager:
71
73
  )
72
74
  futures.pop(index)
73
75
 
74
- def shutdown(self) -> None:
76
+ def _shutdown(self) -> None:
77
+ # Without a way to stop the threads that have already started, this will not stop the Python application. We are fine today with
78
+ # this imperfect approach because we only do this in case of `self._most_recently_seen_exception` which we don't expect to happen
75
79
  self._threadpool.shutdown(wait=False, cancel_futures=True)
76
80
 
77
81
  def is_done(self) -> bool:
78
82
  return all([f.done() for f in self._futures])
79
83
 
80
- def shutdown_if_exception(self) -> None:
81
- """
82
- This method will raise if there is an exception so that the caller can use it.
83
- """
84
- if self._most_recently_seen_exception:
85
- self._stop_and_raise_exception(self._most_recently_seen_exception)
86
-
87
84
  def check_for_errors_and_shutdown(self) -> None:
88
85
  """
89
86
  Check if any of the futures have an exception, and raise it if so. If all futures are done, shutdown the threadpool.
90
87
  If the futures are not done, raise an exception.
91
88
  :return:
92
89
  """
93
- self.shutdown_if_exception()
90
+ if self._most_recently_seen_exception:
91
+ self._logger.exception(
92
+ "An unknown exception has occurred while reading concurrently",
93
+ exc_info=self._most_recently_seen_exception,
94
+ )
95
+ self._stop_and_raise_exception(self._most_recently_seen_exception)
94
96
 
95
- exceptions_from_futures = [f for f in [future.exception() for future in self._futures] if f is not None]
97
+ exceptions_from_futures = [
98
+ f for f in [future.exception() for future in self._futures] if f is not None
99
+ ]
96
100
  if exceptions_from_futures:
97
101
  exception = RuntimeError(f"Failed reading with errors: {exceptions_from_futures}")
98
102
  self._stop_and_raise_exception(exception)
99
103
  else:
100
104
  futures_not_done = [f for f in self._futures if not f.done()]
101
105
  if futures_not_done:
102
- exception = RuntimeError(f"Failed reading with futures not done: {futures_not_done}")
106
+ exception = RuntimeError(
107
+ f"Failed reading with futures not done: {futures_not_done}"
108
+ )
103
109
  self._stop_and_raise_exception(exception)
104
110
  else:
105
- self.shutdown()
111
+ self._shutdown()
106
112
 
107
113
  def _stop_and_raise_exception(self, exception: BaseException) -> None:
108
- self.shutdown()
114
+ self._shutdown()
109
115
  raise exception
@@ -4,8 +4,9 @@
4
4
 
5
5
  from typing import Any, Dict
6
6
 
7
+ from pydantic.v1 import BaseModel
8
+
7
9
  from airbyte_cdk.sources.utils.schema_helpers import expand_refs, rename_key
8
- from pydantic import BaseModel
9
10
 
10
11
 
11
12
  class BaseConfig(BaseModel):
@@ -17,7 +18,7 @@ class BaseConfig(BaseModel):
17
18
  """
18
19
 
19
20
  @classmethod
20
- def schema(cls, *args, **kwargs) -> Dict[str, Any]:
21
+ def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]:
21
22
  """We're overriding the schema classmethod to enable some post-processing"""
22
23
  schema = super().schema(*args, **kwargs)
23
24
  rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf
@@ -3,45 +3,40 @@
3
3
  #
4
4
 
5
5
  import copy
6
- from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
6
+ from dataclasses import dataclass
7
+ from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast
7
8
 
8
9
  from airbyte_cdk.models import (
9
10
  AirbyteMessage,
10
11
  AirbyteStateBlob,
11
12
  AirbyteStateMessage,
12
13
  AirbyteStateType,
13
- AirbyteStream,
14
14
  AirbyteStreamState,
15
15
  StreamDescriptor,
16
16
  )
17
17
  from airbyte_cdk.models import Type as MessageType
18
- from airbyte_cdk.sources.streams import Stream
19
- from pydantic import Extra
18
+ from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob
20
19
 
21
20
 
22
- class HashableStreamDescriptor(StreamDescriptor):
21
+ @dataclass(frozen=True)
22
+ class HashableStreamDescriptor:
23
23
  """
24
24
  Helper class that overrides the existing StreamDescriptor class that is auto generated from the Airbyte Protocol and
25
25
  freezes its fields so that it be used as a hash key. This is only marked public because we use it outside for unit tests.
26
26
  """
27
27
 
28
- class Config:
29
- extra = Extra.allow
30
- frozen = True
28
+ name: str
29
+ namespace: Optional[str] = None
31
30
 
32
31
 
33
32
  class ConnectorStateManager:
34
33
  """
35
- ConnectorStateManager consolidates the various forms of a stream's incoming state message (STREAM / GLOBAL / LEGACY) under a common
34
+ ConnectorStateManager consolidates the various forms of a stream's incoming state message (STREAM / GLOBAL) under a common
36
35
  interface. It also provides methods to extract and update state
37
36
  """
38
37
 
39
- def __init__(
40
- self,
41
- stream_instance_map: Mapping[str, Union[Stream, AirbyteStream]],
42
- state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None,
43
- ):
44
- shared_state, per_stream_states = self._extract_from_state_message(state, stream_instance_map)
38
+ def __init__(self, state: Optional[List[AirbyteStateMessage]] = None):
39
+ shared_state, per_stream_states = self._extract_from_state_message(state)
45
40
 
46
41
  # We explicitly throw an error if we receive a GLOBAL state message that contains a shared_state because API sources are
47
42
  # designed to checkpoint state independently of one another. API sources should never be emitting a state message where
@@ -55,19 +50,25 @@ class ConnectorStateManager:
55
50
  )
56
51
  self.per_stream_states = per_stream_states
57
52
 
58
- def get_stream_state(self, stream_name: str, namespace: Optional[str]) -> MutableMapping[str, Any]:
53
+ def get_stream_state(
54
+ self, stream_name: str, namespace: Optional[str]
55
+ ) -> MutableMapping[str, Any]:
59
56
  """
60
57
  Retrieves the state of a given stream based on its descriptor (name + namespace).
61
58
  :param stream_name: Name of the stream being fetched
62
59
  :param namespace: Namespace of the stream being fetched
63
60
  :return: The per-stream state for a stream
64
61
  """
65
- stream_state = self.per_stream_states.get(HashableStreamDescriptor(name=stream_name, namespace=namespace))
62
+ stream_state: AirbyteStateBlob | None = self.per_stream_states.get(
63
+ HashableStreamDescriptor(name=stream_name, namespace=namespace)
64
+ )
66
65
  if stream_state:
67
- return stream_state.dict() # type: ignore # mypy thinks dict() returns any, but it returns a dict
66
+ return copy.deepcopy({k: v for k, v in stream_state.__dict__.items()})
68
67
  return {}
69
68
 
70
- def update_state_for_stream(self, stream_name: str, namespace: Optional[str], value: Mapping[str, Any]) -> None:
69
+ def update_state_for_stream(
70
+ self, stream_name: str, namespace: Optional[str], value: Mapping[str, Any]
71
+ ) -> None:
71
72
  """
72
73
  Overwrites the state blob of a specific stream based on the provided stream name and optional namespace
73
74
  :param stream_name: The name of the stream whose state is being updated
@@ -75,11 +76,11 @@ class ConnectorStateManager:
75
76
  :param value: A stream state mapping that is being updated for a stream
76
77
  """
77
78
  stream_descriptor = HashableStreamDescriptor(name=stream_name, namespace=namespace)
78
- self.per_stream_states[stream_descriptor] = AirbyteStateBlob.parse_obj(value)
79
+ self.per_stream_states[stream_descriptor] = AirbyteStateBlob(value)
79
80
 
80
81
  def create_state_message(self, stream_name: str, namespace: Optional[str]) -> AirbyteMessage:
81
82
  """
82
- Generates an AirbyteMessage using the current per-stream state of a specified stream in either the per-stream or legacy format
83
+ Generates an AirbyteMessage using the current per-stream state of a specified stream
83
84
  :param stream_name: The name of the stream for the message that is being created
84
85
  :param namespace: The namespace of the stream for the message that is being created
85
86
  :return: The Airbyte state message to be emitted by the connector during a sync
@@ -92,7 +93,8 @@ class ConnectorStateManager:
92
93
  state=AirbyteStateMessage(
93
94
  type=AirbyteStateType.STREAM,
94
95
  stream=AirbyteStreamState(
95
- stream_descriptor=StreamDescriptor(name=stream_name, namespace=namespace), stream_state=stream_state
96
+ stream_descriptor=StreamDescriptor(name=stream_name, namespace=namespace),
97
+ stream_state=stream_state,
96
98
  ),
97
99
  ),
98
100
  )
@@ -100,86 +102,48 @@ class ConnectorStateManager:
100
102
  @classmethod
101
103
  def _extract_from_state_message(
102
104
  cls,
103
- state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]],
104
- stream_instance_map: Mapping[str, Union[Stream, AirbyteStream]],
105
- ) -> Tuple[Optional[AirbyteStateBlob], MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]]]:
105
+ state: Optional[List[AirbyteStateMessage]],
106
+ ) -> Tuple[
107
+ Optional[AirbyteStateBlob],
108
+ MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]],
109
+ ]:
106
110
  """
107
- Takes an incoming list of state messages or the legacy state format and extracts state attributes according to type
108
- which can then be assigned to the new state manager being instantiated
111
+ Takes an incoming list of state messages or a global state message and extracts state attributes according to
112
+ type which can then be assigned to the new state manager being instantiated
109
113
  :param state: The incoming state input
110
114
  :return: A tuple of shared state and per stream state assembled from the incoming state list
111
115
  """
112
116
  if state is None:
113
117
  return None, {}
114
118
 
115
- is_legacy = cls._is_legacy_dict_state(state)
116
- is_migrated_legacy = cls._is_migrated_legacy_state(state)
117
119
  is_global = cls._is_global_state(state)
118
- is_per_stream = cls._is_per_stream_state(state)
119
-
120
- # Incoming pure legacy object format
121
- if is_legacy:
122
- streams = cls._create_descriptor_to_stream_state_mapping(state, stream_instance_map) # type: ignore # We verified state is a dict in _is_legacy_dict_state
123
- return None, streams
124
-
125
- # When processing incoming state in source.read_state(), legacy state gets deserialized into List[AirbyteStateMessage]
126
- # which can be translated into independent per-stream state values
127
- if is_migrated_legacy:
128
- streams = cls._create_descriptor_to_stream_state_mapping(state[0].data, stream_instance_map) # type: ignore # We verified that state is a list in _is_migrated_legacy_state
129
- return None, streams
130
120
 
131
121
  if is_global:
132
- global_state = state[0].global_ # type: ignore # We verified state is a list in _is_global_state
133
- shared_state = copy.deepcopy(global_state.shared_state, {})
122
+ # We already validate that this is a global state message, not None:
123
+ global_state = cast(AirbyteGlobalState, state[0].global_)
124
+ # global_state has shared_state, also not None:
125
+ shared_state: AirbyteStateBlob = cast(
126
+ AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {})
127
+ )
134
128
  streams = {
135
129
  HashableStreamDescriptor(
136
- name=per_stream_state.stream_descriptor.name, namespace=per_stream_state.stream_descriptor.namespace
130
+ name=per_stream_state.stream_descriptor.name,
131
+ namespace=per_stream_state.stream_descriptor.namespace,
137
132
  ): per_stream_state.stream_state
138
- for per_stream_state in global_state.stream_states
133
+ for per_stream_state in global_state.stream_states # type: ignore[union-attr] # global_state has shared_state
139
134
  }
140
135
  return shared_state, streams
141
-
142
- if is_per_stream:
136
+ else:
143
137
  streams = {
144
138
  HashableStreamDescriptor(
145
- name=per_stream_state.stream.stream_descriptor.name, namespace=per_stream_state.stream.stream_descriptor.namespace
146
- ): per_stream_state.stream.stream_state
139
+ name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor
140
+ namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor
141
+ ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state
147
142
  for per_stream_state in state
148
- if per_stream_state.type == AirbyteStateType.STREAM and hasattr(per_stream_state, "stream") # type: ignore # state is always a list of AirbyteStateMessage if is_per_stream is True
143
+ if per_stream_state.type == AirbyteStateType.STREAM
144
+ and hasattr(per_stream_state, "stream") # type: ignore # state is always a list of AirbyteStateMessage if is_per_stream is True
149
145
  }
150
146
  return None, streams
151
- else:
152
- raise ValueError("Input state should come in the form of list of Airbyte state messages or a mapping of states")
153
-
154
- @staticmethod
155
- def _create_descriptor_to_stream_state_mapping(
156
- state: MutableMapping[str, Any], stream_to_instance_map: Mapping[str, Union[Stream, AirbyteStream]]
157
- ) -> MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]]:
158
- """
159
- Takes incoming state received in the legacy format and transforms it into a mapping of StreamDescriptor to AirbyteStreamState
160
- :param state: A mapping object representing the complete state of all streams in the legacy format
161
- :param stream_to_instance_map: A mapping of stream name to stream instance used to retrieve a stream's namespace
162
- :return: The mapping of all of a sync's streams to the corresponding stream state
163
- """
164
- streams = {}
165
- for stream_name, state_value in state.items():
166
- namespace = stream_to_instance_map[stream_name].namespace if stream_name in stream_to_instance_map else None
167
- stream_descriptor = HashableStreamDescriptor(name=stream_name, namespace=namespace)
168
- streams[stream_descriptor] = AirbyteStateBlob.parse_obj(state_value or {})
169
- return streams
170
-
171
- @staticmethod
172
- def _is_legacy_dict_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool:
173
- return isinstance(state, dict)
174
-
175
- @staticmethod
176
- def _is_migrated_legacy_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool:
177
- return (
178
- isinstance(state, List)
179
- and len(state) == 1
180
- and isinstance(state[0], AirbyteStateMessage)
181
- and state[0].type == AirbyteStateType.LEGACY
182
- )
183
147
 
184
148
  @staticmethod
185
149
  def _is_global_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool:
@@ -191,5 +155,7 @@ class ConnectorStateManager:
191
155
  )
192
156
 
193
157
  @staticmethod
194
- def _is_per_stream_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool:
158
+ def _is_per_stream_state(
159
+ state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]],
160
+ ) -> bool:
195
161
  return isinstance(state, List)
@@ -0,0 +1,52 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+
4
+ from datetime import timedelta
5
+ from typing import Optional
6
+
7
+ from airbyte_cdk.sources.declarative.async_job.timer import Timer
8
+ from airbyte_cdk.sources.types import StreamSlice
9
+
10
+ from .status import AsyncJobStatus
11
+
12
+
13
+ class AsyncJob:
14
+ """
15
+ Description of an API job.
16
+
17
+ Note that the timer will only stop once `update_status` is called so the job might be completed on the API side but until we query for
18
+ it and call `ApiJob.update_status`, `ApiJob.status` will not reflect the actual API side status.
19
+ """
20
+
21
+ def __init__(
22
+ self, api_job_id: str, job_parameters: StreamSlice, timeout: Optional[timedelta] = None
23
+ ) -> None:
24
+ self._api_job_id = api_job_id
25
+ self._job_parameters = job_parameters
26
+ self._status = AsyncJobStatus.RUNNING
27
+
28
+ timeout = timeout if timeout else timedelta(minutes=60)
29
+ self._timer = Timer(timeout)
30
+ self._timer.start()
31
+
32
+ def api_job_id(self) -> str:
33
+ return self._api_job_id
34
+
35
+ def status(self) -> AsyncJobStatus:
36
+ if self._timer.has_timed_out():
37
+ return AsyncJobStatus.TIMED_OUT
38
+ return self._status
39
+
40
+ def job_parameters(self) -> StreamSlice:
41
+ return self._job_parameters
42
+
43
+ def update_status(self, status: AsyncJobStatus) -> None:
44
+ if self._status != AsyncJobStatus.RUNNING and status == AsyncJobStatus.RUNNING:
45
+ self._timer.start()
46
+ elif status.is_terminal():
47
+ self._timer.stop()
48
+
49
+ self._status = status
50
+
51
+ def __repr__(self) -> str:
52
+ return f"AsyncJob(api_job_id={self.api_job_id()}, job_parameters={self.job_parameters()}, status={self.status()})"