airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,102 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from functools import lru_cache
6
+ from logging import Logger
7
+ from typing import Any, Iterable, List, Mapping, Optional
8
+
9
+ from airbyte_cdk.models import AirbyteStream, SyncMode
10
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
11
+ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
12
+ AbstractAvailabilityStrategy,
13
+ StreamAvailability,
14
+ )
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
16
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
17
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
18
+
19
+
20
+ class DefaultStream(AbstractStream):
21
+ def __init__(
22
+ self,
23
+ partition_generator: PartitionGenerator,
24
+ name: str,
25
+ json_schema: Mapping[str, Any],
26
+ availability_strategy: AbstractAvailabilityStrategy,
27
+ primary_key: List[str],
28
+ cursor_field: Optional[str],
29
+ logger: Logger,
30
+ cursor: Cursor,
31
+ namespace: Optional[str] = None,
32
+ supports_file_transfer: bool = False,
33
+ ) -> None:
34
+ self._stream_partition_generator = partition_generator
35
+ self._name = name
36
+ self._json_schema = json_schema
37
+ self._availability_strategy = availability_strategy
38
+ self._primary_key = primary_key
39
+ self._cursor_field = cursor_field
40
+ self._logger = logger
41
+ self._cursor = cursor
42
+ self._namespace = namespace
43
+ self._supports_file_transfer = supports_file_transfer
44
+
45
+ def generate_partitions(self) -> Iterable[Partition]:
46
+ yield from self._stream_partition_generator.generate()
47
+
48
+ @property
49
+ def name(self) -> str:
50
+ return self._name
51
+
52
+ @property
53
+ def namespace(self) -> Optional[str]:
54
+ return self._namespace
55
+
56
+ def check_availability(self) -> StreamAvailability:
57
+ return self._availability_strategy.check_availability(self._logger)
58
+
59
+ @property
60
+ def cursor_field(self) -> Optional[str]:
61
+ return self._cursor_field
62
+
63
+ @lru_cache(maxsize=None)
64
+ def get_json_schema(self) -> Mapping[str, Any]:
65
+ return self._json_schema
66
+
67
+ def as_airbyte_stream(self) -> AirbyteStream:
68
+ stream = AirbyteStream(
69
+ name=self.name,
70
+ json_schema=dict(self._json_schema),
71
+ supported_sync_modes=[SyncMode.full_refresh],
72
+ is_resumable=False,
73
+ is_file_based=self._supports_file_transfer,
74
+ )
75
+
76
+ if self._namespace:
77
+ stream.namespace = self._namespace
78
+
79
+ if self._cursor_field:
80
+ stream.source_defined_cursor = True
81
+ stream.is_resumable = True
82
+ stream.supported_sync_modes.append(SyncMode.incremental)
83
+ stream.default_cursor_field = [self._cursor_field]
84
+
85
+ keys = self._primary_key
86
+ if keys and len(keys) > 0:
87
+ stream.source_defined_primary_key = [[key] for key in keys]
88
+
89
+ return stream
90
+
91
+ def log_stream_sync_configuration(self) -> None:
92
+ self._logger.debug(
93
+ f"Syncing stream instance: {self.name}",
94
+ extra={
95
+ "primary_key": self._primary_key,
96
+ "cursor_field": self.cursor_field,
97
+ },
98
+ )
99
+
100
+ @property
101
+ def cursor(self) -> Cursor:
102
+ return self._cursor
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Any
6
+
7
+
8
+ class ExceptionWithDisplayMessage(Exception):
9
+ """
10
+ Exception that can be used to display a custom message to the user.
11
+ """
12
+
13
+ def __init__(self, display_message: str, **kwargs: Any):
14
+ super().__init__(**kwargs)
15
+ self.display_message = display_message
16
+
17
+ def __str__(self) -> str:
18
+ return f'ExceptionWithDisplayMessage: "{self.display_message}"'
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ from typing import List, Optional, Union
4
+
5
+ from airbyte_cdk.sources.streams import Stream
6
+
7
+
8
+ def get_primary_key_from_stream(
9
+ stream_primary_key: Optional[Union[str, List[str], List[List[str]]]],
10
+ ) -> List[str]:
11
+ if stream_primary_key is None:
12
+ return []
13
+ elif isinstance(stream_primary_key, str):
14
+ return [stream_primary_key]
15
+ elif isinstance(stream_primary_key, list):
16
+ are_all_elements_str = all(isinstance(k, str) for k in stream_primary_key)
17
+ are_all_elements_list_of_size_one = all(
18
+ isinstance(k, list) and len(k) == 1 for k in stream_primary_key
19
+ )
20
+
21
+ if are_all_elements_str:
22
+ return stream_primary_key # type: ignore # We verified all items in the list are strings
23
+ elif are_all_elements_list_of_size_one:
24
+ return list(map(lambda x: x[0], stream_primary_key))
25
+ else:
26
+ raise ValueError(f"Nested primary keys are not supported. Found {stream_primary_key}")
27
+ else:
28
+ raise ValueError(f"Invalid type for primary key: {stream_primary_key}")
29
+
30
+
31
+ def get_cursor_field_from_stream(stream: Stream) -> Optional[str]:
32
+ if isinstance(stream.cursor_field, list):
33
+ if len(stream.cursor_field) > 1:
34
+ raise ValueError(
35
+ f"Nested cursor fields are not supported. Got {stream.cursor_field} for {stream.name}"
36
+ )
37
+ elif len(stream.cursor_field) == 0:
38
+ return None
39
+ else:
40
+ return stream.cursor_field[0]
41
+ else:
42
+ return stream.cursor_field
@@ -0,0 +1,64 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import time
5
+ from queue import Queue
6
+
7
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
8
+ PartitionGenerationCompletedSentinel,
9
+ )
10
+ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
11
+ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
12
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
13
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
14
+
15
+
16
+ class PartitionEnqueuer:
17
+ """
18
+ Generates partitions from a partition generator and puts them in a queue.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ queue: Queue[QueueItem],
24
+ thread_pool_manager: ThreadPoolManager,
25
+ sleep_time_in_seconds: float = 0.1,
26
+ ) -> None:
27
+ """
28
+ :param queue: The queue to put the partitions in.
29
+ :param throttler: The throttler to use to throttle the partition generation.
30
+ """
31
+ self._queue = queue
32
+ self._thread_pool_manager = thread_pool_manager
33
+ self._sleep_time_in_seconds = sleep_time_in_seconds
34
+
35
+ def generate_partitions(self, stream: AbstractStream) -> None:
36
+ """
37
+ Generate partitions from a partition generator and put them in a queue.
38
+ When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
39
+
40
+ If an exception is encountered, the exception will be caught and put in the queue. This is very important because if we don't, the
41
+ main thread will have no way to know that something when wrong and will wait until the timeout is reached
42
+
43
+ This method is meant to be called in a separate thread.
44
+ """
45
+ try:
46
+ for partition in stream.generate_partitions():
47
+ # Adding partitions to the queue generates futures. To avoid having too many futures, we throttle here. We understand that
48
+ # we might add more futures than the limit by throttling in the threads while it is the main thread that actual adds the
49
+ # future but we expect the delta between the max futures length and the actual to be small enough that it would not be an
50
+ # issue. We do this in the threads because we want the main thread to always be processing QueueItems as if it does not, the
51
+ # queue size could grow and generating OOM issues.
52
+ #
53
+ # Also note that we do not expect this to create deadlocks where all worker threads wait because we have less
54
+ # PartitionEnqueuer threads than worker threads.
55
+ #
56
+ # Also note that prune_to_validate_has_reached_futures_limit has a lock while pruning which might create a bottleneck in
57
+ # terms of performance.
58
+ while self._thread_pool_manager.prune_to_validate_has_reached_futures_limit():
59
+ time.sleep(self._sleep_time_in_seconds)
60
+ self._queue.put(partition)
61
+ self._queue.put(PartitionGenerationCompletedSentinel(stream))
62
+ except Exception as e:
63
+ self._queue.put(StreamThreadException(e, stream.name))
64
+ self._queue.put(PartitionGenerationCompletedSentinel(stream))
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ from queue import Queue
5
+
6
+ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
7
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import (
9
+ PartitionCompleteSentinel,
10
+ QueueItem,
11
+ )
12
+
13
+
14
+ class PartitionReader:
15
+ """
16
+ Generates records from a partition and puts them in a queue.
17
+ """
18
+
19
+ _IS_SUCCESSFUL = True
20
+
21
+ def __init__(self, queue: Queue[QueueItem]) -> None:
22
+ """
23
+ :param queue: The queue to put the records in.
24
+ """
25
+ self._queue = queue
26
+
27
+ def process_partition(self, partition: Partition) -> None:
28
+ """
29
+ Process a partition and put the records in the output queue.
30
+ When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
31
+
32
+ If an exception is encountered, the exception will be caught and put in the queue. This is very important because if we don't, the
33
+ main thread will have no way to know that something when wrong and will wait until the timeout is reached
34
+
35
+ This method is meant to be called from a thread.
36
+ :param partition: The partition to read data from
37
+ :return: None
38
+ """
39
+ try:
40
+ for record in partition.read():
41
+ self._queue.put(record)
42
+ self._queue.put(PartitionCompleteSentinel(partition, self._IS_SUCCESSFUL))
43
+ except Exception as e:
44
+ self._queue.put(StreamThreadException(e, partition.stream_name()))
45
+ self._queue.put(PartitionCompleteSentinel(partition, not self._IS_SUCCESSFUL))
@@ -0,0 +1,3 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
@@ -0,0 +1,48 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Iterable, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.types import Record
9
+
10
+
11
+ class Partition(ABC):
12
+ """
13
+ A partition is responsible for reading a specific set of data from a source.
14
+ """
15
+
16
+ @abstractmethod
17
+ def read(self) -> Iterable[Record]:
18
+ """
19
+ Reads the data from the partition.
20
+ :return: An iterable of records.
21
+ """
22
+ pass
23
+
24
+ @abstractmethod
25
+ def to_slice(self) -> Optional[Mapping[str, Any]]:
26
+ """
27
+ Converts the partition to a slice that can be serialized and deserialized.
28
+
29
+ Note: it would have been interesting to have a type of `Mapping[str, Comparable]` to simplify typing but some slices can have nested
30
+ values ([example](https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L584-L596))
31
+ :return: A mapping representing a slice
32
+ """
33
+ pass
34
+
35
+ @abstractmethod
36
+ def stream_name(self) -> str:
37
+ """
38
+ Returns the name of the stream that this partition is reading from.
39
+ :return: The name of the stream.
40
+ """
41
+ pass
42
+
43
+ @abstractmethod
44
+ def __hash__(self) -> int:
45
+ """
46
+ Returns a hash of the partition.
47
+ Partitions must be hashable so that they can be used as keys in a dictionary.
48
+ """
@@ -0,0 +1,18 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Iterable
7
+
8
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
9
+
10
+
11
+ class PartitionGenerator(ABC):
12
+ @abstractmethod
13
+ def generate(self) -> Iterable[Partition]:
14
+ """
15
+ Generates partitions for a given sync mode.
16
+ :return: An iterable of partitions
17
+ """
18
+ pass
@@ -0,0 +1,21 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Iterable
5
+
6
+ from airbyte_cdk.sources.types import StreamSlice
7
+
8
+
9
+ class StreamSlicer(ABC):
10
+ """
11
+ Slices the stream into chunks that can be fetched independently. Slices enable state checkpointing and data retrieval parallelization.
12
+ """
13
+
14
+ @abstractmethod
15
+ def stream_slices(self) -> Iterable[StreamSlice]:
16
+ """
17
+ Defines stream slices
18
+
19
+ :return: An iterable of stream slices
20
+ """
21
+ pass
@@ -0,0 +1,38 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Any, Union
6
+
7
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
8
+ PartitionGenerationCompletedSentinel,
9
+ )
10
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
11
+ from airbyte_cdk.sources.types import Record
12
+
13
+
14
+ class PartitionCompleteSentinel:
15
+ """
16
+ A sentinel object indicating all records for a partition were produced.
17
+ Includes a pointer to the partition that was processed.
18
+ """
19
+
20
+ def __init__(self, partition: Partition, is_successful: bool = True):
21
+ """
22
+ :param partition: The partition that was processed
23
+ """
24
+ self.partition = partition
25
+ self.is_successful = is_successful
26
+
27
+ def __eq__(self, other: Any) -> bool:
28
+ if isinstance(other, PartitionCompleteSentinel):
29
+ return self.partition == other.partition
30
+ return False
31
+
32
+
33
+ """
34
+ Typedef representing the items that can be added to the ThreadBasedConcurrentStream
35
+ """
36
+ QueueItem = Union[
37
+ Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception
38
+ ]
@@ -0,0 +1,182 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from enum import Enum
7
+ from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
8
+
9
+ if TYPE_CHECKING:
10
+ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
11
+
12
+
13
+ class ConcurrencyCompatibleStateType(Enum):
14
+ date_range = "date-range"
15
+ integer = "integer"
16
+
17
+
18
+ class AbstractStreamStateConverter(ABC):
19
+ START_KEY = "start"
20
+ END_KEY = "end"
21
+ MOST_RECENT_RECORD_KEY = "most_recent_cursor_value"
22
+
23
+ @abstractmethod
24
+ def _from_state_message(self, value: Any) -> Any:
25
+ pass
26
+
27
+ @abstractmethod
28
+ def _to_state_message(self, value: Any) -> Any:
29
+ pass
30
+
31
+ def __init__(self, is_sequential_state: bool = True):
32
+ self._is_sequential_state = is_sequential_state
33
+
34
+ def convert_to_state_message(
35
+ self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]
36
+ ) -> MutableMapping[str, Any]:
37
+ """
38
+ Convert the state message from the concurrency-compatible format to the stream's original format.
39
+
40
+ e.g.
41
+ { "created": "2021-01-18T21:18:20.000Z" }
42
+ """
43
+ if self.is_state_message_compatible(stream_state) and self._is_sequential_state:
44
+ legacy_state = stream_state.get("legacy", {})
45
+ latest_complete_time = self._get_latest_complete_time(stream_state.get("slices", []))
46
+ if latest_complete_time is not None:
47
+ legacy_state.update(
48
+ {cursor_field.cursor_field_key: self._to_state_message(latest_complete_time)}
49
+ )
50
+ return legacy_state or {}
51
+ else:
52
+ return self.serialize(stream_state, ConcurrencyCompatibleStateType.date_range)
53
+
54
+ def _get_latest_complete_time(self, slices: List[MutableMapping[str, Any]]) -> Any:
55
+ """
56
+ Get the latest time before which all records have been processed.
57
+ """
58
+ if not slices:
59
+ raise RuntimeError(
60
+ "Expected at least one slice but there were none. This is unexpected; please contact Support."
61
+ )
62
+ merged_intervals = self.merge_intervals(slices)
63
+ first_interval = merged_intervals[0]
64
+
65
+ return first_interval.get("most_recent_cursor_value") or first_interval[self.START_KEY]
66
+
67
+ def deserialize(self, state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
68
+ """
69
+ Perform any transformations needed for compatibility with the converter.
70
+ """
71
+ for stream_slice in state.get("slices", []):
72
+ stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
73
+ stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
74
+ return state
75
+
76
+ def serialize(
77
+ self, state: MutableMapping[str, Any], state_type: ConcurrencyCompatibleStateType
78
+ ) -> MutableMapping[str, Any]:
79
+ """
80
+ Perform any transformations needed for compatibility with the converter.
81
+ """
82
+ serialized_slices = []
83
+ for stream_slice in state.get("slices", []):
84
+ serialized_slice = {
85
+ self.START_KEY: self._to_state_message(stream_slice[self.START_KEY]),
86
+ self.END_KEY: self._to_state_message(stream_slice[self.END_KEY]),
87
+ }
88
+ if stream_slice.get(self.MOST_RECENT_RECORD_KEY):
89
+ serialized_slice[self.MOST_RECENT_RECORD_KEY] = self._to_state_message(
90
+ stream_slice[self.MOST_RECENT_RECORD_KEY]
91
+ )
92
+ serialized_slices.append(serialized_slice)
93
+ return {"slices": serialized_slices, "state_type": state_type.value}
94
+
95
+ @staticmethod
96
+ def is_state_message_compatible(state: MutableMapping[str, Any]) -> bool:
97
+ return bool(state) and state.get("state_type") in [
98
+ t.value for t in ConcurrencyCompatibleStateType
99
+ ]
100
+
101
+ @abstractmethod
102
+ def convert_from_sequential_state(
103
+ self,
104
+ cursor_field: "CursorField", # to deprecate as it is only needed for sequential state
105
+ stream_state: MutableMapping[str, Any],
106
+ start: Optional[Any],
107
+ ) -> Tuple[Any, MutableMapping[str, Any]]:
108
+ """
109
+ Convert the state message to the format required by the ConcurrentCursor.
110
+
111
+ e.g.
112
+ {
113
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
114
+ "metadata": { … },
115
+ "slices": [
116
+ {starts: 0, end: 1617030403, finished_processing: true}]
117
+ }
118
+ """
119
+ ...
120
+
121
+ @abstractmethod
122
+ def increment(self, value: Any) -> Any:
123
+ """
124
+ Increment a timestamp by a single unit.
125
+ """
126
+ ...
127
+
128
+ @abstractmethod
129
+ def output_format(self, value: Any) -> Any:
130
+ """
131
+ Convert the cursor value type to a JSON valid type.
132
+ """
133
+ ...
134
+
135
+ def merge_intervals(
136
+ self, intervals: List[MutableMapping[str, Any]]
137
+ ) -> List[MutableMapping[str, Any]]:
138
+ """
139
+ Compute and return a list of merged intervals.
140
+
141
+ Intervals may be merged if the start time of the second interval is 1 unit or less (as defined by the
142
+ `increment` method) than the end time of the first interval.
143
+ """
144
+ if not intervals:
145
+ return []
146
+
147
+ sorted_intervals = sorted(
148
+ intervals, key=lambda interval: (interval[self.START_KEY], interval[self.END_KEY])
149
+ )
150
+ merged_intervals = [sorted_intervals[0]]
151
+
152
+ for current_interval in sorted_intervals[1:]:
153
+ last_interval = merged_intervals[-1]
154
+ last_interval_end = last_interval[self.END_KEY]
155
+ current_interval_start = current_interval[self.START_KEY]
156
+
157
+ if self.increment(last_interval_end) >= current_interval_start:
158
+ last_interval[self.END_KEY] = max(last_interval_end, current_interval[self.END_KEY])
159
+ last_interval_cursor_value = last_interval.get("most_recent_cursor_value")
160
+ current_interval_cursor_value = current_interval.get("most_recent_cursor_value")
161
+
162
+ last_interval["most_recent_cursor_value"] = (
163
+ max(current_interval_cursor_value, last_interval_cursor_value)
164
+ if current_interval_cursor_value and last_interval_cursor_value
165
+ else current_interval_cursor_value or last_interval_cursor_value
166
+ )
167
+ else:
168
+ # Add a new interval if no overlap
169
+ merged_intervals.append(current_interval)
170
+
171
+ return merged_intervals
172
+
173
+ @abstractmethod
174
+ def parse_value(self, value: Any) -> Any:
175
+ """
176
+ Parse the value of the cursor field into a comparable value.
177
+ """
178
+ ...
179
+
180
+ @property
181
+ @abstractmethod
182
+ def zero_value(self) -> Any: ...