airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,99 @@
1
+ from abc import ABC
2
+ from datetime import datetime, timedelta
3
+ from enum import Enum
4
+ from typing import Callable
5
+
6
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType
7
+
8
+
9
+ class ClampingStrategy(ABC):
10
+ def clamp(self, value: CursorValueType) -> CursorValueType:
11
+ raise NotImplementedError()
12
+
13
+
14
+ class NoClamping(ClampingStrategy):
15
+ def clamp(self, value: CursorValueType) -> CursorValueType:
16
+ return value
17
+
18
+
19
+ class ClampingEndProvider:
20
+ def __init__(
21
+ self,
22
+ clamping_strategy: ClampingStrategy,
23
+ end_provider: Callable[[], CursorValueType],
24
+ granularity: timedelta,
25
+ ) -> None:
26
+ self._clamping_strategy = clamping_strategy
27
+ self._end_provider = end_provider
28
+ self._granularity = granularity
29
+
30
+ def __call__(self) -> CursorValueType:
31
+ return self._clamping_strategy.clamp(self._end_provider()) - self._granularity
32
+
33
+
34
+ class DayClampingStrategy(ClampingStrategy):
35
+ def __init__(self, is_ceiling: bool = True) -> None:
36
+ self._is_ceiling = is_ceiling
37
+
38
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
39
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
40
+ if self._is_ceiling:
41
+ return return_value + timedelta(days=1)
42
+ return return_value
43
+
44
+
45
+ class MonthClampingStrategy(ClampingStrategy):
46
+ def __init__(self, is_ceiling: bool = True) -> None:
47
+ self._is_ceiling = is_ceiling
48
+
49
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
50
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
51
+ needs_to_round = value.day != 1
52
+ if not needs_to_round:
53
+ return return_value
54
+
55
+ return self._ceil(return_value) if self._is_ceiling else return_value.replace(day=1)
56
+
57
+ def _ceil(self, value: datetime) -> datetime:
58
+ return value.replace(
59
+ year=value.year + 1 if value.month == 12 else value.year,
60
+ month=(value.month % 12) + 1,
61
+ day=1,
62
+ hour=0,
63
+ minute=0,
64
+ second=0,
65
+ microsecond=0,
66
+ )
67
+
68
+
69
+ class Weekday(Enum):
70
+ """
71
+ These integer values map to the same ones used by the Datetime.date.weekday() implementation
72
+ """
73
+
74
+ MONDAY = 0
75
+ TUESDAY = 1
76
+ WEDNESDAY = 2
77
+ THURSDAY = 3
78
+ FRIDAY = 4
79
+ SATURDAY = 5
80
+ SUNDAY = 6
81
+
82
+
83
+ class WeekClampingStrategy(ClampingStrategy):
84
+ def __init__(self, day_of_week: Weekday, is_ceiling: bool = True) -> None:
85
+ self._day_of_week = day_of_week.value
86
+ self._is_ceiling = is_ceiling
87
+
88
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
89
+ days_diff_to_ceiling = (
90
+ 7 - (value.weekday() - self._day_of_week)
91
+ if value.weekday() > self._day_of_week
92
+ else abs(value.weekday() - self._day_of_week)
93
+ )
94
+ delta = (
95
+ timedelta(days_diff_to_ceiling)
96
+ if self._is_ceiling
97
+ else timedelta(days_diff_to_ceiling - 7)
98
+ )
99
+ return value.replace(hour=0, minute=0, second=0, microsecond=0) + delta
@@ -0,0 +1,481 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import functools
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Iterable,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ )
19
+
20
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
21
+ from airbyte_cdk.sources.message import MessageRepository
22
+ from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
23
+ from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
24
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
25
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
26
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
27
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
28
+ AbstractStreamStateConverter,
29
+ )
30
+ from airbyte_cdk.sources.types import Record, StreamSlice
31
+
32
+ LOGGER = logging.getLogger("airbyte")
33
+
34
+
35
+ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
36
+ return functools.reduce(lambda a, b: a[b], path, mapping)
37
+
38
+
39
+ class CursorField:
40
+ def __init__(self, cursor_field_key: str) -> None:
41
+ self.cursor_field_key = cursor_field_key
42
+
43
+ def extract_value(self, record: Record) -> CursorValueType:
44
+ cursor_value = record.data.get(self.cursor_field_key)
45
+ if cursor_value is None:
46
+ raise ValueError(f"Could not find cursor field {self.cursor_field_key} in record")
47
+ return cursor_value # type: ignore # we assume that the value the path points at is a comparable
48
+
49
+
50
+ class Cursor(StreamSlicer, ABC):
51
+ @property
52
+ @abstractmethod
53
+ def state(self) -> MutableMapping[str, Any]: ...
54
+
55
+ @abstractmethod
56
+ def observe(self, record: Record) -> None:
57
+ """
58
+ Indicate to the cursor that the record has been emitted
59
+ """
60
+ raise NotImplementedError()
61
+
62
+ @abstractmethod
63
+ def close_partition(self, partition: Partition) -> None:
64
+ """
65
+ Indicate to the cursor that the partition has been successfully processed
66
+ """
67
+ raise NotImplementedError()
68
+
69
+ @abstractmethod
70
+ def ensure_at_least_one_state_emitted(self) -> None:
71
+ """
72
+ State messages are emitted when a partition is closed. However, the platform expects at least one state to be emitted per sync per
73
+ stream. Hence, if no partitions are generated, this method needs to be called.
74
+ """
75
+ raise NotImplementedError()
76
+
77
+ def stream_slices(self) -> Iterable[StreamSlice]:
78
+ """
79
+ Default placeholder implementation of generate_slices.
80
+ Subclasses can override this method to provide actual behavior.
81
+ """
82
+ yield StreamSlice(partition={}, cursor_slice={})
83
+
84
+
85
+ class FinalStateCursor(Cursor):
86
+ """Cursor that is used to guarantee at least one state message is emitted for a concurrent stream."""
87
+
88
+ def __init__(
89
+ self,
90
+ stream_name: str,
91
+ stream_namespace: Optional[str],
92
+ message_repository: MessageRepository,
93
+ ) -> None:
94
+ self._stream_name = stream_name
95
+ self._stream_namespace = stream_namespace
96
+ self._message_repository = message_repository
97
+ # Normally the connector state manager operates at the source-level. However, we only need it to write the sentinel
98
+ # state message rather than manage overall source state. This is also only temporary as we move to the resumable
99
+ # full refresh world where every stream uses a FileBasedConcurrentCursor with incremental state.
100
+ self._connector_state_manager = ConnectorStateManager()
101
+ self._has_closed_at_least_one_slice = False
102
+
103
+ @property
104
+ def state(self) -> MutableMapping[str, Any]:
105
+ return {NO_CURSOR_STATE_KEY: True}
106
+
107
+ def observe(self, record: Record) -> None:
108
+ pass
109
+
110
+ def close_partition(self, partition: Partition) -> None:
111
+ pass
112
+
113
+ def ensure_at_least_one_state_emitted(self) -> None:
114
+ """
115
+ Used primarily for full refresh syncs that do not have a valid cursor value to emit at the end of a sync
116
+ """
117
+
118
+ self._connector_state_manager.update_state_for_stream(
119
+ self._stream_name, self._stream_namespace, self.state
120
+ )
121
+ state_message = self._connector_state_manager.create_state_message(
122
+ self._stream_name, self._stream_namespace
123
+ )
124
+ self._message_repository.emit_message(state_message)
125
+
126
+
127
+ class ConcurrentCursor(Cursor):
128
+ _START_BOUNDARY = 0
129
+ _END_BOUNDARY = 1
130
+
131
+ def __init__(
132
+ self,
133
+ stream_name: str,
134
+ stream_namespace: Optional[str],
135
+ stream_state: Any,
136
+ message_repository: MessageRepository,
137
+ connector_state_manager: ConnectorStateManager,
138
+ connector_state_converter: AbstractStreamStateConverter,
139
+ cursor_field: CursorField,
140
+ slice_boundary_fields: Optional[Tuple[str, str]],
141
+ start: Optional[CursorValueType],
142
+ end_provider: Callable[[], CursorValueType],
143
+ lookback_window: Optional[GapType] = None,
144
+ slice_range: Optional[GapType] = None,
145
+ cursor_granularity: Optional[GapType] = None,
146
+ clamping_strategy: ClampingStrategy = NoClamping(),
147
+ ) -> None:
148
+ self._stream_name = stream_name
149
+ self._stream_namespace = stream_namespace
150
+ self._message_repository = message_repository
151
+ self._connector_state_converter = connector_state_converter
152
+ self._connector_state_manager = connector_state_manager
153
+ self._cursor_field = cursor_field
154
+ # To see some example where the slice boundaries might not be defined, check https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L363-L379
155
+ self._slice_boundary_fields = slice_boundary_fields
156
+ self._start = start
157
+ self._end_provider = end_provider
158
+ self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
159
+ self._lookback_window = lookback_window
160
+ self._slice_range = slice_range
161
+ self._most_recent_cursor_value_per_partition: MutableMapping[
162
+ Union[StreamSlice, Mapping[str, Any], None], Any
163
+ ] = {}
164
+ self._has_closed_at_least_one_slice = False
165
+ self._cursor_granularity = cursor_granularity
166
+ # Flag to track if the logger has been triggered (per stream)
167
+ self._should_be_synced_logger_triggered = False
168
+ self._clamping_strategy = clamping_strategy
169
+
170
+ @property
171
+ def state(self) -> MutableMapping[str, Any]:
172
+ return self._connector_state_converter.convert_to_state_message(
173
+ self.cursor_field, self._concurrent_state
174
+ )
175
+
176
+ @property
177
+ def cursor_field(self) -> CursorField:
178
+ return self._cursor_field
179
+
180
+ @property
181
+ def _slice_boundary_fields_wrapper(self) -> Tuple[str, str]:
182
+ return (
183
+ self._slice_boundary_fields
184
+ if self._slice_boundary_fields
185
+ else (
186
+ self._connector_state_converter.START_KEY,
187
+ self._connector_state_converter.END_KEY,
188
+ )
189
+ )
190
+
191
+ def _get_concurrent_state(
192
+ self, state: MutableMapping[str, Any]
193
+ ) -> Tuple[CursorValueType, MutableMapping[str, Any]]:
194
+ if self._connector_state_converter.is_state_message_compatible(state):
195
+ return (
196
+ self._start or self._connector_state_converter.zero_value,
197
+ self._connector_state_converter.deserialize(state),
198
+ )
199
+ return self._connector_state_converter.convert_from_sequential_state(
200
+ self._cursor_field, state, self._start
201
+ )
202
+
203
+ def observe(self, record: Record) -> None:
204
+ most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
205
+ record.associated_slice
206
+ )
207
+ try:
208
+ cursor_value = self._extract_cursor_value(record)
209
+
210
+ if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
211
+ self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
212
+ except ValueError:
213
+ self._log_for_record_without_cursor_value()
214
+
215
+ def _extract_cursor_value(self, record: Record) -> Any:
216
+ return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
217
+
218
+ def close_partition(self, partition: Partition) -> None:
219
+ slice_count_before = len(self._concurrent_state.get("slices", []))
220
+ self._add_slice_to_state(partition)
221
+ if slice_count_before < len(
222
+ self._concurrent_state["slices"]
223
+ ): # only emit if at least one slice has been processed
224
+ self._merge_partitions()
225
+ self._emit_state_message()
226
+ self._has_closed_at_least_one_slice = True
227
+
228
+ def _add_slice_to_state(self, partition: Partition) -> None:
229
+ most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
230
+ partition.to_slice()
231
+ )
232
+
233
+ if self._slice_boundary_fields:
234
+ if "slices" not in self._concurrent_state:
235
+ raise RuntimeError(
236
+ f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
237
+ )
238
+ self._concurrent_state["slices"].append(
239
+ {
240
+ self._connector_state_converter.START_KEY: self._extract_from_slice(
241
+ partition, self._slice_boundary_fields[self._START_BOUNDARY]
242
+ ),
243
+ self._connector_state_converter.END_KEY: self._extract_from_slice(
244
+ partition, self._slice_boundary_fields[self._END_BOUNDARY]
245
+ ),
246
+ self._connector_state_converter.MOST_RECENT_RECORD_KEY: most_recent_cursor_value,
247
+ }
248
+ )
249
+ elif most_recent_cursor_value:
250
+ if self._has_closed_at_least_one_slice:
251
+ # If we track state value using records cursor field, we can only do that if there is one partition. This is because we save
252
+ # the state every time we close a partition. We assume that if there are multiple slices, they need to be providing
253
+ # boundaries. There are cases where partitions could not have boundaries:
254
+ # * The cursor should be per-partition
255
+ # * The stream state is actually the parent stream state
256
+ # There might be other cases not listed above. Those are not supported today hence the stream should not use this cursor for
257
+ # state management. For the specific user that was affected with this issue, we need to:
258
+ # * Fix state tracking (which is currently broken)
259
+ # * Make the new version available
260
+ # * (Probably) ask the user to reset the stream to avoid data loss
261
+ raise ValueError(
262
+ "Given that slice_boundary_fields is not defined and that per-partition state is not supported, only one slice is "
263
+ "expected. Please contact the Airbyte team."
264
+ )
265
+
266
+ self._concurrent_state["slices"].append(
267
+ {
268
+ self._connector_state_converter.START_KEY: self.start,
269
+ self._connector_state_converter.END_KEY: most_recent_cursor_value,
270
+ self._connector_state_converter.MOST_RECENT_RECORD_KEY: most_recent_cursor_value,
271
+ }
272
+ )
273
+
274
+ def _emit_state_message(self) -> None:
275
+ self._connector_state_manager.update_state_for_stream(
276
+ self._stream_name,
277
+ self._stream_namespace,
278
+ self.state,
279
+ )
280
+ state_message = self._connector_state_manager.create_state_message(
281
+ self._stream_name, self._stream_namespace
282
+ )
283
+ self._message_repository.emit_message(state_message)
284
+
285
+ def _merge_partitions(self) -> None:
286
+ self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
287
+ self._concurrent_state["slices"]
288
+ )
289
+
290
+ def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
291
+ try:
292
+ _slice = partition.to_slice()
293
+ if not _slice:
294
+ raise KeyError(f"Could not find key `{key}` in empty slice")
295
+ return self._connector_state_converter.parse_value(_slice[key]) # type: ignore # we expect the devs to specify a key that would return a CursorValueType
296
+ except KeyError as exception:
297
+ raise KeyError(
298
+ f"Partition is expected to have key `{key}` but could not be found"
299
+ ) from exception
300
+
301
+ def ensure_at_least_one_state_emitted(self) -> None:
302
+ """
303
+ The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
304
+ called.
305
+ """
306
+ self._emit_state_message()
307
+
308
+ def stream_slices(self) -> Iterable[StreamSlice]:
309
+ """
310
+ Generating slices based on a few parameters:
311
+ * lookback_window: Buffer to remove from END_KEY of the highest slice
312
+ * slice_range: Max difference between two slices. If the difference between two slices is greater, multiple slices will be created
313
+ * start: `_split_per_slice_range` will clip any value to `self._start which means that:
314
+ * if upper is less than self._start, no slices will be generated
315
+ * if lower is less than self._start, self._start will be used as the lower boundary (lookback_window will not be considered in that case)
316
+
317
+ Note that the slices will overlap at their boundaries. We therefore expect to have at least the lower or the upper boundary to be
318
+ inclusive in the API that is queried.
319
+ """
320
+ self._merge_partitions()
321
+
322
+ if self._start is not None and self._is_start_before_first_slice():
323
+ yield from self._split_per_slice_range(
324
+ self._start,
325
+ self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
326
+ False,
327
+ )
328
+
329
+ if len(self._concurrent_state["slices"]) == 1:
330
+ yield from self._split_per_slice_range(
331
+ self._calculate_lower_boundary_of_last_slice(
332
+ self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
333
+ ),
334
+ self._end_provider(),
335
+ True,
336
+ )
337
+ elif len(self._concurrent_state["slices"]) > 1:
338
+ for i in range(len(self._concurrent_state["slices"]) - 1):
339
+ if self._cursor_granularity:
340
+ yield from self._split_per_slice_range(
341
+ self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
342
+ + self._cursor_granularity,
343
+ self._concurrent_state["slices"][i + 1][
344
+ self._connector_state_converter.START_KEY
345
+ ],
346
+ False,
347
+ )
348
+ else:
349
+ yield from self._split_per_slice_range(
350
+ self._concurrent_state["slices"][i][
351
+ self._connector_state_converter.END_KEY
352
+ ],
353
+ self._concurrent_state["slices"][i + 1][
354
+ self._connector_state_converter.START_KEY
355
+ ],
356
+ False,
357
+ )
358
+ yield from self._split_per_slice_range(
359
+ self._calculate_lower_boundary_of_last_slice(
360
+ self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
361
+ ),
362
+ self._end_provider(),
363
+ True,
364
+ )
365
+ else:
366
+ raise ValueError("Expected at least one slice")
367
+
368
+ def _is_start_before_first_slice(self) -> bool:
369
+ return (
370
+ self._start is not None
371
+ and self._start
372
+ < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
373
+ )
374
+
375
+ def _calculate_lower_boundary_of_last_slice(
376
+ self, lower_boundary: CursorValueType
377
+ ) -> CursorValueType:
378
+ if self._lookback_window:
379
+ return lower_boundary - self._lookback_window
380
+ return lower_boundary
381
+
382
+ def _split_per_slice_range(
383
+ self, lower: CursorValueType, upper: CursorValueType, upper_is_end: bool
384
+ ) -> Iterable[StreamSlice]:
385
+ if lower >= upper:
386
+ return
387
+
388
+ if self._start and upper < self._start:
389
+ return
390
+
391
+ lower = max(lower, self._start) if self._start else lower
392
+ if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
393
+ clamped_lower = self._clamping_strategy.clamp(lower)
394
+ clamped_upper = self._clamping_strategy.clamp(upper)
395
+ start_value, end_value = (
396
+ (clamped_lower, clamped_upper - self._cursor_granularity)
397
+ if self._cursor_granularity and not upper_is_end
398
+ else (clamped_lower, clamped_upper)
399
+ )
400
+ yield StreamSlice(
401
+ partition={},
402
+ cursor_slice={
403
+ self._slice_boundary_fields_wrapper[
404
+ self._START_BOUNDARY
405
+ ]: self._connector_state_converter.output_format(start_value),
406
+ self._slice_boundary_fields_wrapper[
407
+ self._END_BOUNDARY
408
+ ]: self._connector_state_converter.output_format(end_value),
409
+ },
410
+ )
411
+ else:
412
+ stop_processing = False
413
+ current_lower_boundary = lower
414
+ while not stop_processing:
415
+ current_upper_boundary = min(
416
+ self._evaluate_upper_safely(current_lower_boundary, self._slice_range), upper
417
+ )
418
+ has_reached_upper_boundary = current_upper_boundary >= upper
419
+
420
+ clamped_upper = (
421
+ self._clamping_strategy.clamp(current_upper_boundary)
422
+ if current_upper_boundary != upper
423
+ else current_upper_boundary
424
+ )
425
+ clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
426
+ if clamped_lower >= clamped_upper:
427
+ # clamping collapsed both values which means that it is time to stop processing
428
+ # FIXME should this be replace by proper end_provider
429
+ break
430
+ start_value, end_value = (
431
+ (clamped_lower, clamped_upper - self._cursor_granularity)
432
+ if self._cursor_granularity
433
+ and (not upper_is_end or not has_reached_upper_boundary)
434
+ else (clamped_lower, clamped_upper)
435
+ )
436
+ yield StreamSlice(
437
+ partition={},
438
+ cursor_slice={
439
+ self._slice_boundary_fields_wrapper[
440
+ self._START_BOUNDARY
441
+ ]: self._connector_state_converter.output_format(start_value),
442
+ self._slice_boundary_fields_wrapper[
443
+ self._END_BOUNDARY
444
+ ]: self._connector_state_converter.output_format(end_value),
445
+ },
446
+ )
447
+ current_lower_boundary = clamped_upper
448
+ if current_upper_boundary >= upper:
449
+ stop_processing = True
450
+
451
+ def _evaluate_upper_safely(self, lower: CursorValueType, step: GapType) -> CursorValueType:
452
+ """
453
+ Given that we set the default step at datetime.timedelta.max, we will generate an OverflowError when evaluating the next start_date
454
+ This method assumes that users would never enter a step that would generate an overflow. Given that would be the case, the code
455
+ would have broken anyway.
456
+ """
457
+ try:
458
+ return lower + step
459
+ except OverflowError:
460
+ return self._end_provider()
461
+
462
+ def should_be_synced(self, record: Record) -> bool:
463
+ """
464
+ Determines if a record should be synced based on its cursor value.
465
+ :param record: The record to evaluate
466
+
467
+ :return: True if the record's cursor value falls within the sync boundaries
468
+ """
469
+ try:
470
+ record_cursor_value: CursorValueType = self._extract_cursor_value(record)
471
+ except ValueError:
472
+ self._log_for_record_without_cursor_value()
473
+ return True
474
+ return self.start <= record_cursor_value <= self._end_provider()
475
+
476
+ def _log_for_record_without_cursor_value(self) -> None:
477
+ if not self._should_be_synced_logger_triggered:
478
+ LOGGER.warning(
479
+ f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record for stream {self._stream_name}. The incremental sync will assume it needs to be synced"
480
+ )
481
+ self._should_be_synced_logger_triggered = True
@@ -0,0 +1,32 @@
1
+ from abc import abstractmethod
2
+ from typing import Protocol
3
+
4
+
5
+ class GapType(Protocol):
6
+ """
7
+ This is the representation of gaps between two cursor values. Examples:
8
+ * if cursor values are datetimes, GapType is timedelta
9
+ * if cursor values are integer, GapType will also be integer
10
+ """
11
+
12
+ pass
13
+
14
+
15
+ class CursorValueType(Protocol):
16
+ """Protocol for annotating comparable types."""
17
+
18
+ @abstractmethod
19
+ def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
24
+ pass
25
+
26
+ @abstractmethod
27
+ def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
28
+ pass
29
+
30
+ @abstractmethod
31
+ def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
32
+ pass