airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,704 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import abc
6
+ import dataclasses
7
+ import datetime
8
+ import logging
9
+ import re
10
+ import time
11
+ from datetime import timedelta
12
+ from threading import RLock
13
+ from typing import TYPE_CHECKING, Any, Mapping, Optional
14
+ from urllib import parse
15
+
16
+ import requests
17
+ import requests_cache
18
+ from pyrate_limiter import InMemoryBucket, Limiter, RateItem, TimeClock
19
+ from pyrate_limiter import Rate as PyRateRate
20
+ from pyrate_limiter.exceptions import BucketFullException
21
+
22
+ # prevents mypy from complaining about missing session attributes in LimiterMixin
23
+ if TYPE_CHECKING:
24
+ MIXIN_BASE = requests.Session
25
+ else:
26
+ MIXIN_BASE = object
27
+
28
+ logger = logging.getLogger("airbyte")
29
+ logging.getLogger("pyrate_limiter").setLevel(logging.WARNING)
30
+
31
+
32
+ @dataclasses.dataclass
33
+ class Rate:
34
+ """Call rate limit"""
35
+
36
+ limit: int
37
+ interval: timedelta
38
+
39
+
40
+ class CallRateLimitHit(Exception):
41
+ def __init__(self, error: str, item: Any, weight: int, rate: str, time_to_wait: timedelta):
42
+ """Constructor
43
+
44
+ :param error: error message
45
+ :param item: object passed into acquire_call
46
+ :param weight: how many credits were requested
47
+ :param rate: string representation of the rate violated
48
+ :param time_to_wait: how long should wait util more call will be available
49
+ """
50
+ self.item = item
51
+ self.weight = weight
52
+ self.rate = rate
53
+ self.time_to_wait = time_to_wait
54
+ super().__init__(error)
55
+
56
+
57
+ class AbstractCallRatePolicy(abc.ABC):
58
+ """Call rate policy interface.
59
+ Should be configurable with different rules, like N per M for endpoint X. Endpoint X is matched with APIBudget.
60
+ """
61
+
62
+ @abc.abstractmethod
63
+ def matches(self, request: Any) -> bool:
64
+ """Tells if this policy matches specific request and should apply to it
65
+
66
+ :param request:
67
+ :return: True if policy should apply to this request, False - otherwise
68
+ """
69
+
70
+ @abc.abstractmethod
71
+ def try_acquire(self, request: Any, weight: int) -> None:
72
+ """Try to acquire request
73
+
74
+ :param request: a request object representing a single call to API
75
+ :param weight: number of requests to deduct from credit
76
+ :return:
77
+ """
78
+
79
+ @abc.abstractmethod
80
+ def update(
81
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
82
+ ) -> None:
83
+ """Update call rate counting with current values
84
+
85
+ :param available_calls:
86
+ :param call_reset_ts:
87
+ """
88
+
89
+
90
+ class RequestMatcher(abc.ABC):
91
+ """Callable that help to match a request object with call rate policies."""
92
+
93
+ @abc.abstractmethod
94
+ def __call__(self, request: Any) -> bool:
95
+ """
96
+
97
+ :param request:
98
+ :return: True if matches the provided request object, False - otherwise
99
+ """
100
+
101
+
102
+ class HttpRequestMatcher(RequestMatcher):
103
+ """Simple implementation of RequestMatcher for HTTP requests using HttpRequestRegexMatcher under the hood."""
104
+
105
+ def __init__(
106
+ self,
107
+ method: Optional[str] = None,
108
+ url: Optional[str] = None,
109
+ params: Optional[Mapping[str, Any]] = None,
110
+ headers: Optional[Mapping[str, Any]] = None,
111
+ ):
112
+ """Constructor
113
+
114
+ :param method: HTTP method (e.g., "GET", "POST").
115
+ :param url: Full URL to match.
116
+ :param params: Dictionary of query parameters to match.
117
+ :param headers: Dictionary of headers to match.
118
+ """
119
+ # Parse the URL to extract the base and path
120
+ if url:
121
+ parsed_url = parse.urlsplit(url)
122
+ url_base = f"{parsed_url.scheme}://{parsed_url.netloc}"
123
+ url_path = parsed_url.path if parsed_url.path != "/" else None
124
+ else:
125
+ url_base = None
126
+ url_path = None
127
+
128
+ # Use HttpRequestRegexMatcher under the hood
129
+ self._regex_matcher = HttpRequestRegexMatcher(
130
+ method=method,
131
+ url_base=url_base,
132
+ url_path_pattern=re.escape(url_path) if url_path else None,
133
+ params=params,
134
+ headers=headers,
135
+ )
136
+
137
+ def __call__(self, request: Any) -> bool:
138
+ """
139
+ :param request: A requests.Request or requests.PreparedRequest instance.
140
+ :return: True if the request matches all provided criteria; False otherwise.
141
+ """
142
+ return self._regex_matcher(request)
143
+
144
+ def __str__(self) -> str:
145
+ return (
146
+ f"HttpRequestMatcher(method={self._regex_matcher._method}, "
147
+ f"url={self._regex_matcher._url_base}{self._regex_matcher._url_path_pattern.pattern if self._regex_matcher._url_path_pattern else ''}, "
148
+ f"params={self._regex_matcher._params}, headers={self._regex_matcher._headers})"
149
+ )
150
+
151
+
152
+ class HttpRequestRegexMatcher(RequestMatcher):
153
+ """
154
+ Extended RequestMatcher for HTTP requests that supports matching on:
155
+ - HTTP method (case-insensitive)
156
+ - URL base (scheme + netloc) optionally
157
+ - URL path pattern (a regex applied to the path portion of the URL)
158
+ - Query parameters (must be present)
159
+ - Headers (header names compared case-insensitively)
160
+ """
161
+
162
+ def __init__(
163
+ self,
164
+ method: Optional[str] = None,
165
+ url_base: Optional[str] = None,
166
+ url_path_pattern: Optional[str] = None,
167
+ params: Optional[Mapping[str, Any]] = None,
168
+ headers: Optional[Mapping[str, Any]] = None,
169
+ ):
170
+ """
171
+ :param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively.
172
+ :param url_base: Base URL (scheme://host) that must match.
173
+ :param url_path_pattern: A regex pattern that will be applied to the path portion of the URL.
174
+ :param params: Dictionary of query parameters that must be present in the request.
175
+ :param headers: Dictionary of headers that must be present (header keys are compared case-insensitively).
176
+ """
177
+ self._method = method.upper() if method else None
178
+
179
+ # Normalize the url_base if provided: remove trailing slash.
180
+ self._url_base = url_base.rstrip("/") if url_base else None
181
+
182
+ # Compile the URL path pattern if provided.
183
+ self._url_path_pattern = re.compile(url_path_pattern) if url_path_pattern else None
184
+
185
+ # Normalize query parameters to strings.
186
+ self._params = {str(k): str(v) for k, v in (params or {}).items()}
187
+
188
+ # Normalize header keys to lowercase.
189
+ self._headers = {str(k).lower(): str(v) for k, v in (headers or {}).items()}
190
+
191
+ @staticmethod
192
+ def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool:
193
+ """Check that every key/value in the pattern exists in the object."""
194
+ return pattern.items() <= obj.items()
195
+
196
+ def __call__(self, request: Any) -> bool:
197
+ """
198
+ :param request: A requests.Request or requests.PreparedRequest instance.
199
+ :return: True if the request matches all provided criteria; False otherwise.
200
+ """
201
+ # Prepare the request (if needed) and extract the URL details.
202
+ if isinstance(request, requests.Request):
203
+ prepared_request = request.prepare()
204
+ elif isinstance(request, requests.PreparedRequest):
205
+ prepared_request = request
206
+ else:
207
+ return False
208
+
209
+ # Check HTTP method.
210
+ if self._method is not None:
211
+ if prepared_request.method != self._method:
212
+ return False
213
+
214
+ # Parse the URL.
215
+ parsed_url = parse.urlsplit(prepared_request.url)
216
+ # Reconstruct the base: scheme://netloc
217
+ request_url_base = f"{str(parsed_url.scheme)}://{str(parsed_url.netloc)}"
218
+ # The path (without query parameters)
219
+ request_path = str(parsed_url.path).rstrip("/")
220
+
221
+ # If a base URL is provided, check that it matches.
222
+ if self._url_base is not None:
223
+ if request_url_base != self._url_base:
224
+ return False
225
+
226
+ # If a URL path pattern is provided, ensure the path matches the regex.
227
+ if self._url_path_pattern is not None:
228
+ if not self._url_path_pattern.search(request_path):
229
+ return False
230
+
231
+ # Check query parameters.
232
+ if self._params:
233
+ query_params = dict(parse.parse_qsl(str(parsed_url.query)))
234
+ if not self._match_dict(query_params, self._params):
235
+ return False
236
+
237
+ # Check headers (normalize keys to lower-case).
238
+ if self._headers:
239
+ req_headers = {k.lower(): v for k, v in prepared_request.headers.items()}
240
+ if not self._match_dict(req_headers, self._headers):
241
+ return False
242
+
243
+ return True
244
+
245
+ def __str__(self) -> str:
246
+ regex = self._url_path_pattern.pattern if self._url_path_pattern else None
247
+ return (
248
+ f"HttpRequestRegexMatcher(method={self._method}, url_base={self._url_base}, "
249
+ f"url_path_pattern={regex}, params={self._params}, headers={self._headers})"
250
+ )
251
+
252
+
253
+ class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC):
254
+ def __init__(self, matchers: list[RequestMatcher]):
255
+ self._matchers = matchers
256
+
257
+ def matches(self, request: Any) -> bool:
258
+ """Tell if this policy matches specific request and should apply to it
259
+
260
+ :param request:
261
+ :return: True if policy should apply to this request, False - otherwise
262
+ """
263
+
264
+ if not self._matchers:
265
+ return True
266
+ return any(matcher(request) for matcher in self._matchers)
267
+
268
+
269
+ class UnlimitedCallRatePolicy(BaseCallRatePolicy):
270
+ """
271
+ This policy is for explicit unlimited call rates.
272
+ It can be used when we want to match a specific group of requests and don't apply any limits.
273
+
274
+ Example:
275
+
276
+ APICallBudget(
277
+ [
278
+ UnlimitedCallRatePolicy(
279
+ matchers=[HttpRequestMatcher(url="/some/method", headers={"sandbox": true})],
280
+ ),
281
+ FixedWindowCallRatePolicy(
282
+ matchers=[HttpRequestMatcher(url="/some/method")],
283
+ next_reset_ts=datetime.now(),
284
+ period=timedelta(hours=1)
285
+ call_limit=1000,
286
+ ),
287
+ ]
288
+ )
289
+
290
+ The code above will limit all calls to /some/method except calls that have header sandbox=True
291
+ """
292
+
293
+ def try_acquire(self, request: Any, weight: int) -> None:
294
+ """Do nothing"""
295
+
296
+ def update(
297
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
298
+ ) -> None:
299
+ """Do nothing"""
300
+
301
+
302
+ class FixedWindowCallRatePolicy(BaseCallRatePolicy):
303
+ def __init__(
304
+ self,
305
+ next_reset_ts: datetime.datetime,
306
+ period: timedelta,
307
+ call_limit: int,
308
+ matchers: list[RequestMatcher],
309
+ ):
310
+ """A policy that allows {call_limit} calls within a {period} time interval
311
+
312
+ :param next_reset_ts: next call rate reset time point
313
+ :param period: call rate reset period
314
+ :param call_limit:
315
+ :param matchers:
316
+ """
317
+
318
+ self._next_reset_ts = next_reset_ts
319
+ self._offset = period
320
+ self._call_limit = call_limit
321
+ self._calls_num = 0
322
+ self._lock = RLock()
323
+ super().__init__(matchers=matchers)
324
+
325
+ def try_acquire(self, request: Any, weight: int) -> None:
326
+ if weight > self._call_limit:
327
+ raise ValueError("Weight can not exceed the call limit")
328
+ if not self.matches(request):
329
+ raise ValueError("Request does not match the policy")
330
+
331
+ with self._lock:
332
+ self._update_current_window()
333
+
334
+ if self._calls_num + weight > self._call_limit:
335
+ reset_in = self._next_reset_ts - datetime.datetime.now()
336
+ error_message = (
337
+ f"reached maximum number of allowed calls {self._call_limit} "
338
+ f"per {self._offset} interval, next reset in {reset_in}."
339
+ )
340
+ raise CallRateLimitHit(
341
+ error=error_message,
342
+ item=request,
343
+ weight=weight,
344
+ rate=f"{self._call_limit} per {self._offset}",
345
+ time_to_wait=reset_in,
346
+ )
347
+
348
+ self._calls_num += weight
349
+
350
+ def __str__(self) -> str:
351
+ matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
352
+ return (
353
+ f"FixedWindowCallRatePolicy(call_limit={self._call_limit}, period={self._offset}, "
354
+ f"calls_used={self._calls_num}, next_reset={self._next_reset_ts}, "
355
+ f"matchers=[{matcher_str}])"
356
+ )
357
+
358
+ def update(
359
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
360
+ ) -> None:
361
+ """Update call rate counters, by default, only reacts to decreasing updates of available_calls and changes to call_reset_ts.
362
+ We ignore updates with available_calls > current_available_calls to support call rate limits that are lower than API limits.
363
+
364
+ :param available_calls:
365
+ :param call_reset_ts:
366
+ """
367
+ with self._lock:
368
+ self._update_current_window()
369
+ current_available_calls = self._call_limit - self._calls_num
370
+
371
+ if available_calls is not None and current_available_calls > available_calls:
372
+ logger.debug(
373
+ "got rate limit update from api, adjusting available calls from %s to %s",
374
+ current_available_calls,
375
+ available_calls,
376
+ )
377
+ self._calls_num = self._call_limit - available_calls
378
+
379
+ if call_reset_ts is not None and call_reset_ts != self._next_reset_ts:
380
+ logger.debug(
381
+ "got rate limit update from api, adjusting reset time from %s to %s",
382
+ self._next_reset_ts,
383
+ call_reset_ts,
384
+ )
385
+ self._next_reset_ts = call_reset_ts
386
+
387
+ def _update_current_window(self) -> None:
388
+ now = datetime.datetime.now()
389
+ if now > self._next_reset_ts:
390
+ logger.debug("started new window, %s calls available now", self._call_limit)
391
+ self._next_reset_ts = self._next_reset_ts + self._offset
392
+ self._calls_num = 0
393
+
394
+
395
+ class MovingWindowCallRatePolicy(BaseCallRatePolicy):
396
+ """
397
+ Policy to control requests rate implemented on top of PyRateLimiter lib.
398
+ The main difference between this policy and FixedWindowCallRatePolicy is that the rate-limiting window
399
+ is moving along requests that we made, and there is no moment when we reset an available number of calls.
400
+ This strategy requires saving of timestamps of all requests within a window.
401
+ """
402
+
403
+ def __init__(self, rates: list[Rate], matchers: list[RequestMatcher]):
404
+ """Constructor
405
+
406
+ :param rates: list of rates, the order is important and must be ascending
407
+ :param matchers:
408
+ """
409
+ if not rates:
410
+ raise ValueError("The list of rates can not be empty")
411
+ pyrate_rates = [
412
+ PyRateRate(limit=rate.limit, interval=int(rate.interval.total_seconds() * 1000))
413
+ for rate in rates
414
+ ]
415
+ self._bucket = InMemoryBucket(pyrate_rates)
416
+ # Limiter will create the background task that clears old requests in the bucket
417
+ self._limiter = Limiter(self._bucket)
418
+ super().__init__(matchers=matchers)
419
+
420
+ def try_acquire(self, request: Any, weight: int) -> None:
421
+ if not self.matches(request):
422
+ raise ValueError("Request does not match the policy")
423
+
424
+ try:
425
+ self._limiter.try_acquire(request, weight=weight)
426
+ except BucketFullException as exc:
427
+ item = self._limiter.bucket_factory.wrap_item(request, weight)
428
+ assert isinstance(item, RateItem)
429
+
430
+ with self._limiter.lock:
431
+ time_to_wait = self._bucket.waiting(item)
432
+ assert isinstance(time_to_wait, int)
433
+
434
+ raise CallRateLimitHit(
435
+ error=str(exc.meta_info["error"]),
436
+ item=request,
437
+ weight=int(exc.meta_info["weight"]),
438
+ rate=str(exc.meta_info["rate"]),
439
+ time_to_wait=timedelta(milliseconds=time_to_wait),
440
+ )
441
+
442
+ def update(
443
+ self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime]
444
+ ) -> None:
445
+ """Adjust call bucket to reflect the state of the API server
446
+
447
+ :param available_calls:
448
+ :param call_reset_ts:
449
+ :return:
450
+ """
451
+ if (
452
+ available_calls is not None and call_reset_ts is None
453
+ ): # we do our best to sync buckets with API
454
+ if available_calls == 0:
455
+ with self._limiter.lock:
456
+ items_to_add = self._bucket.count() < self._bucket.rates[0].limit
457
+ if items_to_add > 0:
458
+ now: int = TimeClock().now() # type: ignore[no-untyped-call]
459
+ self._bucket.put(RateItem(name="dummy", timestamp=now, weight=items_to_add))
460
+ # TODO: add support if needed, it might be that it is not possible to make a good solution for this case
461
+ # if available_calls is not None and call_reset_ts is not None:
462
+ # ts = call_reset_ts.timestamp()
463
+
464
+ def __str__(self) -> str:
465
+ """Return a human-friendly description of the moving window rate policy for logging purposes."""
466
+ rates_info = ", ".join(
467
+ f"{rate.limit} per {timedelta(milliseconds=rate.interval)}"
468
+ for rate in self._bucket.rates
469
+ )
470
+ current_bucket_count = self._bucket.count()
471
+ matcher_str = ", ".join(f"{matcher}" for matcher in self._matchers)
472
+ return (
473
+ f"MovingWindowCallRatePolicy(rates=[{rates_info}], current_bucket_count={current_bucket_count}, "
474
+ f"matchers=[{matcher_str}])"
475
+ )
476
+
477
+
478
+ class AbstractAPIBudget(abc.ABC):
479
+ """Interface to some API where a client allowed to have N calls per T interval.
480
+
481
+ Important: APIBudget is not doing any API calls, the end user code is responsible to call this interface
482
+ to respect call rate limitation of the API.
483
+
484
+ It supports multiple policies applied to different group of requests. To distinct these groups we use RequestMatchers.
485
+ Individual policy represented by MovingWindowCallRatePolicy and currently supports only moving window strategy.
486
+ """
487
+
488
+ @abc.abstractmethod
489
+ def acquire_call(
490
+ self, request: Any, block: bool = True, timeout: Optional[float] = None
491
+ ) -> None:
492
+ """Try to get a call from budget, will block by default
493
+
494
+ :param request:
495
+ :param block: when true (default) will block the current thread until call credit is available
496
+ :param timeout: if set will limit maximum time in block, otherwise will wait until credit is available
497
+ :raises: CallRateLimitHit - when no credits left and if timeout was set the waiting time exceed the timeout
498
+ """
499
+
500
+ @abc.abstractmethod
501
+ def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
502
+ """Find matching call rate policy for specific request"""
503
+
504
+ @abc.abstractmethod
505
+ def update_from_response(self, request: Any, response: Any) -> None:
506
+ """Update budget information based on response from API
507
+
508
+ :param request: the initial request that triggered this response
509
+ :param response: response from the API
510
+ """
511
+
512
+
513
+ class APIBudget(AbstractAPIBudget):
514
+ """Default APIBudget implementation"""
515
+
516
+ def __init__(
517
+ self, policies: list[AbstractCallRatePolicy], maximum_attempts_to_acquire: int = 100000
518
+ ) -> None:
519
+ """Constructor
520
+
521
+ :param policies: list of policies in this budget
522
+ :param maximum_attempts_to_acquire: number of attempts before throwing hit ratelimit exception, we put some big number here
523
+ to avoid situations when many threads compete with each other for a few lots over a significant amount of time
524
+ """
525
+
526
+ self._policies = policies
527
+ self._maximum_attempts_to_acquire = maximum_attempts_to_acquire
528
+
529
+ def _extract_endpoint(self, request: Any) -> str:
530
+ """Extract the endpoint URL from the request if available."""
531
+ endpoint = None
532
+ try:
533
+ # If the request is already a PreparedRequest, it should have a URL.
534
+ if isinstance(request, requests.PreparedRequest):
535
+ endpoint = request.url
536
+ # If it's a requests.Request, we call prepare() to extract the URL.
537
+ elif isinstance(request, requests.Request):
538
+ prepared = request.prepare()
539
+ endpoint = prepared.url
540
+ except Exception as e:
541
+ logger.debug(f"Error extracting endpoint: {e}")
542
+ if endpoint:
543
+ return endpoint
544
+ return "unknown endpoint"
545
+
546
+ def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]:
547
+ for policy in self._policies:
548
+ if policy.matches(request):
549
+ return policy
550
+ return None
551
+
552
+ def acquire_call(
553
+ self, request: Any, block: bool = True, timeout: Optional[float] = None
554
+ ) -> None:
555
+ """Try to get a call from budget, will block by default.
556
+ Matchers will be called sequentially in the same order they were added.
557
+ The first matcher that returns True will
558
+
559
+ :param request: the API request
560
+ :param block: when True (default) will block until a call credit is available
561
+ :param timeout: if provided, limits maximum waiting time; otherwise, waits indefinitely
562
+ :raises: CallRateLimitHit if the call credit cannot be acquired within the timeout
563
+ """
564
+
565
+ policy = self.get_matching_policy(request)
566
+ endpoint = self._extract_endpoint(request)
567
+ if policy:
568
+ logger.debug(f"Acquiring call for endpoint {endpoint} using policy: {policy}")
569
+ self._do_acquire(request=request, policy=policy, block=block, timeout=timeout)
570
+ elif self._policies:
571
+ logger.debug(
572
+ f"No policies matched for endpoint {endpoint} (request: {request}). Allowing call by default."
573
+ )
574
+
575
+ def update_from_response(self, request: Any, response: Any) -> None:
576
+ """Update budget information based on the API response.
577
+
578
+ :param request: the initial request that triggered this response
579
+ :param response: response from the API
580
+ """
581
+ pass
582
+
583
+ def _do_acquire(
584
+ self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float]
585
+ ) -> None:
586
+ """Internal method to try to acquire a call credit.
587
+
588
+ :param request: the API request
589
+ :param policy: the matching rate-limiting policy
590
+ :param block: indicates whether to block until a call credit is available
591
+ :param timeout: maximum time to wait if blocking
592
+ :raises: CallRateLimitHit if unable to acquire a call credit
593
+ """
594
+ last_exception = None
595
+ endpoint = self._extract_endpoint(request)
596
+ # sometimes we spend all budget before a second attempt, so we have a few more attempts
597
+ for attempt in range(1, self._maximum_attempts_to_acquire):
598
+ try:
599
+ policy.try_acquire(request, weight=1)
600
+ return
601
+ except CallRateLimitHit as exc:
602
+ last_exception = exc
603
+ if block:
604
+ if timeout is not None:
605
+ time_to_wait = min(timedelta(seconds=timeout), exc.time_to_wait)
606
+ else:
607
+ time_to_wait = exc.time_to_wait
608
+ # Ensure we never sleep for a negative duration.
609
+ time_to_wait = max(timedelta(0), time_to_wait)
610
+ logger.debug(
611
+ f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}). "
612
+ f"Sleeping for {time_to_wait} on attempt {attempt}."
613
+ )
614
+ time.sleep(time_to_wait.total_seconds())
615
+ else:
616
+ logger.debug(
617
+ f"Policy {policy} reached call limit for endpoint {endpoint} ({exc.rate}) "
618
+ f"and blocking is disabled."
619
+ )
620
+ raise
621
+
622
+ if last_exception:
623
+ logger.debug(
624
+ f"Exhausted all {self._maximum_attempts_to_acquire} attempts to acquire a call for endpoint {endpoint} "
625
+ f"using policy: {policy}"
626
+ )
627
+ raise last_exception
628
+
629
+
630
+ class HttpAPIBudget(APIBudget):
631
+ """Implementation of AbstractAPIBudget for HTTP"""
632
+
633
+ def __init__(
634
+ self,
635
+ ratelimit_reset_header: str = "ratelimit-reset",
636
+ ratelimit_remaining_header: str = "ratelimit-remaining",
637
+ status_codes_for_ratelimit_hit: list[int] = [429],
638
+ **kwargs: Any,
639
+ ):
640
+ """Constructor
641
+
642
+ :param ratelimit_reset_header: name of the header that has a timestamp of the next reset of call budget
643
+ :param ratelimit_remaining_header: name of the header that has the number of calls left
644
+ :param status_codes_for_ratelimit_hit: list of HTTP status codes that signal about rate limit being hit
645
+ """
646
+ self._ratelimit_reset_header = ratelimit_reset_header
647
+ self._ratelimit_remaining_header = ratelimit_remaining_header
648
+ self._status_codes_for_ratelimit_hit = status_codes_for_ratelimit_hit
649
+ super().__init__(**kwargs)
650
+
651
+ def update_from_response(self, request: Any, response: Any) -> None:
652
+ policy = self.get_matching_policy(request)
653
+ if not policy:
654
+ return
655
+
656
+ if isinstance(response, requests.Response):
657
+ available_calls = self.get_calls_left_from_response(response)
658
+ reset_ts = self.get_reset_ts_from_response(response)
659
+ policy.update(available_calls=available_calls, call_reset_ts=reset_ts)
660
+
661
+ def get_reset_ts_from_response(
662
+ self, response: requests.Response
663
+ ) -> Optional[datetime.datetime]:
664
+ if response.headers.get(self._ratelimit_reset_header):
665
+ return datetime.datetime.fromtimestamp(
666
+ int(response.headers[self._ratelimit_reset_header])
667
+ )
668
+ return None
669
+
670
+ def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]:
671
+ if response.headers.get(self._ratelimit_remaining_header):
672
+ return int(response.headers[self._ratelimit_remaining_header])
673
+
674
+ if response.status_code in self._status_codes_for_ratelimit_hit:
675
+ return 0
676
+
677
+ return None
678
+
679
+
680
+ class LimiterMixin(MIXIN_BASE):
681
+ """Mixin class that adds rate-limiting behavior to requests."""
682
+
683
+ def __init__(
684
+ self,
685
+ api_budget: AbstractAPIBudget,
686
+ **kwargs: Any,
687
+ ):
688
+ self._api_budget = api_budget
689
+ super().__init__(**kwargs) # type: ignore # Base Session doesn't take any kwargs
690
+
691
+ def send(self, request: requests.PreparedRequest, **kwargs: Any) -> requests.Response:
692
+ """Send a request with rate-limiting."""
693
+ self._api_budget.acquire_call(request)
694
+ response = super().send(request, **kwargs)
695
+ self._api_budget.update_from_response(request, response)
696
+ return response
697
+
698
+
699
+ class LimiterSession(LimiterMixin, requests.Session):
700
+ """Session that adds rate-limiting behavior to requests."""
701
+
702
+
703
+ class CachedLimiterSession(requests_cache.CacheMixin, LimiterMixin, requests.Session):
704
+ """Session class with caching and rate-limiting behavior."""