airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,531 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ import os
7
+ import urllib
8
+ from pathlib import Path
9
+ from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
10
+
11
+ import orjson
12
+ import requests
13
+ import requests_cache
14
+ from requests.auth import AuthBase
15
+
16
+ from airbyte_cdk.models import (
17
+ AirbyteMessageSerializer,
18
+ AirbyteStreamStatus,
19
+ AirbyteStreamStatusReason,
20
+ AirbyteStreamStatusReasonType,
21
+ Level,
22
+ StreamDescriptor,
23
+ )
24
+ from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE
25
+ from airbyte_cdk.sources.message import MessageRepository
26
+ from airbyte_cdk.sources.streams.call_rate import APIBudget, CachedLimiterSession, LimiterSession
27
+ from airbyte_cdk.sources.streams.http.error_handlers import (
28
+ BackoffStrategy,
29
+ DefaultBackoffStrategy,
30
+ ErrorHandler,
31
+ ErrorMessageParser,
32
+ ErrorResolution,
33
+ HttpStatusErrorHandler,
34
+ JsonErrorMessageParser,
35
+ ResponseAction,
36
+ )
37
+ from airbyte_cdk.sources.streams.http.exceptions import (
38
+ DefaultBackoffException,
39
+ RateLimitBackoffException,
40
+ RequestBodyException,
41
+ UserDefinedBackoffException,
42
+ )
43
+ from airbyte_cdk.sources.streams.http.rate_limiting import (
44
+ http_client_default_backoff_handler,
45
+ rate_limit_default_backoff_handler,
46
+ user_defined_backoff_handler,
47
+ )
48
+ from airbyte_cdk.sources.utils.types import JsonType
49
+ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
50
+ from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
51
+ from airbyte_cdk.utils.stream_status_utils import (
52
+ as_airbyte_message as stream_status_as_airbyte_message,
53
+ )
54
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
55
+
56
+ BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
57
+
58
+
59
+ class MessageRepresentationAirbyteTracedErrors(AirbyteTracedException):
60
+ """
61
+ Before the migration to the HttpClient in low-code, the exception raised was
62
+ [ReadException](https://github.com/airbytehq/airbyte/blob/8fdd9818ec16e653ba3dd2b167a74b7c07459861/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py#L566).
63
+ This has been moved to a AirbyteTracedException. The printing on this is questionable (AirbyteTracedException string representation
64
+ shows the internal_message and not the message). We have already discussed moving the AirbyteTracedException string representation to
65
+ `message` but the impact is unclear and hard to quantify so we will do it here only for now.
66
+ """
67
+
68
+ def __str__(self) -> str:
69
+ if self.message:
70
+ return self.message
71
+ elif self.internal_message:
72
+ return self.internal_message
73
+ return ""
74
+
75
+
76
+ class HttpClient:
77
+ _DEFAULT_MAX_RETRY: int = 5
78
+ _DEFAULT_MAX_TIME: int = 60 * 10
79
+ _ACTIONS_TO_RETRY_ON = {ResponseAction.RETRY, ResponseAction.RATE_LIMITED}
80
+
81
+ def __init__(
82
+ self,
83
+ name: str,
84
+ logger: logging.Logger,
85
+ error_handler: Optional[ErrorHandler] = None,
86
+ api_budget: Optional[APIBudget] = None,
87
+ session: Optional[Union[requests.Session, requests_cache.CachedSession]] = None,
88
+ authenticator: Optional[AuthBase] = None,
89
+ use_cache: bool = False,
90
+ backoff_strategy: Optional[Union[BackoffStrategy, List[BackoffStrategy]]] = None,
91
+ error_message_parser: Optional[ErrorMessageParser] = None,
92
+ disable_retries: bool = False,
93
+ message_repository: Optional[MessageRepository] = None,
94
+ ):
95
+ self._name = name
96
+ self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
97
+ if session:
98
+ self._session = session
99
+ else:
100
+ self._use_cache = use_cache
101
+ self._session = self._request_session()
102
+ self._session.mount(
103
+ "https://",
104
+ requests.adapters.HTTPAdapter(
105
+ pool_connections=MAX_CONNECTION_POOL_SIZE, pool_maxsize=MAX_CONNECTION_POOL_SIZE
106
+ ),
107
+ )
108
+ if isinstance(authenticator, AuthBase):
109
+ self._session.auth = authenticator
110
+ self._logger = logger
111
+ self._error_handler = error_handler or HttpStatusErrorHandler(self._logger)
112
+ if backoff_strategy is not None:
113
+ if isinstance(backoff_strategy, list):
114
+ self._backoff_strategies = backoff_strategy
115
+ else:
116
+ self._backoff_strategies = [backoff_strategy]
117
+ else:
118
+ self._backoff_strategies = [DefaultBackoffStrategy()]
119
+ self._error_message_parser = error_message_parser or JsonErrorMessageParser()
120
+ self._request_attempt_count: Dict[requests.PreparedRequest, int] = {}
121
+ self._disable_retries = disable_retries
122
+ self._message_repository = message_repository
123
+
124
+ @property
125
+ def cache_filename(self) -> str:
126
+ """
127
+ Override if needed. Return the name of cache file
128
+ Note that if the environment variable REQUEST_CACHE_PATH is not set, the cache will be in-memory only.
129
+ """
130
+ return f"{self._name}.sqlite"
131
+
132
+ def _request_session(self) -> requests.Session:
133
+ """
134
+ Session factory based on use_cache property and call rate limits (api_budget parameter)
135
+ :return: instance of request-based session
136
+ """
137
+ if self._use_cache:
138
+ cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
139
+ # Use in-memory cache if cache_dir is not set
140
+ # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
141
+ # Use in-memory cache if cache_dir is not set
142
+ # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
143
+ sqlite_path = (
144
+ str(Path(cache_dir) / self.cache_filename)
145
+ if cache_dir
146
+ else "file::memory:?cache=shared"
147
+ )
148
+ # By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
149
+ # Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
150
+ # There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
151
+ # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
152
+ # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
153
+ # * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
154
+ backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
155
+ return CachedLimiterSession(
156
+ sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
157
+ )
158
+ else:
159
+ return LimiterSession(api_budget=self._api_budget)
160
+
161
+ def clear_cache(self) -> None:
162
+ """
163
+ Clear cached requests for current session, can be called any time
164
+ """
165
+ if isinstance(self._session, requests_cache.CachedSession):
166
+ self._session.cache.clear() # type: ignore # cache.clear is not typed
167
+
168
+ def _dedupe_query_params(
169
+ self, url: str, params: Optional[Mapping[str, str]]
170
+ ) -> Mapping[str, str]:
171
+ """
172
+ Remove query parameters from params mapping if they are already encoded in the URL.
173
+ :param url: URL with
174
+ :param params:
175
+ :return:
176
+ """
177
+ if params is None:
178
+ params = {}
179
+ query_string = urllib.parse.urlparse(url).query
180
+ query_dict = {k: v[0] for k, v in urllib.parse.parse_qs(query_string).items()}
181
+
182
+ duplicate_keys_with_same_value = {
183
+ k for k in query_dict.keys() if str(params.get(k)) == str(query_dict[k])
184
+ }
185
+ return {k: v for k, v in params.items() if k not in duplicate_keys_with_same_value}
186
+
187
+ def _create_prepared_request(
188
+ self,
189
+ http_method: str,
190
+ url: str,
191
+ dedupe_query_params: bool = False,
192
+ headers: Optional[Mapping[str, str]] = None,
193
+ params: Optional[Mapping[str, str]] = None,
194
+ json: Optional[Mapping[str, Any]] = None,
195
+ data: Optional[Union[str, Mapping[str, Any]]] = None,
196
+ ) -> requests.PreparedRequest:
197
+ if dedupe_query_params:
198
+ query_params = self._dedupe_query_params(url, params)
199
+ else:
200
+ query_params = params or {}
201
+ args = {"method": http_method, "url": url, "headers": headers, "params": query_params}
202
+ if http_method.upper() in BODY_REQUEST_METHODS:
203
+ if json and data:
204
+ raise RequestBodyException(
205
+ "At the same time only one of the 'request_body_data' and 'request_body_json' functions can return data"
206
+ )
207
+ elif json:
208
+ args["json"] = json
209
+ elif data:
210
+ args["data"] = data
211
+ prepared_request: requests.PreparedRequest = self._session.prepare_request(
212
+ requests.Request(**args)
213
+ )
214
+
215
+ return prepared_request
216
+
217
+ @property
218
+ def _max_retries(self) -> int:
219
+ """
220
+ Determines the max retries based on the provided error handler.
221
+ """
222
+ max_retries = None
223
+ if self._disable_retries:
224
+ max_retries = 0
225
+ else:
226
+ max_retries = self._error_handler.max_retries
227
+ return max_retries if max_retries is not None else self._DEFAULT_MAX_RETRY
228
+
229
+ @property
230
+ def _max_time(self) -> int:
231
+ """
232
+ Determines the max time based on the provided error handler.
233
+ """
234
+ return (
235
+ self._error_handler.max_time
236
+ if self._error_handler.max_time is not None
237
+ else self._DEFAULT_MAX_TIME
238
+ )
239
+
240
+ def _send_with_retry(
241
+ self,
242
+ request: requests.PreparedRequest,
243
+ request_kwargs: Mapping[str, Any],
244
+ log_formatter: Optional[Callable[[requests.Response], Any]] = None,
245
+ exit_on_rate_limit: Optional[bool] = False,
246
+ ) -> requests.Response:
247
+ """
248
+ Sends a request with retry logic.
249
+
250
+ Args:
251
+ request (requests.PreparedRequest): The prepared HTTP request to send.
252
+ request_kwargs (Mapping[str, Any]): Additional keyword arguments for the request.
253
+
254
+ Returns:
255
+ requests.Response: The HTTP response received from the server after retries.
256
+ """
257
+
258
+ max_retries = self._max_retries
259
+ max_tries = max(0, max_retries) + 1
260
+ max_time = self._max_time
261
+
262
+ user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries, max_time=max_time)(
263
+ self._send
264
+ )
265
+ rate_limit_backoff_handler = rate_limit_default_backoff_handler(max_tries=max_tries)
266
+ backoff_handler = http_client_default_backoff_handler(
267
+ max_tries=max_tries, max_time=max_time
268
+ )
269
+ # backoff handlers wrap _send, so it will always return a response
270
+ response = backoff_handler(rate_limit_backoff_handler(user_backoff_handler))(
271
+ request,
272
+ request_kwargs,
273
+ log_formatter=log_formatter,
274
+ exit_on_rate_limit=exit_on_rate_limit,
275
+ ) # type: ignore # mypy can't infer that backoff_handler wraps _send
276
+
277
+ return response
278
+
279
+ def _send(
280
+ self,
281
+ request: requests.PreparedRequest,
282
+ request_kwargs: Mapping[str, Any],
283
+ log_formatter: Optional[Callable[[requests.Response], Any]] = None,
284
+ exit_on_rate_limit: Optional[bool] = False,
285
+ ) -> requests.Response:
286
+ if request not in self._request_attempt_count:
287
+ self._request_attempt_count[request] = 1
288
+ else:
289
+ self._request_attempt_count[request] += 1
290
+ if hasattr(self._session, "auth") and isinstance(self._session.auth, AuthBase):
291
+ self._session.auth(request)
292
+
293
+ self._logger.debug(
294
+ "Making outbound API request",
295
+ extra={"headers": request.headers, "url": request.url, "request_body": request.body},
296
+ )
297
+
298
+ response: Optional[requests.Response] = None
299
+ exc: Optional[requests.RequestException] = None
300
+
301
+ try:
302
+ response = self._session.send(request, **request_kwargs)
303
+ except requests.RequestException as e:
304
+ exc = e
305
+
306
+ error_resolution: ErrorResolution = self._error_handler.interpret_response(
307
+ response if response is not None else exc
308
+ )
309
+
310
+ # Evaluation of response.text can be heavy, for example, if streaming a large response
311
+ # Do it only in debug mode
312
+ if self._logger.isEnabledFor(logging.DEBUG) and response is not None:
313
+ if request_kwargs.get("stream"):
314
+ self._logger.debug(
315
+ "Receiving response, but not logging it as the response is streamed",
316
+ extra={"headers": response.headers, "status": response.status_code},
317
+ )
318
+ else:
319
+ self._logger.debug(
320
+ "Receiving response",
321
+ extra={
322
+ "headers": response.headers,
323
+ "status": response.status_code,
324
+ "body": response.text,
325
+ },
326
+ )
327
+
328
+ # Request/response logging for declarative cdk
329
+ if (
330
+ log_formatter is not None
331
+ and response is not None
332
+ and self._message_repository is not None
333
+ ):
334
+ formatter = log_formatter
335
+ self._message_repository.log_message(
336
+ Level.DEBUG,
337
+ lambda: formatter(response),
338
+ )
339
+
340
+ self._handle_error_resolution(
341
+ response=response,
342
+ exc=exc,
343
+ request=request,
344
+ error_resolution=error_resolution,
345
+ exit_on_rate_limit=exit_on_rate_limit,
346
+ )
347
+
348
+ return response # type: ignore # will either return a valid response of type requests.Response or raise an exception
349
+
350
+ def _get_response_body(self, response: requests.Response) -> Optional[JsonType]:
351
+ """
352
+ Extracts and returns the body of an HTTP response.
353
+
354
+ This method attempts to parse the response body as JSON. If the response
355
+ body is not valid JSON, it falls back to decoding the response content
356
+ as a UTF-8 string. If both attempts fail, it returns None.
357
+
358
+ Args:
359
+ response (requests.Response): The HTTP response object.
360
+
361
+ Returns:
362
+ Optional[JsonType]: The parsed JSON object as a string, the decoded
363
+ response content as a string, or None if both parsing attempts fail.
364
+ """
365
+ try:
366
+ return str(response.json())
367
+ except requests.exceptions.JSONDecodeError:
368
+ try:
369
+ return response.content.decode("utf-8")
370
+ except Exception:
371
+ return "The Content of the Response couldn't be decoded."
372
+
373
+ def _evict_key(self, prepared_request: requests.PreparedRequest) -> None:
374
+ """
375
+ Addresses high memory consumption when enabling concurrency in https://github.com/airbytehq/oncall/issues/6821.
376
+
377
+ The `_request_attempt_count` attribute keeps growing as multiple requests are made using the same `http_client`.
378
+ To mitigate this issue, we evict keys for completed requests once we confirm that no further retries are needed.
379
+ This helps manage memory usage more efficiently while maintaining the necessary logic for retry attempts.
380
+ """
381
+ if prepared_request in self._request_attempt_count:
382
+ del self._request_attempt_count[prepared_request]
383
+
384
+ def _handle_error_resolution(
385
+ self,
386
+ response: Optional[requests.Response],
387
+ exc: Optional[requests.RequestException],
388
+ request: requests.PreparedRequest,
389
+ error_resolution: ErrorResolution,
390
+ exit_on_rate_limit: Optional[bool] = False,
391
+ ) -> None:
392
+ if error_resolution.response_action not in self._ACTIONS_TO_RETRY_ON:
393
+ self._evict_key(request)
394
+
395
+ # Emit stream status RUNNING with the reason RATE_LIMITED to log that the rate limit has been reached
396
+ if error_resolution.response_action == ResponseAction.RATE_LIMITED:
397
+ # TODO: Update to handle with message repository when concurrent message repository is ready
398
+ reasons = [AirbyteStreamStatusReason(type=AirbyteStreamStatusReasonType.RATE_LIMITED)]
399
+ message = orjson.dumps(
400
+ AirbyteMessageSerializer.dump(
401
+ stream_status_as_airbyte_message(
402
+ StreamDescriptor(name=self._name), AirbyteStreamStatus.RUNNING, reasons
403
+ )
404
+ )
405
+ ).decode()
406
+
407
+ # Simply printing the stream status is a temporary solution and can cause future issues. Currently, the _send method is
408
+ # wrapped with backoff decorators, and we can only emit messages by iterating record_iterator in the abstract source at the
409
+ # end of the retry decorator behavior. This approach does not allow us to emit messages in the queue before exiting the
410
+ # backoff retry loop. Adding `\n` to the message and ignore 'end' ensure that few messages are printed at the same time.
411
+ print(f"{message}\n", end="", flush=True)
412
+
413
+ if error_resolution.response_action == ResponseAction.FAIL:
414
+ if response is not None:
415
+ filtered_response_message = filter_secrets(
416
+ f"Request (body): '{str(request.body)}'. Response (body): '{self._get_response_body(response)}'. Response (headers): '{response.headers}'."
417
+ )
418
+ error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message: '{self._error_message_parser.parse_response_error_message(response)}'. {filtered_response_message}"
419
+ else:
420
+ error_message = (
421
+ f"'{request.method}' request to '{request.url}' failed with exception: '{exc}'"
422
+ )
423
+
424
+ # ensure the exception message is emitted before raised
425
+ self._logger.error(error_message)
426
+
427
+ raise MessageRepresentationAirbyteTracedErrors(
428
+ internal_message=error_message,
429
+ message=error_resolution.error_message or error_message,
430
+ failure_type=error_resolution.failure_type,
431
+ )
432
+
433
+ elif error_resolution.response_action == ResponseAction.IGNORE:
434
+ if response is not None:
435
+ log_message = f"Ignoring response for '{request.method}' request to '{request.url}' with response code '{response.status_code}'"
436
+ else:
437
+ log_message = f"Ignoring response for '{request.method}' request to '{request.url}' with error '{exc}'"
438
+
439
+ self._logger.info(error_resolution.error_message or log_message)
440
+
441
+ # TODO: Consider dynamic retry count depending on subsequent error codes
442
+ elif (
443
+ error_resolution.response_action == ResponseAction.RETRY
444
+ or error_resolution.response_action == ResponseAction.RATE_LIMITED
445
+ ):
446
+ user_defined_backoff_time = None
447
+ for backoff_strategy in self._backoff_strategies:
448
+ backoff_time = backoff_strategy.backoff_time(
449
+ response_or_exception=response if response is not None else exc,
450
+ attempt_count=self._request_attempt_count[request],
451
+ )
452
+ if backoff_time:
453
+ user_defined_backoff_time = backoff_time
454
+ break
455
+ error_message = (
456
+ error_resolution.error_message
457
+ or f"Request to {request.url} failed with failure type {error_resolution.failure_type}, response action {error_resolution.response_action}."
458
+ )
459
+
460
+ retry_endlessly = (
461
+ error_resolution.response_action == ResponseAction.RATE_LIMITED
462
+ and not exit_on_rate_limit
463
+ )
464
+
465
+ if user_defined_backoff_time:
466
+ raise UserDefinedBackoffException(
467
+ backoff=user_defined_backoff_time,
468
+ request=request,
469
+ response=(response if response is not None else exc),
470
+ error_message=error_message,
471
+ )
472
+
473
+ elif retry_endlessly:
474
+ raise RateLimitBackoffException(
475
+ request=request,
476
+ response=(response if response is not None else exc),
477
+ error_message=error_message,
478
+ )
479
+
480
+ raise DefaultBackoffException(
481
+ request=request,
482
+ response=(response if response is not None else exc),
483
+ error_message=error_message,
484
+ )
485
+
486
+ elif response:
487
+ try:
488
+ response.raise_for_status()
489
+ except requests.HTTPError as e:
490
+ self._logger.error(response.text)
491
+ raise e
492
+
493
+ @property
494
+ def name(self) -> str:
495
+ return self._name
496
+
497
+ def send_request(
498
+ self,
499
+ http_method: str,
500
+ url: str,
501
+ request_kwargs: Mapping[str, Any],
502
+ headers: Optional[Mapping[str, str]] = None,
503
+ params: Optional[Mapping[str, str]] = None,
504
+ json: Optional[Mapping[str, Any]] = None,
505
+ data: Optional[Union[str, Mapping[str, Any]]] = None,
506
+ dedupe_query_params: bool = False,
507
+ log_formatter: Optional[Callable[[requests.Response], Any]] = None,
508
+ exit_on_rate_limit: Optional[bool] = False,
509
+ ) -> Tuple[requests.PreparedRequest, requests.Response]:
510
+ """
511
+ Prepares and sends request and return request and response objects.
512
+ """
513
+
514
+ request: requests.PreparedRequest = self._create_prepared_request(
515
+ http_method=http_method,
516
+ url=url,
517
+ dedupe_query_params=dedupe_query_params,
518
+ headers=headers,
519
+ params=params,
520
+ json=json,
521
+ data=data,
522
+ )
523
+
524
+ response: requests.Response = self._send_with_retry(
525
+ request=request,
526
+ request_kwargs=request_kwargs,
527
+ log_formatter=log_formatter,
528
+ exit_on_rate_limit=exit_on_rate_limit,
529
+ )
530
+
531
+ return request, response
@@ -0,0 +1,158 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ import sys
7
+ import time
8
+ from typing import Any, Callable, Mapping, Optional
9
+
10
+ import backoff
11
+ from requests import PreparedRequest, RequestException, Response, codes, exceptions
12
+
13
+ from .exceptions import (
14
+ DefaultBackoffException,
15
+ RateLimitBackoffException,
16
+ UserDefinedBackoffException,
17
+ )
18
+
19
+ TRANSIENT_EXCEPTIONS = (
20
+ DefaultBackoffException,
21
+ exceptions.ConnectTimeout,
22
+ exceptions.ReadTimeout,
23
+ exceptions.ConnectionError,
24
+ exceptions.ChunkedEncodingError,
25
+ )
26
+
27
+ logger = logging.getLogger("airbyte")
28
+
29
+
30
+ SendRequestCallableType = Callable[[PreparedRequest, Mapping[str, Any]], Response]
31
+
32
+
33
+ def default_backoff_handler(
34
+ max_tries: Optional[int], factor: float, max_time: Optional[int] = None, **kwargs: Any
35
+ ) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
36
+ def log_retry_attempt(details: Mapping[str, Any]) -> None:
37
+ _, exc, _ = sys.exc_info()
38
+ if isinstance(exc, RequestException) and exc.response:
39
+ logger.info(
40
+ f"Status code: {exc.response.status_code!r}, Response Content: {exc.response.content!r}"
41
+ )
42
+ logger.info(
43
+ f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
44
+ )
45
+
46
+ def should_give_up(exc: Exception) -> bool:
47
+ # If a non-rate-limiting related 4XX error makes it this far, it means it was unexpected and probably consistent, so we shouldn't back off
48
+ if isinstance(exc, RequestException):
49
+ if exc.response is not None:
50
+ give_up: bool = (
51
+ exc.response is not None
52
+ and exc.response.status_code != codes.too_many_requests
53
+ and 400 <= exc.response.status_code < 500
54
+ )
55
+ if give_up:
56
+ logger.info(f"Giving up for returned HTTP status: {exc.response.status_code!r}")
57
+ return give_up
58
+ # Only RequestExceptions are retryable, so if we get here, it's not retryable
59
+ return False
60
+
61
+ return backoff.on_exception( # type: ignore # Decorator function returns a function with a different signature than the input function, so mypy can't infer the type of the returned function
62
+ backoff.expo,
63
+ TRANSIENT_EXCEPTIONS,
64
+ jitter=None,
65
+ on_backoff=log_retry_attempt,
66
+ giveup=should_give_up,
67
+ max_tries=max_tries,
68
+ max_time=max_time,
69
+ factor=factor,
70
+ **kwargs,
71
+ )
72
+
73
+
74
+ def http_client_default_backoff_handler(
75
+ max_tries: Optional[int], max_time: Optional[int] = None, **kwargs: Any
76
+ ) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
77
+ def log_retry_attempt(details: Mapping[str, Any]) -> None:
78
+ _, exc, _ = sys.exc_info()
79
+ if isinstance(exc, RequestException) and exc.response:
80
+ logger.info(
81
+ f"Status code: {exc.response.status_code!r}, Response Content: {exc.response.content!r}"
82
+ )
83
+ logger.info(
84
+ f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
85
+ )
86
+
87
+ def should_give_up(exc: Exception) -> bool:
88
+ # If made it here, the ResponseAction was RETRY and therefore should not give up
89
+ return False
90
+
91
+ return backoff.on_exception( # type: ignore # Decorator function returns a function with a different signature than the input function, so mypy can't infer the type of the returned function
92
+ backoff.expo,
93
+ TRANSIENT_EXCEPTIONS,
94
+ jitter=None,
95
+ on_backoff=log_retry_attempt,
96
+ giveup=should_give_up,
97
+ max_tries=max_tries,
98
+ max_time=max_time,
99
+ **kwargs,
100
+ )
101
+
102
+
103
+ def user_defined_backoff_handler(
104
+ max_tries: Optional[int], max_time: Optional[int] = None, **kwargs: Any
105
+ ) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
106
+ def sleep_on_ratelimit(details: Mapping[str, Any]) -> None:
107
+ _, exc, _ = sys.exc_info()
108
+ if isinstance(exc, UserDefinedBackoffException):
109
+ if exc.response:
110
+ logger.info(
111
+ f"Status code: {exc.response.status_code!r}, Response Content: {exc.response.content!r}"
112
+ )
113
+ retry_after = exc.backoff
114
+ logger.info(f"Retrying. Sleeping for {retry_after} seconds")
115
+ time.sleep(retry_after + 1) # extra second to cover any fractions of second
116
+
117
+ def log_give_up(details: Mapping[str, Any]) -> None:
118
+ _, exc, _ = sys.exc_info()
119
+ if isinstance(exc, RequestException):
120
+ logger.error(
121
+ f"Max retry limit reached in {details['elapsed']}s. Request: {exc.request}, Response: {exc.response}"
122
+ )
123
+ else:
124
+ logger.error("Max retry limit reached for unknown request and response")
125
+
126
+ return backoff.on_exception( # type: ignore # Decorator function returns a function with a different signature than the input function, so mypy can't infer the type of the returned function
127
+ backoff.constant,
128
+ UserDefinedBackoffException,
129
+ interval=0, # skip waiting, we'll wait in on_backoff handler
130
+ on_backoff=sleep_on_ratelimit,
131
+ on_giveup=log_give_up,
132
+ jitter=None,
133
+ max_tries=max_tries,
134
+ max_time=max_time,
135
+ **kwargs,
136
+ )
137
+
138
+
139
+ def rate_limit_default_backoff_handler(
140
+ **kwargs: Any,
141
+ ) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
142
+ def log_retry_attempt(details: Mapping[str, Any]) -> None:
143
+ _, exc, _ = sys.exc_info()
144
+ if isinstance(exc, RequestException) and exc.response:
145
+ logger.info(
146
+ f"Status code: {exc.response.status_code!r}, Response Content: {exc.response.content!r}"
147
+ )
148
+ logger.info(
149
+ f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
150
+ )
151
+
152
+ return backoff.on_exception( # type: ignore # Decorator function returns a function with a different signature than the input function, so mypy can't infer the type of the returned function
153
+ backoff.expo,
154
+ RateLimitBackoffException,
155
+ jitter=None,
156
+ on_backoff=log_retry_attempt,
157
+ **kwargs,
158
+ )