airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,702 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ from dataclasses import InitVar, dataclass, field
7
+ from functools import partial
8
+ from itertools import islice
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Iterable,
13
+ List,
14
+ Mapping,
15
+ Optional,
16
+ Set,
17
+ Tuple,
18
+ Union,
19
+ )
20
+
21
+ import requests
22
+ from typing_extensions import deprecated
23
+
24
+ from airbyte_cdk.models import AirbyteMessage
25
+ from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
26
+ from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
27
+ from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
28
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
29
+ from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
30
+ SinglePartitionRouter,
31
+ )
32
+ from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination
33
+ from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
34
+ from airbyte_cdk.sources.declarative.requesters.request_options import (
35
+ DefaultRequestOptionsProvider,
36
+ RequestOptionsProvider,
37
+ )
38
+ from airbyte_cdk.sources.declarative.requesters.requester import Requester
39
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
40
+ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
41
+ from airbyte_cdk.sources.http_logger import format_http_message
42
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
43
+ from airbyte_cdk.sources.streams.core import StreamData
44
+ from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
45
+ from airbyte_cdk.utils.mapping_helpers import combine_mappings
46
+
47
+ FULL_REFRESH_SYNC_COMPLETE_KEY = "__ab_full_refresh_sync_complete"
48
+
49
+
50
+ @dataclass
51
+ class SimpleRetriever(Retriever):
52
+ """
53
+ Retrieves records by synchronously sending requests to fetch records.
54
+
55
+ The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the stream slicer.
56
+
57
+ For each stream slice, submit requests until there are no more pages of records to fetch.
58
+
59
+ This retriever currently inherits from HttpStream to reuse the request submission and pagination machinery.
60
+ As a result, some of the parameters passed to some methods are unused.
61
+ The two will be decoupled in a future release.
62
+
63
+ Attributes:
64
+ stream_name (str): The stream's name
65
+ stream_primary_key (Optional[Union[str, List[str], List[List[str]]]]): The stream's primary key
66
+ requester (Requester): The HTTP requester
67
+ record_selector (HttpSelector): The record selector
68
+ paginator (Optional[Paginator]): The paginator
69
+ stream_slicer (Optional[StreamSlicer]): The stream slicer
70
+ cursor (Optional[cursor]): The cursor
71
+ parameters (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation
72
+ """
73
+
74
+ requester: Requester
75
+ record_selector: HttpSelector
76
+ config: Config
77
+ parameters: InitVar[Mapping[str, Any]]
78
+ name: str
79
+ _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
80
+ primary_key: Optional[Union[str, List[str], List[List[str]]]]
81
+ _primary_key: str = field(init=False, repr=False, default="")
82
+ paginator: Optional[Paginator] = None
83
+ stream_slicer: StreamSlicer = field(
84
+ default_factory=lambda: SinglePartitionRouter(parameters={})
85
+ )
86
+ request_option_provider: RequestOptionsProvider = field(
87
+ default_factory=lambda: DefaultRequestOptionsProvider(parameters={})
88
+ )
89
+ cursor: Optional[DeclarativeCursor] = None
90
+ ignore_stream_slicer_parameters_on_paginated_requests: bool = False
91
+
92
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
93
+ self._paginator = self.paginator or NoPagination(parameters=parameters)
94
+ self._parameters = parameters
95
+ self._name = (
96
+ InterpolatedString(self._name, parameters=parameters)
97
+ if isinstance(self._name, str)
98
+ else self._name
99
+ )
100
+
101
+ @property # type: ignore
102
+ def name(self) -> str:
103
+ """
104
+ :return: Stream name
105
+ """
106
+ return (
107
+ str(self._name.eval(self.config))
108
+ if isinstance(self._name, InterpolatedString)
109
+ else self._name
110
+ )
111
+
112
+ @name.setter
113
+ def name(self, value: str) -> None:
114
+ if not isinstance(value, property):
115
+ self._name = value
116
+
117
+ def _get_mapping(
118
+ self, method: Callable[..., Optional[Union[Mapping[str, Any], str]]], **kwargs: Any
119
+ ) -> Tuple[Union[Mapping[str, Any], str], Set[str]]:
120
+ """
121
+ Get mapping from the provided method, and get the keys of the mapping.
122
+ If the method returns a string, it will return the string and an empty set.
123
+ If the method returns a dict, it will return the dict and its keys.
124
+ """
125
+ mapping = method(**kwargs) or {}
126
+ keys = set(mapping.keys()) if not isinstance(mapping, str) else set()
127
+ return mapping, keys
128
+
129
+ def _get_request_options(
130
+ self,
131
+ stream_state: Optional[StreamData],
132
+ stream_slice: Optional[StreamSlice],
133
+ next_page_token: Optional[Mapping[str, Any]],
134
+ paginator_method: Callable[..., Optional[Union[Mapping[str, Any], str]]],
135
+ stream_slicer_method: Callable[..., Optional[Union[Mapping[str, Any], str]]],
136
+ ) -> Union[Mapping[str, Any], str]:
137
+ """
138
+ Get the request_option from the paginator and the stream slicer.
139
+ Raise a ValueError if there's a key collision
140
+ Returned merged mapping otherwise
141
+ """
142
+ # FIXME we should eventually remove the usage of stream_state as part of the interpolation
143
+
144
+ is_body_json = paginator_method.__name__ == "get_request_body_json"
145
+
146
+ mappings = [
147
+ paginator_method(
148
+ stream_slice=stream_slice,
149
+ next_page_token=next_page_token,
150
+ ),
151
+ ]
152
+ if not next_page_token or not self.ignore_stream_slicer_parameters_on_paginated_requests:
153
+ mappings.append(
154
+ stream_slicer_method(
155
+ stream_slice=stream_slice,
156
+ next_page_token=next_page_token,
157
+ )
158
+ )
159
+ return combine_mappings(mappings, allow_same_value_merge=is_body_json)
160
+
161
+ def _request_headers(
162
+ self,
163
+ stream_state: Optional[StreamData] = None,
164
+ stream_slice: Optional[StreamSlice] = None,
165
+ next_page_token: Optional[Mapping[str, Any]] = None,
166
+ ) -> Mapping[str, Any]:
167
+ """
168
+ Specifies request headers.
169
+ Authentication headers will overwrite any overlapping headers returned from this method.
170
+ """
171
+ headers = self._get_request_options(
172
+ stream_state,
173
+ stream_slice,
174
+ next_page_token,
175
+ self._paginator.get_request_headers,
176
+ self.request_option_provider.get_request_headers,
177
+ )
178
+ if isinstance(headers, str):
179
+ raise ValueError("Request headers cannot be a string")
180
+ return {str(k): str(v) for k, v in headers.items()}
181
+
182
+ def _request_params(
183
+ self,
184
+ stream_state: Optional[StreamData] = None,
185
+ stream_slice: Optional[StreamSlice] = None,
186
+ next_page_token: Optional[Mapping[str, Any]] = None,
187
+ ) -> Mapping[str, Any]:
188
+ """
189
+ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.
190
+
191
+ E.g: you might want to define query parameters for paging if next_page_token is not None.
192
+ """
193
+ params = self._get_request_options(
194
+ stream_state,
195
+ stream_slice,
196
+ next_page_token,
197
+ self._paginator.get_request_params,
198
+ self.request_option_provider.get_request_params,
199
+ )
200
+ if isinstance(params, str):
201
+ raise ValueError("Request params cannot be a string")
202
+ return params
203
+
204
+ def _request_body_data(
205
+ self,
206
+ stream_state: Optional[StreamData] = None,
207
+ stream_slice: Optional[StreamSlice] = None,
208
+ next_page_token: Optional[Mapping[str, Any]] = None,
209
+ ) -> Union[Mapping[str, Any], str]:
210
+ """
211
+ Specifies how to populate the body of the request with a non-JSON payload.
212
+
213
+ If returns a ready text that it will be sent as is.
214
+ If returns a dict that it will be converted to a urlencoded form.
215
+ E.g. {"key1": "value1", "key2": "value2"} => "key1=value1&key2=value2"
216
+
217
+ At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
218
+ """
219
+ return self._get_request_options(
220
+ stream_state,
221
+ stream_slice,
222
+ next_page_token,
223
+ self._paginator.get_request_body_data,
224
+ self.request_option_provider.get_request_body_data,
225
+ )
226
+
227
+ def _request_body_json(
228
+ self,
229
+ stream_state: Optional[StreamData] = None,
230
+ stream_slice: Optional[StreamSlice] = None,
231
+ next_page_token: Optional[Mapping[str, Any]] = None,
232
+ ) -> Optional[Mapping[str, Any]]:
233
+ """
234
+ Specifies how to populate the body of the request with a JSON payload.
235
+
236
+ At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
237
+ """
238
+ body_json = self._get_request_options(
239
+ stream_state,
240
+ stream_slice,
241
+ next_page_token,
242
+ self._paginator.get_request_body_json,
243
+ self.request_option_provider.get_request_body_json,
244
+ )
245
+ if isinstance(body_json, str):
246
+ raise ValueError("Request body json cannot be a string")
247
+ return body_json
248
+
249
+ def _paginator_path(
250
+ self,
251
+ next_page_token: Optional[Mapping[str, Any]] = None,
252
+ stream_state: Optional[Mapping[str, Any]] = None,
253
+ stream_slice: Optional[StreamSlice] = None,
254
+ ) -> Optional[str]:
255
+ """
256
+ If the paginator points to a path, follow it, else return nothing so the requester is used.
257
+ :param next_page_token:
258
+ :return:
259
+ """
260
+ return self._paginator.path(
261
+ next_page_token=next_page_token,
262
+ stream_state=stream_state,
263
+ stream_slice=stream_slice,
264
+ )
265
+
266
+ def _parse_response(
267
+ self,
268
+ response: Optional[requests.Response],
269
+ stream_state: StreamState,
270
+ records_schema: Mapping[str, Any],
271
+ stream_slice: Optional[StreamSlice] = None,
272
+ next_page_token: Optional[Mapping[str, Any]] = None,
273
+ ) -> Iterable[Record]:
274
+ if not response:
275
+ yield from []
276
+ else:
277
+ yield from self.record_selector.select_records(
278
+ response=response,
279
+ stream_state=stream_state,
280
+ records_schema=records_schema,
281
+ stream_slice=stream_slice,
282
+ next_page_token=next_page_token,
283
+ )
284
+
285
+ @property # type: ignore
286
+ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
287
+ """The stream's primary key"""
288
+ return self._primary_key
289
+
290
+ @primary_key.setter
291
+ def primary_key(self, value: str) -> None:
292
+ if not isinstance(value, property):
293
+ self._primary_key = value
294
+
295
+ def _next_page_token(
296
+ self,
297
+ response: requests.Response,
298
+ last_page_size: int,
299
+ last_record: Optional[Record],
300
+ last_page_token_value: Optional[Any],
301
+ ) -> Optional[Mapping[str, Any]]:
302
+ """
303
+ Specifies a pagination strategy.
304
+
305
+ The value returned from this method is passed to most other methods in this class. Use it to form a request e.g: set headers or query params.
306
+
307
+ :return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
308
+ """
309
+ return self._paginator.next_page_token(
310
+ response=response,
311
+ last_page_size=last_page_size,
312
+ last_record=last_record,
313
+ last_page_token_value=last_page_token_value,
314
+ )
315
+
316
+ def _fetch_next_page(
317
+ self,
318
+ stream_state: Mapping[str, Any],
319
+ stream_slice: StreamSlice,
320
+ next_page_token: Optional[Mapping[str, Any]] = None,
321
+ ) -> Optional[requests.Response]:
322
+ return self.requester.send_request(
323
+ path=self._paginator_path(
324
+ next_page_token=next_page_token,
325
+ stream_state=stream_state,
326
+ stream_slice=stream_slice,
327
+ ),
328
+ stream_state=stream_state,
329
+ stream_slice=stream_slice,
330
+ next_page_token=next_page_token,
331
+ request_headers=self._request_headers(
332
+ stream_state=stream_state,
333
+ stream_slice=stream_slice,
334
+ next_page_token=next_page_token,
335
+ ),
336
+ request_params=self._request_params(
337
+ stream_state=stream_state,
338
+ stream_slice=stream_slice,
339
+ next_page_token=next_page_token,
340
+ ),
341
+ request_body_data=self._request_body_data(
342
+ stream_state=stream_state,
343
+ stream_slice=stream_slice,
344
+ next_page_token=next_page_token,
345
+ ),
346
+ request_body_json=self._request_body_json(
347
+ stream_state=stream_state,
348
+ stream_slice=stream_slice,
349
+ next_page_token=next_page_token,
350
+ ),
351
+ )
352
+
353
+ # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
354
+ def _read_pages(
355
+ self,
356
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
357
+ stream_state: Mapping[str, Any],
358
+ stream_slice: StreamSlice,
359
+ ) -> Iterable[Record]:
360
+ pagination_complete = False
361
+ initial_token = self._paginator.get_initial_token()
362
+ next_page_token: Optional[Mapping[str, Any]] = (
363
+ {"next_page_token": initial_token} if initial_token else None
364
+ )
365
+ while not pagination_complete:
366
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
367
+
368
+ last_page_size = 0
369
+ last_record: Optional[Record] = None
370
+ for record in records_generator_fn(response):
371
+ last_page_size += 1
372
+ last_record = record
373
+ yield record
374
+
375
+ if not response:
376
+ pagination_complete = True
377
+ else:
378
+ last_page_token_value = (
379
+ next_page_token.get("next_page_token") if next_page_token else None
380
+ )
381
+ next_page_token = self._next_page_token(
382
+ response=response,
383
+ last_page_size=last_page_size,
384
+ last_record=last_record,
385
+ last_page_token_value=last_page_token_value,
386
+ )
387
+ if not next_page_token:
388
+ pagination_complete = True
389
+
390
+ # Always return an empty generator just in case no records were ever yielded
391
+ yield from []
392
+
393
+ def _read_single_page(
394
+ self,
395
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
396
+ stream_state: Mapping[str, Any],
397
+ stream_slice: StreamSlice,
398
+ ) -> Iterable[StreamData]:
399
+ initial_token = stream_state.get("next_page_token")
400
+ if initial_token is None:
401
+ initial_token = self._paginator.get_initial_token()
402
+ next_page_token: Optional[Mapping[str, Any]] = (
403
+ {"next_page_token": initial_token} if initial_token else None
404
+ )
405
+
406
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
407
+
408
+ last_page_size = 0
409
+ last_record: Optional[Record] = None
410
+ for record in records_generator_fn(response):
411
+ last_page_size += 1
412
+ last_record = record
413
+ yield record
414
+
415
+ if not response:
416
+ next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
417
+ else:
418
+ last_page_token_value = (
419
+ next_page_token.get("next_page_token") if next_page_token else None
420
+ )
421
+ next_page_token = self._next_page_token(
422
+ response=response,
423
+ last_page_size=last_page_size,
424
+ last_record=last_record,
425
+ last_page_token_value=last_page_token_value,
426
+ ) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
427
+
428
+ if self.cursor:
429
+ self.cursor.close_slice(
430
+ StreamSlice(cursor_slice=next_page_token, partition=stream_slice.partition)
431
+ )
432
+
433
+ # Always return an empty generator just in case no records were ever yielded
434
+ yield from []
435
+
436
+ def read_records(
437
+ self,
438
+ records_schema: Mapping[str, Any],
439
+ stream_slice: Optional[StreamSlice] = None,
440
+ ) -> Iterable[StreamData]:
441
+ """
442
+ Fetch a stream's records from an HTTP API source
443
+
444
+ :param records_schema: json schema to describe record
445
+ :param stream_slice: The stream slice to read data for
446
+ :return: The records read from the API source
447
+ """
448
+ _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check
449
+
450
+ most_recent_record_from_slice = None
451
+ record_generator = partial(
452
+ self._parse_records,
453
+ stream_slice=stream_slice,
454
+ stream_state=self.state or {},
455
+ records_schema=records_schema,
456
+ )
457
+
458
+ if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
459
+ stream_state = self.state
460
+
461
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
462
+ # fetch more records. The platform deletes stream state for full refresh streams before starting a
463
+ # new job, so we don't need to worry about this value existing for the initial attempt
464
+ if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
465
+ return
466
+
467
+ yield from self._read_single_page(record_generator, stream_state, _slice)
468
+ else:
469
+ for stream_data in self._read_pages(record_generator, self.state, _slice):
470
+ current_record = self._extract_record(stream_data, _slice)
471
+ if self.cursor and current_record:
472
+ self.cursor.observe(_slice, current_record)
473
+
474
+ # Latest record read, not necessarily within slice boundaries.
475
+ # TODO Remove once all custom components implement `observe` method.
476
+ # https://github.com/airbytehq/airbyte-internal-issues/issues/6955
477
+ most_recent_record_from_slice = self._get_most_recent_record(
478
+ most_recent_record_from_slice, current_record, _slice
479
+ )
480
+ yield stream_data
481
+
482
+ if self.cursor:
483
+ self.cursor.close_slice(_slice, most_recent_record_from_slice)
484
+ return
485
+
486
+ def _get_most_recent_record(
487
+ self,
488
+ current_most_recent: Optional[Record],
489
+ current_record: Optional[Record],
490
+ stream_slice: StreamSlice,
491
+ ) -> Optional[Record]:
492
+ if self.cursor and current_record:
493
+ if not current_most_recent:
494
+ return current_record
495
+ else:
496
+ return (
497
+ current_most_recent
498
+ if self.cursor.is_greater_than_or_equal(current_most_recent, current_record)
499
+ else current_record
500
+ )
501
+ else:
502
+ return None
503
+
504
+ def _extract_record(
505
+ self, stream_data: StreamData, stream_slice: StreamSlice
506
+ ) -> Optional[Record]:
507
+ """
508
+ As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize
509
+ to data to streamline the rest of the process.
510
+ """
511
+ if isinstance(stream_data, Record):
512
+ # Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData`
513
+ return stream_data
514
+ elif isinstance(stream_data, (dict, Mapping)):
515
+ return Record(
516
+ data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name
517
+ )
518
+ elif isinstance(stream_data, AirbyteMessage) and stream_data.record:
519
+ return Record(
520
+ data=stream_data.record.data, # type:ignore # AirbyteMessage always has record.data
521
+ associated_slice=stream_slice,
522
+ stream_name=self.name,
523
+ )
524
+ return None
525
+
526
+ # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
527
+ def stream_slices(self) -> Iterable[Optional[StreamSlice]]: # type: ignore
528
+ """
529
+ Specifies the slices for this stream. See the stream slicing section of the docs for more information.
530
+
531
+ :param sync_mode:
532
+ :param cursor_field:
533
+ :param stream_state:
534
+ :return:
535
+ """
536
+ return self.stream_slicer.stream_slices()
537
+
538
+ @property
539
+ def state(self) -> Mapping[str, Any]:
540
+ return self.cursor.get_stream_state() if self.cursor else {}
541
+
542
+ @state.setter
543
+ def state(self, value: StreamState) -> None:
544
+ """State setter, accept state serialized by state getter."""
545
+ if self.cursor:
546
+ self.cursor.set_initial_state(value)
547
+
548
+ def _parse_records(
549
+ self,
550
+ response: Optional[requests.Response],
551
+ stream_state: Mapping[str, Any],
552
+ records_schema: Mapping[str, Any],
553
+ stream_slice: Optional[StreamSlice],
554
+ ) -> Iterable[Record]:
555
+ yield from self._parse_response(
556
+ response,
557
+ stream_slice=stream_slice,
558
+ stream_state=stream_state,
559
+ records_schema=records_schema,
560
+ )
561
+
562
+ def must_deduplicate_query_params(self) -> bool:
563
+ return True
564
+
565
+ @staticmethod
566
+ def _to_partition_key(to_serialize: Any) -> str:
567
+ # separators have changed in Python 3.4. To avoid being impacted by further change, we explicitly specify our own value
568
+ return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True)
569
+
570
+
571
+ @dataclass
572
+ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
573
+ """
574
+ In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
575
+ slices that are queried throughout a read command.
576
+ """
577
+
578
+ maximum_number_of_slices: int = 5
579
+
580
+ def __post_init__(self, options: Mapping[str, Any]) -> None:
581
+ super().__post_init__(options)
582
+ if self.maximum_number_of_slices and self.maximum_number_of_slices < 1:
583
+ raise ValueError(
584
+ f"The maximum number of slices on a test read needs to be strictly positive. Got {self.maximum_number_of_slices}"
585
+ )
586
+
587
+ # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
588
+ def stream_slices(self) -> Iterable[Optional[StreamSlice]]: # type: ignore
589
+ return islice(super().stream_slices(), self.maximum_number_of_slices)
590
+
591
+ def _fetch_next_page(
592
+ self,
593
+ stream_state: Mapping[str, Any],
594
+ stream_slice: StreamSlice,
595
+ next_page_token: Optional[Mapping[str, Any]] = None,
596
+ ) -> Optional[requests.Response]:
597
+ return self.requester.send_request(
598
+ path=self._paginator_path(
599
+ next_page_token=next_page_token,
600
+ stream_state=stream_state,
601
+ stream_slice=stream_slice,
602
+ ),
603
+ stream_state=stream_state,
604
+ stream_slice=stream_slice,
605
+ next_page_token=next_page_token,
606
+ request_headers=self._request_headers(
607
+ stream_state=stream_state,
608
+ stream_slice=stream_slice,
609
+ next_page_token=next_page_token,
610
+ ),
611
+ request_params=self._request_params(
612
+ stream_state=stream_state,
613
+ stream_slice=stream_slice,
614
+ next_page_token=next_page_token,
615
+ ),
616
+ request_body_data=self._request_body_data(
617
+ stream_state=stream_state,
618
+ stream_slice=stream_slice,
619
+ next_page_token=next_page_token,
620
+ ),
621
+ request_body_json=self._request_body_json(
622
+ stream_state=stream_state,
623
+ stream_slice=stream_slice,
624
+ next_page_token=next_page_token,
625
+ ),
626
+ log_formatter=lambda response: format_http_message(
627
+ response,
628
+ f"Stream '{self.name}' request",
629
+ f"Request performed in order to extract records for stream '{self.name}'",
630
+ self.name,
631
+ ),
632
+ )
633
+
634
+
635
+ @deprecated(
636
+ "This class is experimental. Use at your own risk.",
637
+ category=ExperimentalClassWarning,
638
+ )
639
+ @dataclass
640
+ class LazySimpleRetriever(SimpleRetriever):
641
+ """
642
+ A retriever that supports lazy loading from parent streams.
643
+ """
644
+
645
+ def _read_pages(
646
+ self,
647
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
648
+ stream_state: Mapping[str, Any],
649
+ stream_slice: StreamSlice,
650
+ ) -> Iterable[Record]:
651
+ response = stream_slice.extra_fields["child_response"]
652
+ if response:
653
+ last_page_size, last_record = 0, None
654
+ for record in records_generator_fn(response): # type: ignore[call-arg] # only _parse_records expected as a func
655
+ last_page_size += 1
656
+ last_record = record
657
+ yield record
658
+
659
+ next_page_token = self._next_page_token(response, last_page_size, last_record, None)
660
+ if next_page_token:
661
+ yield from self._paginate(
662
+ next_page_token,
663
+ records_generator_fn,
664
+ stream_state,
665
+ stream_slice,
666
+ )
667
+
668
+ yield from []
669
+ else:
670
+ yield from self._read_pages(records_generator_fn, stream_state, stream_slice)
671
+
672
+ def _paginate(
673
+ self,
674
+ next_page_token: Any,
675
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
676
+ stream_state: Mapping[str, Any],
677
+ stream_slice: StreamSlice,
678
+ ) -> Iterable[Record]:
679
+ """Handle pagination by fetching subsequent pages."""
680
+ pagination_complete = False
681
+
682
+ while not pagination_complete:
683
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
684
+ last_page_size, last_record = 0, None
685
+
686
+ for record in records_generator_fn(response): # type: ignore[call-arg] # only _parse_records expected as a func
687
+ last_page_size += 1
688
+ last_record = record
689
+ yield record
690
+
691
+ if not response:
692
+ pagination_complete = True
693
+ else:
694
+ last_page_token_value = (
695
+ next_page_token.get("next_page_token") if next_page_token else None
696
+ )
697
+ next_page_token = self._next_page_token(
698
+ response, last_page_size, last_record, last_page_token_value
699
+ )
700
+
701
+ if not next_page_token:
702
+ pagination_complete = True