airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,327 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass, field
6
+ from typing import Any, Mapping, MutableMapping, Optional, Union
7
+
8
+ import requests
9
+
10
+ from airbyte_cdk.sources.declarative.decoders import (
11
+ Decoder,
12
+ JsonDecoder,
13
+ PaginationDecoderDecorator,
14
+ )
15
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
16
+ from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
17
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
18
+ PaginationStrategy,
19
+ )
20
+ from airbyte_cdk.sources.declarative.requesters.request_option import (
21
+ RequestOption,
22
+ RequestOptionType,
23
+ )
24
+ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
25
+ from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
26
+ from airbyte_cdk.utils.mapping_helpers import (
27
+ _validate_component_request_option_paths,
28
+ get_interpolation_context,
29
+ )
30
+
31
+
32
+ @dataclass
33
+ class DefaultPaginator(Paginator):
34
+ """
35
+ Default paginator to request pages of results with a fixed size until the pagination strategy no longer returns a next_page_token
36
+
37
+ Examples:
38
+ 1.
39
+ * fetches up to 10 records at a time by setting the "limit" request param to 10
40
+ * updates the request path with "{{ response._metadata.next }}"
41
+ ```
42
+ paginator:
43
+ type: "DefaultPaginator"
44
+ page_size_option:
45
+ type: RequestOption
46
+ inject_into: request_parameter
47
+ field_name: limit
48
+ page_token_option:
49
+ type: RequestPath
50
+ path: "location"
51
+ pagination_strategy:
52
+ type: "CursorPagination"
53
+ cursor_value: "{{ response._metadata.next }}"
54
+ page_size: 10
55
+ ```
56
+
57
+ 2.
58
+ * fetches up to 5 records at a time by setting the "page_size" header to 5
59
+ * increments a record counter and set the request parameter "offset" to the value of the counter
60
+ ```
61
+ paginator:
62
+ type: "DefaultPaginator"
63
+ page_size_option:
64
+ type: RequestOption
65
+ inject_into: header
66
+ field_name: page_size
67
+ pagination_strategy:
68
+ type: "OffsetIncrement"
69
+ page_size: 5
70
+ page_token_option:
71
+ option_type: "request_parameter"
72
+ field_name: "offset"
73
+ ```
74
+
75
+ 3.
76
+ * fetches up to 5 records at a time by setting the "page_size" request param to 5
77
+ * increments a page counter and set the request parameter "page" to the value of the counter
78
+ ```
79
+ paginator:
80
+ type: "DefaultPaginator"
81
+ page_size_option:
82
+ type: RequestOption
83
+ inject_into: request_parameter
84
+ field_name: page_size
85
+ pagination_strategy:
86
+ type: "PageIncrement"
87
+ page_size: 5
88
+ page_token_option:
89
+ type: RequestOption
90
+ option_type: "request_parameter"
91
+ field_name: "page"
92
+ ```
93
+ Attributes:
94
+ page_size_option (Optional[RequestOption]): the request option to set the page size. Cannot be injected in the path.
95
+ page_token_option (Optional[RequestPath, RequestOption]): the request option to set the page token
96
+ pagination_strategy (PaginationStrategy): Strategy defining how to get the next page token
97
+ config (Config): connection config
98
+ url_base (Union[InterpolatedString, str]): endpoint's base url
99
+ decoder (Decoder): decoder to decode the response
100
+ """
101
+
102
+ pagination_strategy: PaginationStrategy
103
+ config: Config
104
+ url_base: Union[InterpolatedString, str]
105
+ parameters: InitVar[Mapping[str, Any]]
106
+ decoder: Decoder = field(
107
+ default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
108
+ )
109
+ page_size_option: Optional[RequestOption] = None
110
+ page_token_option: Optional[Union[RequestPath, RequestOption]] = None
111
+
112
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
113
+ if self.page_size_option and not self.pagination_strategy.get_page_size():
114
+ raise ValueError(
115
+ "page_size_option cannot be set if the pagination strategy does not have a page_size"
116
+ )
117
+ if isinstance(self.url_base, str):
118
+ self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
119
+
120
+ if self.page_token_option and not isinstance(self.page_token_option, RequestPath):
121
+ _validate_component_request_option_paths(
122
+ self.config,
123
+ self.page_size_option,
124
+ self.page_token_option,
125
+ )
126
+
127
+ def get_initial_token(self) -> Optional[Any]:
128
+ """
129
+ Return the page token that should be used for the first request of a stream
130
+
131
+ WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
132
+ of state using page numbers. Because paginators are stateless
133
+ """
134
+ return self.pagination_strategy.initial_token
135
+
136
+ def next_page_token(
137
+ self,
138
+ response: requests.Response,
139
+ last_page_size: int,
140
+ last_record: Optional[Record],
141
+ last_page_token_value: Optional[Any] = None,
142
+ ) -> Optional[Mapping[str, Any]]:
143
+ next_page_token = self.pagination_strategy.next_page_token(
144
+ response=response,
145
+ last_page_size=last_page_size,
146
+ last_record=last_record,
147
+ last_page_token_value=last_page_token_value,
148
+ )
149
+ if next_page_token:
150
+ return {"next_page_token": next_page_token}
151
+ else:
152
+ return None
153
+
154
+ def path(
155
+ self,
156
+ next_page_token: Optional[Mapping[str, Any]],
157
+ stream_state: Optional[Mapping[str, Any]] = None,
158
+ stream_slice: Optional[StreamSlice] = None,
159
+ ) -> Optional[str]:
160
+ token = next_page_token.get("next_page_token") if next_page_token else None
161
+ if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
162
+ # make additional interpolation context
163
+ interpolation_context = get_interpolation_context(
164
+ stream_state=stream_state,
165
+ stream_slice=stream_slice,
166
+ next_page_token=next_page_token,
167
+ )
168
+ # Replace url base to only return the path
169
+ return str(token).replace(self.url_base.eval(self.config, **interpolation_context), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
170
+ else:
171
+ return None
172
+
173
+ def get_request_params(
174
+ self,
175
+ *,
176
+ stream_state: Optional[StreamState] = None,
177
+ stream_slice: Optional[StreamSlice] = None,
178
+ next_page_token: Optional[Mapping[str, Any]] = None,
179
+ ) -> MutableMapping[str, Any]:
180
+ return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
181
+
182
+ def get_request_headers(
183
+ self,
184
+ *,
185
+ stream_state: Optional[StreamState] = None,
186
+ stream_slice: Optional[StreamSlice] = None,
187
+ next_page_token: Optional[Mapping[str, Any]] = None,
188
+ ) -> Mapping[str, str]:
189
+ return self._get_request_options(RequestOptionType.header, next_page_token)
190
+
191
+ def get_request_body_data(
192
+ self,
193
+ *,
194
+ stream_state: Optional[StreamState] = None,
195
+ stream_slice: Optional[StreamSlice] = None,
196
+ next_page_token: Optional[Mapping[str, Any]] = None,
197
+ ) -> Mapping[str, Any]:
198
+ return self._get_request_options(RequestOptionType.body_data, next_page_token)
199
+
200
+ def get_request_body_json(
201
+ self,
202
+ *,
203
+ stream_state: Optional[StreamState] = None,
204
+ stream_slice: Optional[StreamSlice] = None,
205
+ next_page_token: Optional[Mapping[str, Any]] = None,
206
+ ) -> Mapping[str, Any]:
207
+ return self._get_request_options(RequestOptionType.body_json, next_page_token)
208
+
209
+ def _get_request_options(
210
+ self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
211
+ ) -> MutableMapping[str, Any]:
212
+ options: MutableMapping[str, Any] = {}
213
+
214
+ token = next_page_token.get("next_page_token") if next_page_token else None
215
+ if (
216
+ self.page_token_option
217
+ and token is not None
218
+ and isinstance(self.page_token_option, RequestOption)
219
+ and self.page_token_option.inject_into == option_type
220
+ ):
221
+ self.page_token_option.inject_into_request(options, token, self.config)
222
+
223
+ if (
224
+ self.page_size_option
225
+ and self.pagination_strategy.get_page_size()
226
+ and self.page_size_option.inject_into == option_type
227
+ ):
228
+ page_size = self.pagination_strategy.get_page_size()
229
+ self.page_size_option.inject_into_request(options, page_size, self.config)
230
+
231
+ return options
232
+
233
+
234
+ class PaginatorTestReadDecorator(Paginator):
235
+ """
236
+ In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
237
+ pages that are queried throughout a read command.
238
+
239
+ WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
240
+ an internal state to track the current number of pages counted so that it can exit early during a test read
241
+ """
242
+
243
+ _PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
244
+
245
+ def __init__(self, decorated: Paginator, maximum_number_of_pages: int = 5) -> None:
246
+ if maximum_number_of_pages and maximum_number_of_pages < 1:
247
+ raise ValueError(
248
+ f"The maximum number of pages on a test read needs to be strictly positive. Got {maximum_number_of_pages}"
249
+ )
250
+ self._maximum_number_of_pages = maximum_number_of_pages
251
+ self._decorated = decorated
252
+ self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
253
+
254
+ def get_initial_token(self) -> Optional[Any]:
255
+ self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
256
+ return self._decorated.get_initial_token()
257
+
258
+ def next_page_token(
259
+ self,
260
+ response: requests.Response,
261
+ last_page_size: int,
262
+ last_record: Optional[Record],
263
+ last_page_token_value: Optional[Any] = None,
264
+ ) -> Optional[Mapping[str, Any]]:
265
+ if self._page_count >= self._maximum_number_of_pages:
266
+ return None
267
+
268
+ self._page_count += 1
269
+ return self._decorated.next_page_token(
270
+ response, last_page_size, last_record, last_page_token_value
271
+ )
272
+
273
+ def path(
274
+ self,
275
+ next_page_token: Optional[Mapping[str, Any]],
276
+ stream_state: Optional[Mapping[str, Any]] = None,
277
+ stream_slice: Optional[StreamSlice] = None,
278
+ ) -> Optional[str]:
279
+ return self._decorated.path(
280
+ next_page_token=next_page_token,
281
+ stream_state=stream_state,
282
+ stream_slice=stream_slice,
283
+ )
284
+
285
+ def get_request_params(
286
+ self,
287
+ *,
288
+ stream_state: Optional[StreamState] = None,
289
+ stream_slice: Optional[StreamSlice] = None,
290
+ next_page_token: Optional[Mapping[str, Any]] = None,
291
+ ) -> Mapping[str, Any]:
292
+ return self._decorated.get_request_params(
293
+ stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
294
+ )
295
+
296
+ def get_request_headers(
297
+ self,
298
+ *,
299
+ stream_state: Optional[StreamState] = None,
300
+ stream_slice: Optional[StreamSlice] = None,
301
+ next_page_token: Optional[Mapping[str, Any]] = None,
302
+ ) -> Mapping[str, str]:
303
+ return self._decorated.get_request_headers(
304
+ stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
305
+ )
306
+
307
+ def get_request_body_data(
308
+ self,
309
+ *,
310
+ stream_state: Optional[StreamState] = None,
311
+ stream_slice: Optional[StreamSlice] = None,
312
+ next_page_token: Optional[Mapping[str, Any]] = None,
313
+ ) -> Union[Mapping[str, Any], str]:
314
+ return self._decorated.get_request_body_data(
315
+ stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
316
+ )
317
+
318
+ def get_request_body_json(
319
+ self,
320
+ *,
321
+ stream_state: Optional[StreamState] = None,
322
+ stream_slice: Optional[StreamSlice] = None,
323
+ next_page_token: Optional[Mapping[str, Any]] = None,
324
+ ) -> Mapping[str, Any]:
325
+ return self._decorated.get_request_body_json(
326
+ stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
327
+ )
@@ -0,0 +1,76 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Mapping, MutableMapping, Optional, Union
7
+
8
+ import requests
9
+
10
+ from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
11
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
12
+
13
+
14
+ @dataclass
15
+ class NoPagination(Paginator):
16
+ """
17
+ Pagination implementation that never returns a next page.
18
+ """
19
+
20
+ parameters: InitVar[Mapping[str, Any]]
21
+
22
+ def path(
23
+ self,
24
+ next_page_token: Optional[Mapping[str, Any]],
25
+ stream_state: Optional[Mapping[str, Any]] = None,
26
+ stream_slice: Optional[StreamSlice] = None,
27
+ ) -> Optional[str]:
28
+ return None
29
+
30
+ def get_request_params(
31
+ self,
32
+ *,
33
+ stream_state: Optional[StreamState] = None,
34
+ stream_slice: Optional[StreamSlice] = None,
35
+ next_page_token: Optional[Mapping[str, Any]] = None,
36
+ ) -> MutableMapping[str, Any]:
37
+ return {}
38
+
39
+ def get_request_headers(
40
+ self,
41
+ *,
42
+ stream_state: Optional[StreamState] = None,
43
+ stream_slice: Optional[StreamSlice] = None,
44
+ next_page_token: Optional[Mapping[str, Any]] = None,
45
+ ) -> Mapping[str, str]:
46
+ return {}
47
+
48
+ def get_request_body_data(
49
+ self,
50
+ *,
51
+ stream_state: Optional[StreamState] = None,
52
+ stream_slice: Optional[StreamSlice] = None,
53
+ next_page_token: Optional[Mapping[str, Any]] = None,
54
+ ) -> Union[Mapping[str, Any], str]:
55
+ return {}
56
+
57
+ def get_request_body_json(
58
+ self,
59
+ *,
60
+ stream_state: Optional[StreamState] = None,
61
+ stream_slice: Optional[StreamSlice] = None,
62
+ next_page_token: Optional[Mapping[str, Any]] = None,
63
+ ) -> Mapping[str, Any]:
64
+ return {}
65
+
66
+ def get_initial_token(self) -> Optional[Any]:
67
+ return None
68
+
69
+ def next_page_token(
70
+ self,
71
+ response: requests.Response,
72
+ last_page_size: int,
73
+ last_record: Optional[Record],
74
+ last_page_token_value: Optional[Any],
75
+ ) -> Optional[Mapping[str, Any]]:
76
+ return {}
@@ -0,0 +1,65 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Mapping, Optional
8
+
9
+ import requests
10
+
11
+ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
12
+ RequestOptionsProvider,
13
+ )
14
+ from airbyte_cdk.sources.types import Record, StreamSlice
15
+
16
+
17
+ @dataclass
18
+ class Paginator(ABC, RequestOptionsProvider):
19
+ """
20
+ Defines the token to use to fetch the next page of records from the API.
21
+
22
+ If needed, the Paginator will set request options to be set on the HTTP request to fetch the next page of records.
23
+ If the next_page_token is the path to the next page of records, then it should be accessed through the `path` method
24
+ """
25
+
26
+ @abstractmethod
27
+ def get_initial_token(self) -> Optional[Any]:
28
+ """
29
+ Get the page token that should be included in the request to get the first page of records
30
+ """
31
+
32
+ @abstractmethod
33
+ def next_page_token(
34
+ self,
35
+ response: requests.Response,
36
+ last_page_size: int,
37
+ last_record: Optional[Record],
38
+ last_page_token_value: Optional[Any],
39
+ ) -> Optional[Mapping[str, Any]]:
40
+ """
41
+ Returns the next_page_token to use to fetch the next page of records.
42
+
43
+ :param response: the response to process
44
+ :param last_page_size: the number of records read from the response
45
+ :param last_record: the last record extracted from the response
46
+ :param last_page_token_value: The current value of the page token made on the last request
47
+ :return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
48
+ """
49
+ pass
50
+
51
+ @abstractmethod
52
+ def path(
53
+ self,
54
+ next_page_token: Optional[Mapping[str, Any]],
55
+ stream_state: Optional[Mapping[str, Any]] = None,
56
+ stream_slice: Optional[StreamSlice] = None,
57
+ ) -> Optional[str]:
58
+ """
59
+ Returns the URL path to hit to fetch the next page of records
60
+
61
+ e.g: if you wanted to hit https://myapi.com/v1/some_entity then this will return "some_entity"
62
+
63
+ :return: path to hit to fetch the next request. Returning None means the path is not defined by the next_page_token
64
+ """
65
+ pass
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.cursor_pagination_strategy import (
6
+ CursorPaginationStrategy,
7
+ )
8
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.offset_increment import (
9
+ OffsetIncrement,
10
+ )
11
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import (
12
+ PageIncrement,
13
+ )
14
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.stop_condition import (
15
+ CursorStopCondition,
16
+ StopConditionPaginationStrategyDecorator,
17
+ )
18
+
19
+ __all__ = [
20
+ "CursorPaginationStrategy",
21
+ "CursorStopCondition",
22
+ "OffsetIncrement",
23
+ "PageIncrement",
24
+ "StopConditionPaginationStrategyDecorator",
25
+ ]
@@ -0,0 +1,98 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass, field
6
+ from typing import Any, Dict, Mapping, Optional, Union
7
+
8
+ import requests
9
+
10
+ from airbyte_cdk.sources.declarative.decoders import (
11
+ Decoder,
12
+ JsonDecoder,
13
+ PaginationDecoderDecorator,
14
+ )
15
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
16
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
17
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
18
+ PaginationStrategy,
19
+ )
20
+ from airbyte_cdk.sources.types import Config, Record
21
+
22
+
23
+ @dataclass
24
+ class CursorPaginationStrategy(PaginationStrategy):
25
+ """
26
+ Pagination strategy that evaluates an interpolated string to define the next page token
27
+
28
+ Attributes:
29
+ page_size (Optional[int]): the number of records to request
30
+ cursor_value (Union[InterpolatedString, str]): template string evaluating to the cursor value
31
+ config (Config): connection config
32
+ stop_condition (Optional[InterpolatedBoolean]): template string evaluating when to stop paginating
33
+ decoder (Decoder): decoder to decode the response
34
+ """
35
+
36
+ cursor_value: Union[InterpolatedString, str]
37
+ config: Config
38
+ parameters: InitVar[Mapping[str, Any]]
39
+ page_size: Optional[int] = None
40
+ stop_condition: Optional[Union[InterpolatedBoolean, str]] = None
41
+ decoder: Decoder = field(
42
+ default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
43
+ )
44
+
45
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
46
+ if isinstance(self.cursor_value, str):
47
+ self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
48
+ else:
49
+ self._cursor_value = self.cursor_value
50
+ if isinstance(self.stop_condition, str):
51
+ self._stop_condition: Optional[InterpolatedBoolean] = InterpolatedBoolean(
52
+ condition=self.stop_condition, parameters=parameters
53
+ )
54
+ else:
55
+ self._stop_condition = self.stop_condition
56
+
57
+ @property
58
+ def initial_token(self) -> Optional[Any]:
59
+ """
60
+ CursorPaginationStrategy does not have an initial value because the next cursor is typically included
61
+ in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
62
+ cursor, the next cursor should be read from the state or stream slice object.
63
+ """
64
+ return None
65
+
66
+ def next_page_token(
67
+ self,
68
+ response: requests.Response,
69
+ last_page_size: int,
70
+ last_record: Optional[Record],
71
+ last_page_token_value: Optional[Any] = None,
72
+ ) -> Optional[Any]:
73
+ decoded_response = next(self.decoder.decode(response))
74
+ # The default way that link is presented in requests.Response is a string of various links (last, next, etc). This
75
+ # is not indexable or useful for parsing the cursor, so we replace it with the link dictionary from response.links
76
+ headers: Dict[str, Any] = dict(response.headers)
77
+ headers["link"] = response.links
78
+ if self._stop_condition:
79
+ should_stop = self._stop_condition.eval(
80
+ self.config,
81
+ response=decoded_response,
82
+ headers=headers,
83
+ last_record=last_record,
84
+ last_page_size=last_page_size,
85
+ )
86
+ if should_stop:
87
+ return None
88
+ token = self._cursor_value.eval(
89
+ config=self.config,
90
+ response=decoded_response,
91
+ headers=headers,
92
+ last_record=last_record,
93
+ last_page_size=last_page_size,
94
+ )
95
+ return token if token else None
96
+
97
+ def get_page_size(self) -> Optional[int]:
98
+ return self.page_size
@@ -0,0 +1,102 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass, field
6
+ from typing import Any, Mapping, Optional, Union
7
+
8
+ import requests
9
+
10
+ from airbyte_cdk.sources.declarative.decoders import (
11
+ Decoder,
12
+ JsonDecoder,
13
+ PaginationDecoderDecorator,
14
+ )
15
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
16
+ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
17
+ PaginationStrategy,
18
+ )
19
+ from airbyte_cdk.sources.types import Config, Record
20
+
21
+
22
+ @dataclass
23
+ class OffsetIncrement(PaginationStrategy):
24
+ """
25
+ Pagination strategy that returns the number of records reads so far and returns it as the next page token
26
+ Examples:
27
+ # page_size to be a constant integer value
28
+ pagination_strategy:
29
+ type: OffsetIncrement
30
+ page_size: 2
31
+
32
+ # page_size to be a constant string value
33
+ pagination_strategy:
34
+ type: OffsetIncrement
35
+ page_size: "2"
36
+
37
+ # page_size to be an interpolated string value
38
+ pagination_strategy:
39
+ type: OffsetIncrement
40
+ page_size: "{{ parameters['items_per_page'] }}"
41
+
42
+ Attributes:
43
+ page_size (InterpolatedString): the number of records to request
44
+ """
45
+
46
+ config: Config
47
+ page_size: Optional[Union[str, int]]
48
+ parameters: InitVar[Mapping[str, Any]]
49
+ decoder: Decoder = field(
50
+ default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
51
+ )
52
+ inject_on_first_request: bool = False
53
+
54
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
55
+ page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
56
+ if page_size:
57
+ self._page_size: Optional[InterpolatedString] = InterpolatedString(
58
+ page_size, parameters=parameters
59
+ )
60
+ else:
61
+ self._page_size = None
62
+
63
+ @property
64
+ def initial_token(self) -> Optional[Any]:
65
+ if self.inject_on_first_request:
66
+ return 0
67
+ return None
68
+
69
+ def next_page_token(
70
+ self,
71
+ response: requests.Response,
72
+ last_page_size: int,
73
+ last_record: Optional[Record],
74
+ last_page_token_value: Optional[Any] = None,
75
+ ) -> Optional[Any]:
76
+ decoded_response = next(self.decoder.decode(response))
77
+
78
+ # Stop paginating when there are fewer records than the page size or the current page has no records
79
+ if (
80
+ self._page_size
81
+ and last_page_size < self._page_size.eval(self.config, response=decoded_response)
82
+ ) or last_page_size == 0:
83
+ return None
84
+ elif last_page_token_value is None:
85
+ # If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
86
+ # will be None. For this case, we assume that None was the first page and progress to the next offset
87
+ return 0 + last_page_size
88
+ elif not isinstance(last_page_token_value, int):
89
+ raise ValueError(
90
+ f"Last page token value {last_page_token_value} for OffsetIncrement pagination strategy was not an integer"
91
+ )
92
+ else:
93
+ return last_page_token_value + last_page_size
94
+
95
+ def get_page_size(self) -> Optional[int]:
96
+ if self._page_size:
97
+ page_size = self._page_size.eval(self.config)
98
+ if not isinstance(page_size, int):
99
+ raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}")
100
+ return page_size
101
+ else:
102
+ return None