airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,112 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from copy import deepcopy
6
+ from dataclasses import InitVar, dataclass, field
7
+ from typing import Any, Dict, Iterable, List, Mapping
8
+
9
+ import dpath
10
+ from typing_extensions import deprecated
11
+
12
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
13
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
14
+ ComponentMappingDefinition,
15
+ ComponentsResolver,
16
+ ResolvedComponentMappingDefinition,
17
+ )
18
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
19
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
20
+ from airbyte_cdk.sources.types import Config
21
+
22
+
23
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
24
+ @dataclass
25
+ class HttpComponentsResolver(ComponentsResolver):
26
+ """
27
+ Resolves and populates stream templates with components fetched via an HTTP retriever.
28
+
29
+ Attributes:
30
+ retriever (Retriever): The retriever used to fetch data from an API.
31
+ config (Config): Configuration object for the resolver.
32
+ components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
33
+ parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
34
+ """
35
+
36
+ retriever: Retriever
37
+ config: Config
38
+ components_mapping: List[ComponentMappingDefinition]
39
+ parameters: InitVar[Mapping[str, Any]]
40
+ _resolved_components: List[ResolvedComponentMappingDefinition] = field(
41
+ init=False, repr=False, default_factory=list
42
+ )
43
+
44
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
45
+ """
46
+ Initializes and parses component mappings, converting them to resolved definitions.
47
+
48
+ Args:
49
+ parameters (Mapping[str, Any]): Parameters for interpolation.
50
+ """
51
+ for component_mapping in self.components_mapping:
52
+ if isinstance(component_mapping.value, (str, InterpolatedString)):
53
+ interpolated_value = (
54
+ InterpolatedString.create(component_mapping.value, parameters=parameters)
55
+ if isinstance(component_mapping.value, str)
56
+ else component_mapping.value
57
+ )
58
+
59
+ field_path = [
60
+ InterpolatedString.create(path, parameters=parameters)
61
+ for path in component_mapping.field_path
62
+ ]
63
+
64
+ self._resolved_components.append(
65
+ ResolvedComponentMappingDefinition(
66
+ field_path=field_path,
67
+ value=interpolated_value,
68
+ value_type=component_mapping.value_type,
69
+ parameters=parameters,
70
+ )
71
+ )
72
+ else:
73
+ raise ValueError(
74
+ f"Expected a string or InterpolatedString for value in mapping: {component_mapping}"
75
+ )
76
+
77
+ def resolve_components(
78
+ self, stream_template_config: Dict[str, Any]
79
+ ) -> Iterable[Dict[str, Any]]:
80
+ """
81
+ Resolves components in the stream template configuration by populating values.
82
+
83
+ Args:
84
+ stream_template_config (Dict[str, Any]): Stream template to populate.
85
+
86
+ Yields:
87
+ Dict[str, Any]: Updated configurations with resolved components.
88
+ """
89
+ kwargs = {"stream_template_config": stream_template_config}
90
+
91
+ for stream_slice in self.retriever.stream_slices():
92
+ for components_values in self.retriever.read_records(
93
+ records_schema={}, stream_slice=stream_slice
94
+ ):
95
+ updated_config = deepcopy(stream_template_config)
96
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
97
+ kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
98
+
99
+ for resolved_component in self._resolved_components:
100
+ valid_types = (
101
+ (resolved_component.value_type,) if resolved_component.value_type else None
102
+ )
103
+ value = resolved_component.value.eval(
104
+ self.config, valid_types=valid_types, **kwargs
105
+ )
106
+
107
+ path = [
108
+ path.eval(self.config, **kwargs) for path in resolved_component.field_path
109
+ ]
110
+ dpath.set(updated_config, path, value)
111
+
112
+ yield updated_config
@@ -0,0 +1,19 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
6
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
7
+ from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
8
+ LazySimpleRetriever,
9
+ SimpleRetriever,
10
+ SimpleRetrieverTestReadDecorator,
11
+ )
12
+
13
+ __all__ = [
14
+ "Retriever",
15
+ "SimpleRetriever",
16
+ "SimpleRetrieverTestReadDecorator",
17
+ "AsyncRetriever",
18
+ "LazySimpleRetriever",
19
+ ]
@@ -0,0 +1,124 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+
4
+ from dataclasses import InitVar, dataclass, field
5
+ from typing import Any, Iterable, Mapping, Optional
6
+
7
+ from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
8
+ from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
9
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
10
+ AsyncJobPartitionRouter,
11
+ )
12
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
13
+ from airbyte_cdk.sources.streams.core import StreamData
14
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
15
+ from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
16
+
17
+
18
+ @dataclass
19
+ class AsyncRetriever(Retriever):
20
+ config: Config
21
+ parameters: InitVar[Mapping[str, Any]]
22
+ record_selector: RecordSelector
23
+ stream_slicer: AsyncJobPartitionRouter
24
+ slice_logger: AlwaysLogSliceLogger = field(
25
+ init=False,
26
+ default_factory=lambda: AlwaysLogSliceLogger(),
27
+ )
28
+
29
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
30
+ self._parameters = parameters
31
+
32
+ @property
33
+ def exit_on_rate_limit(self) -> bool:
34
+ """
35
+ Whether to exit on rate limit. This is a property of the job repository
36
+ and not the stream slicer. The stream slicer is responsible for creating
37
+ the jobs, but the job repository is responsible for managing the rate
38
+ limits and other job-related properties.
39
+
40
+ Note:
41
+ - If the `creation_requester` cannot place / create the job - it might be the case of the RateLimits
42
+ - If the `creation_requester` can place / create the job - it means all other requesters should successfully manage
43
+ to complete the results.
44
+ """
45
+ job_orchestrator = self.stream_slicer._job_orchestrator
46
+ if job_orchestrator is None:
47
+ # Default value when orchestrator is not available
48
+ return False
49
+ return job_orchestrator._job_repository.creation_requester.exit_on_rate_limit # type: ignore
50
+
51
+ @exit_on_rate_limit.setter
52
+ def exit_on_rate_limit(self, value: bool) -> None:
53
+ """
54
+ Sets the `exit_on_rate_limit` property of the job repository > creation_requester,
55
+ meaning that the Job cannot be placed / created if the rate limit is reached.
56
+ Thus no further work on managing jobs is expected to be done.
57
+ """
58
+ job_orchestrator = self.stream_slicer._job_orchestrator
59
+ if job_orchestrator is not None:
60
+ job_orchestrator._job_repository.creation_requester.exit_on_rate_limit = value # type: ignore[attr-defined, assignment]
61
+
62
+ @property
63
+ def state(self) -> StreamState:
64
+ """
65
+ As a first iteration for sendgrid, there is no state to be managed
66
+ """
67
+ return {}
68
+
69
+ @state.setter
70
+ def state(self, value: StreamState) -> None:
71
+ """
72
+ As a first iteration for sendgrid, there is no state to be managed
73
+ """
74
+ pass
75
+
76
+ def _get_stream_state(self) -> StreamState:
77
+ """
78
+ Gets the current state of the stream.
79
+
80
+ Returns:
81
+ StreamState: Mapping[str, Any]
82
+ """
83
+
84
+ return self.state
85
+
86
+ def _validate_and_get_stream_slice_jobs(
87
+ self, stream_slice: Optional[StreamSlice] = None
88
+ ) -> Iterable[AsyncJob]:
89
+ """
90
+ Validates the stream_slice argument and returns the partition from it.
91
+
92
+ Args:
93
+ stream_slice (Optional[StreamSlice]): The stream slice to validate and extract the partition from.
94
+
95
+ Returns:
96
+ AsyncPartition: The partition extracted from the stream_slice.
97
+
98
+ Raises:
99
+ AirbyteTracedException: If the stream_slice is not an instance of StreamSlice or if the partition is not present in the stream_slice.
100
+
101
+ """
102
+ return stream_slice.extra_fields.get("jobs", []) if stream_slice else []
103
+
104
+ def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
105
+ yield from self.stream_slicer.stream_slices()
106
+
107
+ def read_records(
108
+ self,
109
+ records_schema: Mapping[str, Any],
110
+ stream_slice: Optional[StreamSlice] = None,
111
+ ) -> Iterable[StreamData]:
112
+ # emit the slice_descriptor log message, for connector builder TestRead
113
+ yield self.slice_logger.create_slice_log_message(stream_slice.cursor_slice) # type: ignore
114
+
115
+ stream_state: StreamState = self._get_stream_state()
116
+ jobs: Iterable[AsyncJob] = self._validate_and_get_stream_slice_jobs(stream_slice)
117
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(jobs)
118
+
119
+ yield from self.record_selector.filter_and_transform(
120
+ all_data=records,
121
+ stream_state=stream_state,
122
+ records_schema=records_schema,
123
+ stream_slice=stream_slice,
124
+ )
@@ -0,0 +1,89 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ import logging
7
+ import uuid
8
+ from dataclasses import InitVar, dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Mapping, Optional, Union
11
+
12
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
13
+ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
14
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
15
+ InterpolatedString,
16
+ )
17
+ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
18
+ SafeResponse,
19
+ )
20
+ from airbyte_cdk.sources.declarative.requesters import Requester
21
+ from airbyte_cdk.sources.declarative.types import Record, StreamSlice
22
+ from airbyte_cdk.sources.types import Config
23
+ from airbyte_cdk.sources.utils.files_directory import get_files_directory
24
+
25
+ logger = logging.getLogger("airbyte")
26
+
27
+
28
+ @dataclass
29
+ class FileUploader:
30
+ requester: Requester
31
+ download_target_extractor: RecordExtractor
32
+ config: Config
33
+ parameters: InitVar[Mapping[str, Any]]
34
+
35
+ filename_extractor: Optional[Union[InterpolatedString, str]] = None
36
+ content_extractor: Optional[RecordExtractor] = None
37
+
38
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
+ if self.filename_extractor:
40
+ self.filename_extractor = InterpolatedString.create(
41
+ self.filename_extractor,
42
+ parameters=parameters,
43
+ )
44
+
45
+ def upload(self, record: Record) -> None:
46
+ mocked_response = SafeResponse()
47
+ mocked_response.content = json.dumps(record.data).encode()
48
+ download_target = list(self.download_target_extractor.extract_records(mocked_response))[0]
49
+ if not isinstance(download_target, str):
50
+ raise ValueError(
51
+ f"download_target is expected to be a str but was {type(download_target)}: {download_target}"
52
+ )
53
+
54
+ response = self.requester.send_request(
55
+ stream_slice=StreamSlice(
56
+ partition={}, cursor_slice={}, extra_fields={"download_target": download_target}
57
+ ),
58
+ )
59
+
60
+ if self.content_extractor:
61
+ raise NotImplementedError("TODO")
62
+ else:
63
+ files_directory = Path(get_files_directory())
64
+
65
+ file_name = (
66
+ self.filename_extractor.eval(self.config, record=record)
67
+ if self.filename_extractor
68
+ else str(uuid.uuid4())
69
+ )
70
+ file_name = file_name.lstrip("/")
71
+ file_relative_path = Path(record.stream_name) / Path(file_name)
72
+
73
+ full_path = files_directory / file_relative_path
74
+ full_path.parent.mkdir(parents=True, exist_ok=True)
75
+
76
+ with open(str(full_path), "wb") as f:
77
+ f.write(response.content)
78
+ file_size_bytes = full_path.stat().st_size
79
+
80
+ logger.info("File uploaded successfully")
81
+ logger.info(f"File url: {str(full_path)}")
82
+ logger.info(f"File size: {file_size_bytes / 1024} KB")
83
+ logger.info(f"File relative path: {str(file_relative_path)}")
84
+
85
+ record.file_reference = AirbyteRecordMessageFileReference(
86
+ staging_file_url=str(full_path),
87
+ source_file_relative_path=str(file_relative_path),
88
+ file_size_bytes=file_size_bytes,
89
+ )
@@ -0,0 +1,54 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import abstractmethod
6
+ from typing import Any, Iterable, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice
9
+ from airbyte_cdk.sources.streams.core import StreamData
10
+ from airbyte_cdk.sources.types import StreamState
11
+
12
+
13
+ class Retriever:
14
+ """
15
+ Responsible for fetching a stream's records from an HTTP API source.
16
+ """
17
+
18
+ @abstractmethod
19
+ def read_records(
20
+ self,
21
+ records_schema: Mapping[str, Any],
22
+ stream_slice: Optional[StreamSlice] = None,
23
+ ) -> Iterable[StreamData]:
24
+ """
25
+ Fetch a stream's records from an HTTP API source
26
+
27
+ :param records_schema: json schema to describe record
28
+ :param stream_slice: The stream slice to read data for
29
+ :return: The records read from the API source
30
+ """
31
+
32
+ @abstractmethod
33
+ def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
34
+ """Returns the stream slices"""
35
+
36
+ @property
37
+ @abstractmethod
38
+ def state(self) -> StreamState:
39
+ """State getter, should return state in form that can serialized to a string and send to the output
40
+ as a STATE AirbyteMessage.
41
+
42
+ A good example of a state is a cursor_value:
43
+ {
44
+ self.cursor_field: "cursor_value"
45
+ }
46
+
47
+ State should try to be as small as possible but at the same time descriptive enough to restore
48
+ syncing process from the point where it stopped.
49
+ """
50
+
51
+ @state.setter
52
+ @abstractmethod
53
+ def state(self, value: StreamState) -> None:
54
+ """State setter, accept state serialized by state getter."""