airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,115 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import logging
5
+ import threading
6
+ from concurrent.futures import Future, ThreadPoolExecutor
7
+ from typing import Any, Callable, List, Optional
8
+
9
+
10
+ class ThreadPoolManager:
11
+ """
12
+ Wrapper to abstract away the threadpool and the logic to wait for pending tasks to be completed.
13
+ """
14
+
15
+ DEFAULT_MAX_QUEUE_SIZE = 10_000
16
+
17
+ def __init__(
18
+ self,
19
+ threadpool: ThreadPoolExecutor,
20
+ logger: logging.Logger,
21
+ max_concurrent_tasks: int = DEFAULT_MAX_QUEUE_SIZE,
22
+ ):
23
+ """
24
+ :param threadpool: The threadpool to use
25
+ :param logger: The logger to use
26
+ :param max_concurrent_tasks: The maximum number of tasks that can be pending at the same time
27
+ """
28
+ self._threadpool = threadpool
29
+ self._logger = logger
30
+ self._max_concurrent_tasks = max_concurrent_tasks
31
+ self._futures: List[Future[Any]] = []
32
+ self._lock = threading.Lock()
33
+ self._most_recently_seen_exception: Optional[Exception] = None
34
+
35
+ self._logging_threshold = max_concurrent_tasks * 2
36
+
37
+ def prune_to_validate_has_reached_futures_limit(self) -> bool:
38
+ self._prune_futures(self._futures)
39
+ if len(self._futures) > self._logging_threshold:
40
+ self._logger.warning(
41
+ f"ThreadPoolManager: The list of futures is getting bigger than expected ({len(self._futures)})"
42
+ )
43
+ return len(self._futures) >= self._max_concurrent_tasks
44
+
45
+ def submit(self, function: Callable[..., Any], *args: Any) -> None:
46
+ self._futures.append(self._threadpool.submit(function, *args))
47
+
48
+ def _prune_futures(self, futures: List[Future[Any]]) -> None:
49
+ """
50
+ Take a list in input and remove the futures that are completed. If a future has an exception, it'll raise and kill the stream
51
+ operation.
52
+
53
+ We are using a lock here as without it, the algorithm would not be thread safe
54
+ """
55
+ with self._lock:
56
+ if len(futures) < self._max_concurrent_tasks:
57
+ return
58
+
59
+ for index in reversed(range(len(futures))):
60
+ future = futures[index]
61
+
62
+ if future.done():
63
+ # Only call future.exception() if the future is known to be done because it will block until the future is done.
64
+ # See https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future.exception
65
+ optional_exception = future.exception()
66
+ if optional_exception:
67
+ # Exception handling should be done in the main thread. Hence, we only store the exception and expect the main
68
+ # thread to call raise_if_exception
69
+ # We do not expect this error to happen. The futures created during concurrent syncs should catch the exception and
70
+ # push it to the queue. If this exception occurs, please review the futures and how they handle exceptions.
71
+ self._most_recently_seen_exception = RuntimeError(
72
+ f"Failed processing a future: {optional_exception}. Please contact the Airbyte team."
73
+ )
74
+ futures.pop(index)
75
+
76
+ def _shutdown(self) -> None:
77
+ # Without a way to stop the threads that have already started, this will not stop the Python application. We are fine today with
78
+ # this imperfect approach because we only do this in case of `self._most_recently_seen_exception` which we don't expect to happen
79
+ self._threadpool.shutdown(wait=False, cancel_futures=True)
80
+
81
+ def is_done(self) -> bool:
82
+ return all([f.done() for f in self._futures])
83
+
84
+ def check_for_errors_and_shutdown(self) -> None:
85
+ """
86
+ Check if any of the futures have an exception, and raise it if so. If all futures are done, shutdown the threadpool.
87
+ If the futures are not done, raise an exception.
88
+ :return:
89
+ """
90
+ if self._most_recently_seen_exception:
91
+ self._logger.exception(
92
+ "An unknown exception has occurred while reading concurrently",
93
+ exc_info=self._most_recently_seen_exception,
94
+ )
95
+ self._stop_and_raise_exception(self._most_recently_seen_exception)
96
+
97
+ exceptions_from_futures = [
98
+ f for f in [future.exception() for future in self._futures] if f is not None
99
+ ]
100
+ if exceptions_from_futures:
101
+ exception = RuntimeError(f"Failed reading with errors: {exceptions_from_futures}")
102
+ self._stop_and_raise_exception(exception)
103
+ else:
104
+ futures_not_done = [f for f in self._futures if not f.done()]
105
+ if futures_not_done:
106
+ exception = RuntimeError(
107
+ f"Failed reading with futures not done: {futures_not_done}"
108
+ )
109
+ self._stop_and_raise_exception(exception)
110
+ else:
111
+ self._shutdown()
112
+
113
+ def _stop_and_raise_exception(self, exception: BaseException) -> None:
114
+ self._shutdown()
115
+ raise exception
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from typing import Any, Dict
6
+
7
+ from pydantic.v1 import BaseModel
8
+
9
+ from airbyte_cdk.sources.utils.schema_helpers import expand_refs, rename_key
10
+
11
+
12
+ class BaseConfig(BaseModel):
13
+ """Base class for connector spec, adds the following behaviour:
14
+
15
+ - resolve $ref and replace it with definition
16
+ - replace all occurrences of anyOf with oneOf
17
+ - drop description
18
+ """
19
+
20
+ @classmethod
21
+ def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]:
22
+ """We're overriding the schema classmethod to enable some post-processing"""
23
+ schema = super().schema(*args, **kwargs)
24
+ rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf
25
+ expand_refs(schema)
26
+ schema.pop("description", None) # description added from the docstring
27
+ return schema
@@ -0,0 +1,161 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ from dataclasses import dataclass
7
+ from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast
8
+
9
+ from airbyte_cdk.models import (
10
+ AirbyteMessage,
11
+ AirbyteStateBlob,
12
+ AirbyteStateMessage,
13
+ AirbyteStateType,
14
+ AirbyteStreamState,
15
+ StreamDescriptor,
16
+ )
17
+ from airbyte_cdk.models import Type as MessageType
18
+ from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class HashableStreamDescriptor:
23
+ """
24
+ Helper class that overrides the existing StreamDescriptor class that is auto generated from the Airbyte Protocol and
25
+ freezes its fields so that it be used as a hash key. This is only marked public because we use it outside for unit tests.
26
+ """
27
+
28
+ name: str
29
+ namespace: Optional[str] = None
30
+
31
+
32
+ class ConnectorStateManager:
33
+ """
34
+ ConnectorStateManager consolidates the various forms of a stream's incoming state message (STREAM / GLOBAL) under a common
35
+ interface. It also provides methods to extract and update state
36
+ """
37
+
38
+ def __init__(self, state: Optional[List[AirbyteStateMessage]] = None):
39
+ shared_state, per_stream_states = self._extract_from_state_message(state)
40
+
41
+ # We explicitly throw an error if we receive a GLOBAL state message that contains a shared_state because API sources are
42
+ # designed to checkpoint state independently of one another. API sources should never be emitting a state message where
43
+ # shared_state is populated. Rather than define how to handle shared_state without a clear use case, we're opting to throw an
44
+ # error instead and if/when we find one, we will then implement processing of the shared_state value.
45
+ if shared_state:
46
+ raise ValueError(
47
+ "Received a GLOBAL AirbyteStateMessage that contains a shared_state. This library only ever generates per-STREAM "
48
+ "STATE messages so this was not generated by this connector. This must be an orchestrator or platform error. GLOBAL "
49
+ "state messages with shared_state will not be processed correctly. "
50
+ )
51
+ self.per_stream_states = per_stream_states
52
+
53
+ def get_stream_state(
54
+ self, stream_name: str, namespace: Optional[str]
55
+ ) -> MutableMapping[str, Any]:
56
+ """
57
+ Retrieves the state of a given stream based on its descriptor (name + namespace).
58
+ :param stream_name: Name of the stream being fetched
59
+ :param namespace: Namespace of the stream being fetched
60
+ :return: The per-stream state for a stream
61
+ """
62
+ stream_state: AirbyteStateBlob | None = self.per_stream_states.get(
63
+ HashableStreamDescriptor(name=stream_name, namespace=namespace)
64
+ )
65
+ if stream_state:
66
+ return copy.deepcopy({k: v for k, v in stream_state.__dict__.items()})
67
+ return {}
68
+
69
+ def update_state_for_stream(
70
+ self, stream_name: str, namespace: Optional[str], value: Mapping[str, Any]
71
+ ) -> None:
72
+ """
73
+ Overwrites the state blob of a specific stream based on the provided stream name and optional namespace
74
+ :param stream_name: The name of the stream whose state is being updated
75
+ :param namespace: The namespace of the stream if it exists
76
+ :param value: A stream state mapping that is being updated for a stream
77
+ """
78
+ stream_descriptor = HashableStreamDescriptor(name=stream_name, namespace=namespace)
79
+ self.per_stream_states[stream_descriptor] = AirbyteStateBlob(value)
80
+
81
+ def create_state_message(self, stream_name: str, namespace: Optional[str]) -> AirbyteMessage:
82
+ """
83
+ Generates an AirbyteMessage using the current per-stream state of a specified stream
84
+ :param stream_name: The name of the stream for the message that is being created
85
+ :param namespace: The namespace of the stream for the message that is being created
86
+ :return: The Airbyte state message to be emitted by the connector during a sync
87
+ """
88
+ hashable_descriptor = HashableStreamDescriptor(name=stream_name, namespace=namespace)
89
+ stream_state = self.per_stream_states.get(hashable_descriptor) or AirbyteStateBlob()
90
+
91
+ return AirbyteMessage(
92
+ type=MessageType.STATE,
93
+ state=AirbyteStateMessage(
94
+ type=AirbyteStateType.STREAM,
95
+ stream=AirbyteStreamState(
96
+ stream_descriptor=StreamDescriptor(name=stream_name, namespace=namespace),
97
+ stream_state=stream_state,
98
+ ),
99
+ ),
100
+ )
101
+
102
+ @classmethod
103
+ def _extract_from_state_message(
104
+ cls,
105
+ state: Optional[List[AirbyteStateMessage]],
106
+ ) -> Tuple[
107
+ Optional[AirbyteStateBlob],
108
+ MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]],
109
+ ]:
110
+ """
111
+ Takes an incoming list of state messages or a global state message and extracts state attributes according to
112
+ type which can then be assigned to the new state manager being instantiated
113
+ :param state: The incoming state input
114
+ :return: A tuple of shared state and per stream state assembled from the incoming state list
115
+ """
116
+ if state is None:
117
+ return None, {}
118
+
119
+ is_global = cls._is_global_state(state)
120
+
121
+ if is_global:
122
+ # We already validate that this is a global state message, not None:
123
+ global_state = cast(AirbyteGlobalState, state[0].global_)
124
+ # global_state has shared_state, also not None:
125
+ shared_state: AirbyteStateBlob = cast(
126
+ AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {})
127
+ )
128
+ streams = {
129
+ HashableStreamDescriptor(
130
+ name=per_stream_state.stream_descriptor.name,
131
+ namespace=per_stream_state.stream_descriptor.namespace,
132
+ ): per_stream_state.stream_state
133
+ for per_stream_state in global_state.stream_states # type: ignore[union-attr] # global_state has shared_state
134
+ }
135
+ return shared_state, streams
136
+ else:
137
+ streams = {
138
+ HashableStreamDescriptor(
139
+ name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor
140
+ namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor
141
+ ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state
142
+ for per_stream_state in state
143
+ if per_stream_state.type == AirbyteStateType.STREAM
144
+ and hasattr(per_stream_state, "stream") # type: ignore # state is always a list of AirbyteStateMessage if is_per_stream is True
145
+ }
146
+ return None, streams
147
+
148
+ @staticmethod
149
+ def _is_global_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool:
150
+ return (
151
+ isinstance(state, List)
152
+ and len(state) == 1
153
+ and isinstance(state[0], AirbyteStateMessage)
154
+ and state[0].type == AirbyteStateType.GLOBAL
155
+ )
156
+
157
+ @staticmethod
158
+ def _is_per_stream_state(
159
+ state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]],
160
+ ) -> bool:
161
+ return isinstance(state, List)
@@ -0,0 +1,3 @@
1
+ #
2
+ # Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3
+ #
File without changes
@@ -0,0 +1,52 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+
4
+ from datetime import timedelta
5
+ from typing import Optional
6
+
7
+ from airbyte_cdk.sources.declarative.async_job.timer import Timer
8
+ from airbyte_cdk.sources.types import StreamSlice
9
+
10
+ from .status import AsyncJobStatus
11
+
12
+
13
+ class AsyncJob:
14
+ """
15
+ Description of an API job.
16
+
17
+ Note that the timer will only stop once `update_status` is called so the job might be completed on the API side but until we query for
18
+ it and call `ApiJob.update_status`, `ApiJob.status` will not reflect the actual API side status.
19
+ """
20
+
21
+ def __init__(
22
+ self, api_job_id: str, job_parameters: StreamSlice, timeout: Optional[timedelta] = None
23
+ ) -> None:
24
+ self._api_job_id = api_job_id
25
+ self._job_parameters = job_parameters
26
+ self._status = AsyncJobStatus.RUNNING
27
+
28
+ timeout = timeout if timeout else timedelta(minutes=60)
29
+ self._timer = Timer(timeout)
30
+ self._timer.start()
31
+
32
+ def api_job_id(self) -> str:
33
+ return self._api_job_id
34
+
35
+ def status(self) -> AsyncJobStatus:
36
+ if self._timer.has_timed_out():
37
+ return AsyncJobStatus.TIMED_OUT
38
+ return self._status
39
+
40
+ def job_parameters(self) -> StreamSlice:
41
+ return self._job_parameters
42
+
43
+ def update_status(self, status: AsyncJobStatus) -> None:
44
+ if self._status != AsyncJobStatus.RUNNING and status == AsyncJobStatus.RUNNING:
45
+ self._timer.start()
46
+ elif status.is_terminal():
47
+ self._timer.stop()
48
+
49
+ self._status = status
50
+
51
+ def __repr__(self) -> str:
52
+ return f"AsyncJob(api_job_id={self.api_job_id()}, job_parameters={self.job_parameters()}, status={self.status()})"