airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
airbyte_cdk/logger.py ADDED
@@ -0,0 +1,109 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ import logging
7
+ import logging.config
8
+ from typing import Any, Callable, Mapping, Optional, Tuple
9
+
10
+ import orjson
11
+
12
+ from airbyte_cdk.models import (
13
+ AirbyteLogMessage,
14
+ AirbyteMessage,
15
+ AirbyteMessageSerializer,
16
+ Level,
17
+ Type,
18
+ )
19
+ from airbyte_cdk.utils import PrintBuffer
20
+ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
21
+
22
+ PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
23
+
24
+ LOGGING_CONFIG = {
25
+ "version": 1,
26
+ "disable_existing_loggers": False,
27
+ "formatters": {
28
+ "airbyte": {"()": "airbyte_cdk.logger.AirbyteLogFormatter", "format": "%(message)s"},
29
+ },
30
+ "handlers": {
31
+ "console": {
32
+ "class": "logging.StreamHandler",
33
+ "stream": PRINT_BUFFER,
34
+ "formatter": "airbyte",
35
+ },
36
+ },
37
+ "root": {
38
+ "handlers": ["console"],
39
+ },
40
+ }
41
+
42
+
43
+ def init_logger(name: Optional[str] = None) -> logging.Logger:
44
+ """Initial set up of logger"""
45
+ logger = logging.getLogger(name)
46
+ logger.setLevel(logging.INFO)
47
+ logging.config.dictConfig(LOGGING_CONFIG)
48
+ return logger
49
+
50
+
51
+ def lazy_log(logger: logging.Logger, level: int, lazy_log_provider: Callable[[], str]) -> None:
52
+ """
53
+ This method ensure that the processing of the log message is only done if the logger is enabled for the log level.
54
+ """
55
+ if logger.isEnabledFor(level):
56
+ logger.log(level, lazy_log_provider())
57
+
58
+
59
+ class AirbyteLogFormatter(logging.Formatter):
60
+ """Output log records using AirbyteMessage"""
61
+
62
+ # Transforming Python log levels to Airbyte protocol log levels
63
+ level_mapping = {
64
+ logging.FATAL: Level.FATAL,
65
+ logging.ERROR: Level.ERROR,
66
+ logging.WARNING: Level.WARN,
67
+ logging.INFO: Level.INFO,
68
+ logging.DEBUG: Level.DEBUG,
69
+ }
70
+
71
+ def format(self, record: logging.LogRecord) -> str:
72
+ """Return a JSON representation of the log message"""
73
+ airbyte_level = self.level_mapping.get(record.levelno, "INFO")
74
+ if airbyte_level == Level.DEBUG:
75
+ extras = self.extract_extra_args_from_record(record)
76
+ debug_dict = {"type": "DEBUG", "message": record.getMessage(), "data": extras}
77
+ return filter_secrets(json.dumps(debug_dict))
78
+ else:
79
+ message = super().format(record)
80
+ message = filter_secrets(message)
81
+ log_message = AirbyteMessage(
82
+ type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
83
+ )
84
+ return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
85
+
86
+ @staticmethod
87
+ def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]:
88
+ """
89
+ The python logger conflates default args with extra args. We use an empty log record and set operations
90
+ to isolate fields passed to the log record via extra by the developer.
91
+ """
92
+ default_attrs = logging.LogRecord("", 0, "", 0, None, None, None).__dict__.keys()
93
+ extra_keys = set(record.__dict__.keys()) - default_attrs
94
+ return {k: str(getattr(record, k)) for k in extra_keys if hasattr(record, k)}
95
+
96
+
97
+ def log_by_prefix(msg: str, default_level: str) -> Tuple[int, str]:
98
+ """Custom method, which takes log level from first word of message"""
99
+ valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"]
100
+ split_line = msg.split()
101
+ first_word = next(iter(split_line), None)
102
+ if first_word in valid_log_types:
103
+ log_level = logging.getLevelName(first_word)
104
+ rendered_message = " ".join(split_line[1:])
105
+ else:
106
+ log_level = logging.getLevelName(default_level)
107
+ rendered_message = msg
108
+
109
+ return log_level, rendered_message
@@ -0,0 +1,72 @@
1
+ # The earlier versions of airbyte-cdk (0.28.0<=) had the airbyte_protocol python classes
2
+ # declared inline in the airbyte-cdk code. However, somewhere around Feb 2023 the
3
+ # Airbyte Protocol moved to its own repo/PyPi package, called airbyte-protocol-models.
4
+ # This directory including the airbyte_protocol.py and well_known_types.py files
5
+ # are just wrappers on top of that stand-alone package which do some namespacing magic
6
+ # to make the airbyte_protocol python classes available to the airbyte-cdk consumer as part
7
+ # of airbyte-cdk rather than a standalone package.
8
+ from .airbyte_protocol import (
9
+ AdvancedAuth,
10
+ AirbyteAnalyticsTraceMessage,
11
+ AirbyteCatalog,
12
+ AirbyteConnectionStatus,
13
+ AirbyteControlConnectorConfigMessage,
14
+ AirbyteControlMessage,
15
+ AirbyteErrorTraceMessage,
16
+ AirbyteEstimateTraceMessage,
17
+ AirbyteGlobalState,
18
+ AirbyteLogMessage,
19
+ AirbyteMessage,
20
+ AirbyteProtocol,
21
+ AirbyteRecordMessage,
22
+ AirbyteRecordMessageFileReference,
23
+ AirbyteStateBlob,
24
+ AirbyteStateMessage,
25
+ AirbyteStateStats,
26
+ AirbyteStateType,
27
+ AirbyteStream,
28
+ AirbyteStreamState,
29
+ AirbyteStreamStatus,
30
+ AirbyteStreamStatusReason,
31
+ AirbyteStreamStatusReasonType,
32
+ AirbyteStreamStatusTraceMessage,
33
+ AirbyteTraceMessage,
34
+ AuthFlowType,
35
+ ConfiguredAirbyteCatalog,
36
+ ConfiguredAirbyteStream,
37
+ ConnectorSpecification,
38
+ DestinationSyncMode,
39
+ EstimateType,
40
+ FailureType,
41
+ Level,
42
+ OAuthConfigSpecification,
43
+ OauthConnectorInputSpecification,
44
+ OrchestratorType,
45
+ State,
46
+ Status,
47
+ StreamDescriptor,
48
+ SyncMode,
49
+ TraceType,
50
+ Type,
51
+ )
52
+ from .airbyte_protocol_serializers import (
53
+ AirbyteMessageSerializer,
54
+ AirbyteStateMessageSerializer,
55
+ AirbyteStreamStateSerializer,
56
+ ConfiguredAirbyteCatalogSerializer,
57
+ ConfiguredAirbyteStreamSerializer,
58
+ ConnectorSpecificationSerializer,
59
+ )
60
+ from .well_known_types import (
61
+ BinaryData,
62
+ Boolean,
63
+ Date,
64
+ Integer,
65
+ Model,
66
+ Number,
67
+ String,
68
+ TimestampWithoutTimezone,
69
+ TimestampWithTimezone,
70
+ TimeWithoutTimezone,
71
+ TimeWithTimezone,
72
+ )
@@ -0,0 +1,88 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
7
+
8
+ from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
9
+ from serpyco_rs.metadata import Alias
10
+
11
+ # ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
12
+
13
+
14
+ @dataclass
15
+ class AirbyteStateBlob:
16
+ """
17
+ A dataclass that dynamically sets attributes based on provided keyword arguments and positional arguments.
18
+ Used to "mimic" pydantic Basemodel with ConfigDict(extra='allow') option.
19
+
20
+ The `AirbyteStateBlob` class allows for flexible instantiation by accepting any number of keyword arguments
21
+ and positional arguments. These are used to dynamically update the instance's attributes. This class is useful
22
+ in scenarios where the attributes of an object are not known until runtime and need to be set dynamically.
23
+
24
+ Attributes:
25
+ kwargs (InitVar[Mapping[str, Any]]): A dictionary of keyword arguments used to set attributes dynamically.
26
+
27
+ Methods:
28
+ __init__(*args: Any, **kwargs: Any) -> None:
29
+ Initializes the `AirbyteStateBlob` by setting attributes from the provided arguments.
30
+
31
+ __eq__(other: object) -> bool:
32
+ Checks equality between two `AirbyteStateBlob` instances based on their internal dictionaries.
33
+ Returns `False` if the other object is not an instance of `AirbyteStateBlob`.
34
+ """
35
+
36
+ kwargs: InitVar[Mapping[str, Any]]
37
+
38
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
39
+ # Set any attribute passed in through kwargs
40
+ for arg in args:
41
+ self.__dict__.update(arg)
42
+ for key, value in kwargs.items():
43
+ setattr(self, key, value)
44
+
45
+ def __eq__(self, other: object) -> bool:
46
+ return (
47
+ False
48
+ if not isinstance(other, AirbyteStateBlob)
49
+ else bool(self.__dict__ == other.__dict__)
50
+ )
51
+
52
+
53
+ # The following dataclasses have been redeclared to include the new version of AirbyteStateBlob
54
+ @dataclass
55
+ class AirbyteStreamState:
56
+ stream_descriptor: StreamDescriptor # type: ignore [name-defined]
57
+ stream_state: Optional[AirbyteStateBlob] = None
58
+
59
+
60
+ @dataclass
61
+ class AirbyteGlobalState:
62
+ stream_states: List[AirbyteStreamState]
63
+ shared_state: Optional[AirbyteStateBlob] = None
64
+
65
+
66
+ @dataclass
67
+ class AirbyteStateMessage:
68
+ type: Optional[AirbyteStateType] = None # type: ignore [name-defined]
69
+ stream: Optional[AirbyteStreamState] = None
70
+ global_: Annotated[AirbyteGlobalState | None, Alias("global")] = (
71
+ None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization
72
+ )
73
+ data: Optional[Dict[str, Any]] = None
74
+ sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
75
+ destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined]
76
+
77
+
78
+ @dataclass
79
+ class AirbyteMessage:
80
+ type: Type # type: ignore [name-defined]
81
+ log: Optional[AirbyteLogMessage] = None # type: ignore [name-defined]
82
+ spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
83
+ connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
84
+ catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
85
+ record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
86
+ state: Optional[AirbyteStateMessage] = None
87
+ trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
88
+ control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
@@ -0,0 +1,44 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+ from typing import Any, Dict
3
+
4
+ from serpyco_rs import CustomType, Serializer
5
+
6
+ from .airbyte_protocol import ( # type: ignore[attr-defined] # all classes are imported to airbyte_protocol via *
7
+ AirbyteMessage,
8
+ AirbyteStateBlob,
9
+ AirbyteStateMessage,
10
+ AirbyteStreamState,
11
+ ConfiguredAirbyteCatalog,
12
+ ConfiguredAirbyteStream,
13
+ ConnectorSpecification,
14
+ )
15
+
16
+
17
+ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
18
+ def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
19
+ # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
20
+ return {k: v for k, v in value.__dict__.items()}
21
+
22
+ def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
23
+ return AirbyteStateBlob(value)
24
+
25
+ def get_json_schema(self) -> Dict[str, Any]:
26
+ return {"type": "object"}
27
+
28
+
29
+ def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None:
30
+ return AirbyteStateBlobType() if t is AirbyteStateBlob else None
31
+
32
+
33
+ AirbyteStreamStateSerializer = Serializer(
34
+ AirbyteStreamState, omit_none=True, custom_type_resolver=custom_type_resolver
35
+ )
36
+ AirbyteStateMessageSerializer = Serializer(
37
+ AirbyteStateMessage, omit_none=True, custom_type_resolver=custom_type_resolver
38
+ )
39
+ AirbyteMessageSerializer = Serializer(
40
+ AirbyteMessage, omit_none=True, custom_type_resolver=custom_type_resolver
41
+ )
42
+ ConfiguredAirbyteCatalogSerializer = Serializer(ConfiguredAirbyteCatalog, omit_none=True)
43
+ ConfiguredAirbyteStreamSerializer = Serializer(ConfiguredAirbyteStream, omit_none=True)
44
+ ConnectorSpecificationSerializer = Serializer(ConnectorSpecification, omit_none=True)
@@ -0,0 +1,5 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_protocol_dataclasses.models.well_known_types import * # noqa: F403 # Allow '*'
airbyte_cdk/py.typed ADDED
File without changes
@@ -0,0 +1,26 @@
1
+ #
2
+ # Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import dpath.options
6
+
7
+ from .abstract_source import AbstractSource
8
+ from .config import BaseConfig
9
+ from .source import Source
10
+
11
+ # As part of the CDK sources, we do not control what the APIs return and it is possible that a key is empty.
12
+ # Reasons why we are doing this at the airbyte_cdk level:
13
+ # * As of today, all the use cases should allow for empty keys
14
+ # * Cases as of 2023-08-31: oauth/session token provider responses, extractor, transformation and substream)
15
+ # * The behavior is explicit at the package level and not hidden in every package that needs dpath.options.ALLOW_EMPTY_STRING_KEYS = True
16
+ # There is a downside in enforcing this option preemptively in the module __init__.py: the runtime code will import dpath even though the it
17
+ # might not need dpath leading to longer initialization time.
18
+ # There is a downside in using dpath as a library since the options are global: if we have two pieces of code that want different options,
19
+ # this will not be thread-safe.
20
+ dpath.options.ALLOW_EMPTY_STRING_KEYS = True
21
+
22
+ __all__ = [
23
+ "AbstractSource",
24
+ "BaseConfig",
25
+ "Source",
26
+ ]
@@ -0,0 +1,326 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from typing import (
8
+ Any,
9
+ Dict,
10
+ Iterable,
11
+ Iterator,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Tuple,
17
+ Union,
18
+ )
19
+
20
+ from airbyte_cdk.exception_handler import generate_failed_streams_error_message
21
+ from airbyte_cdk.models import (
22
+ AirbyteCatalog,
23
+ AirbyteConnectionStatus,
24
+ AirbyteMessage,
25
+ AirbyteStateMessage,
26
+ AirbyteStreamStatus,
27
+ ConfiguredAirbyteCatalog,
28
+ ConfiguredAirbyteStream,
29
+ FailureType,
30
+ Status,
31
+ StreamDescriptor,
32
+ )
33
+ from airbyte_cdk.models import Type as MessageType
34
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
35
+ from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
36
+ from airbyte_cdk.sources.source import Source
37
+ from airbyte_cdk.sources.streams import Stream
38
+ from airbyte_cdk.sources.streams.core import StreamData
39
+ from airbyte_cdk.sources.streams.http.http import HttpStream
40
+ from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
41
+ from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
42
+ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
43
+ from airbyte_cdk.utils.event_timing import create_timer
44
+ from airbyte_cdk.utils.stream_status_utils import (
45
+ as_airbyte_message as stream_status_as_airbyte_message,
46
+ )
47
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
48
+
49
+ _default_message_repository = InMemoryMessageRepository()
50
+
51
+
52
+ class AbstractSource(Source, ABC):
53
+ """
54
+ Abstract base class for an Airbyte Source. Consumers should implement any abstract methods
55
+ in this class to create an Airbyte Specification compliant Source.
56
+ """
57
+
58
+ @abstractmethod
59
+ def check_connection(
60
+ self, logger: logging.Logger, config: Mapping[str, Any]
61
+ ) -> Tuple[bool, Optional[Any]]:
62
+ """
63
+ :param logger: source logger
64
+ :param config: The user-provided configuration as specified by the source's spec.
65
+ This usually contains information required to check connection e.g. tokens, secrets and keys etc.
66
+ :return: A tuple of (boolean, error). If boolean is true, then the connection check is successful
67
+ and we can connect to the underlying data source using the provided configuration.
68
+ Otherwise, the input config cannot be used to connect to the underlying data source,
69
+ and the "error" object should describe what went wrong.
70
+ The error object will be cast to string to display the problem to the user.
71
+ """
72
+
73
+ @abstractmethod
74
+ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
75
+ """
76
+ :param config: The user-provided configuration as specified by the source's spec.
77
+ Any stream construction related operation should happen here.
78
+ :return: A list of the streams in this source connector.
79
+ """
80
+
81
+ # Stream name to instance map for applying output object transformation
82
+ _stream_to_instance_map: Dict[str, Stream] = {}
83
+ _slice_logger: SliceLogger = DebugSliceLogger()
84
+
85
+ def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
86
+ """Implements the Discover operation from the Airbyte Specification.
87
+ See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#discover.
88
+ """
89
+ streams = [stream.as_airbyte_stream() for stream in self.streams(config=config)]
90
+ return AirbyteCatalog(streams=streams)
91
+
92
+ def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
93
+ """Implements the Check Connection operation from the Airbyte Specification.
94
+ See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#check.
95
+ """
96
+ check_succeeded, error = self.check_connection(logger, config)
97
+ if not check_succeeded:
98
+ return AirbyteConnectionStatus(status=Status.FAILED, message=repr(error))
99
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
100
+
101
+ def read(
102
+ self,
103
+ logger: logging.Logger,
104
+ config: Mapping[str, Any],
105
+ catalog: ConfiguredAirbyteCatalog,
106
+ state: Optional[List[AirbyteStateMessage]] = None,
107
+ ) -> Iterator[AirbyteMessage]:
108
+ """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/."""
109
+ logger.info(f"Starting syncing {self.name}")
110
+ config, internal_config = split_config(config)
111
+ # TODO assert all streams exist in the connector
112
+ # get the streams once in case the connector needs to make any queries to generate them
113
+ stream_instances = {s.name: s for s in self.streams(config)}
114
+ state_manager = ConnectorStateManager(state=state)
115
+ self._stream_to_instance_map = stream_instances
116
+
117
+ stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
118
+
119
+ with create_timer(self.name) as timer:
120
+ for configured_stream in catalog.streams:
121
+ stream_instance = stream_instances.get(configured_stream.stream.name)
122
+ is_stream_exist = bool(stream_instance)
123
+ try:
124
+ # Used direct reference to `stream_instance` instead of `is_stream_exist` to avoid mypy type checking errors
125
+ if not stream_instance:
126
+ if not self.raise_exception_on_missing_stream:
127
+ yield stream_status_as_airbyte_message(
128
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
129
+ )
130
+ continue
131
+
132
+ error_message = (
133
+ f"The stream '{configured_stream.stream.name}' in your connection configuration was not found in the source. "
134
+ f"Refresh the schema in your replication settings and remove this stream from future sync attempts."
135
+ )
136
+
137
+ # Use configured_stream as stream_instance to support references in error handling.
138
+ stream_instance = configured_stream.stream
139
+
140
+ raise AirbyteTracedException(
141
+ message="A stream listed in your configuration was not found in the source. Please check the logs for more "
142
+ "details.",
143
+ internal_message=error_message,
144
+ failure_type=FailureType.config_error,
145
+ )
146
+
147
+ timer.start_event(f"Syncing stream {configured_stream.stream.name}")
148
+ logger.info(f"Marking stream {configured_stream.stream.name} as STARTED")
149
+ yield stream_status_as_airbyte_message(
150
+ configured_stream.stream, AirbyteStreamStatus.STARTED
151
+ )
152
+ yield from self._read_stream(
153
+ logger=logger,
154
+ stream_instance=stream_instance,
155
+ configured_stream=configured_stream,
156
+ state_manager=state_manager,
157
+ internal_config=internal_config,
158
+ )
159
+ logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
160
+ yield stream_status_as_airbyte_message(
161
+ configured_stream.stream, AirbyteStreamStatus.COMPLETE
162
+ )
163
+
164
+ except Exception as e:
165
+ yield from self._emit_queued_messages()
166
+ logger.exception(
167
+ f"Encountered an exception while reading stream {configured_stream.stream.name}"
168
+ )
169
+ logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
170
+ yield stream_status_as_airbyte_message(
171
+ configured_stream.stream, AirbyteStreamStatus.INCOMPLETE
172
+ )
173
+
174
+ stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
175
+
176
+ if isinstance(e, AirbyteTracedException):
177
+ traced_exception = e
178
+ info_message = f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
179
+ else:
180
+ traced_exception = self._serialize_exception(
181
+ stream_descriptor, e, stream_instance=stream_instance
182
+ )
183
+ info_message = f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}"
184
+
185
+ yield traced_exception.as_sanitized_airbyte_message(
186
+ stream_descriptor=stream_descriptor
187
+ )
188
+ stream_name_to_exception[stream_instance.name] = traced_exception # type: ignore # use configured_stream if stream_instance is None
189
+ if self.stop_sync_on_stream_failure:
190
+ logger.info(info_message)
191
+ break
192
+ finally:
193
+ # Finish read event only if the stream instance exists;
194
+ # otherwise, there's no need as it never started
195
+ if is_stream_exist:
196
+ timer.finish_event()
197
+ logger.info(f"Finished syncing {configured_stream.stream.name}")
198
+ logger.info(timer.report())
199
+
200
+ if len(stream_name_to_exception) > 0:
201
+ error_message = generate_failed_streams_error_message(
202
+ {key: [value] for key, value in stream_name_to_exception.items()}
203
+ )
204
+ logger.info(error_message)
205
+ # We still raise at least one exception when a stream raises an exception because the platform currently relies
206
+ # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
207
+ # type because this combined error isn't actionable, but rather the previously emitted individual errors.
208
+ raise AirbyteTracedException(
209
+ message=error_message, failure_type=FailureType.config_error
210
+ )
211
+ logger.info(f"Finished syncing {self.name}")
212
+
213
+ @staticmethod
214
+ def _serialize_exception(
215
+ stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Optional[Stream] = None
216
+ ) -> AirbyteTracedException:
217
+ display_message = stream_instance.get_error_display_message(e) if stream_instance else None
218
+ if display_message:
219
+ return AirbyteTracedException.from_exception(
220
+ e, message=display_message, stream_descriptor=stream_descriptor
221
+ )
222
+ return AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
223
+
224
+ @property
225
+ def raise_exception_on_missing_stream(self) -> bool:
226
+ return False
227
+
228
+ def _read_stream(
229
+ self,
230
+ logger: logging.Logger,
231
+ stream_instance: Stream,
232
+ configured_stream: ConfiguredAirbyteStream,
233
+ state_manager: ConnectorStateManager,
234
+ internal_config: InternalConfig,
235
+ ) -> Iterator[AirbyteMessage]:
236
+ if internal_config.page_size and isinstance(stream_instance, HttpStream):
237
+ logger.info(
238
+ f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
239
+ )
240
+ stream_instance.page_size = internal_config.page_size
241
+ logger.debug(
242
+ f"Syncing configured stream: {configured_stream.stream.name}",
243
+ extra={
244
+ "sync_mode": configured_stream.sync_mode,
245
+ "primary_key": configured_stream.primary_key,
246
+ "cursor_field": configured_stream.cursor_field,
247
+ },
248
+ )
249
+ stream_instance.log_stream_sync_configuration()
250
+
251
+ stream_name = configured_stream.stream.name
252
+ stream_state = state_manager.get_stream_state(stream_name, stream_instance.namespace)
253
+
254
+ # This is a hack. Existing full refresh streams that are converted into resumable full refresh need to discard
255
+ # the state because the terminal state for a full refresh sync is not compatible with substream resumable full
256
+ # refresh state. This is only required when running live traffic regression testing since the platform normally
257
+ # handles whether to pass state
258
+ if stream_state == {"__ab_no_cursor_state_message": True}:
259
+ stream_state = {}
260
+
261
+ if "state" in dir(stream_instance):
262
+ stream_instance.state = stream_state # type: ignore # we check that state in the dir(stream_instance)
263
+ logger.info(f"Setting state of {self.name} stream to {stream_state}")
264
+
265
+ record_iterator = stream_instance.read(
266
+ configured_stream,
267
+ logger,
268
+ self._slice_logger,
269
+ stream_state,
270
+ state_manager,
271
+ internal_config,
272
+ )
273
+
274
+ record_counter = 0
275
+ logger.info(f"Syncing stream: {stream_name} ")
276
+ for record_data_or_message in record_iterator:
277
+ record = self._get_message(record_data_or_message, stream_instance)
278
+ if record.type == MessageType.RECORD:
279
+ record_counter += 1
280
+ if record_counter == 1:
281
+ logger.info(f"Marking stream {stream_name} as RUNNING")
282
+ # If we just read the first record of the stream, emit the transition to the RUNNING state
283
+ yield stream_status_as_airbyte_message(
284
+ configured_stream.stream, AirbyteStreamStatus.RUNNING
285
+ )
286
+ yield from self._emit_queued_messages()
287
+ yield record
288
+
289
+ logger.info(f"Read {record_counter} records from {stream_name} stream")
290
+
291
+ def _emit_queued_messages(self) -> Iterable[AirbyteMessage]:
292
+ if self.message_repository:
293
+ yield from self.message_repository.consume_queue()
294
+ return
295
+
296
+ def _get_message(
297
+ self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream
298
+ ) -> AirbyteMessage:
299
+ """
300
+ Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage
301
+ """
302
+ match record_data_or_message:
303
+ case AirbyteMessage():
304
+ return record_data_or_message
305
+ case _:
306
+ return stream_data_to_airbyte_message(
307
+ stream.name,
308
+ record_data_or_message,
309
+ stream.transformer,
310
+ stream.get_json_schema(),
311
+ )
312
+
313
+ @property
314
+ def message_repository(self) -> Union[None, MessageRepository]:
315
+ return _default_message_repository
316
+
317
+ @property
318
+ def stop_sync_on_stream_failure(self) -> bool:
319
+ """
320
+ WARNING: This function is in-development which means it is subject to change. Use at your own risk.
321
+
322
+ By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
323
+ continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
324
+ on the first error seen and emit a single error trace message for that stream.
325
+ """
326
+ return False
@@ -0,0 +1,8 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ """The concurrent source model replaces the legacy Source model.
3
+
4
+ The concurrent source model is a new way to build sources in the Airbyte CDK. It is designed to
5
+ be more ergonomic and performant than the legacy Source model.
6
+
7
+ To implement a source using the concurrent source model, check out the submodules in this package.
8
+ """