airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
6
+ from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
7
+ ComplexFieldType,
8
+ DynamicSchemaLoader,
9
+ SchemaTypeIdentifier,
10
+ TypesMap,
11
+ )
12
+ from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
13
+ from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
14
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
15
+
16
+ __all__ = [
17
+ "JsonFileSchemaLoader",
18
+ "DefaultSchemaLoader",
19
+ "SchemaLoader",
20
+ "InlineSchemaLoader",
21
+ "DynamicSchemaLoader",
22
+ "ComplexFieldType",
23
+ "TypesMap",
24
+ "SchemaTypeIdentifier",
25
+ ]
@@ -0,0 +1,47 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ from dataclasses import InitVar, dataclass
7
+ from typing import Any, Mapping
8
+
9
+ from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
10
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
11
+ from airbyte_cdk.sources.types import Config
12
+
13
+
14
+ @dataclass
15
+ class DefaultSchemaLoader(SchemaLoader):
16
+ """
17
+ Loads a schema from the default location or returns an empty schema for streams that have not defined their schema file yet.
18
+
19
+ Attributes:
20
+ config (Config): The user-provided configuration as specified by the source's spec
21
+ parameters (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed
22
+ """
23
+
24
+ config: Config
25
+ parameters: InitVar[Mapping[str, Any]]
26
+
27
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
28
+ self._parameters = parameters
29
+ self.default_loader = JsonFileSchemaLoader(parameters=parameters, config=self.config)
30
+
31
+ def get_json_schema(self) -> Mapping[str, Any]:
32
+ """
33
+ Attempts to retrieve a schema from the default filepath location or returns the empty schema if a schema cannot be found.
34
+
35
+ :return: The empty schema
36
+ """
37
+
38
+ try:
39
+ return self.default_loader.get_json_schema()
40
+ except OSError:
41
+ # A slight hack since we don't directly have the stream name. However, when building the default filepath we assume the
42
+ # runtime options stores stream name 'name' so we'll do the same here
43
+ stream_name = self._parameters.get("name", "")
44
+ logging.info(
45
+ f"Could not find schema for stream {stream_name}, defaulting to the empty schema"
46
+ )
47
+ return {}
@@ -0,0 +1,285 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ from copy import deepcopy
7
+ from dataclasses import InitVar, dataclass, field
8
+ from typing import Any, List, Mapping, MutableMapping, Optional, Union
9
+
10
+ import dpath
11
+ from typing_extensions import deprecated
12
+
13
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
14
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
15
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
16
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
17
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
18
+ from airbyte_cdk.sources.source import ExperimentalClassWarning
19
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
20
+
21
+ AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
22
+ "string": {"type": ["null", "string"]},
23
+ "boolean": {"type": ["null", "boolean"]},
24
+ "date": {"type": ["null", "string"], "format": "date"},
25
+ "timestamp_without_timezone": {
26
+ "type": ["null", "string"],
27
+ "format": "date-time",
28
+ "airbyte_type": "timestamp_without_timezone",
29
+ },
30
+ "timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
31
+ "time_without_timezone": {
32
+ "type": ["null", "string"],
33
+ "format": "time",
34
+ "airbyte_type": "time_without_timezone",
35
+ },
36
+ "time_with_timezone": {
37
+ "type": ["null", "string"],
38
+ "format": "time",
39
+ "airbyte_type": "time_with_timezone",
40
+ },
41
+ "integer": {"type": ["null", "integer"]},
42
+ "number": {"type": ["null", "number"]},
43
+ "array": {"type": ["null", "array"]},
44
+ "object": {"type": ["null", "object"]},
45
+ }
46
+
47
+
48
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
49
+ @dataclass(frozen=True)
50
+ class ComplexFieldType:
51
+ """
52
+ Identifies complex field type
53
+ """
54
+
55
+ field_type: str
56
+ items: Optional[Union[str, "ComplexFieldType"]] = None
57
+
58
+ def __post_init__(self) -> None:
59
+ """
60
+ Enforces that `items` is only used when `field_type` is a array
61
+ """
62
+ # `items_type` is valid only for array target types
63
+ if self.items and self.field_type != "array":
64
+ raise ValueError("'items' can only be used when 'field_type' is an array.")
65
+
66
+
67
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
68
+ @dataclass(frozen=True)
69
+ class TypesMap:
70
+ """
71
+ Represents a mapping between a current type and its corresponding target type.
72
+ """
73
+
74
+ target_type: Union[List[str], str, ComplexFieldType]
75
+ current_type: Union[List[str], str]
76
+ condition: Optional[str]
77
+
78
+
79
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
80
+ @dataclass
81
+ class SchemaTypeIdentifier:
82
+ """
83
+ Identifies schema details for dynamic schema extraction and processing.
84
+ """
85
+
86
+ key_pointer: List[Union[InterpolatedString, str]]
87
+ parameters: InitVar[Mapping[str, Any]]
88
+ type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
89
+ types_mapping: Optional[List[TypesMap]] = None
90
+ schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
91
+
92
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
93
+ self.schema_pointer = (
94
+ self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
95
+ ) # type: ignore[assignment] # This is reqired field in model
96
+ self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
97
+ self.type_pointer = (
98
+ self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
99
+ )
100
+
101
+ @staticmethod
102
+ def _update_pointer(
103
+ pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
104
+ ) -> Optional[List[Union[InterpolatedString, str]]]:
105
+ return (
106
+ [
107
+ InterpolatedString.create(path, parameters=parameters)
108
+ if isinstance(path, str)
109
+ else path
110
+ for path in pointer
111
+ ]
112
+ if pointer
113
+ else None
114
+ )
115
+
116
+
117
+ @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
118
+ @dataclass
119
+ class DynamicSchemaLoader(SchemaLoader):
120
+ """
121
+ Dynamically loads a JSON Schema by extracting data from retrieved records.
122
+ """
123
+
124
+ retriever: Retriever
125
+ config: Config
126
+ parameters: InitVar[Mapping[str, Any]]
127
+ schema_type_identifier: SchemaTypeIdentifier
128
+ schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
129
+
130
+ def get_json_schema(self) -> Mapping[str, Any]:
131
+ """
132
+ Constructs a JSON Schema based on retrieved data.
133
+ """
134
+ properties = {}
135
+ retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
136
+
137
+ raw_schema = (
138
+ self._extract_data(
139
+ retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
140
+ self.schema_type_identifier.schema_pointer,
141
+ )
142
+ if retrieved_record
143
+ else []
144
+ )
145
+
146
+ for property_definition in raw_schema:
147
+ key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
148
+ value = self._get_type(
149
+ property_definition,
150
+ self.schema_type_identifier.type_pointer,
151
+ )
152
+ properties[key] = value
153
+
154
+ transformed_properties = self._transform(properties, {})
155
+
156
+ return {
157
+ "$schema": "https://json-schema.org/draft-07/schema#",
158
+ "type": "object",
159
+ "additionalProperties": True,
160
+ "properties": transformed_properties,
161
+ }
162
+
163
+ def _transform(
164
+ self,
165
+ properties: Mapping[str, Any],
166
+ stream_state: StreamState,
167
+ stream_slice: Optional[StreamSlice] = None,
168
+ ) -> Mapping[str, Any]:
169
+ for transformation in self.schema_transformations:
170
+ transformation.transform(
171
+ properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
172
+ config=self.config,
173
+ )
174
+ return properties
175
+
176
+ def _get_key(
177
+ self,
178
+ raw_schema: MutableMapping[str, Any],
179
+ field_key_path: List[Union[InterpolatedString, str]],
180
+ ) -> str:
181
+ """
182
+ Extracts the key field from the schema using the specified path.
183
+ """
184
+ field_key = self._extract_data(raw_schema, field_key_path)
185
+ if not isinstance(field_key, str):
186
+ raise ValueError(f"Expected key to be a string. Got {field_key}")
187
+ return field_key
188
+
189
+ def _get_type(
190
+ self,
191
+ raw_schema: MutableMapping[str, Any],
192
+ field_type_path: Optional[List[Union[InterpolatedString, str]]],
193
+ ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
194
+ """
195
+ Determines the JSON Schema type for a field, supporting nullable and combined types.
196
+ """
197
+ raw_field_type = (
198
+ self._extract_data(raw_schema, field_type_path, default="string")
199
+ if field_type_path
200
+ else "string"
201
+ )
202
+ mapped_field_type = self._replace_type_if_not_valid(raw_field_type, raw_schema)
203
+ if (
204
+ isinstance(mapped_field_type, list)
205
+ and len(mapped_field_type) == 2
206
+ and all(isinstance(item, str) for item in mapped_field_type)
207
+ ):
208
+ first_type = self._get_airbyte_type(mapped_field_type[0])
209
+ second_type = self._get_airbyte_type(mapped_field_type[1])
210
+ return {"oneOf": [first_type, second_type]}
211
+
212
+ elif isinstance(mapped_field_type, str):
213
+ return self._get_airbyte_type(mapped_field_type)
214
+
215
+ elif isinstance(mapped_field_type, ComplexFieldType):
216
+ return self._resolve_complex_type(mapped_field_type)
217
+
218
+ else:
219
+ raise ValueError(
220
+ f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
221
+ )
222
+
223
+ def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
224
+ if not complex_type.items:
225
+ return self._get_airbyte_type(complex_type.field_type)
226
+
227
+ field_type = self._get_airbyte_type(complex_type.field_type)
228
+
229
+ field_type["items"] = (
230
+ self._get_airbyte_type(complex_type.items)
231
+ if isinstance(complex_type.items, str)
232
+ else self._resolve_complex_type(complex_type.items)
233
+ )
234
+
235
+ return field_type
236
+
237
+ def _replace_type_if_not_valid(
238
+ self,
239
+ field_type: Union[List[str], str],
240
+ raw_schema: MutableMapping[str, Any],
241
+ ) -> Union[List[str], str, ComplexFieldType]:
242
+ """
243
+ Replaces a field type if it matches a type mapping in `types_map`.
244
+ """
245
+ if self.schema_type_identifier.types_mapping:
246
+ for types_map in self.schema_type_identifier.types_mapping:
247
+ # conditional is optional param, setting to true if not provided
248
+ condition = InterpolatedBoolean(
249
+ condition=types_map.condition if types_map.condition is not None else "True",
250
+ parameters={},
251
+ ).eval(config=self.config, raw_schema=raw_schema)
252
+
253
+ if field_type == types_map.current_type and condition:
254
+ return types_map.target_type
255
+ return field_type
256
+
257
+ @staticmethod
258
+ def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
259
+ """
260
+ Maps a field type to its corresponding Airbyte type definition.
261
+ """
262
+ if field_type not in AIRBYTE_DATA_TYPES:
263
+ raise ValueError(f"Invalid Airbyte data type: {field_type}")
264
+
265
+ return deepcopy(AIRBYTE_DATA_TYPES[field_type])
266
+
267
+ def _extract_data(
268
+ self,
269
+ body: Mapping[str, Any],
270
+ extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
271
+ default: Any = None,
272
+ ) -> Any:
273
+ """
274
+ Extracts data from the body based on the provided extraction path.
275
+ """
276
+
277
+ if not extraction_path:
278
+ return body
279
+
280
+ path = [
281
+ node.eval(self.config) if not isinstance(node, str) else node
282
+ for node in extraction_path
283
+ ]
284
+
285
+ return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure
@@ -0,0 +1,19 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Dict, Mapping
7
+
8
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9
+
10
+
11
+ @dataclass
12
+ class InlineSchemaLoader(SchemaLoader):
13
+ """Describes a stream's schema"""
14
+
15
+ schema: Dict[str, Any]
16
+ parameters: InitVar[Mapping[str, Any]]
17
+
18
+ def get_json_schema(self) -> Mapping[str, Any]:
19
+ return self.schema
@@ -0,0 +1,92 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ import pkgutil
7
+ import sys
8
+ from dataclasses import InitVar, dataclass, field
9
+ from typing import Any, Mapping, Tuple, Union
10
+
11
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
12
+ from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
13
+ from airbyte_cdk.sources.types import Config
14
+ from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader
15
+
16
+
17
+ def _default_file_path() -> str:
18
+ # Schema files are always in "source_<connector_name>/schemas/<stream_name>.json
19
+ # The connector's module name can be inferred by looking at the modules loaded and look for the one starting with source_
20
+ source_modules = [
21
+ k for k, v in sys.modules.items() if "source_" in k and "airbyte_cdk" not in k
22
+ ] # example: ['source_exchange_rates', 'source_exchange_rates.source']
23
+ if source_modules:
24
+ module = source_modules[0].split(".")[0]
25
+ return f"./{module}/schemas/{{{{parameters['name']}}}}.json"
26
+
27
+ # If we are not in a source_ module, the most likely scenario is we're processing a manifest from the connector builder
28
+ # server which does not require a json schema to be defined.
29
+ return "./{{parameters['name']}}.json"
30
+
31
+
32
+ @dataclass
33
+ class JsonFileSchemaLoader(ResourceSchemaLoader, SchemaLoader):
34
+ """
35
+ Loads the schema from a json file
36
+
37
+ Attributes:
38
+ file_path (Union[InterpolatedString, str]): The path to the json file describing the schema
39
+ name (str): The stream's name
40
+ config (Config): The user-provided configuration as specified by the source's spec
41
+ parameters (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed
42
+ """
43
+
44
+ config: Config
45
+ parameters: InitVar[Mapping[str, Any]]
46
+ file_path: Union[InterpolatedString, str] = field(default="")
47
+
48
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
49
+ if not self.file_path:
50
+ self.file_path = _default_file_path()
51
+ self.file_path = InterpolatedString.create(self.file_path, parameters=parameters)
52
+
53
+ def get_json_schema(self) -> Mapping[str, Any]:
54
+ # todo: It is worth revisiting if we can replace file_path with just file_name if every schema is in the /schemas directory
55
+ # this would require that we find a creative solution to store or retrieve source_name in here since the files are mounted there
56
+ json_schema_path = self._get_json_filepath()
57
+ resource, schema_path = self.extract_resource_and_schema_path(json_schema_path)
58
+ raw_json_file = pkgutil.get_data(resource, schema_path)
59
+
60
+ if not raw_json_file:
61
+ raise IOError(f"Cannot find file {json_schema_path}")
62
+ try:
63
+ raw_schema = json.loads(raw_json_file)
64
+ except ValueError as err:
65
+ raise RuntimeError(f"Invalid JSON file format for file {json_schema_path}") from err
66
+ self.package_name = resource
67
+ return self._resolve_schema_references(raw_schema)
68
+
69
+ def _get_json_filepath(self) -> Any:
70
+ return self.file_path.eval(self.config) # type: ignore # file_path is always cast to an interpolated string
71
+
72
+ @staticmethod
73
+ def extract_resource_and_schema_path(json_schema_path: str) -> Tuple[str, str]:
74
+ """
75
+ When the connector is running on a docker container, package_data is accessible from the resource (source_<name>), so we extract
76
+ the resource from the first part of the schema path and the remaining path is used to find the schema file. This is a slight
77
+ hack to identify the source name while we are in the airbyte_cdk module.
78
+ :param json_schema_path: The path to the schema JSON file
79
+ :return: Tuple of the resource name and the path to the schema file
80
+ """
81
+ split_path = json_schema_path.split("/")
82
+
83
+ if split_path[0] == "" or split_path[0] == ".":
84
+ split_path = split_path[1:]
85
+
86
+ if len(split_path) == 0:
87
+ return "", ""
88
+
89
+ if len(split_path) == 1:
90
+ return "", split_path[0]
91
+
92
+ return split_path[0], "/".join(split_path[1:])
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Mapping
8
+
9
+
10
+ @dataclass
11
+ class SchemaLoader:
12
+ """Describes a stream's schema"""
13
+
14
+ @abstractmethod
15
+ def get_json_schema(self) -> Mapping[str, Any]:
16
+ """Returns a mapping describing the stream's schema"""
17
+ pass
@@ -0,0 +1,7 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.spec.spec import Spec
6
+
7
+ __all__ = ["Spec"]
@@ -0,0 +1,48 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Mapping, Optional
7
+
8
+ from airbyte_cdk.models import (
9
+ AdvancedAuth,
10
+ ConnectorSpecification,
11
+ ConnectorSpecificationSerializer,
12
+ )
13
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow
14
+
15
+
16
+ @dataclass
17
+ class Spec:
18
+ """
19
+ Returns a connection specification made up of information about the connector and how it can be configured
20
+
21
+ Attributes:
22
+ connection_specification (Mapping[str, Any]): information related to how a connector can be configured
23
+ documentation_url (Optional[str]): The link the Airbyte documentation about this connector
24
+ """
25
+
26
+ connection_specification: Mapping[str, Any]
27
+ parameters: InitVar[Mapping[str, Any]]
28
+ documentation_url: Optional[str] = None
29
+ advanced_auth: Optional[AuthFlow] = None
30
+
31
+ def generate_spec(self) -> ConnectorSpecification:
32
+ """
33
+ Returns the connector specification according the spec block defined in the low code connector manifest.
34
+ """
35
+
36
+ obj: dict[str, Mapping[str, Any] | str | AdvancedAuth] = {
37
+ "connectionSpecification": self.connection_specification
38
+ }
39
+
40
+ if self.documentation_url:
41
+ obj["documentationUrl"] = self.documentation_url
42
+ if self.advanced_auth:
43
+ self.advanced_auth.auth_flow_type = self.advanced_auth.auth_flow_type.value # type: ignore # We know this is always assigned to an AuthFlow which has the auth_flow_type field
44
+ # Map CDK AuthFlow model to protocol AdvancedAuth model
45
+ obj["advanced_auth"] = self.advanced_auth.dict()
46
+
47
+ # We remap these keys to camel case because that's the existing format expected by the rest of the platform
48
+ return ConnectorSpecificationSerializer.load(obj)
@@ -0,0 +1,7 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
6
+
7
+ __all__ = ["StreamSlicer"]
@@ -0,0 +1,93 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ from typing import Any, Iterable, Mapping, Optional
4
+
5
+ from airbyte_cdk.sources.declarative.retrievers import Retriever
6
+ from airbyte_cdk.sources.message import MessageRepository
7
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
10
+ from airbyte_cdk.sources.types import Record, StreamSlice
11
+ from airbyte_cdk.utils.slice_hasher import SliceHasher
12
+
13
+
14
+ class DeclarativePartitionFactory:
15
+ def __init__(
16
+ self,
17
+ stream_name: str,
18
+ json_schema: Mapping[str, Any],
19
+ retriever: Retriever,
20
+ message_repository: MessageRepository,
21
+ ) -> None:
22
+ """
23
+ The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
24
+ thread safe and classes like `DefaultPaginator` may not work because multiple threads can access and modify a shared field across each other.
25
+ In order to avoid these problems, we will create one retriever per thread which should make the processing thread-safe.
26
+ """
27
+ self._stream_name = stream_name
28
+ self._json_schema = json_schema
29
+ self._retriever = retriever
30
+ self._message_repository = message_repository
31
+
32
+ def create(self, stream_slice: StreamSlice) -> Partition:
33
+ return DeclarativePartition(
34
+ self._stream_name,
35
+ self._json_schema,
36
+ self._retriever,
37
+ self._message_repository,
38
+ stream_slice,
39
+ )
40
+
41
+
42
+ class DeclarativePartition(Partition):
43
+ def __init__(
44
+ self,
45
+ stream_name: str,
46
+ json_schema: Mapping[str, Any],
47
+ retriever: Retriever,
48
+ message_repository: MessageRepository,
49
+ stream_slice: StreamSlice,
50
+ ):
51
+ self._stream_name = stream_name
52
+ self._json_schema = json_schema
53
+ self._retriever = retriever
54
+ self._message_repository = message_repository
55
+ self._stream_slice = stream_slice
56
+ self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
57
+
58
+ def read(self) -> Iterable[Record]:
59
+ for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
60
+ if isinstance(stream_data, Mapping):
61
+ record = (
62
+ stream_data
63
+ if isinstance(stream_data, Record)
64
+ else Record(
65
+ data=stream_data,
66
+ stream_name=self.stream_name(),
67
+ associated_slice=self._stream_slice,
68
+ )
69
+ )
70
+ yield record
71
+ else:
72
+ self._message_repository.emit_message(stream_data)
73
+
74
+ def to_slice(self) -> Optional[Mapping[str, Any]]:
75
+ return self._stream_slice
76
+
77
+ def stream_name(self) -> str:
78
+ return self._stream_name
79
+
80
+ def __hash__(self) -> int:
81
+ return self._hash
82
+
83
+
84
+ class StreamSlicerPartitionGenerator(PartitionGenerator):
85
+ def __init__(
86
+ self, partition_factory: DeclarativePartitionFactory, stream_slicer: StreamSlicer
87
+ ) -> None:
88
+ self._partition_factory = partition_factory
89
+ self._stream_slicer = stream_slicer
90
+
91
+ def generate(self) -> Iterable[Partition]:
92
+ for stream_slice in self._stream_slicer.stream_slices():
93
+ yield self._partition_factory.create(stream_slice)
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC
6
+
7
+ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
8
+ RequestOptionsProvider,
9
+ )
10
+ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import (
11
+ StreamSlicer as ConcurrentStreamSlicer,
12
+ )
13
+
14
+
15
+ class StreamSlicer(ConcurrentStreamSlicer, RequestOptionsProvider, ABC):
16
+ """
17
+ Slices the stream into a subset of records.
18
+ Slices enable state checkpointing and data retrieval parallelization.
19
+
20
+ The stream slicer keeps track of the cursor state as a dict of cursor_field -> cursor_value
21
+
22
+ See the stream slicing section of the docs for more information.
23
+ """
24
+
25
+ pass