airbyte-cdk 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (368) hide show
  1. airbyte_cdk/__init__.py +358 -0
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +236 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +104 -0
  7. airbyte_cdk/connector.py +123 -0
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/__init__.py +3 -0
  10. airbyte_cdk/connector_builder/connector_builder_handler.py +121 -0
  11. airbyte_cdk/connector_builder/main.py +107 -0
  12. airbyte_cdk/connector_builder/models.py +73 -0
  13. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  14. airbyte_cdk/connector_builder/test_reader/helpers.py +689 -0
  15. airbyte_cdk/connector_builder/test_reader/message_grouper.py +173 -0
  16. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  17. airbyte_cdk/connector_builder/test_reader/types.py +83 -0
  18. airbyte_cdk/destinations/__init__.py +8 -0
  19. airbyte_cdk/destinations/destination.py +154 -0
  20. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  21. airbyte_cdk/destinations/vector_db_based/__init__.py +38 -0
  22. airbyte_cdk/destinations/vector_db_based/config.py +298 -0
  23. airbyte_cdk/destinations/vector_db_based/document_processor.py +223 -0
  24. airbyte_cdk/destinations/vector_db_based/embedder.py +303 -0
  25. airbyte_cdk/destinations/vector_db_based/indexer.py +78 -0
  26. airbyte_cdk/destinations/vector_db_based/test_utils.py +63 -0
  27. airbyte_cdk/destinations/vector_db_based/utils.py +35 -0
  28. airbyte_cdk/destinations/vector_db_based/writer.py +104 -0
  29. airbyte_cdk/entrypoint.py +414 -0
  30. airbyte_cdk/exception_handler.py +56 -0
  31. airbyte_cdk/logger.py +109 -0
  32. airbyte_cdk/models/__init__.py +72 -0
  33. airbyte_cdk/models/airbyte_protocol.py +88 -0
  34. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  35. airbyte_cdk/models/well_known_types.py +5 -0
  36. airbyte_cdk/py.typed +0 -0
  37. airbyte_cdk/sources/__init__.py +26 -0
  38. airbyte_cdk/sources/abstract_source.py +326 -0
  39. airbyte_cdk/sources/concurrent_source/__init__.py +8 -0
  40. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +255 -0
  41. airbyte_cdk/sources/concurrent_source/concurrent_source.py +165 -0
  42. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +147 -0
  43. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +24 -0
  44. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  45. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +115 -0
  46. airbyte_cdk/sources/config.py +27 -0
  47. airbyte_cdk/sources/connector_state_manager.py +161 -0
  48. airbyte_cdk/sources/declarative/__init__.py +3 -0
  49. airbyte_cdk/sources/declarative/async_job/__init__.py +0 -0
  50. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  51. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +525 -0
  52. airbyte_cdk/sources/declarative/async_job/job_tracker.py +79 -0
  53. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  54. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  55. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  56. airbyte_cdk/sources/declarative/auth/__init__.py +8 -0
  57. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +42 -0
  58. airbyte_cdk/sources/declarative/auth/jwt.py +197 -0
  59. airbyte_cdk/sources/declarative/auth/oauth.py +293 -0
  60. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +45 -0
  61. airbyte_cdk/sources/declarative/auth/token.py +267 -0
  62. airbyte_cdk/sources/declarative/auth/token_provider.py +82 -0
  63. airbyte_cdk/sources/declarative/checks/__init__.py +24 -0
  64. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +61 -0
  65. airbyte_cdk/sources/declarative/checks/check_stream.py +56 -0
  66. airbyte_cdk/sources/declarative/checks/connection_checker.py +35 -0
  67. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  68. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  69. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +526 -0
  70. airbyte_cdk/sources/declarative/datetime/__init__.py +3 -0
  71. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +65 -0
  72. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +118 -0
  73. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3975 -0
  74. airbyte_cdk/sources/declarative/declarative_source.py +36 -0
  75. airbyte_cdk/sources/declarative/declarative_stream.py +241 -0
  76. airbyte_cdk/sources/declarative/decoders/__init__.py +33 -0
  77. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +218 -0
  78. airbyte_cdk/sources/declarative/decoders/decoder.py +32 -0
  79. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  80. airbyte_cdk/sources/declarative/decoders/json_decoder.py +65 -0
  81. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  82. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  83. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  84. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +56 -0
  85. airbyte_cdk/sources/declarative/exceptions.py +9 -0
  86. airbyte_cdk/sources/declarative/extractors/__init__.py +21 -0
  87. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +86 -0
  88. airbyte_cdk/sources/declarative/extractors/http_selector.py +37 -0
  89. airbyte_cdk/sources/declarative/extractors/record_extractor.py +27 -0
  90. airbyte_cdk/sources/declarative/extractors/record_filter.py +91 -0
  91. airbyte_cdk/sources/declarative/extractors/record_selector.py +170 -0
  92. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +176 -0
  93. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  94. airbyte_cdk/sources/declarative/incremental/__init__.py +37 -0
  95. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +497 -0
  96. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +459 -0
  97. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  98. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +357 -0
  99. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +380 -0
  100. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  101. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  102. airbyte_cdk/sources/declarative/interpolation/__init__.py +9 -0
  103. airbyte_cdk/sources/declarative/interpolation/filters.py +139 -0
  104. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +66 -0
  105. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +56 -0
  106. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +52 -0
  107. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +79 -0
  108. airbyte_cdk/sources/declarative/interpolation/interpolation.py +34 -0
  109. airbyte_cdk/sources/declarative/interpolation/jinja.py +161 -0
  110. airbyte_cdk/sources/declarative/interpolation/macros.py +191 -0
  111. airbyte_cdk/sources/declarative/manifest_declarative_source.py +421 -0
  112. airbyte_cdk/sources/declarative/migrations/__init__.py +0 -0
  113. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  114. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  115. airbyte_cdk/sources/declarative/models/__init__.py +2 -0
  116. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2503 -0
  117. airbyte_cdk/sources/declarative/parsers/__init__.py +3 -0
  118. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +157 -0
  119. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +21 -0
  120. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +172 -0
  121. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +213 -0
  122. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +3407 -0
  123. airbyte_cdk/sources/declarative/partition_routers/__init__.py +29 -0
  124. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  125. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  126. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +121 -0
  127. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  128. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +63 -0
  129. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +437 -0
  130. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  131. airbyte_cdk/sources/declarative/requesters/__init__.py +9 -0
  132. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +25 -0
  133. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +23 -0
  134. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +45 -0
  135. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +45 -0
  136. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +41 -0
  137. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +70 -0
  138. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +77 -0
  139. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +17 -0
  140. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +101 -0
  141. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +147 -0
  142. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  143. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +17 -0
  144. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +179 -0
  145. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +350 -0
  146. airbyte_cdk/sources/declarative/requesters/http_requester.py +433 -0
  147. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +21 -0
  148. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +327 -0
  149. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +76 -0
  150. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +65 -0
  151. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +25 -0
  152. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +98 -0
  153. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +102 -0
  154. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +71 -0
  155. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +48 -0
  156. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +66 -0
  157. airbyte_cdk/sources/declarative/requesters/request_option.py +117 -0
  158. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +23 -0
  159. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +92 -0
  160. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  161. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +59 -0
  162. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +68 -0
  163. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +119 -0
  164. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +79 -0
  165. airbyte_cdk/sources/declarative/requesters/request_path.py +15 -0
  166. airbyte_cdk/sources/declarative/requesters/requester.py +144 -0
  167. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  168. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  169. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  170. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  171. airbyte_cdk/sources/declarative/retrievers/__init__.py +19 -0
  172. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +124 -0
  173. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
  174. airbyte_cdk/sources/declarative/retrievers/retriever.py +54 -0
  175. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +702 -0
  176. airbyte_cdk/sources/declarative/schema/__init__.py +25 -0
  177. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +47 -0
  178. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +285 -0
  179. airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +19 -0
  180. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +92 -0
  181. airbyte_cdk/sources/declarative/schema/schema_loader.py +17 -0
  182. airbyte_cdk/sources/declarative/spec/__init__.py +7 -0
  183. airbyte_cdk/sources/declarative/spec/spec.py +48 -0
  184. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +7 -0
  185. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +93 -0
  186. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +25 -0
  187. airbyte_cdk/sources/declarative/transformations/__init__.py +17 -0
  188. airbyte_cdk/sources/declarative/transformations/add_fields.py +146 -0
  189. airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +61 -0
  190. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  191. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  192. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  193. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  194. airbyte_cdk/sources/declarative/transformations/remove_fields.py +75 -0
  195. airbyte_cdk/sources/declarative/transformations/transformation.py +37 -0
  196. airbyte_cdk/sources/declarative/types.py +25 -0
  197. airbyte_cdk/sources/declarative/yaml_declarative_source.py +67 -0
  198. airbyte_cdk/sources/file_based/README.md +152 -0
  199. airbyte_cdk/sources/file_based/__init__.py +24 -0
  200. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +11 -0
  201. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +73 -0
  202. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +149 -0
  203. airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  204. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +153 -0
  205. airbyte_cdk/sources/file_based/config/avro_format.py +25 -0
  206. airbyte_cdk/sources/file_based/config/csv_format.py +210 -0
  207. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  208. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +99 -0
  209. airbyte_cdk/sources/file_based/config/jsonl_format.py +18 -0
  210. airbyte_cdk/sources/file_based/config/parquet_format.py +25 -0
  211. airbyte_cdk/sources/file_based/config/unstructured_format.py +102 -0
  212. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  213. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +8 -0
  214. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +21 -0
  215. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +33 -0
  216. airbyte_cdk/sources/file_based/exceptions.py +159 -0
  217. airbyte_cdk/sources/file_based/file_based_source.py +466 -0
  218. airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +123 -0
  219. airbyte_cdk/sources/file_based/file_based_stream_reader.py +209 -0
  220. airbyte_cdk/sources/file_based/file_record_data.py +22 -0
  221. airbyte_cdk/sources/file_based/file_types/__init__.py +37 -0
  222. airbyte_cdk/sources/file_based/file_types/avro_parser.py +233 -0
  223. airbyte_cdk/sources/file_based/file_types/csv_parser.py +527 -0
  224. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  225. airbyte_cdk/sources/file_based/file_types/file_transfer.py +30 -0
  226. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +86 -0
  227. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +145 -0
  228. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +275 -0
  229. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +480 -0
  230. airbyte_cdk/sources/file_based/remote_file.py +18 -0
  231. airbyte_cdk/sources/file_based/schema_helpers.py +281 -0
  232. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +17 -0
  233. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +20 -0
  234. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +52 -0
  235. airbyte_cdk/sources/file_based/stream/__init__.py +13 -0
  236. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +197 -0
  237. airbyte_cdk/sources/file_based/stream/concurrent/__init__.py +0 -0
  238. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +343 -0
  239. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +9 -0
  240. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +59 -0
  241. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +313 -0
  242. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +83 -0
  243. airbyte_cdk/sources/file_based/stream/cursor/__init__.py +4 -0
  244. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +66 -0
  245. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +149 -0
  246. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +396 -0
  247. airbyte_cdk/sources/file_based/stream/identities_stream.py +49 -0
  248. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +92 -0
  249. airbyte_cdk/sources/file_based/types.py +10 -0
  250. airbyte_cdk/sources/http_config.py +10 -0
  251. airbyte_cdk/sources/http_logger.py +55 -0
  252. airbyte_cdk/sources/message/__init__.py +19 -0
  253. airbyte_cdk/sources/message/repository.py +137 -0
  254. airbyte_cdk/sources/source.py +95 -0
  255. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  256. airbyte_cdk/sources/streams/__init__.py +8 -0
  257. airbyte_cdk/sources/streams/availability_strategy.py +84 -0
  258. airbyte_cdk/sources/streams/call_rate.py +704 -0
  259. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  260. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  261. airbyte_cdk/sources/streams/checkpoint/cursor.py +77 -0
  262. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  263. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  264. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  265. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  266. airbyte_cdk/sources/streams/concurrent/__init__.py +3 -0
  267. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +96 -0
  268. airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +37 -0
  269. airbyte_cdk/sources/streams/concurrent/adapters.py +397 -0
  270. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +94 -0
  271. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  272. airbyte_cdk/sources/streams/concurrent/cursor.py +481 -0
  273. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  274. airbyte_cdk/sources/streams/concurrent/default_stream.py +102 -0
  275. airbyte_cdk/sources/streams/concurrent/exceptions.py +18 -0
  276. airbyte_cdk/sources/streams/concurrent/helpers.py +42 -0
  277. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +64 -0
  278. airbyte_cdk/sources/streams/concurrent/partition_reader.py +45 -0
  279. airbyte_cdk/sources/streams/concurrent/partitions/__init__.py +3 -0
  280. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +48 -0
  281. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +18 -0
  282. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  283. airbyte_cdk/sources/streams/concurrent/partitions/types.py +38 -0
  284. airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py +0 -0
  285. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +182 -0
  286. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +223 -0
  287. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  288. airbyte_cdk/sources/streams/core.py +703 -0
  289. airbyte_cdk/sources/streams/http/__init__.py +10 -0
  290. airbyte_cdk/sources/streams/http/availability_strategy.py +54 -0
  291. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  292. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  293. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  294. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  295. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  296. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  297. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  298. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  299. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  300. airbyte_cdk/sources/streams/http/exceptions.py +61 -0
  301. airbyte_cdk/sources/streams/http/http.py +673 -0
  302. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  303. airbyte_cdk/sources/streams/http/rate_limiting.py +158 -0
  304. airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +14 -0
  305. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +479 -0
  306. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +34 -0
  307. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +436 -0
  308. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +83 -0
  309. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  310. airbyte_cdk/sources/streams/utils/__init__.py +3 -0
  311. airbyte_cdk/sources/types.py +169 -0
  312. airbyte_cdk/sources/utils/__init__.py +7 -0
  313. airbyte_cdk/sources/utils/casing.py +12 -0
  314. airbyte_cdk/sources/utils/files_directory.py +15 -0
  315. airbyte_cdk/sources/utils/record_helper.py +53 -0
  316. airbyte_cdk/sources/utils/schema_helpers.py +230 -0
  317. airbyte_cdk/sources/utils/slice_logger.py +57 -0
  318. airbyte_cdk/sources/utils/transform.py +277 -0
  319. airbyte_cdk/sources/utils/types.py +7 -0
  320. airbyte_cdk/sql/__init__.py +0 -0
  321. airbyte_cdk/sql/_util/__init__.py +0 -0
  322. airbyte_cdk/sql/_util/hashing.py +34 -0
  323. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  324. airbyte_cdk/sql/constants.py +32 -0
  325. airbyte_cdk/sql/exceptions.py +235 -0
  326. airbyte_cdk/sql/secrets.py +123 -0
  327. airbyte_cdk/sql/shared/__init__.py +15 -0
  328. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  329. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  330. airbyte_cdk/sql/types.py +160 -0
  331. airbyte_cdk/test/__init__.py +7 -0
  332. airbyte_cdk/test/catalog_builder.py +81 -0
  333. airbyte_cdk/test/entrypoint_wrapper.py +250 -0
  334. airbyte_cdk/test/mock_http/__init__.py +6 -0
  335. airbyte_cdk/test/mock_http/matcher.py +41 -0
  336. airbyte_cdk/test/mock_http/mocker.py +185 -0
  337. airbyte_cdk/test/mock_http/request.py +103 -0
  338. airbyte_cdk/test/mock_http/response.py +28 -0
  339. airbyte_cdk/test/mock_http/response_builder.py +237 -0
  340. airbyte_cdk/test/state_builder.py +33 -0
  341. airbyte_cdk/test/utils/__init__.py +1 -0
  342. airbyte_cdk/test/utils/data.py +24 -0
  343. airbyte_cdk/test/utils/http_mocking.py +16 -0
  344. airbyte_cdk/test/utils/manifest_only_fixtures.py +59 -0
  345. airbyte_cdk/test/utils/reading.py +26 -0
  346. airbyte_cdk/utils/__init__.py +10 -0
  347. airbyte_cdk/utils/airbyte_secrets_utils.py +80 -0
  348. airbyte_cdk/utils/analytics_message.py +25 -0
  349. airbyte_cdk/utils/constants.py +5 -0
  350. airbyte_cdk/utils/datetime_format_inferrer.py +94 -0
  351. airbyte_cdk/utils/datetime_helpers.py +499 -0
  352. airbyte_cdk/utils/event_timing.py +85 -0
  353. airbyte_cdk/utils/is_cloud_environment.py +18 -0
  354. airbyte_cdk/utils/mapping_helpers.py +162 -0
  355. airbyte_cdk/utils/message_utils.py +26 -0
  356. airbyte_cdk/utils/oneof_option_config.py +33 -0
  357. airbyte_cdk/utils/print_buffer.py +75 -0
  358. airbyte_cdk/utils/schema_inferrer.py +270 -0
  359. airbyte_cdk/utils/slice_hasher.py +37 -0
  360. airbyte_cdk/utils/spec_schema_transformations.py +26 -0
  361. airbyte_cdk/utils/stream_status_utils.py +43 -0
  362. airbyte_cdk/utils/traced_exception.py +145 -0
  363. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE.txt +19 -0
  364. airbyte_cdk-0.0.0.dev0.dist-info/LICENSE_SHORT +1 -0
  365. airbyte_cdk-0.0.0.dev0.dist-info/METADATA +111 -0
  366. airbyte_cdk-0.0.0.dev0.dist-info/RECORD +368 -0
  367. airbyte_cdk-0.0.0.dev0.dist-info/WHEEL +4 -0
  368. airbyte_cdk-0.0.0.dev0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,3 @@
1
+ #
2
+ # Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3
+ #
@@ -0,0 +1,157 @@
1
+ """Contains functions to compile custom code from text."""
2
+
3
+ import hashlib
4
+ import os
5
+ import sys
6
+ from collections.abc import Mapping
7
+ from types import ModuleType
8
+ from typing import Any, cast
9
+
10
+ from typing_extensions import Literal
11
+
12
+ ChecksumType = Literal["md5", "sha256"]
13
+ CHECKSUM_FUNCTIONS = {
14
+ "md5": hashlib.md5,
15
+ "sha256": hashlib.sha256,
16
+ }
17
+ COMPONENTS_MODULE_NAME = "components"
18
+ SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19
+ INJECTED_MANIFEST = "__injected_declarative_manifest"
20
+ INJECTED_COMPONENTS_PY = "__injected_components_py"
21
+ INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22
+ ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ENABLE_UNSAFE_CODE"
23
+
24
+
25
+ class AirbyteCodeTamperedError(Exception):
26
+ """Raised when the connector's components module does not match its checksum.
27
+
28
+ This is a fatal error, as it can be a sign of code tampering.
29
+ """
30
+
31
+
32
+ class AirbyteCustomCodeNotPermittedError(Exception):
33
+ """Raised when custom code is attempted to be run in an environment that does not support it."""
34
+
35
+ def __init__(self) -> None:
36
+ super().__init__(
37
+ "Custom connector code is not permitted in this environment. "
38
+ "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ENABLE_UNSAFE_CODE` "
39
+ "environment variable to 'true' in your Airbyte environment. "
40
+ "If you see this message in Airbyte Cloud, your workspace does not allow executing "
41
+ "custom connector code."
42
+ )
43
+
44
+
45
+ def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
+ """Return the hash of the input text using the specified hash type."""
47
+ if not input_text:
48
+ raise ValueError("Hash input text cannot be empty.")
49
+
50
+ hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
+ hash_object.update(input_text.encode())
52
+ return hash_object.hexdigest()
53
+
54
+
55
+ def custom_code_execution_permitted() -> bool:
56
+ """Return `True` if custom code execution is permitted, otherwise `False`.
57
+
58
+ Custom code execution is permitted if the `AIRBYTE_ENABLE_UNSAFE_CODE` environment variable is set to 'true'.
59
+ """
60
+ return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61
+
62
+
63
+ def validate_python_code(
64
+ code_text: str,
65
+ checksums: dict[str, str] | None,
66
+ ) -> None:
67
+ """Validate the provided Python code text against the provided checksums.
68
+
69
+ Currently we fail if no checksums are provided, although this may change in the future.
70
+ """
71
+ if not code_text:
72
+ # No code provided, nothing to validate.
73
+ return
74
+
75
+ if not checksums:
76
+ raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
77
+
78
+ for checksum_type, checksum in checksums.items():
79
+ if checksum_type not in CHECKSUM_FUNCTIONS:
80
+ raise ValueError(
81
+ f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
82
+ )
83
+
84
+ calculated_checksum = _hash_text(code_text, checksum_type)
85
+ if calculated_checksum != checksum:
86
+ raise AirbyteCodeTamperedError(
87
+ f"{checksum_type} checksum does not match."
88
+ + str(
89
+ {
90
+ "expected_checksum": checksum,
91
+ "actual_checksum": calculated_checksum,
92
+ "code_text": code_text,
93
+ }
94
+ ),
95
+ )
96
+
97
+
98
+ def get_registered_components_module(
99
+ config: Mapping[str, Any] | None,
100
+ ) -> ModuleType | None:
101
+ """Get a components module object based on the provided config.
102
+
103
+ If custom python components is provided, this will be loaded. Otherwise, we will
104
+ attempt to load from the `components` module already imported/registered in sys.modules.
105
+
106
+ If custom `components.py` text is provided in config, it will be registered with sys.modules
107
+ so that it can be later imported by manifest declarations which reference the provided classes.
108
+
109
+ Returns `None` if no components is provided and the `components` module is not found.
110
+ """
111
+ if config and config.get(INJECTED_COMPONENTS_PY, None):
112
+ if not custom_code_execution_permitted():
113
+ raise AirbyteCustomCodeNotPermittedError
114
+
115
+ # Create a new module object and execute the provided Python code text within it
116
+ python_text: str = config[INJECTED_COMPONENTS_PY]
117
+ return register_components_module_from_string(
118
+ components_py_text=python_text,
119
+ checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
120
+ )
121
+
122
+ # Check for `components` or `source_declarative_manifest.components`.
123
+ if SDM_COMPONENTS_MODULE_NAME in sys.modules:
124
+ return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
125
+
126
+ if COMPONENTS_MODULE_NAME in sys.modules:
127
+ return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
128
+
129
+ # Could not find module 'components' in `sys.modules`
130
+ # and INJECTED_COMPONENTS_PY was not provided in config.
131
+ return None
132
+
133
+
134
+ def register_components_module_from_string(
135
+ components_py_text: str,
136
+ checksums: dict[str, Any] | None,
137
+ ) -> ModuleType:
138
+ """Load and return the components module from a provided string containing the python code."""
139
+ # First validate the code
140
+ validate_python_code(
141
+ code_text=components_py_text,
142
+ checksums=checksums,
143
+ )
144
+
145
+ # Create a new module object
146
+ components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
147
+
148
+ # Execute the module text in the module's namespace
149
+ exec(components_py_text, components_module.__dict__)
150
+
151
+ # Register the module in `sys.modules`` so it can be imported as
152
+ # `source_declarative_manifest.components` and/or `components`.
153
+ sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
154
+ sys.modules[COMPONENTS_MODULE_NAME] = components_module
155
+
156
+ # Now you can import and use the module
157
+ return components_module
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ class CircularReferenceException(Exception):
7
+ """
8
+ Raised when a circular reference is detected in a manifest.
9
+ """
10
+
11
+ def __init__(self, reference: str) -> None:
12
+ super().__init__(f"Circular reference found: {reference}")
13
+
14
+
15
+ class UndefinedReferenceException(Exception):
16
+ """
17
+ Raised when refering to an undefined reference.
18
+ """
19
+
20
+ def __init__(self, path: str, reference: str) -> None:
21
+ super().__init__(f"Undefined reference {reference} from {path}")
@@ -0,0 +1,172 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ import typing
7
+ from typing import Any, Mapping
8
+
9
+ PARAMETERS_STR = "$parameters"
10
+
11
+
12
+ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
13
+ # CompositeErrorHandler
14
+ "CompositeErrorHandler.error_handlers": "DefaultErrorHandler",
15
+ # CursorPagination
16
+ "CursorPagination.decoder": "JsonDecoder",
17
+ # DatetimeBasedCursor
18
+ "DatetimeBasedCursor.end_datetime": "MinMaxDatetime",
19
+ "DatetimeBasedCursor.end_time_option": "RequestOption",
20
+ "DatetimeBasedCursor.start_datetime": "MinMaxDatetime",
21
+ "DatetimeBasedCursor.start_time_option": "RequestOption",
22
+ # CustomIncrementalSync
23
+ "CustomIncrementalSync.end_datetime": "MinMaxDatetime",
24
+ "CustomIncrementalSync.end_time_option": "RequestOption",
25
+ "CustomIncrementalSync.start_datetime": "MinMaxDatetime",
26
+ "CustomIncrementalSync.start_time_option": "RequestOption",
27
+ # DeclarativeSource
28
+ "DeclarativeSource.check": "CheckStream",
29
+ "DeclarativeSource.spec": "Spec",
30
+ "DeclarativeSource.streams": "DeclarativeStream",
31
+ # DeclarativeStream
32
+ "DeclarativeStream.retriever": "SimpleRetriever",
33
+ "DeclarativeStream.schema_loader": "JsonFileSchemaLoader",
34
+ # DynamicDeclarativeStream
35
+ "DynamicDeclarativeStream.stream_template": "DeclarativeStream",
36
+ "DynamicDeclarativeStream.components_resolver": "ConfigComponentResolver",
37
+ # HttpComponentsResolver
38
+ "HttpComponentsResolver.retriever": "SimpleRetriever",
39
+ "HttpComponentsResolver.components_mapping": "ComponentMappingDefinition",
40
+ # ConfigComponentResolver
41
+ "ConfigComponentsResolver.stream_config": "StreamConfig",
42
+ "ConfigComponentsResolver.components_mapping": "ComponentMappingDefinition",
43
+ # DefaultErrorHandler
44
+ "DefaultErrorHandler.response_filters": "HttpResponseFilter",
45
+ # DefaultPaginator
46
+ "DefaultPaginator.decoder": "JsonDecoder",
47
+ "DefaultPaginator.page_size_option": "RequestOption",
48
+ # DpathExtractor
49
+ "DpathExtractor.decoder": "JsonDecoder",
50
+ # HttpRequester
51
+ "HttpRequester.error_handler": "DefaultErrorHandler",
52
+ # ListPartitionRouter
53
+ "ListPartitionRouter.request_option": "RequestOption",
54
+ # ParentStreamConfig
55
+ "ParentStreamConfig.request_option": "RequestOption",
56
+ "ParentStreamConfig.stream": "DeclarativeStream",
57
+ # RecordSelector
58
+ "RecordSelector.extractor": "DpathExtractor",
59
+ "RecordSelector.record_filter": "RecordFilter",
60
+ # SimpleRetriever
61
+ "SimpleRetriever.paginator": "NoPagination",
62
+ "SimpleRetriever.record_selector": "RecordSelector",
63
+ "SimpleRetriever.requester": "HttpRequester",
64
+ # SubstreamPartitionRouter
65
+ "SubstreamPartitionRouter.parent_stream_configs": "ParentStreamConfig",
66
+ # AddFields
67
+ "AddFields.fields": "AddedFieldDefinition",
68
+ # CustomPartitionRouter
69
+ "CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
70
+ # DynamicSchemaLoader
71
+ "DynamicSchemaLoader.retriever": "SimpleRetriever",
72
+ # SchemaTypeIdentifier
73
+ "SchemaTypeIdentifier.types_map": "TypesMap",
74
+ }
75
+
76
+ # We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
77
+ # be a short term fix because once we have migrated, then type and class_name should be requirements for all custom components.
78
+ CUSTOM_COMPONENTS_MAPPING: Mapping[str, str] = {
79
+ "CompositeErrorHandler.backoff_strategies": "CustomBackoffStrategy",
80
+ "DeclarativeStream.retriever": "CustomRetriever",
81
+ "DeclarativeStream.transformations": "CustomTransformation",
82
+ "DefaultErrorHandler.backoff_strategies": "CustomBackoffStrategy",
83
+ "DefaultPaginator.pagination_strategy": "CustomPaginationStrategy",
84
+ "HttpRequester.authenticator": "CustomAuthenticator",
85
+ "HttpRequester.error_handler": "CustomErrorHandler",
86
+ "RecordSelector.extractor": "CustomRecordExtractor",
87
+ "SimpleRetriever.partition_router": "CustomPartitionRouter",
88
+ }
89
+
90
+
91
+ class ManifestComponentTransformer:
92
+ def propagate_types_and_parameters(
93
+ self,
94
+ parent_field_identifier: str,
95
+ declarative_component: Mapping[str, Any],
96
+ parent_parameters: Mapping[str, Any],
97
+ ) -> Mapping[str, Any]:
98
+ """
99
+ Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the
100
+ default component type if it was not already present. The resulting transformed components are a deep copy of the input
101
+ components, not an in-place transformation.
102
+
103
+ :param declarative_component: The current component that is having type and parameters added
104
+ :param parent_field_identifier: The name of the field of the current component coming from the parent component
105
+ :param parent_parameters: The parameters set on parent components defined before the current component
106
+ :return: A deep copy of the transformed component with types and parameters persisted to it
107
+ """
108
+ propagated_component = dict(copy.deepcopy(declarative_component))
109
+ if "type" not in propagated_component:
110
+ # If the component has class_name we assume that this is a reference to a custom component. This is a slight change to
111
+ # existing behavior because we originally allowed for either class or type to be specified. After the pydantic migration,
112
+ # class_name will only be a valid field on custom components and this change reflects that. I checked, and we currently
113
+ # have no low-code connectors that use class_name except for custom components.
114
+ if "class_name" in propagated_component:
115
+ found_type = CUSTOM_COMPONENTS_MAPPING.get(parent_field_identifier)
116
+ else:
117
+ found_type = DEFAULT_MODEL_TYPES.get(parent_field_identifier)
118
+ if found_type:
119
+ propagated_component["type"] = found_type
120
+
121
+ # When there is no resolved type, we're not processing a component (likely a regular object) and don't need to propagate parameters
122
+ # When the type refers to a json schema, we're not processing a component as well. This check is currently imperfect as there could
123
+ # be json_schema are not objects but we believe this is not likely in our case because:
124
+ # * records are Mapping so objects hence SchemaLoader root should be an object
125
+ # * connection_specification is a Mapping
126
+ if "type" not in propagated_component or self._is_json_schema_object(propagated_component):
127
+ return propagated_component
128
+
129
+ # Combines parameters defined at the current level with parameters from parent components. Parameters at the current
130
+ # level take precedence
131
+ current_parameters = dict(copy.deepcopy(parent_parameters))
132
+ component_parameters = propagated_component.pop(PARAMETERS_STR, {})
133
+ current_parameters = {**current_parameters, **component_parameters}
134
+
135
+ # Parameters should be applied to the current component fields with the existing field taking precedence over parameters if
136
+ # both exist
137
+ for parameter_key, parameter_value in current_parameters.items():
138
+ propagated_component[parameter_key] = (
139
+ propagated_component.get(parameter_key) or parameter_value
140
+ )
141
+
142
+ for field_name, field_value in propagated_component.items():
143
+ if isinstance(field_value, dict):
144
+ # We exclude propagating a parameter that matches the current field name because that would result in an infinite cycle
145
+ excluded_parameter = current_parameters.pop(field_name, None)
146
+ parent_type_field_identifier = f"{propagated_component.get('type')}.{field_name}"
147
+ propagated_component[field_name] = self.propagate_types_and_parameters(
148
+ parent_type_field_identifier, field_value, current_parameters
149
+ )
150
+ if excluded_parameter:
151
+ current_parameters[field_name] = excluded_parameter
152
+ elif isinstance(field_value, typing.List):
153
+ # We exclude propagating a parameter that matches the current field name because that would result in an infinite cycle
154
+ excluded_parameter = current_parameters.pop(field_name, None)
155
+ for i, element in enumerate(field_value):
156
+ if isinstance(element, dict):
157
+ parent_type_field_identifier = (
158
+ f"{propagated_component.get('type')}.{field_name}"
159
+ )
160
+ field_value[i] = self.propagate_types_and_parameters(
161
+ parent_type_field_identifier, element, current_parameters
162
+ )
163
+ if excluded_parameter:
164
+ current_parameters[field_name] = excluded_parameter
165
+
166
+ if current_parameters:
167
+ propagated_component[PARAMETERS_STR] = current_parameters
168
+ return propagated_component
169
+
170
+ @staticmethod
171
+ def _is_json_schema_object(propagated_component: Mapping[str, Any]) -> bool:
172
+ return propagated_component.get("type") == "object"
@@ -0,0 +1,213 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import re
6
+ from typing import Any, Mapping, Set, Tuple, Union
7
+
8
+ from airbyte_cdk.sources.declarative.parsers.custom_exceptions import (
9
+ CircularReferenceException,
10
+ UndefinedReferenceException,
11
+ )
12
+
13
+ REF_TAG = "$ref"
14
+
15
+
16
+ class ManifestReferenceResolver:
17
+ """
18
+ An incoming manifest can contain references to values previously defined.
19
+ This parser will dereference these values to produce a complete ConnectionDefinition.
20
+
21
+ References can be defined using a #/<arg> string.
22
+ ```
23
+ key: 1234
24
+ reference: "#/key"
25
+ ```
26
+ will produce the following definition:
27
+ ```
28
+ key: 1234
29
+ reference: 1234
30
+ ```
31
+ This also works with objects:
32
+ ```
33
+ key_value_pairs:
34
+ k1: v1
35
+ k2: v2
36
+ same_key_value_pairs: "#/key_value_pairs"
37
+ ```
38
+ will produce the following definition:
39
+ ```
40
+ key_value_pairs:
41
+ k1: v1
42
+ k2: v2
43
+ same_key_value_pairs:
44
+ k1: v1
45
+ k2: v2
46
+ ```
47
+
48
+ The $ref keyword can be used to refer to an object and enhance it with addition key-value pairs
49
+ ```
50
+ key_value_pairs:
51
+ k1: v1
52
+ k2: v2
53
+ same_key_value_pairs:
54
+ $ref: "#/key_value_pairs"
55
+ k3: v3
56
+ ```
57
+ will produce the following definition:
58
+ ```
59
+ key_value_pairs:
60
+ k1: v1
61
+ k2: v2
62
+ same_key_value_pairs:
63
+ k1: v1
64
+ k2: v2
65
+ k3: v3
66
+ ```
67
+
68
+ References can also point to nested values.
69
+ Nested references are ambiguous because one could define a key containing with `.`
70
+ in this example, we want to refer to the limit key in the dict object:
71
+ ```
72
+ dict:
73
+ limit: 50
74
+ limit_ref: "#/dict/limit"
75
+ ```
76
+ will produce the following definition:
77
+ ```
78
+ dict
79
+ limit: 50
80
+ limit-ref: 50
81
+ ```
82
+
83
+ whereas here we want to access the `nested/path` value.
84
+ ```
85
+ nested:
86
+ path: "first one"
87
+ nested/path: "uh oh"
88
+ value: "#/nested/path
89
+ ```
90
+ will produce the following definition:
91
+ ```
92
+ nested:
93
+ path: "first one"
94
+ nested/path: "uh oh"
95
+ value: "uh oh"
96
+ ```
97
+
98
+ to resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward
99
+ until we find a key with the given path, or until there is nothing to traverse.
100
+ """
101
+
102
+ def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]:
103
+ """
104
+ :param manifest: incoming manifest that could have references to previously defined components
105
+ :return:
106
+ """
107
+ return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return]
108
+
109
+ def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: Set[Any]) -> Any:
110
+ if isinstance(node, dict):
111
+ evaluated_dict = {
112
+ k: self._evaluate_node(v, manifest, visited)
113
+ for k, v in node.items()
114
+ if not self._is_ref_key(k)
115
+ }
116
+ if REF_TAG in node:
117
+ # The node includes a $ref key, so we splat the referenced value(s) into the evaluated dict
118
+ evaluated_ref = self._evaluate_node(node[REF_TAG], manifest, visited)
119
+ if not isinstance(evaluated_ref, dict):
120
+ return evaluated_ref
121
+ else:
122
+ # The values defined on the component take precedence over the reference values
123
+ return evaluated_ref | evaluated_dict
124
+ else:
125
+ return evaluated_dict
126
+ elif isinstance(node, list):
127
+ return [self._evaluate_node(v, manifest, visited) for v in node]
128
+ elif self._is_ref(node):
129
+ if node in visited:
130
+ raise CircularReferenceException(node)
131
+ visited.add(node)
132
+ ret = self._evaluate_node(self._lookup_ref_value(node, manifest), manifest, visited)
133
+ visited.remove(node)
134
+ return ret
135
+ else:
136
+ return node
137
+
138
+ def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any:
139
+ ref_match = re.match(r"#/(.*)", ref)
140
+ if not ref_match:
141
+ raise ValueError(f"Invalid reference format {ref}")
142
+ try:
143
+ path = ref_match.groups()[0]
144
+ return self._read_ref_value(path, manifest)
145
+ except (AttributeError, KeyError, IndexError):
146
+ raise UndefinedReferenceException(path, ref)
147
+
148
+ @staticmethod
149
+ def _is_ref(node: Any) -> bool:
150
+ return isinstance(node, str) and node.startswith("#/")
151
+
152
+ @staticmethod
153
+ def _is_ref_key(key: str) -> bool:
154
+ return bool(key == REF_TAG)
155
+
156
+ @staticmethod
157
+ def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any:
158
+ """
159
+ Read the value at the referenced location of the manifest.
160
+
161
+ References are ambiguous because one could define a key containing `/`
162
+ In this example, we want to refer to the `limit` key in the `dict` object:
163
+ dict:
164
+ limit: 50
165
+ limit_ref: "#/dict/limit"
166
+
167
+ Whereas here we want to access the `nested/path` value.
168
+ nested:
169
+ path: "first one"
170
+ nested/path: "uh oh"
171
+ value: "#/nested/path"
172
+
173
+ To resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward
174
+ until we find a key with the given path, or until there is nothing to traverse.
175
+
176
+ Consider the path foo/bar/baz. To resolve the ambiguity, we first try 'foo/bar/baz' in its entirety as a top-level key. If this
177
+ fails, we try 'foo' as the top-level key, and if this succeeds, pass 'bar/baz' on as the key to be tried at the next level.
178
+ """
179
+ while ref:
180
+ try:
181
+ return manifest_node[ref]
182
+ except (KeyError, TypeError):
183
+ head, ref = _parse_path(ref)
184
+ manifest_node = manifest_node[head] # type: ignore # Couldn't figure out how to fix this since manifest_node can get reassigned into other types like lists
185
+ return manifest_node
186
+
187
+
188
+ def _parse_path(ref: str) -> Tuple[Union[str, int], str]:
189
+ """
190
+ Return the next path component, together with the rest of the path.
191
+
192
+ A path component may be a string key, or an int index.
193
+
194
+ >>> _parse_path("foo/bar")
195
+ "foo", "bar"
196
+ >>> _parse_path("foo/7/8/bar")
197
+ "foo", "7/8/bar"
198
+ >>> _parse_path("7/8/bar")
199
+ 7, "8/bar"
200
+ >>> _parse_path("8/bar")
201
+ 8, "bar"
202
+ >>> _parse_path("8foo/bar")
203
+ "8foo", "bar"
204
+ """
205
+ match = re.match(r"([^/]*)/?(.*)", ref)
206
+ if match:
207
+ first, rest = match.groups()
208
+ try:
209
+ return int(first), rest
210
+ except ValueError:
211
+ return first, rest
212
+ else:
213
+ raise ValueError(f"Invalid path {ref} specified")