airbyte-cdk 0.72.0__py3-none-any.whl → 6.17.1.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (518) hide show
  1. airbyte_cdk/__init__.py +355 -6
  2. airbyte_cdk/cli/__init__.py +1 -0
  3. airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
  4. airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
  5. airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
  6. airbyte_cdk/config_observation.py +29 -10
  7. airbyte_cdk/connector.py +24 -24
  8. airbyte_cdk/connector_builder/README.md +53 -0
  9. airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
  10. airbyte_cdk/connector_builder/main.py +45 -13
  11. airbyte_cdk/connector_builder/message_grouper.py +189 -50
  12. airbyte_cdk/connector_builder/models.py +3 -2
  13. airbyte_cdk/destinations/__init__.py +4 -3
  14. airbyte_cdk/destinations/destination.py +54 -20
  15. airbyte_cdk/destinations/vector_db_based/README.md +37 -0
  16. airbyte_cdk/destinations/vector_db_based/config.py +40 -17
  17. airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
  18. airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
  19. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  20. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  21. airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
  22. airbyte_cdk/entrypoint.py +153 -44
  23. airbyte_cdk/exception_handler.py +21 -3
  24. airbyte_cdk/logger.py +30 -44
  25. airbyte_cdk/models/__init__.py +13 -2
  26. airbyte_cdk/models/airbyte_protocol.py +86 -1
  27. airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
  28. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  29. airbyte_cdk/models/well_known_types.py +1 -1
  30. airbyte_cdk/sources/__init__.py +5 -1
  31. airbyte_cdk/sources/abstract_source.py +125 -79
  32. airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
  33. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
  34. airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
  35. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
  36. airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
  37. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
  38. airbyte_cdk/sources/config.py +3 -2
  39. airbyte_cdk/sources/connector_state_manager.py +49 -83
  40. airbyte_cdk/sources/declarative/async_job/job.py +52 -0
  41. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
  42. airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
  43. airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
  44. airbyte_cdk/sources/declarative/async_job/status.py +24 -0
  45. airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
  46. airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
  47. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  48. airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
  49. airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
  50. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
  51. airbyte_cdk/sources/declarative/auth/token.py +28 -10
  52. airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
  53. airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
  54. airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
  55. airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
  56. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
  57. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
  58. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  59. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
  60. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1213 -88
  61. airbyte_cdk/sources/declarative/declarative_source.py +5 -2
  62. airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
  63. airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
  64. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  65. airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
  66. airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
  67. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
  68. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
  69. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
  70. airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
  71. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
  72. airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
  73. airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
  74. airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
  75. airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
  76. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
  77. airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
  78. airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
  79. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +346 -0
  80. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
  81. airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
  82. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
  83. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +173 -74
  84. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
  85. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
  86. airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
  87. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
  88. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
  89. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
  90. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
  91. airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
  92. airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
  93. airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
  94. airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
  95. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
  96. airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
  97. airbyte_cdk/sources/declarative/models/__init__.py +1 -1
  98. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1329 -595
  99. airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
  100. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
  101. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
  102. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1763 -226
  103. airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
  104. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  105. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
  106. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
  107. airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
  108. airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
  109. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
  110. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  111. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  112. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
  113. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
  114. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
  115. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
  116. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
  117. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
  118. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
  119. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
  120. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
  121. airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
  122. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
  123. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
  124. airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
  125. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  126. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
  127. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
  128. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
  129. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  130. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
  131. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
  132. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
  133. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
  134. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
  135. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
  136. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
  137. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
  138. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
  139. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
  140. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
  141. airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
  142. airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
  143. airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
  144. airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
  145. airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
  146. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
  147. airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
  148. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
  149. airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
  150. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
  151. airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
  152. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
  153. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
  154. airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
  155. airbyte_cdk/sources/declarative/spec/spec.py +12 -5
  156. airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
  157. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
  158. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
  159. airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
  160. airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
  161. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  162. airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
  163. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
  164. airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
  165. airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
  166. airbyte_cdk/sources/declarative/types.py +19 -110
  167. airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
  168. airbyte_cdk/sources/embedded/base_integration.py +16 -5
  169. airbyte_cdk/sources/embedded/catalog.py +16 -4
  170. airbyte_cdk/sources/embedded/runner.py +19 -3
  171. airbyte_cdk/sources/embedded/tools.py +5 -2
  172. airbyte_cdk/sources/file_based/README.md +152 -0
  173. airbyte_cdk/sources/file_based/__init__.py +24 -0
  174. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  175. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
  176. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
  177. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
  178. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  179. airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
  180. airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
  181. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
  182. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  183. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  184. airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
  185. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  186. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  187. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  188. airbyte_cdk/sources/file_based/exceptions.py +18 -15
  189. airbyte_cdk/sources/file_based/file_based_source.py +140 -33
  190. airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
  191. airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
  192. airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
  193. airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
  194. airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
  195. airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
  196. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  197. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
  198. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
  199. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
  200. airbyte_cdk/sources/file_based/remote_file.py +1 -1
  201. airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
  202. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  203. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  204. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  205. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
  206. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
  207. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  208. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
  209. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
  210. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
  211. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  212. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
  213. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
  214. airbyte_cdk/sources/http_logger.py +8 -3
  215. airbyte_cdk/sources/message/__init__.py +7 -1
  216. airbyte_cdk/sources/message/repository.py +18 -4
  217. airbyte_cdk/sources/source.py +42 -38
  218. airbyte_cdk/sources/streams/__init__.py +2 -2
  219. airbyte_cdk/sources/streams/availability_strategy.py +54 -3
  220. airbyte_cdk/sources/streams/call_rate.py +64 -21
  221. airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
  222. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
  223. airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
  224. airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
  225. airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
  226. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
  227. airbyte_cdk/sources/streams/concurrent/README.md +7 -0
  228. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
  229. airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
  230. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
  231. airbyte_cdk/sources/streams/concurrent/cursor.py +298 -42
  232. airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
  233. airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
  234. airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
  235. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
  236. airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
  237. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
  238. airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
  239. airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
  240. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
  241. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
  242. airbyte_cdk/sources/streams/core.py +412 -87
  243. airbyte_cdk/sources/streams/http/__init__.py +2 -1
  244. airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
  245. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
  246. airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
  247. airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
  248. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
  249. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
  250. airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
  251. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
  252. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
  253. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
  254. airbyte_cdk/sources/streams/http/exceptions.py +27 -7
  255. airbyte_cdk/sources/streams/http/http.py +369 -246
  256. airbyte_cdk/sources/streams/http/http_client.py +531 -0
  257. airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
  258. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
  259. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
  260. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
  261. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  262. airbyte_cdk/sources/types.py +154 -0
  263. airbyte_cdk/sources/utils/record_helper.py +36 -21
  264. airbyte_cdk/sources/utils/schema_helpers.py +13 -6
  265. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  266. airbyte_cdk/sources/utils/transform.py +54 -20
  267. airbyte_cdk/sql/_util/hashing.py +34 -0
  268. airbyte_cdk/sql/_util/name_normalizers.py +92 -0
  269. airbyte_cdk/sql/constants.py +32 -0
  270. airbyte_cdk/sql/exceptions.py +235 -0
  271. airbyte_cdk/sql/secrets.py +123 -0
  272. airbyte_cdk/sql/shared/__init__.py +15 -0
  273. airbyte_cdk/sql/shared/catalog_providers.py +145 -0
  274. airbyte_cdk/sql/shared/sql_processor.py +786 -0
  275. airbyte_cdk/sql/types.py +160 -0
  276. airbyte_cdk/test/catalog_builder.py +70 -18
  277. airbyte_cdk/test/entrypoint_wrapper.py +117 -42
  278. airbyte_cdk/test/mock_http/__init__.py +1 -1
  279. airbyte_cdk/test/mock_http/matcher.py +6 -0
  280. airbyte_cdk/test/mock_http/mocker.py +57 -10
  281. airbyte_cdk/test/mock_http/request.py +19 -3
  282. airbyte_cdk/test/mock_http/response.py +3 -1
  283. airbyte_cdk/test/mock_http/response_builder.py +32 -16
  284. airbyte_cdk/test/state_builder.py +18 -10
  285. airbyte_cdk/test/utils/__init__.py +1 -0
  286. airbyte_cdk/test/utils/data.py +24 -0
  287. airbyte_cdk/test/utils/http_mocking.py +16 -0
  288. airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
  289. airbyte_cdk/test/utils/reading.py +26 -0
  290. airbyte_cdk/utils/__init__.py +2 -1
  291. airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
  292. airbyte_cdk/utils/analytics_message.py +10 -2
  293. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  294. airbyte_cdk/utils/event_timing.py +10 -10
  295. airbyte_cdk/utils/mapping_helpers.py +3 -1
  296. airbyte_cdk/utils/message_utils.py +20 -11
  297. airbyte_cdk/utils/print_buffer.py +75 -0
  298. airbyte_cdk/utils/schema_inferrer.py +198 -28
  299. airbyte_cdk/utils/slice_hasher.py +30 -0
  300. airbyte_cdk/utils/spec_schema_transformations.py +6 -3
  301. airbyte_cdk/utils/stream_status_utils.py +8 -1
  302. airbyte_cdk/utils/traced_exception.py +61 -21
  303. airbyte_cdk-6.17.1.dev0.dist-info/METADATA +109 -0
  304. airbyte_cdk-6.17.1.dev0.dist-info/RECORD +350 -0
  305. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/WHEEL +1 -2
  306. airbyte_cdk-6.17.1.dev0.dist-info/entry_points.txt +3 -0
  307. airbyte_cdk/sources/declarative/create_partial.py +0 -92
  308. airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
  309. airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
  310. airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
  311. airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
  312. airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
  313. airbyte_cdk/sources/deprecated/base_source.py +0 -94
  314. airbyte_cdk/sources/deprecated/client.py +0 -99
  315. airbyte_cdk/sources/singer/__init__.py +0 -8
  316. airbyte_cdk/sources/singer/singer_helpers.py +0 -304
  317. airbyte_cdk/sources/singer/source.py +0 -186
  318. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
  319. airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
  320. airbyte_cdk/sources/streams/http/auth/core.py +0 -29
  321. airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
  322. airbyte_cdk/sources/streams/http/auth/token.py +0 -47
  323. airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
  324. airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
  325. airbyte_cdk/sources/utils/schema_models.py +0 -84
  326. airbyte_cdk-0.72.0.dist-info/METADATA +0 -243
  327. airbyte_cdk-0.72.0.dist-info/RECORD +0 -466
  328. airbyte_cdk-0.72.0.dist-info/top_level.txt +0 -3
  329. source_declarative_manifest/main.py +0 -29
  330. unit_tests/connector_builder/__init__.py +0 -3
  331. unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
  332. unit_tests/connector_builder/test_message_grouper.py +0 -713
  333. unit_tests/connector_builder/utils.py +0 -27
  334. unit_tests/destinations/test_destination.py +0 -243
  335. unit_tests/singer/test_singer_helpers.py +0 -56
  336. unit_tests/singer/test_singer_source.py +0 -112
  337. unit_tests/sources/__init__.py +0 -0
  338. unit_tests/sources/concurrent_source/__init__.py +0 -3
  339. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
  340. unit_tests/sources/declarative/__init__.py +0 -3
  341. unit_tests/sources/declarative/auth/__init__.py +0 -3
  342. unit_tests/sources/declarative/auth/test_oauth.py +0 -331
  343. unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
  344. unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
  345. unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
  346. unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
  347. unit_tests/sources/declarative/checks/__init__.py +0 -3
  348. unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
  349. unit_tests/sources/declarative/decoders/__init__.py +0 -0
  350. unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
  351. unit_tests/sources/declarative/external_component.py +0 -13
  352. unit_tests/sources/declarative/extractors/__init__.py +0 -3
  353. unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
  354. unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
  355. unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
  356. unit_tests/sources/declarative/incremental/__init__.py +0 -0
  357. unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
  358. unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
  359. unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
  360. unit_tests/sources/declarative/interpolation/__init__.py +0 -3
  361. unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
  362. unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
  363. unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
  364. unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
  365. unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
  366. unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
  367. unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
  368. unit_tests/sources/declarative/parsers/__init__.py +0 -3
  369. unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
  370. unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
  371. unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1841
  372. unit_tests/sources/declarative/parsers/testing_components.py +0 -36
  373. unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
  374. unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
  375. unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
  376. unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
  377. unit_tests/sources/declarative/requesters/__init__.py +0 -3
  378. unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
  379. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
  380. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
  381. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
  382. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
  383. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
  384. unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
  385. unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
  386. unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
  387. unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
  388. unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
  389. unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
  390. unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
  391. unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
  392. unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
  393. unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
  394. unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
  395. unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
  396. unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
  397. unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
  398. unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
  399. unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
  400. unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
  401. unit_tests/sources/declarative/retrievers/__init__.py +0 -3
  402. unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
  403. unit_tests/sources/declarative/schema/__init__.py +0 -6
  404. unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
  405. unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
  406. unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
  407. unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
  408. unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
  409. unit_tests/sources/declarative/states/__init__.py +0 -3
  410. unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
  411. unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
  412. unit_tests/sources/declarative/test_create_partial.py +0 -83
  413. unit_tests/sources/declarative/test_declarative_stream.py +0 -103
  414. unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
  415. unit_tests/sources/declarative/test_types.py +0 -39
  416. unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
  417. unit_tests/sources/file_based/__init__.py +0 -0
  418. unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  419. unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
  420. unit_tests/sources/file_based/config/__init__.py +0 -0
  421. unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
  422. unit_tests/sources/file_based/config/test_csv_format.py +0 -34
  423. unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
  424. unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
  425. unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
  426. unit_tests/sources/file_based/file_types/__init__.py +0 -0
  427. unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
  428. unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
  429. unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
  430. unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
  431. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
  432. unit_tests/sources/file_based/helpers.py +0 -70
  433. unit_tests/sources/file_based/in_memory_files_source.py +0 -211
  434. unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  435. unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
  436. unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
  437. unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
  438. unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
  439. unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
  440. unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
  441. unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
  442. unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
  443. unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
  444. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
  445. unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
  446. unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
  447. unit_tests/sources/file_based/stream/__init__.py +0 -0
  448. unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
  449. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
  450. unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
  451. unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
  452. unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
  453. unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
  454. unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
  455. unit_tests/sources/file_based/test_scenarios.py +0 -253
  456. unit_tests/sources/file_based/test_schema_helpers.py +0 -346
  457. unit_tests/sources/fixtures/__init__.py +0 -3
  458. unit_tests/sources/fixtures/source_test_fixture.py +0 -153
  459. unit_tests/sources/message/__init__.py +0 -0
  460. unit_tests/sources/message/test_repository.py +0 -153
  461. unit_tests/sources/streams/__init__.py +0 -0
  462. unit_tests/sources/streams/concurrent/__init__.py +0 -3
  463. unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
  464. unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
  465. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
  466. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
  467. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
  468. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
  469. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
  470. unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
  471. unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
  472. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
  473. unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
  474. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
  475. unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
  476. unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
  477. unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
  478. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
  479. unit_tests/sources/streams/http/__init__.py +0 -0
  480. unit_tests/sources/streams/http/auth/__init__.py +0 -0
  481. unit_tests/sources/streams/http/auth/test_auth.py +0 -173
  482. unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  483. unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
  484. unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
  485. unit_tests/sources/streams/http/test_http.py +0 -635
  486. unit_tests/sources/streams/test_availability_strategy.py +0 -70
  487. unit_tests/sources/streams/test_call_rate.py +0 -300
  488. unit_tests/sources/streams/test_stream_read.py +0 -405
  489. unit_tests/sources/streams/test_streams_core.py +0 -184
  490. unit_tests/sources/test_abstract_source.py +0 -1442
  491. unit_tests/sources/test_concurrent_source.py +0 -112
  492. unit_tests/sources/test_config.py +0 -92
  493. unit_tests/sources/test_connector_state_manager.py +0 -482
  494. unit_tests/sources/test_http_logger.py +0 -252
  495. unit_tests/sources/test_integration_source.py +0 -86
  496. unit_tests/sources/test_source.py +0 -684
  497. unit_tests/sources/test_source_read.py +0 -460
  498. unit_tests/test/__init__.py +0 -0
  499. unit_tests/test/mock_http/__init__.py +0 -0
  500. unit_tests/test/mock_http/test_matcher.py +0 -53
  501. unit_tests/test/mock_http/test_mocker.py +0 -214
  502. unit_tests/test/mock_http/test_request.py +0 -117
  503. unit_tests/test/mock_http/test_response_builder.py +0 -177
  504. unit_tests/test/test_entrypoint_wrapper.py +0 -240
  505. unit_tests/utils/__init__.py +0 -0
  506. unit_tests/utils/test_datetime_format_inferrer.py +0 -60
  507. unit_tests/utils/test_mapping_helpers.py +0 -54
  508. unit_tests/utils/test_message_utils.py +0 -91
  509. unit_tests/utils/test_rate_limiting.py +0 -26
  510. unit_tests/utils/test_schema_inferrer.py +0 -202
  511. unit_tests/utils/test_secret_utils.py +0 -135
  512. unit_tests/utils/test_stream_status_utils.py +0 -61
  513. unit_tests/utils/test_traced_exception.py +0 -107
  514. /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
  515. {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
  516. {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
  517. {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
  518. {airbyte_cdk-0.72.0.dist-info → airbyte_cdk-6.17.1.dev0.dist-info}/LICENSE.txt +0 -0
@@ -7,7 +7,6 @@ import logging
7
7
  from copy import deepcopy
8
8
  from json import JSONDecodeError
9
9
  from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Union
10
- from urllib.parse import parse_qs, urlparse
11
10
 
12
11
  from airbyte_cdk.connector_builder.models import (
13
12
  AuxiliaryRequest,
@@ -19,22 +18,23 @@ from airbyte_cdk.connector_builder.models import (
19
18
  StreamReadSlices,
20
19
  )
21
20
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
22
- from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
23
- from airbyte_cdk.sources.utils.slice_logger import SliceLogger
24
- from airbyte_cdk.sources.utils.types import JsonType
25
- from airbyte_cdk.utils import AirbyteTracedException
26
- from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
27
- from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
28
- from airbyte_protocol.models.airbyte_protocol import (
21
+ from airbyte_cdk.models import (
29
22
  AirbyteControlMessage,
30
23
  AirbyteLogMessage,
31
24
  AirbyteMessage,
25
+ AirbyteStateMessage,
32
26
  AirbyteTraceMessage,
33
27
  ConfiguredAirbyteCatalog,
34
28
  OrchestratorType,
35
29
  TraceType,
36
30
  )
37
- from airbyte_protocol.models.airbyte_protocol import Type as MessageType
31
+ from airbyte_cdk.models import Type as MessageType
32
+ from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
33
+ from airbyte_cdk.sources.utils.slice_logger import SliceLogger
34
+ from airbyte_cdk.sources.utils.types import JsonType
35
+ from airbyte_cdk.utils import AirbyteTracedException
36
+ from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
37
+ from airbyte_cdk.utils.schema_inferrer import SchemaInferrer, SchemaValidationException
38
38
 
39
39
 
40
40
  class MessageGrouper:
@@ -45,16 +45,55 @@ class MessageGrouper:
45
45
  self._max_slices = max_slices
46
46
  self._max_record_limit = max_record_limit
47
47
 
48
+ def _pk_to_nested_and_composite_field(
49
+ self, field: Optional[Union[str, List[str], List[List[str]]]]
50
+ ) -> List[List[str]]:
51
+ if not field:
52
+ return [[]]
53
+
54
+ if isinstance(field, str):
55
+ return [[field]]
56
+
57
+ is_composite_key = isinstance(field[0], str)
58
+ if is_composite_key:
59
+ return [[i] for i in field] # type: ignore # the type of field is expected to be List[str] here
60
+
61
+ return field # type: ignore # the type of field is expected to be List[List[str]] here
62
+
63
+ def _cursor_field_to_nested_and_composite_field(
64
+ self, field: Union[str, List[str]]
65
+ ) -> List[List[str]]:
66
+ if not field:
67
+ return [[]]
68
+
69
+ if isinstance(field, str):
70
+ return [[field]]
71
+
72
+ is_nested_key = isinstance(field[0], str)
73
+ if is_nested_key:
74
+ return [field]
75
+
76
+ raise ValueError(f"Unknown type for cursor field `{field}")
77
+
48
78
  def get_message_groups(
49
79
  self,
50
80
  source: DeclarativeSource,
51
81
  config: Mapping[str, Any],
52
82
  configured_catalog: ConfiguredAirbyteCatalog,
83
+ state: List[AirbyteStateMessage],
53
84
  record_limit: Optional[int] = None,
54
85
  ) -> StreamRead:
55
86
  if record_limit is not None and not (1 <= record_limit <= self._max_record_limit):
56
- raise ValueError(f"Record limit must be between 1 and {self._max_record_limit}. Got {record_limit}")
57
- schema_inferrer = SchemaInferrer()
87
+ raise ValueError(
88
+ f"Record limit must be between 1 and {self._max_record_limit}. Got {record_limit}"
89
+ )
90
+ stream = source.streams(config)[
91
+ 0
92
+ ] # The connector builder currently only supports reading from a single stream at a time
93
+ schema_inferrer = SchemaInferrer(
94
+ self._pk_to_nested_and_composite_field(stream.primary_key),
95
+ self._cursor_field_to_nested_and_composite_field(stream.cursor_field),
96
+ )
58
97
  datetime_format_inferrer = DatetimeFormatInferrer()
59
98
 
60
99
  if record_limit is None:
@@ -67,19 +106,34 @@ class MessageGrouper:
67
106
  latest_config_update: AirbyteControlMessage = None
68
107
  auxiliary_requests = []
69
108
  for message_group in self._get_message_groups(
70
- self._read_stream(source, config, configured_catalog),
109
+ self._read_stream(source, config, configured_catalog, state),
71
110
  schema_inferrer,
72
111
  datetime_format_inferrer,
73
112
  record_limit,
74
113
  ):
75
114
  if isinstance(message_group, AirbyteLogMessage):
76
- log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
115
+ log_messages.append(
116
+ LogMessage(
117
+ **{"message": message_group.message, "level": message_group.level.value}
118
+ )
119
+ )
77
120
  elif isinstance(message_group, AirbyteTraceMessage):
78
121
  if message_group.type == TraceType.ERROR:
79
- error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
80
- log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
122
+ log_messages.append(
123
+ LogMessage(
124
+ **{
125
+ "message": message_group.error.message,
126
+ "level": "ERROR",
127
+ "internal_message": message_group.error.internal_message,
128
+ "stacktrace": message_group.error.stack_trace,
129
+ }
130
+ )
131
+ )
81
132
  elif isinstance(message_group, AirbyteControlMessage):
82
- if not latest_config_update or latest_config_update.emitted_at <= message_group.emitted_at:
133
+ if (
134
+ not latest_config_update
135
+ or latest_config_update.emitted_at <= message_group.emitted_at
136
+ ):
83
137
  latest_config_update = message_group
84
138
  elif isinstance(message_group, AuxiliaryRequest):
85
139
  auxiliary_requests.append(message_group)
@@ -88,15 +142,24 @@ class MessageGrouper:
88
142
  else:
89
143
  raise ValueError(f"Unknown message group type: {type(message_group)}")
90
144
 
145
+ try:
146
+ # The connector builder currently only supports reading from a single stream at a time
147
+ configured_stream = configured_catalog.streams[0]
148
+ schema = schema_inferrer.get_stream_schema(configured_stream.stream.name)
149
+ except SchemaValidationException as exception:
150
+ for validation_error in exception.validation_errors:
151
+ log_messages.append(LogMessage(validation_error, "ERROR"))
152
+ schema = exception.schema
153
+
91
154
  return StreamRead(
92
155
  logs=log_messages,
93
156
  slices=slices,
94
157
  test_read_limit_reached=self._has_reached_limit(slices),
95
158
  auxiliary_requests=auxiliary_requests,
96
- inferred_schema=schema_inferrer.get_stream_schema(
97
- configured_catalog.streams[0].stream.name
98
- ), # The connector builder currently only supports reading from a single stream at a time
99
- latest_config_update=self._clean_config(latest_config_update.connectorConfig.config) if latest_config_update else None,
159
+ inferred_schema=schema,
160
+ latest_config_update=self._clean_config(latest_config_update.connectorConfig.config)
161
+ if latest_config_update
162
+ else None,
100
163
  inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
101
164
  )
102
165
 
@@ -106,7 +169,15 @@ class MessageGrouper:
106
169
  schema_inferrer: SchemaInferrer,
107
170
  datetime_format_inferrer: DatetimeFormatInferrer,
108
171
  limit: int,
109
- ) -> Iterable[Union[StreamReadPages, AirbyteControlMessage, AirbyteLogMessage, AirbyteTraceMessage, AuxiliaryRequest]]:
172
+ ) -> Iterable[
173
+ Union[
174
+ StreamReadPages,
175
+ AirbyteControlMessage,
176
+ AirbyteLogMessage,
177
+ AirbyteTraceMessage,
178
+ AuxiliaryRequest,
179
+ ]
180
+ ]:
110
181
  """
111
182
  Message groups are partitioned according to when request log messages are received. Subsequent response log messages
112
183
  and record messages belong to the prior request log message and when we encounter another request, append the latest
@@ -129,42 +200,64 @@ class MessageGrouper:
129
200
  current_slice_pages: List[StreamReadPages] = []
130
201
  current_page_request: Optional[HttpRequest] = None
131
202
  current_page_response: Optional[HttpResponse] = None
203
+ latest_state_message: Optional[Dict[str, Any]] = None
132
204
 
133
205
  while records_count < limit and (message := next(messages, None)):
134
206
  json_object = self._parse_json(message.log) if message.type == MessageType.LOG else None
135
207
  if json_object is not None and not isinstance(json_object, dict):
136
- raise ValueError(f"Expected log message to be a dict, got {json_object} of type {type(json_object)}")
208
+ raise ValueError(
209
+ f"Expected log message to be a dict, got {json_object} of type {type(json_object)}"
210
+ )
137
211
  json_message: Optional[Dict[str, JsonType]] = json_object
138
212
  if self._need_to_close_page(at_least_one_page_in_group, message, json_message):
139
- self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
213
+ self._close_page(
214
+ current_page_request,
215
+ current_page_response,
216
+ current_slice_pages,
217
+ current_page_records,
218
+ )
140
219
  current_page_request = None
141
220
  current_page_response = None
142
221
 
143
222
  if (
144
223
  at_least_one_page_in_group
145
224
  and message.type == MessageType.LOG
146
- and message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX)
225
+ and message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
147
226
  ):
148
- yield StreamReadSlices(pages=current_slice_pages, slice_descriptor=current_slice_descriptor)
149
- current_slice_descriptor = self._parse_slice_description(message.log.message)
227
+ yield StreamReadSlices(
228
+ pages=current_slice_pages,
229
+ slice_descriptor=current_slice_descriptor,
230
+ state=[latest_state_message] if latest_state_message else [],
231
+ )
232
+ current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
150
233
  current_slice_pages = []
151
234
  at_least_one_page_in_group = False
152
- elif message.type == MessageType.LOG and message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX):
235
+ elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message'
236
+ SliceLogger.SLICE_LOG_PREFIX
237
+ ):
153
238
  # parsing the first slice
154
- current_slice_descriptor = self._parse_slice_description(message.log.message)
239
+ current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
155
240
  elif message.type == MessageType.LOG:
156
241
  if json_message is not None and self._is_http_log(json_message):
157
242
  if self._is_auxiliary_http_request(json_message):
158
243
  airbyte_cdk = json_message.get("airbyte_cdk", {})
159
244
  if not isinstance(airbyte_cdk, dict):
160
- raise ValueError(f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}")
245
+ raise ValueError(
246
+ f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
247
+ )
161
248
  stream = airbyte_cdk.get("stream", {})
162
249
  if not isinstance(stream, dict):
163
- raise ValueError(f"Expected stream to be a dict, got {stream} of type {type(stream)}")
164
- title_prefix = "Parent stream: " if stream.get("is_substream", False) else ""
250
+ raise ValueError(
251
+ f"Expected stream to be a dict, got {stream} of type {type(stream)}"
252
+ )
253
+ title_prefix = (
254
+ "Parent stream: " if stream.get("is_substream", False) else ""
255
+ )
165
256
  http = json_message.get("http", {})
166
257
  if not isinstance(http, dict):
167
- raise ValueError(f"Expected http to be a dict, got {http} of type {type(http)}")
258
+ raise ValueError(
259
+ f"Expected http to be a dict, got {http} of type {type(http)}"
260
+ )
168
261
  yield AuxiliaryRequest(
169
262
  title=title_prefix + str(http.get("title", None)),
170
263
  description=str(http.get("description", None)),
@@ -178,26 +271,47 @@ class MessageGrouper:
178
271
  else:
179
272
  yield message.log
180
273
  elif message.type == MessageType.TRACE:
181
- if message.trace.type == TraceType.ERROR:
274
+ if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type
182
275
  yield message.trace
183
276
  elif message.type == MessageType.RECORD:
184
- current_page_records.append(message.record.data)
277
+ current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data
185
278
  records_count += 1
186
279
  schema_inferrer.accumulate(message.record)
187
280
  datetime_format_inferrer.accumulate(message.record)
188
- elif message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG:
281
+ elif (
282
+ message.type == MessageType.CONTROL
283
+ and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type'
284
+ ):
189
285
  yield message.control
286
+ elif message.type == MessageType.STATE:
287
+ latest_state_message = message.state # type: ignore[assignment]
190
288
  else:
191
289
  if current_page_request or current_page_response or current_page_records:
192
- self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
193
- yield StreamReadSlices(pages=current_slice_pages, slice_descriptor=current_slice_descriptor)
290
+ self._close_page(
291
+ current_page_request,
292
+ current_page_response,
293
+ current_slice_pages,
294
+ current_page_records,
295
+ )
296
+ yield StreamReadSlices(
297
+ pages=current_slice_pages,
298
+ slice_descriptor=current_slice_descriptor,
299
+ state=[latest_state_message] if latest_state_message else [],
300
+ )
194
301
 
195
302
  @staticmethod
196
- def _need_to_close_page(at_least_one_page_in_group: bool, message: AirbyteMessage, json_message: Optional[Dict[str, Any]]) -> bool:
303
+ def _need_to_close_page(
304
+ at_least_one_page_in_group: bool,
305
+ message: AirbyteMessage,
306
+ json_message: Optional[Dict[str, Any]],
307
+ ) -> bool:
197
308
  return (
198
309
  at_least_one_page_in_group
199
310
  and message.type == MessageType.LOG
200
- and (MessageGrouper._is_page_http_request(json_message) or message.log.message.startswith("slice:"))
311
+ and (
312
+ MessageGrouper._is_page_http_request(json_message)
313
+ or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
314
+ )
201
315
  )
202
316
 
203
317
  @staticmethod
@@ -205,7 +319,9 @@ class MessageGrouper:
205
319
  if not json_message:
206
320
  return False
207
321
  else:
208
- return MessageGrouper._is_http_log(json_message) and not MessageGrouper._is_auxiliary_http_request(json_message)
322
+ return MessageGrouper._is_http_log(
323
+ json_message
324
+ ) and not MessageGrouper._is_auxiliary_http_request(json_message)
209
325
 
210
326
  @staticmethod
211
327
  def _is_http_log(message: Dict[str, JsonType]) -> bool:
@@ -236,20 +352,44 @@ class MessageGrouper:
236
352
  Close a page when parsing message groups
237
353
  """
238
354
  current_slice_pages.append(
239
- StreamReadPages(request=current_page_request, response=current_page_response, records=deepcopy(current_page_records)) # type: ignore
355
+ StreamReadPages(
356
+ request=current_page_request,
357
+ response=current_page_response,
358
+ records=deepcopy(current_page_records), # type: ignore [arg-type]
359
+ )
240
360
  )
241
361
  current_page_records.clear()
242
362
 
243
363
  def _read_stream(
244
- self, source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog
364
+ self,
365
+ source: DeclarativeSource,
366
+ config: Mapping[str, Any],
367
+ configured_catalog: ConfiguredAirbyteCatalog,
368
+ state: List[AirbyteStateMessage],
245
369
  ) -> Iterator[AirbyteMessage]:
246
370
  # the generator can raise an exception
247
371
  # iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
248
372
  try:
249
- yield from AirbyteEntrypoint(source).read(source.spec(self.logger), config, configured_catalog, {})
373
+ yield from AirbyteEntrypoint(source).read(
374
+ source.spec(self.logger), config, configured_catalog, state
375
+ )
376
+ except AirbyteTracedException as traced_exception:
377
+ # Look for this message which indicates that it is the "final exception" raised by AbstractSource.
378
+ # If it matches, don't yield this as we don't need to show this in the Builder.
379
+ # This is somewhat brittle as it relies on the message string, but if they drift then the worst case
380
+ # is that this message will be shown in the Builder.
381
+ if (
382
+ traced_exception.message is not None
383
+ and "During the sync, the following streams did not sync successfully"
384
+ in traced_exception.message
385
+ ):
386
+ return
387
+ yield traced_exception.as_airbyte_message()
250
388
  except Exception as e:
251
389
  error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
252
- yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
390
+ yield AirbyteTracedException.from_exception(
391
+ e, message=error_message
392
+ ).as_airbyte_message()
253
393
 
254
394
  @staticmethod
255
395
  def _parse_json(log_message: AirbyteLogMessage) -> JsonType:
@@ -264,15 +404,12 @@ class MessageGrouper:
264
404
 
265
405
  @staticmethod
266
406
  def _create_request_from_log_message(json_http_message: Dict[str, Any]) -> HttpRequest:
267
- url = urlparse(json_http_message.get("url", {}).get("full", ""))
268
- full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else ""
407
+ url = json_http_message.get("url", {}).get("full", "")
269
408
  request = json_http_message.get("http", {}).get("request", {})
270
- parameters = parse_qs(url.query) or None
271
409
  return HttpRequest(
272
- url=full_path,
410
+ url=url,
273
411
  http_method=request.get("method", ""),
274
412
  headers=request.get("headers"),
275
- parameters=parameters,
276
413
  body=request.get("body", {}).get("content", ""),
277
414
  )
278
415
 
@@ -280,7 +417,9 @@ class MessageGrouper:
280
417
  def _create_response_from_log_message(json_http_message: Dict[str, Any]) -> HttpResponse:
281
418
  response = json_http_message.get("http", {}).get("response", {})
282
419
  body = response.get("body", {}).get("content", "")
283
- return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers"))
420
+ return HttpResponse(
421
+ status=response.get("status_code"), body=body, headers=response.get("headers")
422
+ )
284
423
 
285
424
  def _has_reached_limit(self, slices: List[StreamReadSlices]) -> bool:
286
425
  if len(slices) >= self._max_slices:
@@ -16,7 +16,6 @@ class HttpResponse:
16
16
  @dataclass
17
17
  class HttpRequest:
18
18
  url: str
19
- parameters: Optional[Dict[str, Any]]
20
19
  headers: Optional[Dict[str, Any]]
21
20
  http_method: str
22
21
  body: Optional[str] = None
@@ -33,13 +32,15 @@ class StreamReadPages:
33
32
  class StreamReadSlices:
34
33
  pages: List[StreamReadPages]
35
34
  slice_descriptor: Optional[Dict[str, Any]]
36
- state: Optional[Dict[str, Any]] = None
35
+ state: Optional[List[Dict[str, Any]]] = None
37
36
 
38
37
 
39
38
  @dataclass
40
39
  class LogMessage:
41
40
  message: str
42
41
  level: str
42
+ internal_message: Optional[str] = None
43
+ stacktrace: Optional[str] = None
43
44
 
44
45
 
45
46
  @dataclass
@@ -1,7 +1,8 @@
1
- #
2
1
  # Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3
- #
2
+ """The destinations module provides classes for building destination connectors."""
4
3
 
5
4
  from .destination import Destination
6
5
 
7
- __all__ = ["Destination"]
6
+ __all__ = [
7
+ "Destination",
8
+ ]
@@ -9,12 +9,19 @@ import sys
9
9
  from abc import ABC, abstractmethod
10
10
  from typing import Any, Iterable, List, Mapping
11
11
 
12
+ import orjson
13
+
12
14
  from airbyte_cdk.connector import Connector
13
15
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
14
- from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Type
16
+ from airbyte_cdk.models import (
17
+ AirbyteMessage,
18
+ AirbyteMessageSerializer,
19
+ ConfiguredAirbyteCatalog,
20
+ ConfiguredAirbyteCatalogSerializer,
21
+ Type,
22
+ )
15
23
  from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
16
24
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
17
- from pydantic import ValidationError
18
25
 
19
26
  logger = logging.getLogger("airbyte")
20
27
 
@@ -24,7 +31,10 @@ class Destination(Connector, ABC):
24
31
 
25
32
  @abstractmethod
26
33
  def write(
27
- self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]
34
+ self,
35
+ config: Mapping[str, Any],
36
+ configured_catalog: ConfiguredAirbyteCatalog,
37
+ input_messages: Iterable[AirbyteMessage],
28
38
  ) -> Iterable[AirbyteMessage]:
29
39
  """Implement to define how the connector writes data to the destination"""
30
40
 
@@ -36,17 +46,26 @@ class Destination(Connector, ABC):
36
46
  """Reads from stdin, converting to Airbyte messages"""
37
47
  for line in input_stream:
38
48
  try:
39
- yield AirbyteMessage.parse_raw(line)
40
- except ValidationError:
41
- logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}")
49
+ yield AirbyteMessageSerializer.load(orjson.loads(line))
50
+ except orjson.JSONDecodeError:
51
+ logger.info(
52
+ f"ignoring input which can't be deserialized as Airbyte Message: {line}"
53
+ )
42
54
 
43
55
  def _run_write(
44
- self, config: Mapping[str, Any], configured_catalog_path: str, input_stream: io.TextIOWrapper
56
+ self,
57
+ config: Mapping[str, Any],
58
+ configured_catalog_path: str,
59
+ input_stream: io.TextIOWrapper,
45
60
  ) -> Iterable[AirbyteMessage]:
46
- catalog = ConfiguredAirbyteCatalog.parse_file(configured_catalog_path)
61
+ catalog = ConfiguredAirbyteCatalogSerializer.load(
62
+ orjson.loads(open(configured_catalog_path).read())
63
+ )
47
64
  input_messages = self._parse_input_stream(input_stream)
48
65
  logger.info("Begin writing to the destination...")
49
- yield from self.write(config=config, configured_catalog=catalog, input_messages=input_messages)
66
+ yield from self.write(
67
+ config=config, configured_catalog=catalog, input_messages=input_messages
68
+ )
50
69
  logger.info("Writing complete.")
51
70
 
52
71
  def parse_args(self, args: List[str]) -> argparse.Namespace:
@@ -60,18 +79,30 @@ class Destination(Connector, ABC):
60
79
  subparsers = main_parser.add_subparsers(title="commands", dest="command")
61
80
 
62
81
  # spec
63
- subparsers.add_parser("spec", help="outputs the json configuration specification", parents=[parent_parser])
82
+ subparsers.add_parser(
83
+ "spec", help="outputs the json configuration specification", parents=[parent_parser]
84
+ )
64
85
 
65
86
  # check
66
- check_parser = subparsers.add_parser("check", help="checks the config can be used to connect", parents=[parent_parser])
87
+ check_parser = subparsers.add_parser(
88
+ "check", help="checks the config can be used to connect", parents=[parent_parser]
89
+ )
67
90
  required_check_parser = check_parser.add_argument_group("required named arguments")
68
- required_check_parser.add_argument("--config", type=str, required=True, help="path to the json configuration file")
91
+ required_check_parser.add_argument(
92
+ "--config", type=str, required=True, help="path to the json configuration file"
93
+ )
69
94
 
70
95
  # write
71
- write_parser = subparsers.add_parser("write", help="Writes data to the destination", parents=[parent_parser])
96
+ write_parser = subparsers.add_parser(
97
+ "write", help="Writes data to the destination", parents=[parent_parser]
98
+ )
72
99
  write_required = write_parser.add_argument_group("required named arguments")
73
- write_required.add_argument("--config", type=str, required=True, help="path to the JSON configuration file")
74
- write_required.add_argument("--catalog", type=str, required=True, help="path to the configured catalog JSON file")
100
+ write_required.add_argument(
101
+ "--config", type=str, required=True, help="path to the JSON configuration file"
102
+ )
103
+ write_required.add_argument(
104
+ "--catalog", type=str, required=True, help="path to the configured catalog JSON file"
105
+ )
75
106
 
76
107
  parsed_args = main_parser.parse_args(args)
77
108
  cmd = parsed_args.command
@@ -85,7 +116,6 @@ class Destination(Connector, ABC):
85
116
  return parsed_args
86
117
 
87
118
  def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]:
88
-
89
119
  cmd = parsed_args.command
90
120
  if cmd not in self.VALID_CMDS:
91
121
  raise Exception(f"Unrecognized command: {cmd}")
@@ -101,7 +131,7 @@ class Destination(Connector, ABC):
101
131
  except AirbyteTracedException as traced_exc:
102
132
  connection_status = traced_exc.as_connection_status_message()
103
133
  if connection_status and cmd == "check":
104
- yield connection_status.json(exclude_unset=True)
134
+ yield connection_status
105
135
  return
106
136
  raise traced_exc
107
137
 
@@ -110,11 +140,15 @@ class Destination(Connector, ABC):
110
140
  elif cmd == "write":
111
141
  # Wrap in UTF-8 to override any other input encodings
112
142
  wrapped_stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8")
113
- yield from self._run_write(config=config, configured_catalog_path=parsed_args.catalog, input_stream=wrapped_stdin)
143
+ yield from self._run_write(
144
+ config=config,
145
+ configured_catalog_path=parsed_args.catalog,
146
+ input_stream=wrapped_stdin,
147
+ )
114
148
 
115
- def run(self, args: List[str]):
149
+ def run(self, args: List[str]) -> None:
116
150
  init_uncaught_exception_handler(logger)
117
151
  parsed_args = self.parse_args(args)
118
152
  output_messages = self.run_cmd(parsed_args)
119
153
  for message in output_messages:
120
- print(message.json(exclude_unset=True))
154
+ print(orjson.dumps(AirbyteMessageSerializer.dump(message)).decode())
@@ -0,0 +1,37 @@
1
+ # Vector DB based destinations
2
+
3
+ ## Note: All helpers in this directory are experimental and subject to change
4
+
5
+ This directory contains several helpers that can be used to create a destination that processes and chunks records, embeds their text part and loads them into a vector database.
6
+ The specific loading behavior is defined by the destination connector itself, but chunking and embedding behavior is handled by the helpers.
7
+
8
+ To use these helpers, install the CDK with the `vector-db-based` extra:
9
+
10
+ ```bash
11
+ pip install airbyte-cdk[vector-db-based]
12
+ ```
13
+
14
+ The helpers can be used in the following way:
15
+
16
+ - Add the config models to the spec of the connector
17
+ - Implement the `Indexer` interface for your specific database
18
+ - In the check implementation of the destination, initialize the indexer and the embedder and call `check` on them
19
+ - In the write implementation of the destination, initialize the indexer, the embedder and pass them to a new instance of the writer. Then call the writers `write` method with the iterable for incoming messages
20
+
21
+ If there are no connector-specific embedders, the `airbyte_cdk.destinations.vector_db_based.embedder.create_from_config` function can be used to get an embedder instance from the config.
22
+
23
+ This is how the components interact:
24
+
25
+ ```text
26
+ ┌─────────────┐
27
+ │MyDestination│
28
+ └┬────────────┘
29
+ ┌▽───────────────────────────────┐
30
+ │Writer │
31
+ └┬─────────┬──────────┬──────────┘
32
+ ┌▽───────┐┌▽────────┐┌▽────────────────┐
33
+ │Embedder││MyIndexer││DocumentProcessor│
34
+ └────────┘└─────────┘└─────────────────┘
35
+ ```
36
+
37
+ Normally, only the `MyDestination` class and the `MyIndexer` class has to be implemented specifically for the destination. The other classes are provided as is by the helpers.