airbyte-cdk 0.51.10__tar.gz → 0.51.12__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (380) hide show
  1. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/PKG-INFO +1 -1
  2. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/config.py +16 -0
  3. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/document_processor.py +10 -6
  4. airbyte-cdk-0.51.12/airbyte_cdk/destinations/vector_db_based/embedder.py +164 -0
  5. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/indexer.py +1 -3
  6. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/test_utils.py +1 -1
  7. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/writer.py +9 -3
  8. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/avro_format.py +4 -2
  9. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/csv_format.py +16 -5
  10. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/jsonl_format.py +5 -3
  11. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/parquet_format.py +4 -2
  12. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/csv_parser.py +10 -1
  13. airbyte-cdk-0.51.12/airbyte_cdk/sources/file_based/remote_file.py +16 -0
  14. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk.egg-info/PKG-INFO +1 -1
  15. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/setup.py +1 -1
  16. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/file_types/test_csv_parser.py +25 -2
  17. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/csv_scenarios.py +7 -7
  18. airbyte-cdk-0.51.10/airbyte_cdk/destinations/vector_db_based/embedder.py +0 -109
  19. airbyte-cdk-0.51.10/airbyte_cdk/sources/file_based/remote_file.py +0 -25
  20. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/LICENSE.txt +0 -0
  21. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/README.md +0 -0
  22. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/__init__.py +0 -0
  23. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/config_observation.py +0 -0
  24. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector.py +0 -0
  25. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector_builder/__init__.py +0 -0
  26. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector_builder/connector_builder_handler.py +0 -0
  27. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector_builder/main.py +0 -0
  28. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector_builder/message_grouper.py +0 -0
  29. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/connector_builder/models.py +0 -0
  30. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/__init__.py +0 -0
  31. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/destination.py +0 -0
  32. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/__init__.py +0 -0
  33. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/batcher.py +0 -0
  34. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/destinations/vector_db_based/utils.py +0 -0
  35. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/entrypoint.py +0 -0
  36. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/exception_handler.py +0 -0
  37. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/logger.py +0 -0
  38. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/models/__init__.py +0 -0
  39. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/models/airbyte_protocol.py +0 -0
  40. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/models/well_known_types.py +0 -0
  41. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/py.typed +0 -0
  42. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/__init__.py +0 -0
  43. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/abstract_source.py +0 -0
  44. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/config.py +0 -0
  45. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/connector_state_manager.py +0 -0
  46. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/__init__.py +0 -0
  47. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/auth/__init__.py +0 -0
  48. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +0 -0
  49. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/auth/oauth.py +0 -0
  50. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/auth/token.py +0 -0
  51. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/auth/token_provider.py +0 -0
  52. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/checks/__init__.py +0 -0
  53. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/checks/check_stream.py +0 -0
  54. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/checks/connection_checker.py +0 -0
  55. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/create_partial.py +0 -0
  56. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/datetime/__init__.py +0 -0
  57. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +0 -0
  58. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +0 -0
  59. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -0
  60. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/declarative_source.py +0 -0
  61. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/declarative_stream.py +0 -0
  62. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/decoders/__init__.py +0 -0
  63. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/decoders/decoder.py +0 -0
  64. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/decoders/json_decoder.py +0 -0
  65. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/exceptions.py +0 -0
  66. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/__init__.py +0 -0
  67. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -0
  68. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -0
  69. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/record_extractor.py +0 -0
  70. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/record_filter.py +0 -0
  71. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/extractors/record_selector.py +0 -0
  72. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/incremental/__init__.py +0 -0
  73. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/incremental/cursor.py +0 -0
  74. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +0 -0
  75. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -0
  76. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/__init__.py +0 -0
  77. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/filters.py +0 -0
  78. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +0 -0
  79. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +0 -0
  80. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +0 -0
  81. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +0 -0
  82. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/interpolation.py +0 -0
  83. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/jinja.py +0 -0
  84. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/interpolation/macros.py +0 -0
  85. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -0
  86. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/models/__init__.py +0 -0
  87. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -0
  88. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/__init__.py +0 -0
  89. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -0
  90. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +0 -0
  91. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -0
  92. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -0
  93. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +0 -0
  94. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +0 -0
  95. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/partition_routers/__init__.py +0 -0
  96. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -0
  97. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -0
  98. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -0
  99. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/__init__.py +0 -0
  100. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +0 -0
  101. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -0
  102. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -0
  103. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -0
  104. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +0 -0
  105. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -0
  106. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -0
  107. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +0 -0
  108. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -0
  109. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -0
  110. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +0 -0
  111. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -0
  112. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -0
  113. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -0
  114. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -0
  115. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +0 -0
  116. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -0
  117. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -0
  118. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -0
  119. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +0 -0
  120. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -0
  121. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -0
  122. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -0
  123. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -0
  124. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +0 -0
  125. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_option.py +0 -0
  126. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +0 -0
  127. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +0 -0
  128. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -0
  129. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +0 -0
  130. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +0 -0
  131. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/request_path.py +0 -0
  132. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/requesters/requester.py +0 -0
  133. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/retrievers/__init__.py +0 -0
  134. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/retrievers/retriever.py +0 -0
  135. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +0 -0
  136. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/schema/__init__.py +0 -0
  137. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/schema/default_schema_loader.py +0 -0
  138. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +0 -0
  139. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +0 -0
  140. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/schema/schema_loader.py +0 -0
  141. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/spec/__init__.py +0 -0
  142. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/spec/spec.py +0 -0
  143. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/stream_slicers/__init__.py +0 -0
  144. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -0
  145. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +0 -0
  146. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/transformations/__init__.py +0 -0
  147. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -0
  148. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -0
  149. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/transformations/transformation.py +0 -0
  150. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/types.py +0 -0
  151. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -0
  152. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/deprecated/__init__.py +0 -0
  153. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/deprecated/base_source.py +0 -0
  154. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/deprecated/client.py +0 -0
  155. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/embedded/__init__.py +0 -0
  156. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/embedded/base_integration.py +0 -0
  157. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/embedded/catalog.py +0 -0
  158. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/embedded/runner.py +0 -0
  159. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/embedded/tools.py +0 -0
  160. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/__init__.py +0 -0
  161. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/availability_strategy/__init__.py +0 -0
  162. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +0 -0
  163. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +0 -0
  164. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/__init__.py +0 -0
  165. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -0
  166. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/config/file_based_stream_config.py +0 -0
  167. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/discovery_policy/__init__.py +0 -0
  168. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +0 -0
  169. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +0 -0
  170. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/exceptions.py +0 -0
  171. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_based_source.py +0 -0
  172. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_based_stream_reader.py +0 -0
  173. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/__init__.py +0 -0
  174. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -0
  175. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/file_type_parser.py +0 -0
  176. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +0 -0
  177. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +0 -0
  178. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/schema_helpers.py +0 -0
  179. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +0 -0
  180. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +0 -0
  181. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +0 -0
  182. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/__init__.py +0 -0
  183. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +0 -0
  184. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/cursor/__init__.py +0 -0
  185. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +0 -0
  186. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +0 -0
  187. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +0 -0
  188. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/file_based/types.py +0 -0
  189. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/http_logger.py +0 -0
  190. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/message/__init__.py +0 -0
  191. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/message/repository.py +0 -0
  192. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/singer/__init__.py +0 -0
  193. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/singer/singer_helpers.py +0 -0
  194. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/singer/source.py +0 -0
  195. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/source.py +0 -0
  196. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/__init__.py +0 -0
  197. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/availability_strategy.py +0 -0
  198. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/core.py +0 -0
  199. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/__init__.py +0 -0
  200. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/auth/__init__.py +0 -0
  201. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/auth/core.py +0 -0
  202. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/auth/oauth.py +0 -0
  203. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/auth/token.py +0 -0
  204. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/availability_strategy.py +0 -0
  205. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/exceptions.py +0 -0
  206. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/http.py +0 -0
  207. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/rate_limiting.py +0 -0
  208. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py +0 -0
  209. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +0 -0
  210. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +0 -0
  211. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -0
  212. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/http/requests_native_auth/token.py +0 -0
  213. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/utils/__init__.py +0 -0
  214. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/streams/utils/stream_helper.py +0 -0
  215. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/__init__.py +0 -0
  216. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/casing.py +0 -0
  217. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/catalog_helpers.py +0 -0
  218. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/record_helper.py +0 -0
  219. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/schema_helpers.py +0 -0
  220. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/schema_models.py +0 -0
  221. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/slice_logger.py +0 -0
  222. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/transform.py +0 -0
  223. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/sources/utils/types.py +0 -0
  224. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/__init__.py +0 -0
  225. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/airbyte_secrets_utils.py +0 -0
  226. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/datetime_format_inferrer.py +0 -0
  227. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/event_timing.py +0 -0
  228. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/mapping_helpers.py +0 -0
  229. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/schema_inferrer.py +0 -0
  230. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/spec_schema_transformations.py +0 -0
  231. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/stream_status_utils.py +0 -0
  232. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk/utils/traced_exception.py +0 -0
  233. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk.egg-info/SOURCES.txt +0 -0
  234. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk.egg-info/dependency_links.txt +0 -0
  235. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk.egg-info/requires.txt +0 -0
  236. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/airbyte_cdk.egg-info/top_level.txt +0 -0
  237. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/pyproject.toml +0 -0
  238. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/setup.cfg +0 -0
  239. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/source_declarative_manifest/__init__.py +0 -0
  240. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/source_declarative_manifest/main.py +0 -0
  241. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/connector_builder/__init__.py +0 -0
  242. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/connector_builder/test_connector_builder_handler.py +0 -0
  243. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/connector_builder/test_message_grouper.py +0 -0
  244. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/connector_builder/utils.py +0 -0
  245. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/destinations/__init__.py +0 -0
  246. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/destinations/test_destination.py +0 -0
  247. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/singer/__init__.py +0 -0
  248. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/singer/test_singer_helpers.py +0 -0
  249. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/singer/test_singer_source.py +0 -0
  250. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/__init__.py +0 -0
  251. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/__init__.py +0 -0
  252. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/auth/__init__.py +0 -0
  253. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/auth/test_oauth.py +0 -0
  254. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -0
  255. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/auth/test_token_auth.py +0 -0
  256. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/auth/test_token_provider.py +0 -0
  257. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/checks/__init__.py +0 -0
  258. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/checks/test_check_stream.py +0 -0
  259. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/decoders/__init__.py +0 -0
  260. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -0
  261. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/external_component.py +0 -0
  262. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/extractors/__init__.py +0 -0
  263. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -0
  264. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/extractors/test_record_filter.py +0 -0
  265. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/extractors/test_record_selector.py +0 -0
  266. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/incremental/__init__.py +0 -0
  267. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -0
  268. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -0
  269. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -0
  270. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/__init__.py +0 -0
  271. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_filters.py +0 -0
  272. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -0
  273. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -0
  274. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -0
  275. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -0
  276. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_jinja.py +0 -0
  277. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/interpolation/test_macros.py +0 -0
  278. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/parsers/__init__.py +0 -0
  279. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -0
  280. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -0
  281. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -0
  282. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/parsers/testing_components.py +0 -0
  283. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/partition_routers/__init__.py +0 -0
  284. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -0
  285. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -0
  286. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -0
  287. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/__init__.py +0 -0
  288. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -0
  289. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -0
  290. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -0
  291. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -0
  292. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -0
  293. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -0
  294. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -0
  295. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -0
  296. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -0
  297. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -0
  298. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -0
  299. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -0
  300. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -0
  301. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -0
  302. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -0
  303. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -0
  304. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -0
  305. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -0
  306. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -0
  307. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -0
  308. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -0
  309. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/test_http_requester.py +0 -0
  310. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -0
  311. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/retrievers/__init__.py +0 -0
  312. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -0
  313. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/__init__.py +0 -0
  314. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -0
  315. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/source_test/__init__.py +0 -0
  316. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -0
  317. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -0
  318. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -0
  319. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/states/__init__.py +0 -0
  320. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/stream_slicers/__init__.py +0 -0
  321. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -0
  322. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/test_create_partial.py +0 -0
  323. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/test_declarative_stream.py +0 -0
  324. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -0
  325. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -0
  326. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/__init__.py +0 -0
  327. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
  328. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -0
  329. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/config/__init__.py +0 -0
  330. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -0
  331. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/config/test_csv_format.py +0 -0
  332. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -0
  333. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/file_types/__init__.py +0 -0
  334. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -0
  335. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -0
  336. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -0
  337. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/helpers.py +0 -0
  338. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/in_memory_files_source.py +0 -0
  339. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/__init__.py +0 -0
  340. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -0
  341. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -0
  342. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -0
  343. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -0
  344. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -0
  345. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -0
  346. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -0
  347. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -0
  348. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/stream/__init__.py +0 -0
  349. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -0
  350. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -0
  351. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -0
  352. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/test_scenarios.py +0 -0
  353. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/file_based/test_schema_helpers.py +0 -0
  354. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/fixtures/__init__.py +0 -0
  355. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/fixtures/source_test_fixture.py +0 -0
  356. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/message/__init__.py +0 -0
  357. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/message/test_repository.py +0 -0
  358. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/__init__.py +0 -0
  359. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/__init__.py +0 -0
  360. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/auth/__init__.py +0 -0
  361. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/auth/test_auth.py +0 -0
  362. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
  363. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -0
  364. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/test_availability_strategy.py +0 -0
  365. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/http/test_http.py +0 -0
  366. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/test_availability_strategy.py +0 -0
  367. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/streams/test_streams_core.py +0 -0
  368. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_abstract_source.py +0 -0
  369. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_config.py +0 -0
  370. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_connector_state_manager.py +0 -0
  371. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_http_logger.py +0 -0
  372. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_integration_source.py +0 -0
  373. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/sources/test_source.py +0 -0
  374. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/__init__.py +0 -0
  375. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_datetime_format_inferrer.py +0 -0
  376. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_mapping_helpers.py +0 -0
  377. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_schema_inferrer.py +0 -0
  378. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_secret_utils.py +0 -0
  379. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_stream_status_utils.py +0 -0
  380. {airbyte-cdk-0.51.10 → airbyte-cdk-0.51.12}/unit_tests/utils/test_traced_exception.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.51.10
3
+ Version: 0.51.12
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -59,6 +59,22 @@ class FakeEmbeddingConfigModel(BaseModel):
59
59
  }
60
60
 
61
61
 
62
+ class FromFieldEmbeddingConfigModel(BaseModel):
63
+ mode: Literal["from_field"] = Field("from_field", const=True)
64
+ field_name: str = Field(
65
+ ..., title="Field name", description="Name of the field in the record that contains the embedding", examples=["embedding", "vector"]
66
+ )
67
+ dimensions: int = Field(
68
+ ..., title="Embedding dimensions", description="The number of dimensions the embedding model is generating", examples=[1536, 384]
69
+ )
70
+
71
+ class Config:
72
+ title = "From Field"
73
+ schema_extra = {
74
+ "description": "Use a field in the record as the embedding. This is useful if you already have an embedding for your data and want to store it in the vector store."
75
+ }
76
+
77
+
62
78
  class CohereEmbeddingConfigModel(BaseModel):
63
79
  mode: Literal["cohere"] = Field("cohere", const=True)
64
80
  cohere_key: str = Field(..., title="Cohere API key", airbyte_secret=True)
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
9
9
  import dpath.util
10
10
  from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel
11
11
  from airbyte_cdk.models import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
12
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
12
13
  from langchain.document_loaders.base import Document
13
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
15
  from langchain.utils import stringify_dict
@@ -21,8 +22,8 @@ METADATA_RECORD_ID_FIELD = "_ab_record_id"
21
22
  class Chunk:
22
23
  page_content: str
23
24
  metadata: Dict[str, Any]
24
- stream: str
25
- namespace: Optional[str] = None
25
+ record: AirbyteRecordMessage
26
+ embedding: Optional[List[float]] = None
26
27
 
27
28
 
28
29
  class DocumentProcessor:
@@ -66,11 +67,14 @@ class DocumentProcessor:
66
67
  """
67
68
  doc = self._generate_document(record)
68
69
  if doc is None:
69
- raise ValueError(f"Record {str(record.data)[:250]}... does not contain any text fields.")
70
- chunks = [
71
- Chunk(
72
- page_content=chunk_document.page_content, metadata=chunk_document.metadata, stream=record.stream, namespace=record.namespace
70
+ text_fields = ", ".join(self.text_fields) if self.text_fields else "all fields"
71
+ raise AirbyteTracedException(
72
+ internal_message="No text fields found in record",
73
+ message=f"Record {str(record.data)[:250]}... does not contain any of the configured text fields: {text_fields}. Please check your processing configuration, there has to be at least one text field set in each record.",
74
+ failure_type=FailureType.config_error,
73
75
  )
76
+ chunks = [
77
+ Chunk(page_content=chunk_document.page_content, metadata=chunk_document.metadata, record=record)
74
78
  for chunk_document in self._split_document(doc)
75
79
  ]
76
80
  id_to_delete = doc.metadata[METADATA_RECORD_ID_FIELD] if METADATA_RECORD_ID_FIELD in doc.metadata else None
@@ -0,0 +1,164 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import List, Optional
7
+
8
+ from airbyte_cdk.destinations.vector_db_based.config import (
9
+ CohereEmbeddingConfigModel,
10
+ FakeEmbeddingConfigModel,
11
+ FromFieldEmbeddingConfigModel,
12
+ OpenAIEmbeddingConfigModel,
13
+ )
14
+ from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk
15
+ from airbyte_cdk.destinations.vector_db_based.utils import format_exception
16
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
17
+ from langchain.embeddings.cohere import CohereEmbeddings
18
+ from langchain.embeddings.fake import FakeEmbeddings
19
+ from langchain.embeddings.openai import OpenAIEmbeddings
20
+
21
+
22
+ class Embedder(ABC):
23
+ """
24
+ Embedder is an abstract class that defines the interface for embedding text.
25
+
26
+ The Indexer class uses the Embedder class to internally embed text - each indexer is responsible to pass the text of all documents to the embedder and store the resulting embeddings in the destination.
27
+ The destination connector is responsible to create an embedder instance and pass it to the writer.
28
+ The CDK defines basic embedders that should be supported in each destination. It is possible to implement custom embedders for special destinations if needed.
29
+ """
30
+
31
+ def __init__(self) -> None:
32
+ pass
33
+
34
+ @abstractmethod
35
+ def check(self) -> Optional[str]:
36
+ pass
37
+
38
+ @abstractmethod
39
+ def embed_chunks(self, chunks: List[Chunk]) -> List[Optional[List[float]]]:
40
+ """
41
+ Embed the text of each chunk and return the resulting embedding vectors.
42
+ If a chunk cannot be embedded or is configured to not be embedded, return None for that chunk.
43
+ """
44
+ pass
45
+
46
+ @property
47
+ @abstractmethod
48
+ def embedding_dimensions(self) -> int:
49
+ pass
50
+
51
+
52
+ OPEN_AI_VECTOR_SIZE = 1536
53
+
54
+
55
+ class OpenAIEmbedder(Embedder):
56
+ def __init__(self, config: OpenAIEmbeddingConfigModel):
57
+ super().__init__()
58
+ # Client is set internally
59
+ self.embeddings = OpenAIEmbeddings(openai_api_key=config.openai_key, chunk_size=8191, max_retries=15) # type: ignore
60
+
61
+ def check(self) -> Optional[str]:
62
+ try:
63
+ self.embeddings.embed_query("test")
64
+ except Exception as e:
65
+ return format_exception(e)
66
+ return None
67
+
68
+ def embed_chunks(self, chunks: List[Chunk]) -> List[List[float]]:
69
+ return self.embeddings.embed_documents([chunk.page_content for chunk in chunks])
70
+
71
+ @property
72
+ def embedding_dimensions(self) -> int:
73
+ # vector size produced by text-embedding-ada-002 model
74
+ return OPEN_AI_VECTOR_SIZE
75
+
76
+
77
+ COHERE_VECTOR_SIZE = 1024
78
+
79
+
80
+ class CohereEmbedder(Embedder):
81
+ def __init__(self, config: CohereEmbeddingConfigModel):
82
+ super().__init__()
83
+ # Client is set internally
84
+ self.embeddings = CohereEmbeddings(cohere_api_key=config.cohere_key, model="embed-english-light-v2.0") # type: ignore
85
+
86
+ def check(self) -> Optional[str]:
87
+ try:
88
+ self.embeddings.embed_query("test")
89
+ except Exception as e:
90
+ return format_exception(e)
91
+ return None
92
+
93
+ def embed_chunks(self, chunks: List[Chunk]) -> List[List[float]]:
94
+ return self.embeddings.embed_documents([chunk.page_content for chunk in chunks])
95
+
96
+ @property
97
+ def embedding_dimensions(self) -> int:
98
+ # vector size produced by text-embedding-ada-002 model
99
+ return COHERE_VECTOR_SIZE
100
+
101
+
102
+ class FakeEmbedder(Embedder):
103
+ def __init__(self, config: FakeEmbeddingConfigModel):
104
+ super().__init__()
105
+ self.embeddings = FakeEmbeddings(size=OPEN_AI_VECTOR_SIZE)
106
+
107
+ def check(self) -> Optional[str]:
108
+ try:
109
+ self.embeddings.embed_query("test")
110
+ except Exception as e:
111
+ return format_exception(e)
112
+ return None
113
+
114
+ def embed_chunks(self, chunks: List[Chunk]) -> List[List[float]]:
115
+ return self.embeddings.embed_documents([chunk.page_content for chunk in chunks])
116
+
117
+ @property
118
+ def embedding_dimensions(self) -> int:
119
+ # use same vector size as for OpenAI embeddings to keep it realistic
120
+ return OPEN_AI_VECTOR_SIZE
121
+
122
+
123
+ class FromFieldEmbedder(Embedder):
124
+ def __init__(self, config: FromFieldEmbeddingConfigModel):
125
+ super().__init__()
126
+ self.config = config
127
+
128
+ def check(self) -> Optional[str]:
129
+ return None
130
+
131
+ def embed_chunks(self, chunks: List[Chunk]) -> List[List[float]]:
132
+ """
133
+ From each chunk, pull the embedding from the field specified in the config.
134
+ Check that the field exists, is a list of numbers and is the correct size. If not, raise an AirbyteTracedException explaining the problem.
135
+ """
136
+ embeddings = []
137
+ for chunk in chunks:
138
+ data = chunk.record.data
139
+ if self.config.field_name not in data:
140
+ raise AirbyteTracedException(
141
+ internal_message="Embedding vector field not found",
142
+ failure_type=FailureType.config_error,
143
+ message=f"Record {str(data)[:250]}... in stream {chunk.record.stream} does not contain embedding vector field {self.config.field_name}. Please check your embedding configuration, the embedding vector field has to be set correctly on every record.",
144
+ )
145
+ field = data[self.config.field_name]
146
+ if not isinstance(field, list) or not all(isinstance(x, (int, float)) for x in field):
147
+ raise AirbyteTracedException(
148
+ internal_message="Embedding vector field not a list of numbers",
149
+ failure_type=FailureType.config_error,
150
+ message=f"Record {str(data)[:250]}... in stream {chunk.record.stream} does contain embedding vector field {self.config.field_name}, but it is not a list of numbers. Please check your embedding configuration, the embedding vector field has to be a list of numbers of length {self.config.dimensions} on every record.",
151
+ )
152
+ if len(field) != self.config.dimensions:
153
+ raise AirbyteTracedException(
154
+ internal_message="Embedding vector field has wrong length",
155
+ failure_type=FailureType.config_error,
156
+ message=f"Record {str(data)[:250]}... in stream {chunk.record.stream} does contain embedding vector field {self.config.field_name}, but it has length {len(field)} instead of the configured {self.config.dimensions}. Please check your embedding configuration, the embedding vector field has to be a list of numbers of length {self.config.dimensions} on every record.",
157
+ )
158
+ embeddings.append(field)
159
+
160
+ return embeddings
161
+
162
+ @property
163
+ def embedding_dimensions(self) -> int:
164
+ return self.config.dimensions
@@ -7,7 +7,6 @@ from abc import ABC, abstractmethod
7
7
  from typing import Any, Generator, Iterable, List, Optional, Tuple, TypeVar
8
8
 
9
9
  from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk
10
- from airbyte_cdk.destinations.vector_db_based.embedder import Embedder
11
10
  from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog
12
11
 
13
12
 
@@ -19,9 +18,8 @@ class Indexer(ABC):
19
18
  In a destination connector, implement a custom indexer by extending this class and implementing the abstract methods.
20
19
  """
21
20
 
22
- def __init__(self, config: Any, embedder: Embedder):
21
+ def __init__(self, config: Any):
23
22
  self.config = config
24
- self.embedder = embedder
25
23
  pass
26
24
 
27
25
  def pre_sync(self, catalog: ConfiguredAirbyteCatalog) -> None:
@@ -48,6 +48,6 @@ class BaseIntegrationTest(unittest.TestCase):
48
48
  type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data={"str_col": str_value, "int_col": int_value}, emitted_at=0)
49
49
  )
50
50
 
51
- def setUp(self):
51
+ def setUp(self) -> None:
52
52
  with open("secrets/config.json", "r") as f:
53
53
  self.config = json.loads(f.read())
@@ -8,24 +8,27 @@ from typing import Iterable, List
8
8
  from airbyte_cdk.destinations.vector_db_based.batcher import Batcher
9
9
  from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel
10
10
  from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk, DocumentProcessor
11
+ from airbyte_cdk.destinations.vector_db_based.embedder import Embedder
11
12
  from airbyte_cdk.destinations.vector_db_based.indexer import Indexer
12
13
  from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog, Type
13
14
 
14
15
 
15
16
  class Writer:
16
17
  """
17
- The Writer class is orchestrating the document processor, the batcher and the indexer:
18
+ The Writer class is orchestrating the document processor, the batcher, the embedder and the indexer:
18
19
  * Incoming records are collected using the batcher
19
20
  * The document processor generates documents from all records in the batch
20
- * The indexer indexes the resulting documents in the destination
21
+ * The embedder embeds the documents
22
+ * The indexer indexes the resulting documents and their embeddings in the destination
21
23
 
22
24
  The destination connector is responsible to create a writer instance and pass the input messages iterable to the write method.
23
25
  The batch size can be configured by the destination connector to give the freedom of either letting the user configure it or hardcoding it to a sensible value depending on the destination.
24
26
  """
25
27
 
26
- def __init__(self, processing_config: ProcessingConfigModel, indexer: Indexer, batch_size: int) -> None:
28
+ def __init__(self, processing_config: ProcessingConfigModel, indexer: Indexer, embedder: Embedder, batch_size: int) -> None:
27
29
  self.processing_config = processing_config
28
30
  self.indexer = indexer
31
+ self.embedder = embedder
29
32
  self.batcher = Batcher(batch_size, lambda batch: self._process_batch(batch))
30
33
 
31
34
  def _process_batch(self, batch: List[AirbyteRecordMessage]) -> None:
@@ -36,6 +39,9 @@ class Writer:
36
39
  documents.extend(record_documents)
37
40
  if record_id_to_delete is not None:
38
41
  ids_to_delete.append(record_id_to_delete)
42
+ embeddings = self.embedder.embed_chunks(documents)
43
+ for i, document in enumerate(documents):
44
+ document.embedding = embeddings[i]
39
45
  self.indexer.index(documents, ids_to_delete)
40
46
 
41
47
  def write(self, configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]) -> Iterable[AirbyteMessage]:
@@ -3,14 +3,16 @@
3
3
  #
4
4
 
5
5
  from pydantic import BaseModel, Field
6
- from typing_extensions import Literal
7
6
 
8
7
 
9
8
  class AvroFormat(BaseModel):
10
9
  class Config:
11
10
  title = "Avro Format"
12
11
 
13
- filetype: Literal["avro"] = "avro"
12
+ filetype: str = Field(
13
+ "avro",
14
+ const=True,
15
+ )
14
16
 
15
17
  double_as_string: bool = Field(
16
18
  title="Convert Double Fields to Strings",
@@ -7,7 +7,6 @@ from enum import Enum
7
7
  from typing import Any, Dict, List, Optional, Set, Union
8
8
 
9
9
  from pydantic import BaseModel, Field, ValidationError, root_validator, validator
10
- from typing_extensions import Literal
11
10
 
12
11
 
13
12
  class InferenceType(Enum):
@@ -25,7 +24,10 @@ class CsvHeaderFromCsv(BaseModel):
25
24
  class Config:
26
25
  title = "From CSV"
27
26
 
28
- header_definition_type: Literal[CsvHeaderDefinitionType.FROM_CSV.value] = CsvHeaderDefinitionType.FROM_CSV.value # type: ignore
27
+ header_definition_type: str = Field(
28
+ CsvHeaderDefinitionType.FROM_CSV.value,
29
+ const=True,
30
+ )
29
31
 
30
32
  def has_header_row(self) -> bool:
31
33
  return True
@@ -35,7 +37,10 @@ class CsvHeaderAutogenerated(BaseModel):
35
37
  class Config:
36
38
  title = "Autogenerated"
37
39
 
38
- header_definition_type: Literal[CsvHeaderDefinitionType.AUTOGENERATED.value] = CsvHeaderDefinitionType.AUTOGENERATED.value # type: ignore
40
+ header_definition_type: str = Field(
41
+ CsvHeaderDefinitionType.AUTOGENERATED.value,
42
+ const=True,
43
+ )
39
44
 
40
45
  def has_header_row(self) -> bool:
41
46
  return False
@@ -45,7 +50,10 @@ class CsvHeaderUserProvided(BaseModel):
45
50
  class Config:
46
51
  title = "User Provided"
47
52
 
48
- header_definition_type: Literal[CsvHeaderDefinitionType.USER_PROVIDED.value] = CsvHeaderDefinitionType.USER_PROVIDED.value # type: ignore
53
+ header_definition_type: str = Field(
54
+ CsvHeaderDefinitionType.USER_PROVIDED.value,
55
+ const=True,
56
+ )
49
57
  column_names: List[str] = Field(
50
58
  title="Column Names",
51
59
  description="The column names that will be used while emitting the CSV records",
@@ -69,7 +77,10 @@ class CsvFormat(BaseModel):
69
77
  class Config:
70
78
  title = "CSV Format"
71
79
 
72
- filetype: Literal["csv"] = "csv"
80
+ filetype: str = Field(
81
+ "csv",
82
+ const=True,
83
+ )
73
84
  delimiter: str = Field(
74
85
  title="Delimiter",
75
86
  description="The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.",
@@ -2,12 +2,14 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from pydantic import BaseModel
6
- from typing_extensions import Literal
5
+ from pydantic import BaseModel, Field
7
6
 
8
7
 
9
8
  class JsonlFormat(BaseModel):
10
9
  class Config:
11
10
  title = "Jsonl Format"
12
11
 
13
- filetype: Literal["jsonl"] = "jsonl"
12
+ filetype: str = Field(
13
+ "jsonl",
14
+ const=True,
15
+ )
@@ -3,14 +3,16 @@
3
3
  #
4
4
 
5
5
  from pydantic import BaseModel, Field
6
- from typing_extensions import Literal
7
6
 
8
7
 
9
8
  class ParquetFormat(BaseModel):
10
9
  class Config:
11
10
  title = "Parquet Format"
12
11
 
13
- filetype: Literal["parquet"] = "parquet"
12
+ filetype: str = Field(
13
+ "parquet",
14
+ const=True,
15
+ )
14
16
  # This option is not recommended, but necessary for backwards compatibility
15
17
  decimal_as_float: bool = Field(
16
18
  title="Convert Decimal Fields to Floats",
@@ -11,6 +11,7 @@ from functools import partial
11
11
  from io import IOBase
12
12
  from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set
13
13
 
14
+ from airbyte_cdk.models import FailureType
14
15
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, CsvHeaderAutogenerated, CsvHeaderUserProvided, InferenceType
15
16
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
16
17
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
@@ -18,6 +19,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFile
18
19
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
19
20
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
20
21
  from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
22
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
21
23
 
22
24
  DIALECT_NAME = "_config_dialect"
23
25
 
@@ -75,11 +77,12 @@ class _CsvReader:
75
77
  if isinstance(config_format.header_definition, CsvHeaderUserProvided):
76
78
  return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type
77
79
 
78
- self._skip_rows(fp, config_format.skip_rows_before_header)
79
80
  if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
81
+ self._skip_rows(fp, config_format.skip_rows_before_header + config_format.skip_rows_after_header)
80
82
  headers = self._auto_generate_headers(fp, dialect_name)
81
83
  else:
82
84
  # Then read the header
85
+ self._skip_rows(fp, config_format.skip_rows_before_header)
83
86
  reader = csv.reader(fp, dialect=dialect_name) # type: ignore
84
87
  headers = list(next(reader))
85
88
 
@@ -141,6 +144,12 @@ class CsvParser(FileTypeParser):
141
144
  if read_bytes >= self._MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE:
142
145
  break
143
146
 
147
+ if not type_inferrer_by_field:
148
+ raise AirbyteTracedException(
149
+ message=f"Could not infer schema as there are no rows in {file.uri}. If having an empty CSV file is expected, ignore this. "
150
+ f"Else, please contact Airbyte.",
151
+ failure_type=FailureType.config_error,
152
+ )
144
153
  schema = {header.strip(): {"type": type_inferred.infer()} for header, type_inferred in type_inferrer_by_field.items()}
145
154
  data_generator.close()
146
155
  return schema
@@ -0,0 +1,16 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from datetime import datetime
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class RemoteFile(BaseModel):
11
+ """
12
+ A file in a file-based stream.
13
+ """
14
+
15
+ uri: str
16
+ last_modified: datetime
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.51.10
3
+ Version: 0.51.12
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -26,7 +26,7 @@ setup(
26
26
  name="airbyte-cdk",
27
27
  # The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be
28
28
  # updated if our semver format changes such as using release candidate versions.
29
- version="0.51.10",
29
+ version="0.51.12",
30
30
  description="A framework for writing Airbyte Connectors.",
31
31
  long_description=README,
32
32
  long_description_content_type="text/markdown",
@@ -13,6 +13,7 @@ from unittest import TestCase, mock
13
13
  from unittest.mock import Mock
14
14
 
15
15
  import pytest
16
+ from airbyte_cdk.models import FailureType
16
17
  from airbyte_cdk.sources.file_based.config.csv_format import (
17
18
  DEFAULT_FALSE_VALUES,
18
19
  DEFAULT_TRUE_VALUES,
@@ -26,6 +27,7 @@ from airbyte_cdk.sources.file_based.exceptions import RecordParseError
26
27
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
27
28
  from airbyte_cdk.sources.file_based.file_types.csv_parser import CsvParser, _CsvReader
28
29
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
30
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
29
31
 
30
32
  PROPERTY_TYPES = {
31
33
  "col1": "null",
@@ -169,7 +171,7 @@ class SchemaInferenceTestCase(TestCase):
169
171
  self._config.get_input_schema.return_value = None
170
172
  self._config.format = self._config_format
171
173
 
172
- self._file = Mock(spec=RemoteFile)
174
+ self._file = RemoteFile(uri="a uri", last_modified=datetime.now())
173
175
  self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
174
176
  self._logger = Mock(spec=logging.Logger)
175
177
  self._csv_reader = Mock(spec=_CsvReader)
@@ -222,6 +224,12 @@ class SchemaInferenceTestCase(TestCase):
222
224
  # since the type is number, we know the string at the end was not considered
223
225
  assert inferred_schema == {self._HEADER_NAME: {"type": "number"}}
224
226
 
227
+ def test_given_empty_csv_file_when_infer_schema_then_raise_config_error(self) -> None:
228
+ self._csv_reader.read_data.return_value = []
229
+ with pytest.raises(AirbyteTracedException) as exception:
230
+ self._infer_schema()
231
+ assert exception.value.failure_type == FailureType.config_error
232
+
225
233
  def _test_infer_schema(self, rows: List[str], expected_type: str) -> None:
226
234
  self._csv_reader.read_data.return_value = ({self._HEADER_NAME: row} for row in rows)
227
235
  inferred_schema = self._infer_schema()
@@ -260,7 +268,7 @@ class CsvReaderTest(unittest.TestCase):
260
268
  self._config.name = self._CONFIG_NAME
261
269
  self._config.format = self._config_format
262
270
 
263
- self._file = Mock(spec=RemoteFile)
271
+ self._file = RemoteFile(uri="a uri", last_modified=datetime.now())
264
272
  self._stream_reader = Mock(spec=AbstractFileBasedStreamReader)
265
273
  self._logger = Mock(spec=logging.Logger)
266
274
  self._csv_reader = _CsvReader()
@@ -292,6 +300,21 @@ class CsvReaderTest(unittest.TestCase):
292
300
 
293
301
  assert list(data_generator) == [{"f0": "0", "f1": "1", "f2": "2", "f3": "3", "f4": "4", "f5": "5", "f6": "6"}]
294
302
 
303
+ def test_given_skip_row_before_and_after_and_autogenerated_headers_when_read_data_then_generate_headers_with_format_fX(self) -> None:
304
+ self._config_format.header_definition = CsvHeaderAutogenerated()
305
+ self._config_format.skip_rows_before_header = 1
306
+ self._config_format.skip_rows_after_header = 2
307
+ self._stream_reader.open_file.return_value = CsvFileBuilder().with_data([
308
+ "skip before",
309
+ "skip after 1",
310
+ "skip after 2",
311
+ "0,1,2,3,4,5,6"
312
+ ]).build()
313
+
314
+ data_generator = self._read_data()
315
+
316
+ assert list(data_generator) == [{"f0": "0", "f1": "1", "f2": "2", "f3": "3", "f4": "4", "f5": "5", "f6": "6"}]
317
+
295
318
  def test_given_user_provided_headers_when_read_data_then_use_user_provided_headers(self) -> None:
296
319
  self._config_format.header_definition = CsvHeaderUserProvided(column_names=["first", "second", "third", "fourth"])
297
320
  self._stream_reader.open_file.return_value = CsvFileBuilder().with_data(["0,1,2,3"]).build()
@@ -111,7 +111,7 @@ single_csv_scenario = (
111
111
  "title": "Avro Format",
112
112
  "type": "object",
113
113
  "properties": {
114
- "filetype": {"title": "Filetype", "default": "avro", "enum": ["avro"], "type": "string"},
114
+ "filetype": {"title": "Filetype", "default": "avro", "const": "avro", "type": "string"},
115
115
  "double_as_string": {
116
116
  "title": "Convert Double Fields to Strings",
117
117
  "description": "Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers.",
@@ -124,7 +124,7 @@ single_csv_scenario = (
124
124
  "title": "CSV Format",
125
125
  "type": "object",
126
126
  "properties": {
127
- "filetype": {"title": "Filetype", "default": "csv", "enum": ["csv"], "type": "string"},
127
+ "filetype": {"title": "Filetype", "default": "csv", "const": "csv", "type": "string"},
128
128
  "delimiter": {
129
129
  "title": "Delimiter",
130
130
  "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.",
@@ -190,21 +190,21 @@ single_csv_scenario = (
190
190
  "title": "From CSV",
191
191
  "type": "object",
192
192
  "properties": {
193
- "header_definition_type": {"title": "Header Definition Type", "default": "From CSV", "enum": ["From CSV"], "type": "string"},
193
+ "header_definition_type": {"title": "Header Definition Type", "default": "From CSV", "const": "From CSV", "type": "string"},
194
194
  },
195
195
  },
196
196
  {
197
197
  "title": "Autogenerated",
198
198
  "type": "object",
199
199
  "properties": {
200
- "header_definition_type": {"title": "Header Definition Type", "default": "Autogenerated", "enum": ["Autogenerated"], "type": "string"},
200
+ "header_definition_type": {"title": "Header Definition Type", "default": "Autogenerated", "const": "Autogenerated", "type": "string"},
201
201
  },
202
202
  },
203
203
  {
204
204
  "title": "User Provided",
205
205
  "type": "object",
206
206
  "properties": {
207
- "header_definition_type": {"title": "Header Definition Type", "default": "User Provided", "enum": ["User Provided"], "type": "string"},
207
+ "header_definition_type": {"title": "Header Definition Type", "default": "User Provided", "const": "User Provided", "type": "string"},
208
208
  "column_names": {
209
209
  "title": "Column Names",
210
210
  "description": "The column names that will be used while emitting the CSV records",
@@ -247,7 +247,7 @@ single_csv_scenario = (
247
247
  "title": "Jsonl Format",
248
248
  "type": "object",
249
249
  "properties": {
250
- "filetype": {"title": "Filetype", "default": "jsonl", "enum": ["jsonl"], "type": "string"}
250
+ "filetype": {"title": "Filetype", "default": "jsonl", "const": "jsonl", "type": "string"}
251
251
  },
252
252
  },
253
253
  {
@@ -257,7 +257,7 @@ single_csv_scenario = (
257
257
  "filetype": {
258
258
  "title": "Filetype",
259
259
  "default": "parquet",
260
- "enum": ["parquet"],
260
+ "const": "parquet",
261
261
  "type": "string",
262
262
  },
263
263
  "decimal_as_float": {