cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. cognee/api/client.py +44 -4
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +13 -3
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
  116. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  117. cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
  118. cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
  119. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  120. cognee/modules/ingestion/data_types/TextData.py +8 -2
  121. cognee/modules/ingestion/save_data_to_file.py +1 -1
  122. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  123. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  124. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  125. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  126. cognee/modules/pipelines/models/__init__.py +1 -0
  127. cognee/modules/pipelines/operations/pipeline.py +10 -2
  128. cognee/modules/pipelines/operations/run_tasks.py +252 -20
  129. cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
  130. cognee/modules/retrieval/chunks_retriever.py +23 -1
  131. cognee/modules/retrieval/code_retriever.py +66 -9
  132. cognee/modules/retrieval/completion_retriever.py +11 -9
  133. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  134. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  135. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  136. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  137. cognee/modules/retrieval/insights_retriever.py +4 -0
  138. cognee/modules/retrieval/natural_language_retriever.py +9 -15
  139. cognee/modules/retrieval/summaries_retriever.py +23 -1
  140. cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
  141. cognee/modules/retrieval/utils/completion.py +6 -9
  142. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  143. cognee/modules/search/methods/search.py +5 -1
  144. cognee/modules/search/operations/__init__.py +1 -0
  145. cognee/modules/search/operations/select_search_type.py +42 -0
  146. cognee/modules/search/types/SearchType.py +1 -0
  147. cognee/modules/settings/get_settings.py +0 -8
  148. cognee/modules/settings/save_vector_db_config.py +1 -1
  149. cognee/shared/data_models.py +3 -1
  150. cognee/shared/logging_utils.py +0 -5
  151. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  152. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  153. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  154. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  155. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  156. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  157. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  158. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  159. cognee/tasks/graph/infer_data_ontology.py +5 -6
  160. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  161. cognee/tasks/ingestion/ingest_data.py +91 -61
  162. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  163. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  164. cognee/tasks/storage/index_data_points.py +1 -1
  165. cognee/tasks/storage/index_graph_edges.py +4 -1
  166. cognee/tasks/summarization/summarize_code.py +2 -3
  167. cognee/tasks/summarization/summarize_text.py +3 -2
  168. cognee/tests/test_cognee_server_start.py +12 -7
  169. cognee/tests/test_deduplication.py +2 -2
  170. cognee/tests/test_deletion.py +58 -17
  171. cognee/tests/test_graph_visualization_permissions.py +161 -0
  172. cognee/tests/test_neptune_analytics_graph.py +309 -0
  173. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  174. cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
  175. cognee/tests/test_pgvector.py +5 -5
  176. cognee/tests/test_s3.py +1 -6
  177. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  178. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  179. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  180. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  181. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  182. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  183. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  184. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
  185. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  186. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
  187. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
  188. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  189. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  190. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  191. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  192. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  193. cognee/modules/data/extraction/extract_categories.py +0 -14
  194. cognee/tests/test_qdrant.py +0 -99
  195. distributed/Dockerfile +0 -34
  196. distributed/app.py +0 -4
  197. distributed/entrypoint.py +0 -71
  198. distributed/entrypoint.sh +0 -5
  199. distributed/modal_image.py +0 -11
  200. distributed/queues.py +0 -5
  201. distributed/tasks/queued_add_data_points.py +0 -13
  202. distributed/tasks/queued_add_edges.py +0 -13
  203. distributed/tasks/queued_add_nodes.py +0 -13
  204. distributed/test.py +0 -28
  205. distributed/utils.py +0 -19
  206. distributed/workers/data_point_saving_worker.py +0 -93
  207. distributed/workers/graph_saving_worker.py +0 -104
  208. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  209. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  210. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  211. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  212. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  213. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  214. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  215. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  216. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  217. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  218. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  219. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  220. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  221. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
  222. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
  223. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,8 +0,0 @@
1
- # class PineconeVectorDB(VectorDB):
2
- # def __init__(self, *args, **kwargs):
3
- # super().__init__(*args, **kwargs)
4
- # self.init_pinecone(self.index_name)
5
- #
6
- # def init_pinecone(self, index_name):
7
- # # Pinecone initialization logic
8
- # pass
@@ -1,514 +0,0 @@
1
- import os
2
- from typing import Dict, List, Optional
3
- from qdrant_client import AsyncQdrantClient, models
4
-
5
- from cognee.shared.logging_utils import get_logger
6
- from cognee.infrastructure.engine.utils import parse_id
7
- from cognee.exceptions import InvalidValueError
8
- from cognee.infrastructure.engine import DataPoint
9
- from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
10
- from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
11
-
12
- from ..embeddings.EmbeddingEngine import EmbeddingEngine
13
- from ..vector_db_interface import VectorDBInterface
14
-
15
- logger = get_logger("QDrantAdapter")
16
-
17
-
18
- class IndexSchema(DataPoint):
19
- """
20
- Represents a schema for indexing where each data point contains a text field.
21
-
22
- This class inherits from DataPoint and defines a text attribute as well as metadata
23
- containing index fields used for indexing operations.
24
- """
25
-
26
- text: str
27
-
28
- metadata: dict = {"index_fields": ["text"]}
29
-
30
-
31
- # class CollectionConfig(BaseModel, extra = "forbid"):
32
- # vector_config: Dict[str, models.VectorParams] = Field(..., description="Vectors configuration" )
33
- # hnsw_config: Optional[models.HnswConfig] = Field(default = None, description="HNSW vector index configuration")
34
- # optimizers_config: Optional[models.OptimizersConfig] = Field(default = None, description="Optimizers configuration")
35
- # quantization_config: Optional[models.QuantizationConfig] = Field(default = None, description="Quantization configuration")
36
-
37
-
38
- def create_hnsw_config(hnsw_config: Dict):
39
- """
40
- Create HNSW configuration.
41
-
42
- This function returns an HNSW configuration object if the provided configuration is not
43
- None, otherwise it returns None.
44
-
45
- Parameters:
46
- -----------
47
-
48
- - hnsw_config (Dict): A dictionary containing HNSW configuration parameters.
49
-
50
- Returns:
51
- --------
52
-
53
- An instance of models.HnswConfig if hnsw_config is not None, otherwise None.
54
- """
55
- if hnsw_config is not None:
56
- return models.HnswConfig()
57
- return None
58
-
59
-
60
- def create_optimizers_config(optimizers_config: Dict):
61
- """
62
- Create and return an OptimizersConfig instance if the input configuration is provided.
63
-
64
- This function checks if the given optimizers configuration is not None. If valid, it
65
- initializes and returns a new instance of the OptimizersConfig class from the models
66
- module. If the configuration is None, it returns None instead.
67
-
68
- Parameters:
69
- -----------
70
-
71
- - optimizers_config (Dict): A dictionary containing optimizer configuration
72
- settings.
73
-
74
- Returns:
75
- --------
76
-
77
- Returns an instance of OptimizersConfig if optimizers_config is provided; otherwise,
78
- returns None.
79
- """
80
- if optimizers_config is not None:
81
- return models.OptimizersConfig()
82
- return None
83
-
84
-
85
- def create_quantization_config(quantization_config: Dict):
86
- """
87
- Create a quantization configuration based on the provided settings.
88
-
89
- This function generates an instance of `QuantizationConfig` if the provided
90
- `quantization_config` is not None. If it is None, the function returns None.
91
-
92
- Parameters:
93
- -----------
94
-
95
- - quantization_config (Dict): A dictionary containing the quantization configuration
96
- settings.
97
-
98
- Returns:
99
- --------
100
-
101
- An instance of `QuantizationConfig` if `quantization_config` is provided; otherwise,
102
- returns None.
103
- """
104
- if quantization_config is not None:
105
- return models.QuantizationConfig()
106
- return None
107
-
108
-
109
- class QDrantAdapter(VectorDBInterface):
110
- """
111
- Adapt to the Qdrant vector database interface.
112
-
113
- Public methods:
114
- - get_qdrant_client
115
- - embed_data
116
- - has_collection
117
- - create_collection
118
- - create_data_points
119
- - create_vector_index
120
- - index_data_points
121
- - retrieve
122
- - search
123
- - batch_search
124
- - delete_data_points
125
- - prune
126
- """
127
-
128
- name = "Qdrant"
129
- url: str = None
130
- api_key: str = None
131
- qdrant_path: str = None
132
-
133
- def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
134
- self.embedding_engine = embedding_engine
135
-
136
- if qdrant_path is not None:
137
- self.qdrant_path = qdrant_path
138
- else:
139
- self.url = url
140
- self.api_key = api_key
141
-
142
- def get_qdrant_client(self) -> AsyncQdrantClient:
143
- """
144
- Retrieve an instance of AsyncQdrantClient configured with the appropriate
145
- settings based on the instance's attributes.
146
-
147
- Returns an instance of AsyncQdrantClient configured to connect to the database.
148
-
149
- Returns:
150
- --------
151
- - AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
152
- operations.
153
- """
154
- is_prod = os.getenv("ENV").lower() == "prod"
155
-
156
- if self.qdrant_path is not None:
157
- return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
158
- elif self.url is not None:
159
- return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
160
-
161
- return AsyncQdrantClient(location=":memory:")
162
-
163
- async def embed_data(self, data: List[str]) -> List[float]:
164
- """
165
- Embed a list of text data into vector representations asynchronously.
166
-
167
- Parameters:
168
- -----------
169
-
170
- - data (List[str]): A list of strings containing the text data to be embedded.
171
-
172
- Returns:
173
- --------
174
-
175
- - List[float]: A list of floating-point vectors representing the embedded text data.
176
- """
177
- return await self.embedding_engine.embed_text(data)
178
-
179
- async def has_collection(self, collection_name: str) -> bool:
180
- """
181
- Check if a specified collection exists in the Qdrant database asynchronously.
182
-
183
- Parameters:
184
- -----------
185
-
186
- - collection_name (str): The name of the collection to check for existence.
187
-
188
- Returns:
189
- --------
190
-
191
- - bool: True if the specified collection exists, False otherwise.
192
- """
193
- client = self.get_qdrant_client()
194
- result = await client.collection_exists(collection_name)
195
- await client.close()
196
- return result
197
-
198
- async def create_collection(
199
- self,
200
- collection_name: str,
201
- payload_schema=None,
202
- ):
203
- """
204
- Create a new collection in the Qdrant database if it does not already exist.
205
-
206
- If the collection already exists, this operation has no effect.
207
-
208
- Parameters:
209
- -----------
210
-
211
- - collection_name (str): The name of the collection to create.
212
- - payload_schema: Optional schema for the payload. Defaults to None. (default None)
213
- """
214
- client = self.get_qdrant_client()
215
-
216
- if not await client.collection_exists(collection_name):
217
- await client.create_collection(
218
- collection_name=collection_name,
219
- vectors_config={
220
- "text": models.VectorParams(
221
- size=self.embedding_engine.get_vector_size(), distance="Cosine"
222
- )
223
- },
224
- )
225
-
226
- await client.close()
227
-
228
- async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
229
- """
230
- Create and upload data points to a specified collection in the database.
231
-
232
- Raises CollectionNotFoundError if the collection does not exist.
233
-
234
- Parameters:
235
- -----------
236
-
237
- - collection_name (str): The name of the collection to which data points will be
238
- uploaded.
239
- - data_points (List[DataPoint]): A list of DataPoint objects to be uploaded.
240
-
241
- Returns:
242
- --------
243
-
244
- None if the operation is successful; raises exceptions on error.
245
- """
246
- from qdrant_client.http.exceptions import UnexpectedResponse
247
-
248
- client = self.get_qdrant_client()
249
-
250
- data_vectors = await self.embed_data(
251
- [DataPoint.get_embeddable_data(data_point) for data_point in data_points]
252
- )
253
-
254
- def convert_to_qdrant_point(data_point: DataPoint):
255
- """
256
- Convert a DataPoint object into the format expected by Qdrant for upload.
257
-
258
- Parameters:
259
- -----------
260
-
261
- - data_point (DataPoint): The DataPoint object to convert.
262
-
263
- Returns:
264
- --------
265
-
266
- None; performs an operation without returning a value.
267
- """
268
- return models.PointStruct(
269
- id=str(data_point.id),
270
- payload=data_point.model_dump(),
271
- vector={"text": data_vectors[data_points.index(data_point)]},
272
- )
273
-
274
- points = [convert_to_qdrant_point(point) for point in data_points]
275
-
276
- try:
277
- client.upload_points(collection_name=collection_name, points=points)
278
- except UnexpectedResponse as error:
279
- if "Collection not found" in str(error):
280
- raise CollectionNotFoundError(
281
- message=f"Collection {collection_name} not found!"
282
- ) from error
283
- else:
284
- raise error
285
- except Exception as error:
286
- logger.error("Error uploading data points to Qdrant: %s", str(error))
287
- raise error
288
- finally:
289
- await client.close()
290
-
291
- async def create_vector_index(self, index_name: str, index_property_name: str):
292
- """
293
- Create a vector index for a specified property name.
294
-
295
- This is essentially a wrapper around create_collection, which allows for more
296
- flexibility
297
- in index naming.
298
-
299
- Parameters:
300
- -----------
301
-
302
- - index_name (str): The base name for the index to be created.
303
- - index_property_name (str): The property name that will be part of the index name.
304
- """
305
- await self.create_collection(f"{index_name}_{index_property_name}")
306
-
307
- async def index_data_points(
308
- self, index_name: str, index_property_name: str, data_points: list[DataPoint]
309
- ):
310
- """
311
- Index data points into a specific collection based on provided metadata.
312
-
313
- Transforms DataPoint objects into an appropriate format and uploads them.
314
-
315
- Parameters:
316
- -----------
317
-
318
- - index_name (str): The base name for the index used for naming the collection.
319
- - index_property_name (str): The property name used for naming the collection.
320
- - data_points (list[DataPoint]): A list of DataPoint objects to index.
321
- """
322
- await self.create_data_points(
323
- f"{index_name}_{index_property_name}",
324
- [
325
- IndexSchema(
326
- id=data_point.id,
327
- text=getattr(data_point, data_point.metadata["index_fields"][0]),
328
- )
329
- for data_point in data_points
330
- ],
331
- )
332
-
333
- async def retrieve(self, collection_name: str, data_point_ids: list[str]):
334
- """
335
- Retrieve data points from a specified collection based on their IDs.
336
-
337
- Returns the data corresponding to the provided IDs from the collection.
338
-
339
- Parameters:
340
- -----------
341
-
342
- - collection_name (str): The name of the collection to retrieve from.
343
- - data_point_ids (list[str]): A list of IDs of the data points to retrieve.
344
-
345
- Returns:
346
- --------
347
-
348
- The retrieved data points, including payloads for each ID.
349
- """
350
- client = self.get_qdrant_client()
351
- results = await client.retrieve(collection_name, data_point_ids, with_payload=True)
352
- await client.close()
353
- return results
354
-
355
- async def search(
356
- self,
357
- collection_name: str,
358
- query_text: Optional[str] = None,
359
- query_vector: Optional[List[float]] = None,
360
- limit: int = 15,
361
- with_vector: bool = False,
362
- ) -> List[ScoredResult]:
363
- """
364
- Search for data points in a collection based on either a textual query or a vector
365
- query.
366
-
367
- Raises InvalidValueError if both query_text and query_vector are None.
368
-
369
- Returns a list of scored results that match the search criteria.
370
-
371
- Parameters:
372
- -----------
373
-
374
- - collection_name (str): The name of the collection to search within.
375
- - query_text (Optional[str]): The text to be used in the search query; optional if
376
- query_vector is provided. (default None)
377
- - query_vector (Optional[List[float]]): The vector to be used in the search query;
378
- optional if query_text is provided. (default None)
379
- - limit (int): The maximum number of results to return; defaults to 15. (default 15)
380
- - with_vector (bool): Indicates whether to return vector data along with results;
381
- defaults to False. (default False)
382
-
383
- Returns:
384
- --------
385
-
386
- - List[ScoredResult]: A list of ScoredResult objects representing the results of the
387
- search.
388
- """
389
- from qdrant_client.http.exceptions import UnexpectedResponse
390
-
391
- if query_text is None and query_vector is None:
392
- raise InvalidValueError(message="One of query_text or query_vector must be provided!")
393
-
394
- if not await self.has_collection(collection_name):
395
- return []
396
-
397
- if query_vector is None:
398
- query_vector = (await self.embed_data([query_text]))[0]
399
-
400
- try:
401
- client = self.get_qdrant_client()
402
- if limit == 0:
403
- collection_size = await client.count(collection_name=collection_name)
404
-
405
- results = await client.search(
406
- collection_name=collection_name,
407
- query_vector=models.NamedVector(
408
- name="text",
409
- vector=query_vector
410
- if query_vector is not None
411
- else (await self.embed_data([query_text]))[0],
412
- ),
413
- limit=limit if limit > 0 else collection_size.count,
414
- with_vectors=with_vector,
415
- )
416
-
417
- await client.close()
418
-
419
- return [
420
- ScoredResult(
421
- id=parse_id(result.id),
422
- payload={
423
- **result.payload,
424
- "id": parse_id(result.id),
425
- },
426
- score=1 - result.score,
427
- )
428
- for result in results
429
- ]
430
- finally:
431
- await client.close()
432
-
433
- async def batch_search(
434
- self,
435
- collection_name: str,
436
- query_texts: List[str],
437
- limit: int = None,
438
- with_vectors: bool = False,
439
- ):
440
- """
441
- Perform a batch search in a specified collection using multiple query texts.
442
-
443
- Returns the results of the search for each query, filtering for results with a score
444
- higher than 0.9.
445
-
446
- Parameters:
447
- -----------
448
-
449
- - collection_name (str): The name of the collection to search in.
450
- - query_texts (List[str]): A list of query texts to search for in the collection.
451
- - limit (int): The maximum number of results to return for each search request; can
452
- be None. (default None)
453
- - with_vectors (bool): Indicates whether to include vector data in the results;
454
- defaults to False. (default False)
455
-
456
- Returns:
457
- --------
458
-
459
- A list containing the filtered search results for each query text.
460
- """
461
-
462
- vectors = await self.embed_data(query_texts)
463
-
464
- # Generate dynamic search requests based on the provided embeddings
465
- requests = [
466
- models.SearchRequest(
467
- vector=models.NamedVector(name="text", vector=vector),
468
- limit=limit,
469
- with_vector=with_vectors,
470
- )
471
- for vector in vectors
472
- ]
473
-
474
- client = self.get_qdrant_client()
475
-
476
- # Perform batch search with the dynamically generated requests
477
- results = await client.search_batch(collection_name=collection_name, requests=requests)
478
-
479
- await client.close()
480
-
481
- return [filter(lambda result: result.score > 0.9, result_group) for result_group in results]
482
-
483
- async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
484
- """
485
- Delete specific data points from a specified collection based on their IDs.
486
-
487
- Parameters:
488
- -----------
489
-
490
- - collection_name (str): The name of the collection from which to delete the data
491
- points.
492
- - data_point_ids (list[str]): The list of IDs of data points to be deleted.
493
-
494
- Returns:
495
- --------
496
-
497
- The result of the delete operation from the database.
498
- """
499
- client = self.get_qdrant_client()
500
- results = await client.delete(collection_name, data_point_ids)
501
- return results
502
-
503
- async def prune(self):
504
- """
505
- Remove all collections from the Qdrant database asynchronously.
506
- """
507
- client = self.get_qdrant_client()
508
-
509
- response = await client.get_collections()
510
-
511
- for collection in response.collections:
512
- await client.delete_collection(collection.name)
513
-
514
- await client.close()
@@ -1,2 +0,0 @@
1
- from .QDrantAdapter import QDrantAdapter
2
- from ..models.CollectionConfig import CollectionConfig