MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
memos/vec_dbs/qdrant.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from memos.configs.vec_db import QdrantVecDBConfig
|
|
4
|
+
from memos.dependency import require_python_package
|
|
5
|
+
from memos.log import get_logger
|
|
6
|
+
from memos.vec_dbs.base import BaseVecDB
|
|
7
|
+
from memos.vec_dbs.item import VecDBItem
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class QdrantVecDB(BaseVecDB):
|
|
14
|
+
"""Qdrant vector database implementation."""
|
|
15
|
+
|
|
16
|
+
@require_python_package(
|
|
17
|
+
import_name="qdrant_client",
|
|
18
|
+
install_command="pip install qdrant-client",
|
|
19
|
+
install_link="https://python-client.qdrant.tech/",
|
|
20
|
+
)
|
|
21
|
+
def __init__(self, config: QdrantVecDBConfig):
|
|
22
|
+
"""Initialize the Qdrant vector database and the collection."""
|
|
23
|
+
from qdrant_client import QdrantClient
|
|
24
|
+
|
|
25
|
+
self.config = config
|
|
26
|
+
# Default payload fields we always index because query filters rely on them
|
|
27
|
+
self._default_payload_index_fields = [
|
|
28
|
+
"memory_type",
|
|
29
|
+
"status",
|
|
30
|
+
"vector_sync",
|
|
31
|
+
"user_name",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
client_kwargs: dict[str, Any] = {}
|
|
35
|
+
if self.config.url:
|
|
36
|
+
client_kwargs["url"] = self.config.url
|
|
37
|
+
if self.config.api_key:
|
|
38
|
+
client_kwargs["api_key"] = self.config.api_key
|
|
39
|
+
else:
|
|
40
|
+
client_kwargs.update(
|
|
41
|
+
{
|
|
42
|
+
"host": self.config.host,
|
|
43
|
+
"port": self.config.port,
|
|
44
|
+
"path": self.config.path,
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# If both host and port are None, we are running in local/embedded mode
|
|
49
|
+
if self.config.host is None and self.config.port is None:
|
|
50
|
+
logger.warning(
|
|
51
|
+
"Qdrant is running in local mode (host and port are both None). "
|
|
52
|
+
"In local mode, there may be race conditions during concurrent reads/writes. "
|
|
53
|
+
"It is strongly recommended to deploy a standalone Qdrant server "
|
|
54
|
+
"(e.g., via Docker: https://qdrant.tech/documentation/quickstart/)."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
self.client = QdrantClient(**client_kwargs)
|
|
58
|
+
self.create_collection()
|
|
59
|
+
# Ensure common payload indexes exist (idempotent)
|
|
60
|
+
try:
|
|
61
|
+
self.ensure_payload_indexes(self._default_payload_index_fields)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.warning(f"Failed to ensure default payload indexes: {e}")
|
|
64
|
+
|
|
65
|
+
def create_collection(self) -> None:
|
|
66
|
+
"""Create a new collection with specified parameters."""
|
|
67
|
+
from qdrant_client.http import models
|
|
68
|
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
69
|
+
|
|
70
|
+
if self.collection_exists(self.config.collection_name):
|
|
71
|
+
collection_info = self.client.get_collection(self.config.collection_name)
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Collection '{self.config.collection_name}' (vector dimension: {collection_info.config.params.vectors.size}) already exists. Skipping creation."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
# Map string distance metric to Qdrant Distance enum
|
|
79
|
+
distance_map = {
|
|
80
|
+
"cosine": models.Distance.COSINE,
|
|
81
|
+
"euclidean": models.Distance.EUCLID,
|
|
82
|
+
"dot": models.Distance.DOT,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
self.client.create_collection(
|
|
87
|
+
collection_name=self.config.collection_name,
|
|
88
|
+
vectors_config=models.VectorParams(
|
|
89
|
+
size=self.config.vector_dimension,
|
|
90
|
+
distance=distance_map[self.config.distance_metric],
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
except UnexpectedResponse as err:
|
|
94
|
+
# Cloud Qdrant returns 409 when the collection already exists; tolerate and continue.
|
|
95
|
+
if getattr(err, "status_code", None) == 409 or "already exists" in str(err).lower():
|
|
96
|
+
logger.warning(
|
|
97
|
+
f"Collection '{self.config.collection_name}' already exists. Skipping creation."
|
|
98
|
+
)
|
|
99
|
+
return
|
|
100
|
+
raise
|
|
101
|
+
except Exception:
|
|
102
|
+
# Bubble up other exceptions so callers can observe failures
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
logger.info(
|
|
106
|
+
f"Collection '{self.config.collection_name}' created with {self.config.vector_dimension} dimensions."
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def list_collections(self) -> list[str]:
|
|
110
|
+
"""List all collections."""
|
|
111
|
+
collections = self.client.get_collections()
|
|
112
|
+
return [collection.name for collection in collections.collections]
|
|
113
|
+
|
|
114
|
+
def delete_collection(self, name: str) -> None:
|
|
115
|
+
"""Delete a collection."""
|
|
116
|
+
self.client.delete_collection(collection_name=name)
|
|
117
|
+
|
|
118
|
+
def collection_exists(self, name: str) -> bool:
|
|
119
|
+
"""Check if a collection exists."""
|
|
120
|
+
try:
|
|
121
|
+
self.client.get_collection(collection_name=name)
|
|
122
|
+
return True
|
|
123
|
+
except Exception:
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
def search(
|
|
127
|
+
self, query_vector: list[float], top_k: int, filter: dict[str, Any] | None = None
|
|
128
|
+
) -> list[VecDBItem]:
|
|
129
|
+
"""
|
|
130
|
+
Search for similar items in the database.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
query_vector: Single vector to search
|
|
134
|
+
top_k: Number of results to return
|
|
135
|
+
filter: Payload filters
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of search results with distance scores and payloads.
|
|
139
|
+
"""
|
|
140
|
+
qdrant_filter = self._dict_to_filter(filter) if filter else None
|
|
141
|
+
response = self.client.query_points(
|
|
142
|
+
collection_name=self.config.collection_name,
|
|
143
|
+
query=query_vector,
|
|
144
|
+
limit=top_k,
|
|
145
|
+
query_filter=qdrant_filter,
|
|
146
|
+
with_vectors=True,
|
|
147
|
+
with_payload=True,
|
|
148
|
+
).points
|
|
149
|
+
logger.info(f"Qdrant search completed with {len(response)} results.")
|
|
150
|
+
return [
|
|
151
|
+
VecDBItem(
|
|
152
|
+
id=point.id,
|
|
153
|
+
vector=point.vector,
|
|
154
|
+
payload=point.payload,
|
|
155
|
+
score=point.score,
|
|
156
|
+
)
|
|
157
|
+
for point in response
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
def _dict_to_filter(self, filter_dict: dict[str, Any]) -> Any:
|
|
161
|
+
from qdrant_client.http import models
|
|
162
|
+
|
|
163
|
+
"""Convert a dictionary filter to a Qdrant Filter object."""
|
|
164
|
+
conditions = []
|
|
165
|
+
|
|
166
|
+
for field, value in filter_dict.items():
|
|
167
|
+
# Simple exact match for now
|
|
168
|
+
# TODO: Extend this to support more complex conditions
|
|
169
|
+
conditions.append(
|
|
170
|
+
models.FieldCondition(key=field, match=models.MatchValue(value=value))
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return models.Filter(must=conditions)
|
|
174
|
+
|
|
175
|
+
def get_by_id(self, id: str) -> VecDBItem | None:
|
|
176
|
+
"""Get a single item by ID."""
|
|
177
|
+
response = self.client.retrieve(
|
|
178
|
+
collection_name=self.config.collection_name,
|
|
179
|
+
ids=[id],
|
|
180
|
+
with_payload=True,
|
|
181
|
+
with_vectors=True,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if not response:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
point = response[0]
|
|
188
|
+
return VecDBItem(
|
|
189
|
+
id=point.id,
|
|
190
|
+
vector=point.vector,
|
|
191
|
+
payload=point.payload,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def get_by_ids(self, ids: list[str]) -> list[VecDBItem]:
|
|
195
|
+
"""Get multiple items by their IDs."""
|
|
196
|
+
response = self.client.retrieve(
|
|
197
|
+
collection_name=self.config.collection_name,
|
|
198
|
+
ids=ids,
|
|
199
|
+
with_payload=True,
|
|
200
|
+
with_vectors=True,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if not response:
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
return [
|
|
207
|
+
VecDBItem(
|
|
208
|
+
id=point.id,
|
|
209
|
+
vector=point.vector,
|
|
210
|
+
payload=point.payload,
|
|
211
|
+
)
|
|
212
|
+
for point in response
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
def get_by_filter(self, filter: dict[str, Any], scroll_limit: int = 100) -> list[VecDBItem]:
|
|
216
|
+
"""
|
|
217
|
+
Retrieve all items that match the given filter criteria.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
filter: Payload filters to match against stored items
|
|
221
|
+
scroll_limit: Maximum number of items to retrieve per scroll request
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of items including vectors and payload that match the filter
|
|
225
|
+
"""
|
|
226
|
+
qdrant_filter = self._dict_to_filter(filter) if filter else None
|
|
227
|
+
all_points = []
|
|
228
|
+
offset = None
|
|
229
|
+
|
|
230
|
+
# Use scroll to paginate through all matching points
|
|
231
|
+
while True:
|
|
232
|
+
points, offset = self.client.scroll(
|
|
233
|
+
collection_name=self.config.collection_name,
|
|
234
|
+
limit=scroll_limit,
|
|
235
|
+
scroll_filter=qdrant_filter,
|
|
236
|
+
offset=offset,
|
|
237
|
+
with_vectors=True,
|
|
238
|
+
with_payload=True,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if not points:
|
|
242
|
+
break
|
|
243
|
+
|
|
244
|
+
all_points.extend(points)
|
|
245
|
+
|
|
246
|
+
# Update offset for next iteration
|
|
247
|
+
if offset is None:
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
logger.info(f"Qdrant retrieve by filter completed with {len(all_points)} results.")
|
|
251
|
+
return [
|
|
252
|
+
VecDBItem(
|
|
253
|
+
id=point.id,
|
|
254
|
+
vector=point.vector,
|
|
255
|
+
payload=point.payload,
|
|
256
|
+
)
|
|
257
|
+
for point in all_points
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
def get_all(self, scroll_limit=100) -> list[VecDBItem]:
|
|
261
|
+
"""Retrieve all items in the vector database."""
|
|
262
|
+
return self.get_by_filter({}, scroll_limit=scroll_limit)
|
|
263
|
+
|
|
264
|
+
def count(self, filter: dict[str, Any] | None = None) -> int:
|
|
265
|
+
"""Count items in the database, optionally with filter."""
|
|
266
|
+
qdrant_filter = None
|
|
267
|
+
if filter:
|
|
268
|
+
qdrant_filter = self._dict_to_filter(filter)
|
|
269
|
+
|
|
270
|
+
response = self.client.count(
|
|
271
|
+
collection_name=self.config.collection_name, count_filter=qdrant_filter
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return response.count
|
|
275
|
+
|
|
276
|
+
def add(self, data: list[VecDBItem | dict[str, Any]]) -> None:
|
|
277
|
+
from qdrant_client.http import models
|
|
278
|
+
|
|
279
|
+
"""
|
|
280
|
+
Add data to the vector database.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
data: List of VecDBItem objects or dictionaries containing:
|
|
284
|
+
- 'id': unique identifier
|
|
285
|
+
- 'vector': embedding vector
|
|
286
|
+
- 'payload': additional fields for filtering/retrieval
|
|
287
|
+
"""
|
|
288
|
+
points = []
|
|
289
|
+
for item in data:
|
|
290
|
+
if isinstance(item, dict):
|
|
291
|
+
item = item.copy()
|
|
292
|
+
item = VecDBItem.from_dict(item)
|
|
293
|
+
point = models.PointStruct(id=item.id, vector=item.vector, payload=item.payload)
|
|
294
|
+
points.append(point)
|
|
295
|
+
|
|
296
|
+
self.client.upsert(collection_name=self.config.collection_name, points=points)
|
|
297
|
+
|
|
298
|
+
def update(self, id: str, data: VecDBItem | dict[str, Any]) -> None:
|
|
299
|
+
"""Update an item in the vector database."""
|
|
300
|
+
from qdrant_client.http import models
|
|
301
|
+
|
|
302
|
+
if isinstance(data, dict):
|
|
303
|
+
data = data.copy()
|
|
304
|
+
data = VecDBItem.from_dict(data)
|
|
305
|
+
|
|
306
|
+
if data.vector:
|
|
307
|
+
# For vector updates (with or without payload), use upsert with the same ID
|
|
308
|
+
self.client.upsert(
|
|
309
|
+
collection_name=self.config.collection_name,
|
|
310
|
+
points=[models.PointStruct(id=id, vector=data.vector, payload=data.payload)],
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
# For payload-only updates
|
|
314
|
+
self.client.set_payload(
|
|
315
|
+
collection_name=self.config.collection_name, payload=data.payload, points=[id]
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def ensure_payload_indexes(self, fields: list[str]) -> None:
|
|
319
|
+
"""
|
|
320
|
+
Create payload indexes for specified fields in the collection.
|
|
321
|
+
This is idempotent: it will skip if index already exists.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
fields (list[str]): List of field names to index (as keyword).
|
|
325
|
+
"""
|
|
326
|
+
for field in fields:
|
|
327
|
+
try:
|
|
328
|
+
self.client.create_payload_index(
|
|
329
|
+
collection_name=self.config.collection_name,
|
|
330
|
+
field_name=field,
|
|
331
|
+
field_schema="keyword", # Could be extended in future
|
|
332
|
+
)
|
|
333
|
+
logger.debug(f"Qdrant payload index on '{field}' ensured.")
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.warning(f"Failed to create payload index on '{field}': {e}")
|
|
336
|
+
|
|
337
|
+
def upsert(self, data: list[VecDBItem | dict[str, Any]]) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Add or update data in the vector database.
|
|
340
|
+
|
|
341
|
+
If an item with the same ID exists, it will be updated.
|
|
342
|
+
Otherwise, it will be added as a new item.
|
|
343
|
+
"""
|
|
344
|
+
# Qdrant's upsert operation already handles this logic
|
|
345
|
+
self.add(data)
|
|
346
|
+
|
|
347
|
+
def delete(self, ids: list[str]) -> None:
|
|
348
|
+
from qdrant_client.http import models
|
|
349
|
+
|
|
350
|
+
"""Delete items from the vector database."""
|
|
351
|
+
point_ids: list[str | int] = ids
|
|
352
|
+
self.client.delete(
|
|
353
|
+
collection_name=self.config.collection_name,
|
|
354
|
+
points_selector=models.PointIdsList(points=point_ids),
|
|
355
|
+
)
|