MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
memos/vec_dbs/milvus.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from memos.configs.vec_db import MilvusVecDBConfig
|
|
4
|
+
from memos.dependency import require_python_package
|
|
5
|
+
from memos.log import get_logger
|
|
6
|
+
from memos.vec_dbs.base import BaseVecDB
|
|
7
|
+
from memos.vec_dbs.item import MilvusVecDBItem
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MilvusVecDB(BaseVecDB):
|
|
14
|
+
"""Milvus vector database implementation."""
|
|
15
|
+
|
|
16
|
+
@require_python_package(
|
|
17
|
+
import_name="pymilvus",
|
|
18
|
+
install_command="pip install -U pymilvus",
|
|
19
|
+
install_link="https://milvus.io/docs/install-pymilvus.md",
|
|
20
|
+
)
|
|
21
|
+
def __init__(self, config: MilvusVecDBConfig):
|
|
22
|
+
"""Initialize the Milvus vector database and the collection."""
|
|
23
|
+
from pymilvus import MilvusClient
|
|
24
|
+
|
|
25
|
+
self.config = config
|
|
26
|
+
|
|
27
|
+
# Create Milvus client
|
|
28
|
+
self.client = MilvusClient(
|
|
29
|
+
uri=self.config.uri, user=self.config.user_name, password=self.config.password
|
|
30
|
+
)
|
|
31
|
+
self.schema = self.create_schema()
|
|
32
|
+
self.index_params = self.create_index()
|
|
33
|
+
self.create_collection()
|
|
34
|
+
|
|
35
|
+
def create_schema(self):
|
|
36
|
+
"""Create schema for the milvus collection."""
|
|
37
|
+
from pymilvus import DataType, Function, FunctionType
|
|
38
|
+
|
|
39
|
+
schema = self.client.create_schema(auto_id=False, enable_dynamic_field=True)
|
|
40
|
+
schema.add_field(
|
|
41
|
+
field_name="id", datatype=DataType.VARCHAR, max_length=65535, is_primary=True
|
|
42
|
+
)
|
|
43
|
+
analyzer_params = {"tokenizer": "standard", "filter": ["lowercase"]}
|
|
44
|
+
schema.add_field(
|
|
45
|
+
field_name="memory",
|
|
46
|
+
datatype=DataType.VARCHAR,
|
|
47
|
+
max_length=65535,
|
|
48
|
+
analyzer_params=analyzer_params,
|
|
49
|
+
enable_match=True,
|
|
50
|
+
enable_analyzer=True,
|
|
51
|
+
)
|
|
52
|
+
schema.add_field(field_name="original_text", datatype=DataType.VARCHAR, max_length=65535)
|
|
53
|
+
schema.add_field(
|
|
54
|
+
field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=self.config.vector_dimension
|
|
55
|
+
)
|
|
56
|
+
schema.add_field(field_name="payload", datatype=DataType.JSON)
|
|
57
|
+
|
|
58
|
+
schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
|
|
59
|
+
bm25_function = Function(
|
|
60
|
+
name="bm25",
|
|
61
|
+
function_type=FunctionType.BM25,
|
|
62
|
+
input_field_names=["memory"],
|
|
63
|
+
output_field_names="sparse_vector",
|
|
64
|
+
)
|
|
65
|
+
schema.add_function(bm25_function)
|
|
66
|
+
|
|
67
|
+
return schema
|
|
68
|
+
|
|
69
|
+
def create_index(self):
|
|
70
|
+
"""Create index for the milvus collection."""
|
|
71
|
+
index_params = self.client.prepare_index_params()
|
|
72
|
+
index_params.add_index(
|
|
73
|
+
field_name="vector", index_type="FLAT", metric_type=self._get_metric_type()
|
|
74
|
+
)
|
|
75
|
+
index_params.add_index(
|
|
76
|
+
field_name="sparse_vector",
|
|
77
|
+
index_type="SPARSE_INVERTED_INDEX",
|
|
78
|
+
metric_type="BM25",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return index_params
|
|
82
|
+
|
|
83
|
+
def create_collection(self) -> None:
|
|
84
|
+
"""Create a new collection with specified parameters."""
|
|
85
|
+
for collection_name in self.config.collection_name:
|
|
86
|
+
if self.collection_exists(collection_name):
|
|
87
|
+
logger.warning(f"Collection '{collection_name}' already exists. Skipping creation.")
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
self.client.create_collection(
|
|
91
|
+
collection_name=collection_name,
|
|
92
|
+
dimension=self.config.vector_dimension,
|
|
93
|
+
metric_type=self._get_metric_type(),
|
|
94
|
+
schema=self.schema,
|
|
95
|
+
index_params=self.index_params,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
logger.info(
|
|
99
|
+
f"Collection '{collection_name}' created with {self.config.vector_dimension} dimensions."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def create_collection_by_name(self, collection_name: str) -> None:
|
|
103
|
+
"""Create a new collection with specified parameters."""
|
|
104
|
+
if self.collection_exists(collection_name):
|
|
105
|
+
logger.warning(f"Collection '{collection_name}' already exists. Skipping creation.")
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
self.client.create_collection(
|
|
109
|
+
collection_name=collection_name,
|
|
110
|
+
dimension=self.config.vector_dimension,
|
|
111
|
+
metric_type=self._get_metric_type(),
|
|
112
|
+
schema=self.schema,
|
|
113
|
+
index_params=self.index_params,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def list_collections(self) -> list[str]:
|
|
117
|
+
"""List all collections."""
|
|
118
|
+
return self.client.list_collections()
|
|
119
|
+
|
|
120
|
+
def delete_collection(self, name: str) -> None:
|
|
121
|
+
"""Delete a collection."""
|
|
122
|
+
self.client.drop_collection(name)
|
|
123
|
+
|
|
124
|
+
def collection_exists(self, name: str) -> bool:
|
|
125
|
+
"""Check if a collection exists."""
|
|
126
|
+
return self.client.has_collection(collection_name=name)
|
|
127
|
+
|
|
128
|
+
def _dense_search(
|
|
129
|
+
self,
|
|
130
|
+
collection_name: str,
|
|
131
|
+
query_vector: list[float],
|
|
132
|
+
top_k: int,
|
|
133
|
+
filter: str = "",
|
|
134
|
+
**kwargs: Any,
|
|
135
|
+
) -> list[list[dict]]:
|
|
136
|
+
"""Dense search for similar items in the database."""
|
|
137
|
+
results = self.client.search(
|
|
138
|
+
collection_name=collection_name,
|
|
139
|
+
data=[query_vector],
|
|
140
|
+
limit=top_k,
|
|
141
|
+
filter=filter,
|
|
142
|
+
output_fields=["*"],
|
|
143
|
+
anns_field="vector",
|
|
144
|
+
)
|
|
145
|
+
return results
|
|
146
|
+
|
|
147
|
+
def _sparse_search(
|
|
148
|
+
self,
|
|
149
|
+
collection_name: str,
|
|
150
|
+
query: str,
|
|
151
|
+
top_k: int,
|
|
152
|
+
filter: str = "",
|
|
153
|
+
**kwargs: Any,
|
|
154
|
+
) -> list[list[dict]]:
|
|
155
|
+
"""Sparse search for similar items in the database."""
|
|
156
|
+
results = self.client.search(
|
|
157
|
+
collection_name=collection_name,
|
|
158
|
+
data=[query],
|
|
159
|
+
limit=top_k,
|
|
160
|
+
filter=filter,
|
|
161
|
+
output_fields=["*"],
|
|
162
|
+
anns_field="sparse_vector",
|
|
163
|
+
)
|
|
164
|
+
return results
|
|
165
|
+
|
|
166
|
+
def _hybrid_search(
|
|
167
|
+
self,
|
|
168
|
+
collection_name: str,
|
|
169
|
+
query_vector: list[float],
|
|
170
|
+
query: str,
|
|
171
|
+
top_k: int,
|
|
172
|
+
filter: str | None = None,
|
|
173
|
+
ranker_type: str = "rrf", # rrf, weighted
|
|
174
|
+
sparse_weight=1.0,
|
|
175
|
+
dense_weight=1.0,
|
|
176
|
+
**kwargs: Any,
|
|
177
|
+
) -> list[list[dict]]:
|
|
178
|
+
"""Hybrid search for similar items in the database."""
|
|
179
|
+
from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker
|
|
180
|
+
|
|
181
|
+
# Set up BM25 search request
|
|
182
|
+
expr = filter if filter else None
|
|
183
|
+
sparse_request = AnnSearchRequest(
|
|
184
|
+
data=[query],
|
|
185
|
+
anns_field="sparse_vector",
|
|
186
|
+
param={"metric_type": "BM25"},
|
|
187
|
+
limit=top_k,
|
|
188
|
+
expr=expr,
|
|
189
|
+
)
|
|
190
|
+
# Set up dense vector search request
|
|
191
|
+
dense_request = AnnSearchRequest(
|
|
192
|
+
data=[query_vector],
|
|
193
|
+
anns_field="vector",
|
|
194
|
+
param={"metric_type": self._get_metric_type()},
|
|
195
|
+
limit=top_k,
|
|
196
|
+
expr=expr,
|
|
197
|
+
)
|
|
198
|
+
ranker = (
|
|
199
|
+
RRFRanker() if ranker_type == "rrf" else WeightedRanker(sparse_weight, dense_weight)
|
|
200
|
+
)
|
|
201
|
+
results = self.client.hybrid_search(
|
|
202
|
+
collection_name=collection_name,
|
|
203
|
+
reqs=[sparse_request, dense_request],
|
|
204
|
+
ranker=ranker,
|
|
205
|
+
limit=top_k,
|
|
206
|
+
output_fields=["*"],
|
|
207
|
+
)
|
|
208
|
+
return results
|
|
209
|
+
|
|
210
|
+
def search(
|
|
211
|
+
self,
|
|
212
|
+
query_vector: list[float],
|
|
213
|
+
query: str,
|
|
214
|
+
collection_name: str,
|
|
215
|
+
top_k: int,
|
|
216
|
+
filter: dict[str, Any] | None = None,
|
|
217
|
+
search_type: str = "dense", # dense, sparse, hybrid
|
|
218
|
+
) -> list[MilvusVecDBItem]:
|
|
219
|
+
"""
|
|
220
|
+
Search for similar items in the database.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
query_vector: Single vector to search
|
|
224
|
+
collection_name: Name of the collection to search
|
|
225
|
+
top_k: Number of results to return
|
|
226
|
+
filter: Payload filters
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of search results with distance scores and payloads.
|
|
230
|
+
"""
|
|
231
|
+
# Convert filter to Milvus expression
|
|
232
|
+
logger.info(f"filter for milvus: {filter}")
|
|
233
|
+
expr = self._dict_to_expr(filter) if filter else ""
|
|
234
|
+
|
|
235
|
+
search_func_map = {
|
|
236
|
+
"dense": self._dense_search,
|
|
237
|
+
"sparse": self._sparse_search,
|
|
238
|
+
"hybrid": self._hybrid_search,
|
|
239
|
+
}
|
|
240
|
+
try:
|
|
241
|
+
results = search_func_map[search_type](
|
|
242
|
+
collection_name=collection_name,
|
|
243
|
+
query_vector=query_vector,
|
|
244
|
+
query=query,
|
|
245
|
+
top_k=top_k,
|
|
246
|
+
filter=expr,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
items = []
|
|
250
|
+
for hit in results[0]:
|
|
251
|
+
entity = hit.get("entity", {})
|
|
252
|
+
|
|
253
|
+
items.append(
|
|
254
|
+
MilvusVecDBItem(
|
|
255
|
+
id=str(entity.get("id")),
|
|
256
|
+
memory=entity.get("memory"),
|
|
257
|
+
original_text=entity.get("original_text"),
|
|
258
|
+
vector=entity.get("vector"),
|
|
259
|
+
payload=entity.get("payload", {}),
|
|
260
|
+
score=1 - float(hit["distance"]),
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.error("Error in _%s_search: %s", search_type, e)
|
|
265
|
+
return []
|
|
266
|
+
|
|
267
|
+
logger.info(f"Milvus search completed with {len(items)} results.")
|
|
268
|
+
return items
|
|
269
|
+
|
|
270
|
+
def _dict_to_expr(self, filter_dict: dict[str, Any]) -> str:
|
|
271
|
+
"""Convert a dictionary filter to a Milvus expression string.
|
|
272
|
+
|
|
273
|
+
Supports complex query syntax with logical operators, comparison operators,
|
|
274
|
+
arithmetic operators, array operators, and string pattern matching.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
filter_dict: Dictionary containing filter conditions
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Milvus expression string
|
|
281
|
+
"""
|
|
282
|
+
if not filter_dict:
|
|
283
|
+
return ""
|
|
284
|
+
|
|
285
|
+
return self._build_expression(filter_dict)
|
|
286
|
+
|
|
287
|
+
def _build_expression(self, condition: Any) -> str:
|
|
288
|
+
"""Build expression from condition dict or value."""
|
|
289
|
+
if isinstance(condition, dict):
|
|
290
|
+
# Handle logical operators
|
|
291
|
+
if "and" in condition:
|
|
292
|
+
return self._handle_logical_and(condition["and"])
|
|
293
|
+
elif "or" in condition:
|
|
294
|
+
return self._handle_logical_or(condition["or"])
|
|
295
|
+
elif "not" in condition:
|
|
296
|
+
return self._handle_logical_not(condition["not"])
|
|
297
|
+
else:
|
|
298
|
+
# Handle field conditions
|
|
299
|
+
return self._handle_field_conditions(condition)
|
|
300
|
+
else:
|
|
301
|
+
# Simple value comparison
|
|
302
|
+
return f"{condition}"
|
|
303
|
+
|
|
304
|
+
def _handle_logical_and(self, conditions: list) -> str:
|
|
305
|
+
"""Handle AND logical operator."""
|
|
306
|
+
if not conditions:
|
|
307
|
+
return ""
|
|
308
|
+
expressions = [self._build_expression(cond) for cond in conditions if cond is not None]
|
|
309
|
+
expressions = [expr for expr in expressions if expr]
|
|
310
|
+
if not expressions:
|
|
311
|
+
return ""
|
|
312
|
+
return f"({' and '.join(expressions)})"
|
|
313
|
+
|
|
314
|
+
def _handle_logical_or(self, conditions: list) -> str:
|
|
315
|
+
"""Handle OR logical operator."""
|
|
316
|
+
if not conditions:
|
|
317
|
+
return ""
|
|
318
|
+
expressions = [self._build_expression(cond) for cond in conditions if cond is not None]
|
|
319
|
+
expressions = [expr for expr in expressions if expr]
|
|
320
|
+
if not expressions:
|
|
321
|
+
return ""
|
|
322
|
+
return f"({' or '.join(expressions)})"
|
|
323
|
+
|
|
324
|
+
def _handle_logical_not(self, condition: Any) -> str:
|
|
325
|
+
"""Handle NOT logical operator."""
|
|
326
|
+
expr = self._build_expression(condition)
|
|
327
|
+
if not expr:
|
|
328
|
+
return ""
|
|
329
|
+
return f"(not {expr})"
|
|
330
|
+
|
|
331
|
+
def _handle_field_conditions(self, condition_dict: dict[str, Any]) -> str:
|
|
332
|
+
"""Handle field-specific conditions."""
|
|
333
|
+
conditions = []
|
|
334
|
+
|
|
335
|
+
for field, value in condition_dict.items():
|
|
336
|
+
if value is None:
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
field_expr = self._build_field_expression(field, value)
|
|
340
|
+
if field_expr:
|
|
341
|
+
conditions.append(field_expr)
|
|
342
|
+
|
|
343
|
+
if not conditions:
|
|
344
|
+
return ""
|
|
345
|
+
return " and ".join(conditions)
|
|
346
|
+
|
|
347
|
+
def _build_field_expression(self, field: str, value: Any) -> str:
|
|
348
|
+
"""Build expression for a single field."""
|
|
349
|
+
# Handle comparison operators
|
|
350
|
+
if isinstance(value, dict):
|
|
351
|
+
if len(value) == 1:
|
|
352
|
+
op, operand = next(iter(value.items()))
|
|
353
|
+
op_lower = op.lower()
|
|
354
|
+
|
|
355
|
+
if op_lower == "in":
|
|
356
|
+
return self._handle_in_operator(field, operand)
|
|
357
|
+
elif op_lower == "contains":
|
|
358
|
+
return self._handle_contains_operator(field, operand, case_sensitive=True)
|
|
359
|
+
elif op_lower == "icontains":
|
|
360
|
+
return self._handle_contains_operator(field, operand, case_sensitive=False)
|
|
361
|
+
elif op_lower == "like":
|
|
362
|
+
return self._handle_like_operator(field, operand)
|
|
363
|
+
elif op_lower in ["gte", "lte", "gt", "lt", "ne"]:
|
|
364
|
+
return self._handle_comparison_operator(field, op_lower, operand)
|
|
365
|
+
else:
|
|
366
|
+
# Unknown operator, treat as equality
|
|
367
|
+
return f"payload['{field}'] == {self._format_value(operand)}"
|
|
368
|
+
else:
|
|
369
|
+
# Multiple operators, handle each one
|
|
370
|
+
sub_conditions = []
|
|
371
|
+
for op, operand in value.items():
|
|
372
|
+
op_lower = op.lower()
|
|
373
|
+
if op_lower in [
|
|
374
|
+
"gte",
|
|
375
|
+
"lte",
|
|
376
|
+
"gt",
|
|
377
|
+
"lt",
|
|
378
|
+
"ne",
|
|
379
|
+
"in",
|
|
380
|
+
"contains",
|
|
381
|
+
"icontains",
|
|
382
|
+
"like",
|
|
383
|
+
]:
|
|
384
|
+
sub_expr = self._build_field_expression(field, {op: operand})
|
|
385
|
+
if sub_expr:
|
|
386
|
+
sub_conditions.append(sub_expr)
|
|
387
|
+
|
|
388
|
+
if sub_conditions:
|
|
389
|
+
return f"({' and '.join(sub_conditions)})"
|
|
390
|
+
return ""
|
|
391
|
+
else:
|
|
392
|
+
# Simple equality
|
|
393
|
+
return f"payload['{field}'] == {self._format_value(value)}"
|
|
394
|
+
|
|
395
|
+
def _handle_in_operator(self, field: str, values: list) -> str:
|
|
396
|
+
"""Handle IN operator for arrays."""
|
|
397
|
+
if not isinstance(values, list) or not values:
|
|
398
|
+
return ""
|
|
399
|
+
|
|
400
|
+
formatted_values = [self._format_value(v) for v in values]
|
|
401
|
+
return f"payload['{field}'] in [{', '.join(formatted_values)}]"
|
|
402
|
+
|
|
403
|
+
def _handle_contains_operator(self, field: str, value: Any, case_sensitive: bool = True) -> str:
|
|
404
|
+
"""Handle CONTAINS/ICONTAINS operator."""
|
|
405
|
+
formatted_value = self._format_value(value)
|
|
406
|
+
if case_sensitive:
|
|
407
|
+
return f"json_contains(payload['{field}'], {formatted_value})"
|
|
408
|
+
else:
|
|
409
|
+
# For case-insensitive contains, we need to use LIKE with lower case
|
|
410
|
+
return f"(not json_contains(payload['{field}'], {formatted_value}))"
|
|
411
|
+
|
|
412
|
+
def _handle_like_operator(self, field: str, pattern: str) -> str:
|
|
413
|
+
"""Handle LIKE operator for string pattern matching."""
|
|
414
|
+
# Convert SQL-like pattern to Milvus-like pattern
|
|
415
|
+
return f"payload['{field}'] like '{pattern}'"
|
|
416
|
+
|
|
417
|
+
def _handle_comparison_operator(self, field: str, operator: str, value: Any) -> str:
|
|
418
|
+
"""Handle comparison operators (gte, lte, gt, lt, ne)."""
|
|
419
|
+
milvus_op = {"gte": ">=", "lte": "<=", "gt": ">", "lt": "<", "ne": "!="}.get(operator, "==")
|
|
420
|
+
|
|
421
|
+
formatted_value = self._format_value(value)
|
|
422
|
+
return f"payload['{field}'] {milvus_op} {formatted_value}"
|
|
423
|
+
|
|
424
|
+
def _format_value(self, value: Any) -> str:
|
|
425
|
+
"""Format value for Milvus expression."""
|
|
426
|
+
if isinstance(value, str):
|
|
427
|
+
return f"'{value}'"
|
|
428
|
+
elif isinstance(value, int | float):
|
|
429
|
+
return str(value)
|
|
430
|
+
elif isinstance(value, bool):
|
|
431
|
+
return str(value).lower()
|
|
432
|
+
elif isinstance(value, list):
|
|
433
|
+
formatted_items = [self._format_value(item) for item in value]
|
|
434
|
+
return f"[{', '.join(formatted_items)}]"
|
|
435
|
+
elif value is None:
|
|
436
|
+
return "null"
|
|
437
|
+
else:
|
|
438
|
+
return f"'{value!s}'"
|
|
439
|
+
|
|
440
|
+
def _get_metric_type(self) -> str:
|
|
441
|
+
"""Get the metric type for search."""
|
|
442
|
+
metric_map = {
|
|
443
|
+
"cosine": "COSINE",
|
|
444
|
+
"euclidean": "L2",
|
|
445
|
+
"dot": "IP",
|
|
446
|
+
}
|
|
447
|
+
return metric_map.get(self.config.distance_metric, "L2")
|
|
448
|
+
|
|
449
|
+
def get_by_id(self, collection_name: str, id: str) -> MilvusVecDBItem | None:
|
|
450
|
+
"""Get a single item by ID."""
|
|
451
|
+
results = self.client.get(
|
|
452
|
+
collection_name=collection_name,
|
|
453
|
+
ids=[id],
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
if not results:
|
|
457
|
+
return None
|
|
458
|
+
|
|
459
|
+
entity = results[0]
|
|
460
|
+
|
|
461
|
+
return MilvusVecDBItem(
|
|
462
|
+
id=entity["id"],
|
|
463
|
+
memory=entity.get("memory"),
|
|
464
|
+
original_text=entity.get("original_text"),
|
|
465
|
+
vector=entity.get("vector"),
|
|
466
|
+
payload=entity.get("payload", {}),
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def get_by_ids(self, collection_name: str, ids: list[str]) -> list[MilvusVecDBItem]:
|
|
470
|
+
"""Get multiple items by their IDs."""
|
|
471
|
+
results = self.client.get(
|
|
472
|
+
collection_name=collection_name,
|
|
473
|
+
ids=ids,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
if not results:
|
|
477
|
+
return []
|
|
478
|
+
|
|
479
|
+
items = []
|
|
480
|
+
for entity in results:
|
|
481
|
+
items.append(
|
|
482
|
+
MilvusVecDBItem(
|
|
483
|
+
id=entity["id"],
|
|
484
|
+
memory=entity.get("memory"),
|
|
485
|
+
original_text=entity.get("original_text"),
|
|
486
|
+
vector=entity.get("vector"),
|
|
487
|
+
payload=entity.get("payload", {}),
|
|
488
|
+
)
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
return items
|
|
492
|
+
|
|
493
|
+
def get_by_filter(
|
|
494
|
+
self, collection_name: str, filter: dict[str, Any], scroll_limit: int = 100
|
|
495
|
+
) -> list[MilvusVecDBItem]:
|
|
496
|
+
"""
|
|
497
|
+
Retrieve all items that match the given filter criteria using query_iterator.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
filter: Payload filters to match against stored items
|
|
501
|
+
scroll_limit: Maximum number of items to retrieve per batch (batch_size)
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
List of items including vectors and payload that match the filter
|
|
505
|
+
"""
|
|
506
|
+
expr = self._dict_to_expr(filter) if filter else ""
|
|
507
|
+
all_items = []
|
|
508
|
+
|
|
509
|
+
# Use query_iterator for efficient pagination
|
|
510
|
+
iterator = self.client.query_iterator(
|
|
511
|
+
collection_name=collection_name,
|
|
512
|
+
filter=expr,
|
|
513
|
+
batch_size=scroll_limit,
|
|
514
|
+
output_fields=["*"], # Include all fields including payload
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Iterate through all batches
|
|
518
|
+
try:
|
|
519
|
+
while True:
|
|
520
|
+
batch_results = iterator.next()
|
|
521
|
+
|
|
522
|
+
if not batch_results:
|
|
523
|
+
break
|
|
524
|
+
|
|
525
|
+
# Convert batch results to MilvusVecDBItem objects
|
|
526
|
+
for entity in batch_results:
|
|
527
|
+
# Extract the actual payload from Milvus entity
|
|
528
|
+
payload = entity.get("payload", {})
|
|
529
|
+
all_items.append(
|
|
530
|
+
MilvusVecDBItem(
|
|
531
|
+
id=entity["id"],
|
|
532
|
+
memory=entity.get("memory"),
|
|
533
|
+
original_text=entity.get("original_text"),
|
|
534
|
+
vector=entity.get("vector"),
|
|
535
|
+
payload=payload,
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
except Exception as e:
|
|
539
|
+
logger.warning(
|
|
540
|
+
f"Error during Milvus query iteration: {e}. Returning {len(all_items)} items found so far."
|
|
541
|
+
)
|
|
542
|
+
finally:
|
|
543
|
+
# Close the iterator
|
|
544
|
+
iterator.close()
|
|
545
|
+
|
|
546
|
+
logger.info(f"Milvus retrieve by filter completed with {len(all_items)} results.")
|
|
547
|
+
return all_items
|
|
548
|
+
|
|
549
|
+
def get_all(self, collection_name: str, scroll_limit=100) -> list[MilvusVecDBItem]:
|
|
550
|
+
"""Retrieve all items in the vector database."""
|
|
551
|
+
return self.get_by_filter(collection_name, {}, scroll_limit=scroll_limit)
|
|
552
|
+
|
|
553
|
+
def count(self, collection_name: str, filter: dict[str, Any] | None = None) -> int:
|
|
554
|
+
"""Count items in the database, optionally with filter."""
|
|
555
|
+
if filter:
|
|
556
|
+
# If there's a filter, use query method
|
|
557
|
+
expr = self._dict_to_expr(filter) if filter else ""
|
|
558
|
+
results = self.client.query(
|
|
559
|
+
collection_name=collection_name,
|
|
560
|
+
filter=expr,
|
|
561
|
+
output_fields=["id"],
|
|
562
|
+
)
|
|
563
|
+
return len(results)
|
|
564
|
+
else:
|
|
565
|
+
# For counting all items, use get_collection_stats for accurate count
|
|
566
|
+
stats = self.client.get_collection_stats(collection_name)
|
|
567
|
+
# Extract row count from stats - stats is a dict, not a list
|
|
568
|
+
return int(stats.get("row_count", 0))
|
|
569
|
+
|
|
570
|
+
def add(self, collection_name: str, data: list[MilvusVecDBItem | dict[str, Any]]) -> None:
|
|
571
|
+
"""
|
|
572
|
+
Add data to the vector database.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
data: List of MilvusVecDBItem objects or dictionaries containing:
|
|
576
|
+
- 'id': unique identifier
|
|
577
|
+
- 'memory': memory string
|
|
578
|
+
- 'vector': embedding vector
|
|
579
|
+
- 'payload': additional fields for filtering/retrieval
|
|
580
|
+
"""
|
|
581
|
+
entities = []
|
|
582
|
+
for item in data:
|
|
583
|
+
if isinstance(item, dict):
|
|
584
|
+
item = item.copy()
|
|
585
|
+
item = MilvusVecDBItem.from_dict(item)
|
|
586
|
+
|
|
587
|
+
# Prepare entity data
|
|
588
|
+
entity = {
|
|
589
|
+
"id": item.id[:65000],
|
|
590
|
+
"memory": item.memory[:65000],
|
|
591
|
+
"original_text": item.original_text[:65000],
|
|
592
|
+
"vector": item.vector,
|
|
593
|
+
"payload": item.payload if item.payload else {},
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
entities.append(entity)
|
|
597
|
+
|
|
598
|
+
# Use upsert to be safe (insert or update)
|
|
599
|
+
self.client.upsert(
|
|
600
|
+
collection_name=collection_name,
|
|
601
|
+
data=entities,
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
def update(self, collection_name: str, id: str, data: MilvusVecDBItem | dict[str, Any]) -> None:
|
|
605
|
+
"""Update an item in the vector database."""
|
|
606
|
+
if id != data.id:
|
|
607
|
+
raise ValueError(
|
|
608
|
+
f"The id of the data to update must be the same as the id of the item to update, ID mismatch: expected {id}, got {data.id}"
|
|
609
|
+
)
|
|
610
|
+
if isinstance(data, dict):
|
|
611
|
+
data = data.copy()
|
|
612
|
+
data = MilvusVecDBItem.from_dict(data)
|
|
613
|
+
|
|
614
|
+
# Use upsert for updates
|
|
615
|
+
self.upsert(collection_name, [data])
|
|
616
|
+
|
|
617
|
+
def ensure_payload_indexes(self, fields: list[str]) -> None:
|
|
618
|
+
"""
|
|
619
|
+
Create payload indexes for specified fields in the collection.
|
|
620
|
+
This is idempotent: it will skip if index already exists.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
fields (list[str]): List of field names to index (as keyword).
|
|
624
|
+
"""
|
|
625
|
+
# Note: Milvus doesn't have the same concept of payload indexes as Qdrant
|
|
626
|
+
# Field indexes are created automatically for scalar fields
|
|
627
|
+
logger.info(f"Milvus automatically indexes scalar fields: {fields}")
|
|
628
|
+
|
|
629
|
+
def upsert(self, collection_name: str, data: list[MilvusVecDBItem | dict[str, Any]]) -> None:
|
|
630
|
+
"""
|
|
631
|
+
Add or update data in the vector database.
|
|
632
|
+
|
|
633
|
+
If an item with the same ID exists, it will be updated.
|
|
634
|
+
Otherwise, it will be added as a new item.
|
|
635
|
+
"""
|
|
636
|
+
# Reuse add method since it already uses upsert
|
|
637
|
+
self.add(collection_name, data)
|
|
638
|
+
|
|
639
|
+
def delete(self, collection_name: str, ids: list[str]) -> None:
|
|
640
|
+
"""Delete items from the vector database."""
|
|
641
|
+
if not ids:
|
|
642
|
+
return
|
|
643
|
+
self.client.delete(
|
|
644
|
+
collection_name=collection_name,
|
|
645
|
+
ids=ids,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
def delete_by_filter(self, collection_name: str, filter: dict[str, Any]) -> None:
|
|
649
|
+
"""Delete items from the vector database by filter."""
|
|
650
|
+
expr = self._dict_to_expr(filter) if filter else ""
|
|
651
|
+
self.client.delete(
|
|
652
|
+
collection_name=collection_name,
|
|
653
|
+
filter=expr,
|
|
654
|
+
)
|