MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,654 @@
1
+ from typing import Any
2
+
3
+ from memos.configs.vec_db import MilvusVecDBConfig
4
+ from memos.dependency import require_python_package
5
+ from memos.log import get_logger
6
+ from memos.vec_dbs.base import BaseVecDB
7
+ from memos.vec_dbs.item import MilvusVecDBItem
8
+
9
+
10
+ logger = get_logger(__name__)
11
+
12
+
13
+ class MilvusVecDB(BaseVecDB):
14
+ """Milvus vector database implementation."""
15
+
16
+ @require_python_package(
17
+ import_name="pymilvus",
18
+ install_command="pip install -U pymilvus",
19
+ install_link="https://milvus.io/docs/install-pymilvus.md",
20
+ )
21
+ def __init__(self, config: MilvusVecDBConfig):
22
+ """Initialize the Milvus vector database and the collection."""
23
+ from pymilvus import MilvusClient
24
+
25
+ self.config = config
26
+
27
+ # Create Milvus client
28
+ self.client = MilvusClient(
29
+ uri=self.config.uri, user=self.config.user_name, password=self.config.password
30
+ )
31
+ self.schema = self.create_schema()
32
+ self.index_params = self.create_index()
33
+ self.create_collection()
34
+
35
+ def create_schema(self):
36
+ """Create schema for the milvus collection."""
37
+ from pymilvus import DataType, Function, FunctionType
38
+
39
+ schema = self.client.create_schema(auto_id=False, enable_dynamic_field=True)
40
+ schema.add_field(
41
+ field_name="id", datatype=DataType.VARCHAR, max_length=65535, is_primary=True
42
+ )
43
+ analyzer_params = {"tokenizer": "standard", "filter": ["lowercase"]}
44
+ schema.add_field(
45
+ field_name="memory",
46
+ datatype=DataType.VARCHAR,
47
+ max_length=65535,
48
+ analyzer_params=analyzer_params,
49
+ enable_match=True,
50
+ enable_analyzer=True,
51
+ )
52
+ schema.add_field(field_name="original_text", datatype=DataType.VARCHAR, max_length=65535)
53
+ schema.add_field(
54
+ field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=self.config.vector_dimension
55
+ )
56
+ schema.add_field(field_name="payload", datatype=DataType.JSON)
57
+
58
+ schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
59
+ bm25_function = Function(
60
+ name="bm25",
61
+ function_type=FunctionType.BM25,
62
+ input_field_names=["memory"],
63
+ output_field_names="sparse_vector",
64
+ )
65
+ schema.add_function(bm25_function)
66
+
67
+ return schema
68
+
69
+ def create_index(self):
70
+ """Create index for the milvus collection."""
71
+ index_params = self.client.prepare_index_params()
72
+ index_params.add_index(
73
+ field_name="vector", index_type="FLAT", metric_type=self._get_metric_type()
74
+ )
75
+ index_params.add_index(
76
+ field_name="sparse_vector",
77
+ index_type="SPARSE_INVERTED_INDEX",
78
+ metric_type="BM25",
79
+ )
80
+
81
+ return index_params
82
+
83
+ def create_collection(self) -> None:
84
+ """Create a new collection with specified parameters."""
85
+ for collection_name in self.config.collection_name:
86
+ if self.collection_exists(collection_name):
87
+ logger.warning(f"Collection '{collection_name}' already exists. Skipping creation.")
88
+ continue
89
+
90
+ self.client.create_collection(
91
+ collection_name=collection_name,
92
+ dimension=self.config.vector_dimension,
93
+ metric_type=self._get_metric_type(),
94
+ schema=self.schema,
95
+ index_params=self.index_params,
96
+ )
97
+
98
+ logger.info(
99
+ f"Collection '{collection_name}' created with {self.config.vector_dimension} dimensions."
100
+ )
101
+
102
+ def create_collection_by_name(self, collection_name: str) -> None:
103
+ """Create a new collection with specified parameters."""
104
+ if self.collection_exists(collection_name):
105
+ logger.warning(f"Collection '{collection_name}' already exists. Skipping creation.")
106
+ return
107
+
108
+ self.client.create_collection(
109
+ collection_name=collection_name,
110
+ dimension=self.config.vector_dimension,
111
+ metric_type=self._get_metric_type(),
112
+ schema=self.schema,
113
+ index_params=self.index_params,
114
+ )
115
+
116
+ def list_collections(self) -> list[str]:
117
+ """List all collections."""
118
+ return self.client.list_collections()
119
+
120
+ def delete_collection(self, name: str) -> None:
121
+ """Delete a collection."""
122
+ self.client.drop_collection(name)
123
+
124
+ def collection_exists(self, name: str) -> bool:
125
+ """Check if a collection exists."""
126
+ return self.client.has_collection(collection_name=name)
127
+
128
+ def _dense_search(
129
+ self,
130
+ collection_name: str,
131
+ query_vector: list[float],
132
+ top_k: int,
133
+ filter: str = "",
134
+ **kwargs: Any,
135
+ ) -> list[list[dict]]:
136
+ """Dense search for similar items in the database."""
137
+ results = self.client.search(
138
+ collection_name=collection_name,
139
+ data=[query_vector],
140
+ limit=top_k,
141
+ filter=filter,
142
+ output_fields=["*"],
143
+ anns_field="vector",
144
+ )
145
+ return results
146
+
147
+ def _sparse_search(
148
+ self,
149
+ collection_name: str,
150
+ query: str,
151
+ top_k: int,
152
+ filter: str = "",
153
+ **kwargs: Any,
154
+ ) -> list[list[dict]]:
155
+ """Sparse search for similar items in the database."""
156
+ results = self.client.search(
157
+ collection_name=collection_name,
158
+ data=[query],
159
+ limit=top_k,
160
+ filter=filter,
161
+ output_fields=["*"],
162
+ anns_field="sparse_vector",
163
+ )
164
+ return results
165
+
166
+ def _hybrid_search(
167
+ self,
168
+ collection_name: str,
169
+ query_vector: list[float],
170
+ query: str,
171
+ top_k: int,
172
+ filter: str | None = None,
173
+ ranker_type: str = "rrf", # rrf, weighted
174
+ sparse_weight=1.0,
175
+ dense_weight=1.0,
176
+ **kwargs: Any,
177
+ ) -> list[list[dict]]:
178
+ """Hybrid search for similar items in the database."""
179
+ from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker
180
+
181
+ # Set up BM25 search request
182
+ expr = filter if filter else None
183
+ sparse_request = AnnSearchRequest(
184
+ data=[query],
185
+ anns_field="sparse_vector",
186
+ param={"metric_type": "BM25"},
187
+ limit=top_k,
188
+ expr=expr,
189
+ )
190
+ # Set up dense vector search request
191
+ dense_request = AnnSearchRequest(
192
+ data=[query_vector],
193
+ anns_field="vector",
194
+ param={"metric_type": self._get_metric_type()},
195
+ limit=top_k,
196
+ expr=expr,
197
+ )
198
+ ranker = (
199
+ RRFRanker() if ranker_type == "rrf" else WeightedRanker(sparse_weight, dense_weight)
200
+ )
201
+ results = self.client.hybrid_search(
202
+ collection_name=collection_name,
203
+ reqs=[sparse_request, dense_request],
204
+ ranker=ranker,
205
+ limit=top_k,
206
+ output_fields=["*"],
207
+ )
208
+ return results
209
+
210
+ def search(
211
+ self,
212
+ query_vector: list[float],
213
+ query: str,
214
+ collection_name: str,
215
+ top_k: int,
216
+ filter: dict[str, Any] | None = None,
217
+ search_type: str = "dense", # dense, sparse, hybrid
218
+ ) -> list[MilvusVecDBItem]:
219
+ """
220
+ Search for similar items in the database.
221
+
222
+ Args:
223
+ query_vector: Single vector to search
224
+ collection_name: Name of the collection to search
225
+ top_k: Number of results to return
226
+ filter: Payload filters
227
+
228
+ Returns:
229
+ List of search results with distance scores and payloads.
230
+ """
231
+ # Convert filter to Milvus expression
232
+ logger.info(f"filter for milvus: {filter}")
233
+ expr = self._dict_to_expr(filter) if filter else ""
234
+
235
+ search_func_map = {
236
+ "dense": self._dense_search,
237
+ "sparse": self._sparse_search,
238
+ "hybrid": self._hybrid_search,
239
+ }
240
+ try:
241
+ results = search_func_map[search_type](
242
+ collection_name=collection_name,
243
+ query_vector=query_vector,
244
+ query=query,
245
+ top_k=top_k,
246
+ filter=expr,
247
+ )
248
+
249
+ items = []
250
+ for hit in results[0]:
251
+ entity = hit.get("entity", {})
252
+
253
+ items.append(
254
+ MilvusVecDBItem(
255
+ id=str(entity.get("id")),
256
+ memory=entity.get("memory"),
257
+ original_text=entity.get("original_text"),
258
+ vector=entity.get("vector"),
259
+ payload=entity.get("payload", {}),
260
+ score=1 - float(hit["distance"]),
261
+ )
262
+ )
263
+ except Exception as e:
264
+ logger.error("Error in _%s_search: %s", search_type, e)
265
+ return []
266
+
267
+ logger.info(f"Milvus search completed with {len(items)} results.")
268
+ return items
269
+
270
+ def _dict_to_expr(self, filter_dict: dict[str, Any]) -> str:
271
+ """Convert a dictionary filter to a Milvus expression string.
272
+
273
+ Supports complex query syntax with logical operators, comparison operators,
274
+ arithmetic operators, array operators, and string pattern matching.
275
+
276
+ Args:
277
+ filter_dict: Dictionary containing filter conditions
278
+
279
+ Returns:
280
+ Milvus expression string
281
+ """
282
+ if not filter_dict:
283
+ return ""
284
+
285
+ return self._build_expression(filter_dict)
286
+
287
+ def _build_expression(self, condition: Any) -> str:
288
+ """Build expression from condition dict or value."""
289
+ if isinstance(condition, dict):
290
+ # Handle logical operators
291
+ if "and" in condition:
292
+ return self._handle_logical_and(condition["and"])
293
+ elif "or" in condition:
294
+ return self._handle_logical_or(condition["or"])
295
+ elif "not" in condition:
296
+ return self._handle_logical_not(condition["not"])
297
+ else:
298
+ # Handle field conditions
299
+ return self._handle_field_conditions(condition)
300
+ else:
301
+ # Simple value comparison
302
+ return f"{condition}"
303
+
304
+ def _handle_logical_and(self, conditions: list) -> str:
305
+ """Handle AND logical operator."""
306
+ if not conditions:
307
+ return ""
308
+ expressions = [self._build_expression(cond) for cond in conditions if cond is not None]
309
+ expressions = [expr for expr in expressions if expr]
310
+ if not expressions:
311
+ return ""
312
+ return f"({' and '.join(expressions)})"
313
+
314
+ def _handle_logical_or(self, conditions: list) -> str:
315
+ """Handle OR logical operator."""
316
+ if not conditions:
317
+ return ""
318
+ expressions = [self._build_expression(cond) for cond in conditions if cond is not None]
319
+ expressions = [expr for expr in expressions if expr]
320
+ if not expressions:
321
+ return ""
322
+ return f"({' or '.join(expressions)})"
323
+
324
+ def _handle_logical_not(self, condition: Any) -> str:
325
+ """Handle NOT logical operator."""
326
+ expr = self._build_expression(condition)
327
+ if not expr:
328
+ return ""
329
+ return f"(not {expr})"
330
+
331
+ def _handle_field_conditions(self, condition_dict: dict[str, Any]) -> str:
332
+ """Handle field-specific conditions."""
333
+ conditions = []
334
+
335
+ for field, value in condition_dict.items():
336
+ if value is None:
337
+ continue
338
+
339
+ field_expr = self._build_field_expression(field, value)
340
+ if field_expr:
341
+ conditions.append(field_expr)
342
+
343
+ if not conditions:
344
+ return ""
345
+ return " and ".join(conditions)
346
+
347
+ def _build_field_expression(self, field: str, value: Any) -> str:
348
+ """Build expression for a single field."""
349
+ # Handle comparison operators
350
+ if isinstance(value, dict):
351
+ if len(value) == 1:
352
+ op, operand = next(iter(value.items()))
353
+ op_lower = op.lower()
354
+
355
+ if op_lower == "in":
356
+ return self._handle_in_operator(field, operand)
357
+ elif op_lower == "contains":
358
+ return self._handle_contains_operator(field, operand, case_sensitive=True)
359
+ elif op_lower == "icontains":
360
+ return self._handle_contains_operator(field, operand, case_sensitive=False)
361
+ elif op_lower == "like":
362
+ return self._handle_like_operator(field, operand)
363
+ elif op_lower in ["gte", "lte", "gt", "lt", "ne"]:
364
+ return self._handle_comparison_operator(field, op_lower, operand)
365
+ else:
366
+ # Unknown operator, treat as equality
367
+ return f"payload['{field}'] == {self._format_value(operand)}"
368
+ else:
369
+ # Multiple operators, handle each one
370
+ sub_conditions = []
371
+ for op, operand in value.items():
372
+ op_lower = op.lower()
373
+ if op_lower in [
374
+ "gte",
375
+ "lte",
376
+ "gt",
377
+ "lt",
378
+ "ne",
379
+ "in",
380
+ "contains",
381
+ "icontains",
382
+ "like",
383
+ ]:
384
+ sub_expr = self._build_field_expression(field, {op: operand})
385
+ if sub_expr:
386
+ sub_conditions.append(sub_expr)
387
+
388
+ if sub_conditions:
389
+ return f"({' and '.join(sub_conditions)})"
390
+ return ""
391
+ else:
392
+ # Simple equality
393
+ return f"payload['{field}'] == {self._format_value(value)}"
394
+
395
+ def _handle_in_operator(self, field: str, values: list) -> str:
396
+ """Handle IN operator for arrays."""
397
+ if not isinstance(values, list) or not values:
398
+ return ""
399
+
400
+ formatted_values = [self._format_value(v) for v in values]
401
+ return f"payload['{field}'] in [{', '.join(formatted_values)}]"
402
+
403
+ def _handle_contains_operator(self, field: str, value: Any, case_sensitive: bool = True) -> str:
404
+ """Handle CONTAINS/ICONTAINS operator."""
405
+ formatted_value = self._format_value(value)
406
+ if case_sensitive:
407
+ return f"json_contains(payload['{field}'], {formatted_value})"
408
+ else:
409
+ # For case-insensitive contains, we need to use LIKE with lower case
410
+ return f"(not json_contains(payload['{field}'], {formatted_value}))"
411
+
412
+ def _handle_like_operator(self, field: str, pattern: str) -> str:
413
+ """Handle LIKE operator for string pattern matching."""
414
+ # Convert SQL-like pattern to Milvus-like pattern
415
+ return f"payload['{field}'] like '{pattern}'"
416
+
417
+ def _handle_comparison_operator(self, field: str, operator: str, value: Any) -> str:
418
+ """Handle comparison operators (gte, lte, gt, lt, ne)."""
419
+ milvus_op = {"gte": ">=", "lte": "<=", "gt": ">", "lt": "<", "ne": "!="}.get(operator, "==")
420
+
421
+ formatted_value = self._format_value(value)
422
+ return f"payload['{field}'] {milvus_op} {formatted_value}"
423
+
424
+ def _format_value(self, value: Any) -> str:
425
+ """Format value for Milvus expression."""
426
+ if isinstance(value, str):
427
+ return f"'{value}'"
428
+ elif isinstance(value, int | float):
429
+ return str(value)
430
+ elif isinstance(value, bool):
431
+ return str(value).lower()
432
+ elif isinstance(value, list):
433
+ formatted_items = [self._format_value(item) for item in value]
434
+ return f"[{', '.join(formatted_items)}]"
435
+ elif value is None:
436
+ return "null"
437
+ else:
438
+ return f"'{value!s}'"
439
+
440
+ def _get_metric_type(self) -> str:
441
+ """Get the metric type for search."""
442
+ metric_map = {
443
+ "cosine": "COSINE",
444
+ "euclidean": "L2",
445
+ "dot": "IP",
446
+ }
447
+ return metric_map.get(self.config.distance_metric, "L2")
448
+
449
+ def get_by_id(self, collection_name: str, id: str) -> MilvusVecDBItem | None:
450
+ """Get a single item by ID."""
451
+ results = self.client.get(
452
+ collection_name=collection_name,
453
+ ids=[id],
454
+ )
455
+
456
+ if not results:
457
+ return None
458
+
459
+ entity = results[0]
460
+
461
+ return MilvusVecDBItem(
462
+ id=entity["id"],
463
+ memory=entity.get("memory"),
464
+ original_text=entity.get("original_text"),
465
+ vector=entity.get("vector"),
466
+ payload=entity.get("payload", {}),
467
+ )
468
+
469
+ def get_by_ids(self, collection_name: str, ids: list[str]) -> list[MilvusVecDBItem]:
470
+ """Get multiple items by their IDs."""
471
+ results = self.client.get(
472
+ collection_name=collection_name,
473
+ ids=ids,
474
+ )
475
+
476
+ if not results:
477
+ return []
478
+
479
+ items = []
480
+ for entity in results:
481
+ items.append(
482
+ MilvusVecDBItem(
483
+ id=entity["id"],
484
+ memory=entity.get("memory"),
485
+ original_text=entity.get("original_text"),
486
+ vector=entity.get("vector"),
487
+ payload=entity.get("payload", {}),
488
+ )
489
+ )
490
+
491
+ return items
492
+
493
+ def get_by_filter(
494
+ self, collection_name: str, filter: dict[str, Any], scroll_limit: int = 100
495
+ ) -> list[MilvusVecDBItem]:
496
+ """
497
+ Retrieve all items that match the given filter criteria using query_iterator.
498
+
499
+ Args:
500
+ filter: Payload filters to match against stored items
501
+ scroll_limit: Maximum number of items to retrieve per batch (batch_size)
502
+
503
+ Returns:
504
+ List of items including vectors and payload that match the filter
505
+ """
506
+ expr = self._dict_to_expr(filter) if filter else ""
507
+ all_items = []
508
+
509
+ # Use query_iterator for efficient pagination
510
+ iterator = self.client.query_iterator(
511
+ collection_name=collection_name,
512
+ filter=expr,
513
+ batch_size=scroll_limit,
514
+ output_fields=["*"], # Include all fields including payload
515
+ )
516
+
517
+ # Iterate through all batches
518
+ try:
519
+ while True:
520
+ batch_results = iterator.next()
521
+
522
+ if not batch_results:
523
+ break
524
+
525
+ # Convert batch results to MilvusVecDBItem objects
526
+ for entity in batch_results:
527
+ # Extract the actual payload from Milvus entity
528
+ payload = entity.get("payload", {})
529
+ all_items.append(
530
+ MilvusVecDBItem(
531
+ id=entity["id"],
532
+ memory=entity.get("memory"),
533
+ original_text=entity.get("original_text"),
534
+ vector=entity.get("vector"),
535
+ payload=payload,
536
+ )
537
+ )
538
+ except Exception as e:
539
+ logger.warning(
540
+ f"Error during Milvus query iteration: {e}. Returning {len(all_items)} items found so far."
541
+ )
542
+ finally:
543
+ # Close the iterator
544
+ iterator.close()
545
+
546
+ logger.info(f"Milvus retrieve by filter completed with {len(all_items)} results.")
547
+ return all_items
548
+
549
+ def get_all(self, collection_name: str, scroll_limit=100) -> list[MilvusVecDBItem]:
550
+ """Retrieve all items in the vector database."""
551
+ return self.get_by_filter(collection_name, {}, scroll_limit=scroll_limit)
552
+
553
+ def count(self, collection_name: str, filter: dict[str, Any] | None = None) -> int:
554
+ """Count items in the database, optionally with filter."""
555
+ if filter:
556
+ # If there's a filter, use query method
557
+ expr = self._dict_to_expr(filter) if filter else ""
558
+ results = self.client.query(
559
+ collection_name=collection_name,
560
+ filter=expr,
561
+ output_fields=["id"],
562
+ )
563
+ return len(results)
564
+ else:
565
+ # For counting all items, use get_collection_stats for accurate count
566
+ stats = self.client.get_collection_stats(collection_name)
567
+ # Extract row count from stats - stats is a dict, not a list
568
+ return int(stats.get("row_count", 0))
569
+
570
+ def add(self, collection_name: str, data: list[MilvusVecDBItem | dict[str, Any]]) -> None:
571
+ """
572
+ Add data to the vector database.
573
+
574
+ Args:
575
+ data: List of MilvusVecDBItem objects or dictionaries containing:
576
+ - 'id': unique identifier
577
+ - 'memory': memory string
578
+ - 'vector': embedding vector
579
+ - 'payload': additional fields for filtering/retrieval
580
+ """
581
+ entities = []
582
+ for item in data:
583
+ if isinstance(item, dict):
584
+ item = item.copy()
585
+ item = MilvusVecDBItem.from_dict(item)
586
+
587
+ # Prepare entity data
588
+ entity = {
589
+ "id": item.id[:65000],
590
+ "memory": item.memory[:65000],
591
+ "original_text": item.original_text[:65000],
592
+ "vector": item.vector,
593
+ "payload": item.payload if item.payload else {},
594
+ }
595
+
596
+ entities.append(entity)
597
+
598
+ # Use upsert to be safe (insert or update)
599
+ self.client.upsert(
600
+ collection_name=collection_name,
601
+ data=entities,
602
+ )
603
+
604
+ def update(self, collection_name: str, id: str, data: MilvusVecDBItem | dict[str, Any]) -> None:
605
+ """Update an item in the vector database."""
606
+ if id != data.id:
607
+ raise ValueError(
608
+ f"The id of the data to update must be the same as the id of the item to update, ID mismatch: expected {id}, got {data.id}"
609
+ )
610
+ if isinstance(data, dict):
611
+ data = data.copy()
612
+ data = MilvusVecDBItem.from_dict(data)
613
+
614
+ # Use upsert for updates
615
+ self.upsert(collection_name, [data])
616
+
617
+ def ensure_payload_indexes(self, fields: list[str]) -> None:
618
+ """
619
+ Create payload indexes for specified fields in the collection.
620
+ This is idempotent: it will skip if index already exists.
621
+
622
+ Args:
623
+ fields (list[str]): List of field names to index (as keyword).
624
+ """
625
+ # Note: Milvus doesn't have the same concept of payload indexes as Qdrant
626
+ # Field indexes are created automatically for scalar fields
627
+ logger.info(f"Milvus automatically indexes scalar fields: {fields}")
628
+
629
+ def upsert(self, collection_name: str, data: list[MilvusVecDBItem | dict[str, Any]]) -> None:
630
+ """
631
+ Add or update data in the vector database.
632
+
633
+ If an item with the same ID exists, it will be updated.
634
+ Otherwise, it will be added as a new item.
635
+ """
636
+ # Reuse add method since it already uses upsert
637
+ self.add(collection_name, data)
638
+
639
+ def delete(self, collection_name: str, ids: list[str]) -> None:
640
+ """Delete items from the vector database."""
641
+ if not ids:
642
+ return
643
+ self.client.delete(
644
+ collection_name=collection_name,
645
+ ids=ids,
646
+ )
647
+
648
+ def delete_by_filter(self, collection_name: str, filter: dict[str, Any]) -> None:
649
+ """Delete items from the vector database by filter."""
650
+ expr = self._dict_to_expr(filter) if filter else ""
651
+ self.client.delete(
652
+ collection_name=collection_name,
653
+ filter=expr,
654
+ )