MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,1058 @@
1
+ import json
2
+ import re
3
+
4
+ from datetime import datetime
5
+ from typing import Any
6
+
7
+ from memos.configs.graph_db import Neo4jGraphDBConfig
8
+ from memos.graph_dbs.neo4j import Neo4jGraphDB, _flatten_info_fields, _prepare_node_metadata
9
+ from memos.log import get_logger
10
+ from memos.vec_dbs.factory import VecDBFactory
11
+ from memos.vec_dbs.item import VecDBItem
12
+
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class Neo4jCommunityGraphDB(Neo4jGraphDB):
18
+ """
19
+ Neo4j Community Edition graph memory store.
20
+
21
+ Note:
22
+ This class avoids Enterprise-only features:
23
+ - No multi-database support
24
+ - No vector index
25
+ - No CREATE DATABASE
26
+ """
27
+
28
+ def __init__(self, config: Neo4jGraphDBConfig):
29
+ assert config.auto_create is False
30
+ assert config.use_multi_db is False
31
+ # Init vector database
32
+ self.vec_db = VecDBFactory.from_config(config.vec_config)
33
+ # Call parent init
34
+ super().__init__(config)
35
+
36
+ def create_index(
37
+ self,
38
+ label: str = "Memory",
39
+ vector_property: str = "embedding",
40
+ dimensions: int = 1536,
41
+ index_name: str = "memory_vector_index",
42
+ ) -> None:
43
+ """
44
+ Create the vector index for embedding and datetime indexes for created_at and updated_at fields.
45
+ """
46
+ # Create indexes
47
+ self._create_basic_property_indexes()
48
+
49
+ def add_node(
50
+ self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None
51
+ ) -> None:
52
+ user_name = user_name if user_name else self.config.user_name
53
+ if not self.config.use_multi_db and (self.config.user_name or user_name):
54
+ metadata["user_name"] = user_name
55
+
56
+ # Safely process metadata
57
+ metadata = _prepare_node_metadata(metadata)
58
+
59
+ # Initialize delete_time and delete_record_id fields
60
+ metadata.setdefault("delete_time", "")
61
+ metadata.setdefault("delete_record_id", "")
62
+
63
+ # serialization
64
+ if metadata["sources"]:
65
+ for idx in range(len(metadata["sources"])):
66
+ metadata["sources"][idx] = json.dumps(metadata["sources"][idx])
67
+ # Extract required fields
68
+ embedding = metadata.pop("embedding", None)
69
+ if embedding is None:
70
+ raise ValueError(f"Missing 'embedding' in metadata for node {id}")
71
+
72
+ # Merge node and set metadata
73
+ created_at = metadata.pop("created_at")
74
+ updated_at = metadata.pop("updated_at")
75
+ vector_sync_status = "success"
76
+
77
+ try:
78
+ # Write to Vector DB
79
+ item = VecDBItem(
80
+ id=id,
81
+ vector=embedding,
82
+ payload={
83
+ "memory": memory,
84
+ "vector_sync": vector_sync_status,
85
+ **metadata, # unpack all metadata keys to top-level
86
+ },
87
+ )
88
+ self.vec_db.add([item])
89
+ except Exception as e:
90
+ logger.warning(f"[VecDB] Vector insert failed for node {id}: {e}")
91
+ vector_sync_status = "failed"
92
+
93
+ metadata["vector_sync"] = vector_sync_status
94
+ query = """
95
+ MERGE (n:Memory {id: $id})
96
+ SET n.memory = $memory,
97
+ n.created_at = datetime($created_at),
98
+ n.updated_at = datetime($updated_at),
99
+ n += $metadata
100
+ """
101
+ with self.driver.session(database=self.db_name) as session:
102
+ session.run(
103
+ query,
104
+ id=id,
105
+ memory=memory,
106
+ created_at=created_at,
107
+ updated_at=updated_at,
108
+ metadata=metadata,
109
+ )
110
+
111
+ def add_nodes_batch(self, nodes: list[dict[str, Any]], user_name: str | None = None) -> None:
112
+ print("neo4j_community add_nodes_batch:")
113
+ if not nodes:
114
+ logger.warning("[add_nodes_batch] Empty nodes list, skipping")
115
+ return
116
+
117
+ effective_user_name = user_name if user_name else self.config.user_name
118
+
119
+ vec_items: list[VecDBItem] = []
120
+ prepared_nodes: list[dict[str, Any]] = []
121
+
122
+ for node_data in nodes:
123
+ try:
124
+ node_id = node_data.get("id")
125
+ memory = node_data.get("memory")
126
+ metadata = node_data.get("metadata", {})
127
+
128
+ if node_id is None or memory is None:
129
+ logger.warning("[add_nodes_batch] Skip invalid node: missing id/memory")
130
+ continue
131
+
132
+ if not self.config.use_multi_db and (self.config.user_name or effective_user_name):
133
+ metadata["user_name"] = effective_user_name
134
+
135
+ metadata = _prepare_node_metadata(metadata)
136
+ metadata = _flatten_info_fields(metadata)
137
+
138
+ # Initialize delete_time and delete_record_id fields
139
+ metadata.setdefault("delete_time", "")
140
+ metadata.setdefault("delete_record_id", "")
141
+
142
+ embedding = metadata.pop("embedding", None)
143
+ if embedding is None:
144
+ raise ValueError(f"Missing 'embedding' in metadata for node {node_id}")
145
+
146
+ vector_sync_status = "success"
147
+ vec_items.append(
148
+ VecDBItem(
149
+ id=node_id,
150
+ vector=embedding,
151
+ payload={
152
+ "memory": memory,
153
+ "vector_sync": vector_sync_status,
154
+ **metadata,
155
+ },
156
+ )
157
+ )
158
+
159
+ created_at = metadata.pop("created_at")
160
+ updated_at = metadata.pop("updated_at")
161
+ metadata["vector_sync"] = vector_sync_status
162
+
163
+ prepared_nodes.append(
164
+ {
165
+ "id": node_id,
166
+ "memory": memory,
167
+ "created_at": created_at,
168
+ "updated_at": updated_at,
169
+ "metadata": metadata,
170
+ }
171
+ )
172
+ except Exception as e:
173
+ logger.error(
174
+ f"[add_nodes_batch] Failed to prepare node {node_data.get('id', 'unknown')}: {e}",
175
+ exc_info=True,
176
+ )
177
+ continue
178
+
179
+ if not prepared_nodes:
180
+ logger.warning("[add_nodes_batch] No valid nodes to insert after preparation")
181
+ return
182
+
183
+ try:
184
+ self.vec_db.add(vec_items)
185
+ except Exception as e:
186
+ logger.warning(f"[VecDB] batch insert failed: {e}")
187
+ for node in prepared_nodes:
188
+ node["metadata"]["vector_sync"] = "failed"
189
+
190
+ query = """
191
+ UNWIND $nodes AS node
192
+ MERGE (n:Memory {id: node.id})
193
+ SET n.memory = node.memory,
194
+ n.created_at = datetime(node.created_at),
195
+ n.updated_at = datetime(node.updated_at),
196
+ n += node.metadata
197
+ """
198
+
199
+ nodes_data = [
200
+ {
201
+ "id": node["id"],
202
+ "memory": node["memory"],
203
+ "created_at": node["created_at"],
204
+ "updated_at": node["updated_at"],
205
+ "metadata": node["metadata"],
206
+ }
207
+ for node in prepared_nodes
208
+ ]
209
+
210
+ try:
211
+ with self.driver.session(database=self.db_name) as session:
212
+ session.run(query, nodes=nodes_data)
213
+ logger.info(f"[add_nodes_batch] Successfully inserted {len(prepared_nodes)} nodes")
214
+ except Exception as e:
215
+ logger.error(f"[add_nodes_batch] Failed to add nodes: {e}", exc_info=True)
216
+ raise
217
+
218
+ def get_children_with_embeddings(
219
+ self, id: str, user_name: str | None = None
220
+ ) -> list[dict[str, Any]]:
221
+ user_name = user_name if user_name else self.config.user_name
222
+ where_user = ""
223
+ params = {"id": id}
224
+
225
+ if not self.config.use_multi_db and (self.config.user_name or user_name):
226
+ where_user = "AND p.user_name = $user_name AND c.user_name = $user_name"
227
+ params["user_name"] = user_name
228
+
229
+ query = f"""
230
+ MATCH (p:Memory)-[:PARENT]->(c:Memory)
231
+ WHERE p.id = $id {where_user}
232
+ RETURN c.id AS id, c.memory AS memory
233
+ """
234
+
235
+ with self.driver.session(database=self.db_name) as session:
236
+ result = session.run(query, params)
237
+ child_nodes = [{"id": r["id"], "memory": r["memory"]} for r in result]
238
+
239
+ # Get embeddings from vector DB
240
+ ids = [n["id"] for n in child_nodes]
241
+ vec_items = {v.id: v.vector for v in self.vec_db.get_by_ids(ids)}
242
+
243
+ # Merge results
244
+ for node in child_nodes:
245
+ node["embedding"] = vec_items.get(node["id"])
246
+
247
+ return child_nodes
248
+
249
+ # Search / recall operations
250
+ def search_by_embedding(
251
+ self,
252
+ vector: list[float],
253
+ top_k: int = 5,
254
+ scope: str | None = None,
255
+ status: str | None = None,
256
+ threshold: float | None = None,
257
+ search_filter: dict | None = None,
258
+ user_name: str | None = None,
259
+ filter: dict | None = None,
260
+ knowledgebase_ids: list[str] | None = None,
261
+ **kwargs,
262
+ ) -> list[dict]:
263
+ """
264
+ Retrieve node IDs based on vector similarity using external vector DB.
265
+
266
+ Args:
267
+ vector (list[float]): The embedding vector representing query semantics.
268
+ top_k (int): Number of top similar nodes to retrieve.
269
+ scope (str, optional): Memory type filter (e.g., 'WorkingMemory', 'LongTermMemory').
270
+ status (str, optional): Node status filter (e.g., 'activated', 'archived').
271
+ threshold (float, optional): Minimum similarity score threshold (0 ~ 1).
272
+ search_filter (dict, optional): Additional metadata filters to apply.
273
+ filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
274
+ Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]}
275
+ knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by.
276
+
277
+ Returns:
278
+ list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
279
+
280
+ Notes:
281
+ - This method uses an external vector database (not Neo4j) to perform the search.
282
+ - If 'scope' is provided, it restricts results to nodes with matching memory_type.
283
+ - If 'status' is provided, it further filters nodes by status.
284
+ - If 'threshold' is provided, only results with score >= threshold will be returned.
285
+ - If 'search_filter' is provided, it applies additional metadata-based filtering.
286
+ - If 'filter' is provided, it applies complex filter conditions with AND/OR logic.
287
+ - The returned IDs can be used to fetch full node data from Neo4j if needed.
288
+ """
289
+ user_name = user_name if user_name else self.config.user_name
290
+
291
+ # First, perform vector search in external vector DB
292
+ vec_filter = {}
293
+ if scope:
294
+ vec_filter["memory_type"] = scope
295
+ if status:
296
+ vec_filter["status"] = status
297
+ vec_filter["vector_sync"] = "success"
298
+ if kwargs.get("cube_name"):
299
+ vec_filter["user_name"] = kwargs["cube_name"]
300
+ else:
301
+ vec_filter["user_name"] = user_name
302
+
303
+ # Add search_filter conditions
304
+ if search_filter:
305
+ vec_filter.update(search_filter)
306
+
307
+ # Perform vector search
308
+ vec_results = []
309
+ if self.vec_db:
310
+ try:
311
+ vec_results = self.vec_db.search(
312
+ query_vector=vector, top_k=top_k, filter=vec_filter
313
+ )
314
+ except Exception as e:
315
+ logger.warning(f"[VecDB] search failed: {e}")
316
+
317
+ # Filter by threshold
318
+ if threshold is not None:
319
+ vec_results = [r for r in vec_results if r.score is None or r.score >= threshold]
320
+
321
+ # If no filter or knowledgebase_ids provided, return vector search results directly
322
+ if not filter and not knowledgebase_ids:
323
+ return [{"id": r.id, "score": r.score} for r in vec_results]
324
+
325
+ # Extract IDs from vector search results
326
+ vec_ids = [r.id for r in vec_results]
327
+ if not vec_ids:
328
+ return []
329
+
330
+ # Build WHERE clause for Neo4j filtering
331
+ where_clauses = ["n.id IN $vec_ids"]
332
+ params = {"vec_ids": vec_ids}
333
+
334
+ # Build user_name filter with knowledgebase_ids support (OR relationship) using common method
335
+ user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher(
336
+ user_name=user_name,
337
+ knowledgebase_ids=knowledgebase_ids,
338
+ default_user_name=self.config.user_name,
339
+ node_alias="n",
340
+ )
341
+
342
+ # Add user_name WHERE clause
343
+ if user_name_conditions:
344
+ if len(user_name_conditions) == 1:
345
+ where_clauses.append(user_name_conditions[0])
346
+ else:
347
+ where_clauses.append(f"({' OR '.join(user_name_conditions)})")
348
+
349
+ # Build filter conditions using common method
350
+ filter_conditions, filter_params = self._build_filter_conditions_cypher(
351
+ filter=filter,
352
+ param_counter_start=0,
353
+ node_alias="n",
354
+ )
355
+ where_clauses.extend(filter_conditions)
356
+
357
+ where_clause = "WHERE " + " AND ".join(where_clauses)
358
+
359
+ # Add user_name and knowledgebase_ids parameters using common method
360
+ params.update(user_name_params)
361
+
362
+ # Add filter parameters
363
+ if filter_params:
364
+ params.update(filter_params)
365
+
366
+ # Query Neo4j to filter results
367
+ query = f"""
368
+ MATCH (n:Memory)
369
+ {where_clause}
370
+ RETURN n.id AS id
371
+ """
372
+ logger.info(f"[search_by_embedding] query: {query}, params: {params}")
373
+
374
+ with self.driver.session(database=self.db_name) as session:
375
+ neo4j_results = session.run(query, params)
376
+ filtered_ids = {record["id"] for record in neo4j_results}
377
+
378
+ # Filter vector results by Neo4j filtered IDs and return with scores
379
+ filtered_results = [
380
+ {"id": r.id, "score": r.score} for r in vec_results if r.id in filtered_ids
381
+ ]
382
+
383
+ return filtered_results
384
+
385
+ def _normalize_date_string(self, date_str: str) -> str:
386
+ """
387
+ Normalize date string to ISO 8601 format for Neo4j datetime() function.
388
+
389
+ Args:
390
+ date_str: Date string in various formats (e.g., "2025-09-19", "2025-09-19T00:00:00Z")
391
+
392
+ Returns:
393
+ ISO 8601 formatted date string (e.g., "2025-09-19T00:00:00Z")
394
+ """
395
+ if not isinstance(date_str, str):
396
+ return date_str
397
+
398
+ # If already in ISO 8601 format with time, return as is
399
+ if "T" in date_str or date_str.endswith("Z") or "+" in date_str or "-" in date_str[-6:]:
400
+ return date_str
401
+
402
+ # Check if it's a simple date format (YYYY-MM-DD)
403
+ date_pattern = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", date_str)
404
+ if date_pattern:
405
+ # Convert to ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ
406
+ # For "gt" (greater than), use 00:00:00 of the next day
407
+ # For "lt" (less than), use 00:00:00 of the same day
408
+ # For "gte" (greater than or equal), use 00:00:00 of the same day
409
+ # For "lte" (less than or equal), use 23:59:59.999999999 of the same day
410
+ # But we'll use 00:00:00Z as default and let the caller handle the logic
411
+ return f"{date_str}T00:00:00Z"
412
+
413
+ # If it's already a datetime string, try to parse and reformat
414
+ try:
415
+ # Try to parse various datetime formats
416
+ dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
417
+ return dt.isoformat().replace("+00:00", "Z")
418
+ except (ValueError, AttributeError):
419
+ # If parsing fails, return as is
420
+ return date_str
421
+
422
+ def _build_filter_conditions_cypher(
423
+ self,
424
+ filter: dict | None,
425
+ param_counter_start: int = 0,
426
+ node_alias: str = "node",
427
+ ) -> tuple[list[str], dict[str, Any]]:
428
+ """
429
+ Build filter conditions for Cypher queries with date normalization.
430
+
431
+ This method extends the parent class method by normalizing date strings
432
+ to ISO 8601 format before building conditions.
433
+
434
+ Args:
435
+ filter: Filter dictionary with "or" or "and" logic
436
+ param_counter_start: Starting value for parameter counter (to avoid conflicts)
437
+ node_alias: Node alias in Cypher query (default: "node" or "n")
438
+
439
+ Returns:
440
+ Tuple of (condition_strings_list, parameters_dict)
441
+ """
442
+ normalized_filter = self._normalize_filter_dates(filter) if filter else filter
443
+
444
+ # Call parent method with normalized filter
445
+ return super()._build_filter_conditions_cypher(
446
+ filter=normalized_filter,
447
+ param_counter_start=param_counter_start,
448
+ node_alias=node_alias,
449
+ )
450
+
451
+ def _normalize_filter_dates(self, filter: dict) -> dict:
452
+ """
453
+ Recursively normalize date strings in filter dictionary.
454
+
455
+ Args:
456
+ filter: Filter dictionary that may contain date strings
457
+
458
+ Returns:
459
+ Filter dictionary with normalized date strings
460
+ """
461
+ if not isinstance(filter, dict):
462
+ return filter
463
+
464
+ normalized = {}
465
+
466
+ if "and" in filter:
467
+ normalized["and"] = [
468
+ self._normalize_condition_dates(cond) if isinstance(cond, dict) else cond
469
+ for cond in filter["and"]
470
+ ]
471
+ elif "or" in filter:
472
+ normalized["or"] = [
473
+ self._normalize_condition_dates(cond) if isinstance(cond, dict) else cond
474
+ for cond in filter["or"]
475
+ ]
476
+ else:
477
+ # Single condition
478
+ normalized = self._normalize_condition_dates(filter)
479
+
480
+ return normalized
481
+
482
+ def _normalize_condition_dates(self, condition: dict) -> dict:
483
+ """
484
+ Normalize date strings in a single condition dictionary.
485
+
486
+ Args:
487
+ condition: A condition dict like {"created_at": {"gt": "2025-09-19"}}
488
+
489
+ Returns:
490
+ Condition dict with normalized date strings
491
+ """
492
+ from datetime import timedelta
493
+
494
+ normalized = {}
495
+
496
+ for key, value in condition.items():
497
+ # Check if this is a date field
498
+ is_date_field = key in ("created_at", "updated_at") or key.endswith("_at")
499
+
500
+ if isinstance(value, dict):
501
+ # Handle comparison operators
502
+ normalized_value = {}
503
+ for op, op_value in value.items():
504
+ if op in ("gt", "lt", "gte", "lte") and is_date_field:
505
+ # Normalize date string for date comparisons
506
+ if isinstance(op_value, str):
507
+ # Check if it's a simple date format (YYYY-MM-DD)
508
+ date_pattern = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", op_value)
509
+ if date_pattern:
510
+ try:
511
+ # Parse the date
512
+ dt = datetime.fromisoformat(op_value + "T00:00:00")
513
+
514
+ if op == "gt":
515
+ # "gt": "2025-09-19" means > 2025-09-19 00:00:00
516
+ # So we keep it as 2025-09-19T00:00:00Z
517
+ normalized_value[op] = dt.isoformat() + "Z"
518
+ elif op == "gte":
519
+ # "gte": "2025-09-19" means >= 2025-09-19 00:00:00
520
+ normalized_value[op] = dt.isoformat() + "Z"
521
+ elif op == "lt":
522
+ # "lt": "2025-11-29" means < 2025-11-29 (exclude the entire day)
523
+ # So we convert to the start of the next day: 2025-11-30T00:00:00Z
524
+ # This ensures all times on 2025-11-29 are included
525
+ dt_next = dt + timedelta(days=1)
526
+ normalized_value[op] = dt_next.isoformat() + "Z"
527
+ elif op == "lte":
528
+ # "lte": "2025-11-29" means <= 2025-11-29 23:59:59.999999
529
+ # So we convert to end of day: 2025-11-29T23:59:59.999999Z
530
+ dt_end = dt + timedelta(days=1) - timedelta(microseconds=1)
531
+ normalized_value[op] = dt_end.isoformat() + "Z"
532
+ except ValueError:
533
+ # If parsing fails, use the original normalization
534
+ normalized_value[op] = self._normalize_date_string(op_value)
535
+ else:
536
+ # Already in a more complex format, just normalize it
537
+ normalized_value[op] = self._normalize_date_string(op_value)
538
+ else:
539
+ normalized_value[op] = op_value
540
+ else:
541
+ normalized_value[op] = op_value
542
+ normalized[key] = normalized_value
543
+ else:
544
+ normalized[key] = value
545
+
546
+ return normalized
547
+
548
+ def get_all_memory_items(
549
+ self,
550
+ scope: str,
551
+ filter: dict | None = None,
552
+ knowledgebase_ids: list[str] | None = None,
553
+ **kwargs,
554
+ ) -> list[dict]:
555
+ """
556
+ Retrieve all memory items of a specific memory_type.
557
+
558
+ Args:
559
+ scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', 'UserMemory', or 'OuterMemory'.
560
+ filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
561
+ Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]}
562
+ knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by.
563
+
564
+ Returns:
565
+ list[dict]: Full list of memory items under this scope.
566
+ """
567
+ logger.info(
568
+ f"[get_all_memory_items] scope: {scope}, filter: {filter}, knowledgebase_ids: {knowledgebase_ids}"
569
+ )
570
+ print(
571
+ f"[get_all_memory_items] scope: {scope}, filter: {filter}, knowledgebase_ids: {knowledgebase_ids}"
572
+ )
573
+
574
+ user_name = kwargs.get("user_name") if kwargs.get("user_name") else self.config.user_name
575
+ if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}:
576
+ raise ValueError(f"Unsupported memory type scope: {scope}")
577
+
578
+ where_clauses = ["n.memory_type = $scope"]
579
+ params = {"scope": scope}
580
+
581
+ # Build user_name filter with knowledgebase_ids support (OR relationship) using common method
582
+ user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher(
583
+ user_name=user_name,
584
+ knowledgebase_ids=knowledgebase_ids,
585
+ default_user_name=self.config.user_name,
586
+ node_alias="n",
587
+ )
588
+
589
+ # Add user_name WHERE clause
590
+ if user_name_conditions:
591
+ if len(user_name_conditions) == 1:
592
+ where_clauses.append(user_name_conditions[0])
593
+ else:
594
+ where_clauses.append(f"({' OR '.join(user_name_conditions)})")
595
+
596
+ # Build filter conditions using common method
597
+ filter_conditions, filter_params = self._build_filter_conditions_cypher(
598
+ filter=filter,
599
+ param_counter_start=0,
600
+ node_alias="n",
601
+ )
602
+ where_clauses.extend(filter_conditions)
603
+
604
+ where_clause = "WHERE " + " AND ".join(where_clauses)
605
+
606
+ # Add user_name and knowledgebase_ids parameters using common method
607
+ params.update(user_name_params)
608
+
609
+ # Add filter parameters
610
+ if filter_params:
611
+ params.update(filter_params)
612
+
613
+ query = f"""
614
+ MATCH (n:Memory)
615
+ {where_clause}
616
+ RETURN n
617
+ """
618
+ logger.info(f"[get_all_memory_items] query: {query}, params: {params}")
619
+ print(f"[get_all_memory_items] query: {query}, params: {params}")
620
+
621
+ with self.driver.session(database=self.db_name) as session:
622
+ results = session.run(query, params)
623
+ return [self._parse_node(dict(record["n"])) for record in results]
624
+
625
+ def get_by_metadata(
626
+ self,
627
+ filters: list[dict[str, Any]],
628
+ user_name: str | None = None,
629
+ filter: dict | None = None,
630
+ knowledgebase_ids: list[str] | None = None,
631
+ ) -> list[str]:
632
+ """
633
+ Retrieve node IDs that match given metadata filters.
634
+ Supports exact match.
635
+
636
+ Args:
637
+ filters: List of filter dicts like:
638
+ [
639
+ {"field": "key", "op": "in", "value": ["A", "B"]},
640
+ {"field": "confidence", "op": ">=", "value": 80},
641
+ {"field": "tags", "op": "contains", "value": "AI"},
642
+ ...
643
+ ]
644
+ filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
645
+ knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by user_name.
646
+
647
+ Returns:
648
+ list[str]: Node IDs whose metadata match the filter conditions. (AND logic).
649
+
650
+ Notes:
651
+ - Supports structured querying such as tag/category/importance/time filtering.
652
+ - Can be used for faceted recall or prefiltering before embedding rerank.
653
+ """
654
+ logger.info(
655
+ f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
656
+ )
657
+ print(
658
+ f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
659
+ )
660
+ user_name = user_name if user_name else self.config.user_name
661
+ where_clauses = []
662
+ params = {}
663
+
664
+ for i, f in enumerate(filters):
665
+ field = f["field"]
666
+ op = f.get("op", "=")
667
+ value = f["value"]
668
+ param_key = f"val{i}"
669
+
670
+ # Build WHERE clause
671
+ if op == "=":
672
+ where_clauses.append(f"n.{field} = ${param_key}")
673
+ params[param_key] = value
674
+ elif op == "in":
675
+ where_clauses.append(f"n.{field} IN ${param_key}")
676
+ params[param_key] = value
677
+ elif op == "contains":
678
+ where_clauses.append(f"ANY(x IN ${param_key} WHERE x IN n.{field})")
679
+ params[param_key] = value
680
+ elif op == "starts_with":
681
+ where_clauses.append(f"n.{field} STARTS WITH ${param_key}")
682
+ params[param_key] = value
683
+ elif op == "ends_with":
684
+ where_clauses.append(f"n.{field} ENDS WITH ${param_key}")
685
+ params[param_key] = value
686
+ elif op in [">", ">=", "<", "<="]:
687
+ where_clauses.append(f"n.{field} {op} ${param_key}")
688
+ params[param_key] = value
689
+ else:
690
+ raise ValueError(f"Unsupported operator: {op}")
691
+
692
+ # Build user_name filter with knowledgebase_ids support (OR relationship)
693
+ user_name_conditions = []
694
+ if not self.config.use_multi_db and (self.config.user_name or user_name):
695
+ user_name_conditions.append("n.user_name = $user_name")
696
+
697
+ # Add knowledgebase_ids conditions (checking user_name field in the data)
698
+ if knowledgebase_ids and isinstance(knowledgebase_ids, list) and len(knowledgebase_ids) > 0:
699
+ for idx, kb_id in enumerate(knowledgebase_ids):
700
+ if isinstance(kb_id, str):
701
+ param_name = f"kb_id_{idx}"
702
+ user_name_conditions.append(f"n.user_name = ${param_name}")
703
+
704
+ # Add user_name WHERE clause
705
+ if user_name_conditions:
706
+ if len(user_name_conditions) == 1:
707
+ where_clauses.append(user_name_conditions[0])
708
+ else:
709
+ where_clauses.append(f"({' OR '.join(user_name_conditions)})")
710
+
711
+ # Add filter conditions (supports "or" and "and" logic)
712
+ filter_params = {}
713
+ if filter:
714
+ # Helper function to build a single filter condition
715
+ def build_filter_condition(
716
+ condition_dict: dict, param_counter: list
717
+ ) -> tuple[str, dict]:
718
+ """Build a WHERE condition for a single filter item.
719
+
720
+ Args:
721
+ condition_dict: A dict like {"id": "xxx"} or {"A": "xxx"} or {"created_at": {"gt": "2025-11-01"}}
722
+ param_counter: List to track parameter counter for unique param names
723
+
724
+ Returns:
725
+ Tuple of (condition_string, parameters_dict)
726
+ """
727
+ condition_parts = []
728
+ filter_params_inner = {}
729
+
730
+ for key, value in condition_dict.items():
731
+ # Check if value is a dict with comparison operators (gt, lt, gte, lte)
732
+ if isinstance(value, dict):
733
+ # Handle comparison operators: gt (greater than), lt (less than), gte (greater than or equal), lte (less than or equal)
734
+ for op, op_value in value.items():
735
+ if op in ("gt", "lt", "gte", "lte"):
736
+ # Map operator to Cypher operator
737
+ cypher_op_map = {"gt": ">", "lt": "<", "gte": ">=", "lte": "<="}
738
+ cypher_op = cypher_op_map[op]
739
+
740
+ # All fields are stored as flat properties in Neo4j
741
+ param_name = f"filter_meta_{key}_{op}_{param_counter[0]}"
742
+ param_counter[0] += 1
743
+ filter_params_inner[param_name] = op_value
744
+
745
+ # Check if field is a date field (created_at, updated_at, etc.)
746
+ # Use datetime() function for date comparisons
747
+ if key in ("created_at", "updated_at") or key.endswith("_at"):
748
+ condition_parts.append(
749
+ f"n.{key} {cypher_op} datetime(${param_name})"
750
+ )
751
+ else:
752
+ condition_parts.append(f"n.{key} {cypher_op} ${param_name}")
753
+ else:
754
+ # All fields are stored as flat properties in Neo4j (simple equality)
755
+ param_name = f"filter_meta_{key}_{param_counter[0]}"
756
+ param_counter[0] += 1
757
+ filter_params_inner[param_name] = value
758
+ condition_parts.append(f"n.{key} = ${param_name}")
759
+
760
+ return " AND ".join(condition_parts), filter_params_inner
761
+
762
+ # Process filter structure
763
+ param_counter = [
764
+ len(filters)
765
+ ] # Use list to allow modification in nested function, start from len(filters) to avoid conflicts
766
+
767
+ if isinstance(filter, dict):
768
+ if "or" in filter:
769
+ # OR logic: at least one condition must match
770
+ or_conditions = []
771
+ for condition in filter["or"]:
772
+ if isinstance(condition, dict):
773
+ condition_str, filter_params_inner = build_filter_condition(
774
+ condition, param_counter
775
+ )
776
+ if condition_str:
777
+ or_conditions.append(f"({condition_str})")
778
+ filter_params.update(filter_params_inner)
779
+ if or_conditions:
780
+ where_clauses.append(f"({' OR '.join(or_conditions)})")
781
+
782
+ elif "and" in filter:
783
+ # AND logic: all conditions must match
784
+ for condition in filter["and"]:
785
+ if isinstance(condition, dict):
786
+ condition_str, filter_params_inner = build_filter_condition(
787
+ condition, param_counter
788
+ )
789
+ if condition_str:
790
+ where_clauses.append(f"({condition_str})")
791
+ filter_params.update(filter_params_inner)
792
+
793
+ where_str = " AND ".join(where_clauses) if where_clauses else ""
794
+ if where_str:
795
+ query = f"MATCH (n:Memory) WHERE {where_str} RETURN n.id AS id"
796
+ else:
797
+ query = "MATCH (n:Memory) RETURN n.id AS id"
798
+
799
+ # Add user_name parameter
800
+ if not self.config.use_multi_db and (self.config.user_name or user_name):
801
+ params["user_name"] = user_name
802
+
803
+ # Add knowledgebase_ids parameters
804
+ if knowledgebase_ids and isinstance(knowledgebase_ids, list) and len(knowledgebase_ids) > 0:
805
+ for idx, kb_id in enumerate(knowledgebase_ids):
806
+ if isinstance(kb_id, str):
807
+ param_name = f"kb_id_{idx}"
808
+ params[param_name] = kb_id
809
+
810
+ # Merge filter parameters
811
+ if filter_params:
812
+ params.update(filter_params)
813
+ logger.info(f"[get_by_metadata] query: {query},params: {params}")
814
+ print(f"[get_by_metadata] query: {query},params: {params}")
815
+
816
+ with self.driver.session(database=self.db_name) as session:
817
+ result = session.run(query, params)
818
+ return [record["id"] for record in result]
819
+
820
+ def delete_node_by_prams(
821
+ self,
822
+ writable_cube_ids: list[str],
823
+ memory_ids: list[str] | None = None,
824
+ file_ids: list[str] | None = None,
825
+ filter: dict | None = None,
826
+ ) -> int:
827
+ """
828
+ Delete nodes by memory_ids, file_ids, or filter.
829
+
830
+ Args:
831
+ writable_cube_ids (list[str]): List of cube IDs (user_name) to filter nodes. Required parameter.
832
+ memory_ids (list[str], optional): List of memory node IDs to delete.
833
+ file_ids (list[str], optional): List of file node IDs to delete.
834
+ filter (dict, optional): Filter dictionary to query matching nodes for deletion.
835
+
836
+ Returns:
837
+ int: Number of nodes deleted.
838
+ """
839
+ logger.info(
840
+ f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
841
+ )
842
+ print(
843
+ f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
844
+ )
845
+
846
+ # Validate writable_cube_ids
847
+ if not writable_cube_ids or len(writable_cube_ids) == 0:
848
+ raise ValueError("writable_cube_ids is required and cannot be empty")
849
+
850
+ # Build WHERE conditions separately for memory_ids and file_ids
851
+ where_clauses = []
852
+ params = {}
853
+
854
+ # Build user_name condition from writable_cube_ids (OR relationship - match any cube_id)
855
+ user_name_conditions = []
856
+ for idx, cube_id in enumerate(writable_cube_ids):
857
+ param_name = f"cube_id_{idx}"
858
+ user_name_conditions.append(f"n.user_name = ${param_name}")
859
+ params[param_name] = cube_id
860
+
861
+ # Handle memory_ids: query n.id
862
+ if memory_ids and len(memory_ids) > 0:
863
+ where_clauses.append("n.id IN $memory_ids")
864
+ params["memory_ids"] = memory_ids
865
+
866
+ # Handle file_ids: query n.file_ids field
867
+ # All file_ids must be present in the array field (AND relationship)
868
+ if file_ids and len(file_ids) > 0:
869
+ file_id_and_conditions = []
870
+ for idx, file_id in enumerate(file_ids):
871
+ param_name = f"file_id_{idx}"
872
+ params[param_name] = file_id
873
+ # Check if this file_id is in the file_ids array field
874
+ file_id_and_conditions.append(f"${param_name} IN n.file_ids")
875
+ if file_id_and_conditions:
876
+ # Use AND to require all file_ids to be present
877
+ where_clauses.append(f"({' AND '.join(file_id_and_conditions)})")
878
+
879
+ # Query nodes by filter if provided
880
+ filter_ids = []
881
+ if filter:
882
+ # Use get_by_metadata with empty filters list and filter
883
+ filter_ids = self.get_by_metadata(
884
+ filters=[],
885
+ user_name=None,
886
+ filter=filter,
887
+ knowledgebase_ids=writable_cube_ids,
888
+ )
889
+
890
+ # If filter returned IDs, add condition for them
891
+ if filter_ids:
892
+ where_clauses.append("n.id IN $filter_ids")
893
+ params["filter_ids"] = filter_ids
894
+
895
+ # If no conditions (except user_name), return 0
896
+ if not where_clauses:
897
+ logger.warning(
898
+ "[delete_node_by_prams] No nodes to delete (no memory_ids, file_ids, or filter provided)"
899
+ )
900
+ return 0
901
+
902
+ # Build WHERE clause
903
+ # First, combine memory_ids, file_ids, and filter conditions with OR (any condition can match)
904
+ data_conditions = " OR ".join([f"({clause})" for clause in where_clauses])
905
+
906
+ # Then, combine with user_name condition using AND (must match user_name AND one of the data conditions)
907
+ user_name_where = " OR ".join(user_name_conditions)
908
+ ids_where = f"({user_name_where}) AND ({data_conditions})"
909
+
910
+ logger.info(
911
+ f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
912
+ )
913
+ print(
914
+ f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
915
+ )
916
+
917
+ # First count matching nodes to get accurate count
918
+ count_query = f"MATCH (n:Memory) WHERE {ids_where} RETURN count(n) AS node_count"
919
+ logger.info(f"[delete_node_by_prams] count_query: {count_query}")
920
+ print(f"[delete_node_by_prams] count_query: {count_query}")
921
+
922
+ # Then delete nodes
923
+ delete_query = f"MATCH (n:Memory) WHERE {ids_where} DETACH DELETE n"
924
+ logger.info(f"[delete_node_by_prams] delete_query: {delete_query}")
925
+ print(f"[delete_node_by_prams] delete_query: {delete_query}")
926
+ print(f"[delete_node_by_prams] params: {params}")
927
+
928
+ deleted_count = 0
929
+ try:
930
+ with self.driver.session(database=self.db_name) as session:
931
+ # Count nodes before deletion
932
+ count_result = session.run(count_query, **params)
933
+ count_record = count_result.single()
934
+ expected_count = 0
935
+ if count_record:
936
+ expected_count = count_record["node_count"] or 0
937
+
938
+ # Delete nodes
939
+ session.run(delete_query, **params)
940
+ # Use the count from before deletion as the actual deleted count
941
+ deleted_count = expected_count
942
+
943
+ except Exception as e:
944
+ logger.error(f"[delete_node_by_prams] Failed to delete nodes: {e}", exc_info=True)
945
+ raise
946
+
947
+ logger.info(f"[delete_node_by_prams] Successfully deleted {deleted_count} nodes")
948
+ return deleted_count
949
+
950
+ def clear(self, user_name: str | None = None) -> None:
951
+ """
952
+ Clear the entire graph if the target database exists.
953
+ """
954
+ # Step 1: clear Neo4j part via parent logic
955
+ user_name = user_name if user_name else self.config.user_name
956
+ super().clear(user_name=user_name)
957
+
958
+ # Step2: Clear the vector db
959
+ try:
960
+ items = self.vec_db.get_by_filter({"user_name": user_name})
961
+ if items:
962
+ self.vec_db.delete([item.id for item in items])
963
+ logger.info(f"Cleared {len(items)} vectors for user '{user_name}'.")
964
+ else:
965
+ logger.info(f"No vectors to clear for user '{user_name}'.")
966
+ except Exception as e:
967
+ logger.warning(f"Failed to clear vector DB for user '{user_name}': {e}")
968
+
969
+ def drop_database(self) -> None:
970
+ """
971
+ Permanently delete the entire database this instance is using.
972
+ WARNING: This operation is destructive and cannot be undone.
973
+ """
974
+ raise ValueError(
975
+ f"Refusing to drop protected database: {self.db_name} in "
976
+ f"Shared Database Multi-Tenant mode"
977
+ )
978
+
979
+ # Avoid enterprise feature
980
+ def _ensure_database_exists(self):
981
+ pass
982
+
983
+ def _create_basic_property_indexes(self) -> None:
984
+ """
985
+ Create standard B-tree indexes on memory_type, created_at,
986
+ and updated_at fields.
987
+ Create standard B-tree indexes on user_name when use Shared Database
988
+ Multi-Tenant Mode
989
+ """
990
+ # Step 1: Neo4j indexes
991
+ try:
992
+ with self.driver.session(database=self.db_name) as session:
993
+ session.run("""
994
+ CREATE INDEX memory_type_index IF NOT EXISTS
995
+ FOR (n:Memory) ON (n.memory_type)
996
+ """)
997
+ logger.debug("Index 'memory_type_index' ensured.")
998
+
999
+ session.run("""
1000
+ CREATE INDEX memory_created_at_index IF NOT EXISTS
1001
+ FOR (n:Memory) ON (n.created_at)
1002
+ """)
1003
+ logger.debug("Index 'memory_created_at_index' ensured.")
1004
+
1005
+ session.run("""
1006
+ CREATE INDEX memory_updated_at_index IF NOT EXISTS
1007
+ FOR (n:Memory) ON (n.updated_at)
1008
+ """)
1009
+ logger.debug("Index 'memory_updated_at_index' ensured.")
1010
+
1011
+ if not self.config.use_multi_db and self.config.user_name:
1012
+ session.run(
1013
+ """
1014
+ CREATE INDEX memory_user_name_index IF NOT EXISTS
1015
+ FOR (n:Memory) ON (n.user_name)
1016
+ """
1017
+ )
1018
+ logger.debug("Index 'memory_user_name_index' ensured.")
1019
+ except Exception as e:
1020
+ logger.warning(f"Failed to create basic property indexes: {e}")
1021
+
1022
+ # Step 2: VectorDB indexes
1023
+ try:
1024
+ if hasattr(self.vec_db, "ensure_payload_indexes"):
1025
+ self.vec_db.ensure_payload_indexes(["user_name", "memory_type", "status"])
1026
+ else:
1027
+ logger.debug("VecDB does not support payload index creation; skipping.")
1028
+ except Exception as e:
1029
+ logger.warning(f"Failed to create VecDB payload indexes: {e}")
1030
+
1031
+ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]:
1032
+ """Parse Neo4j node and optionally fetch embedding from vector DB."""
1033
+ node = node_data.copy()
1034
+
1035
+ # Convert Neo4j datetime to string
1036
+ for time_field in ("created_at", "updated_at"):
1037
+ if time_field in node and hasattr(node[time_field], "isoformat"):
1038
+ node[time_field] = node[time_field].isoformat()
1039
+ node.pop("user_name", None)
1040
+ # serialization
1041
+ if node["sources"]:
1042
+ for idx in range(len(node["sources"])):
1043
+ if not (
1044
+ isinstance(node["sources"][idx], str)
1045
+ and node["sources"][idx][0] == "{"
1046
+ and node["sources"][idx][0] == "}"
1047
+ ):
1048
+ break
1049
+ node["sources"][idx] = json.loads(node["sources"][idx])
1050
+ new_node = {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node}
1051
+ try:
1052
+ vec_item = self.vec_db.get_by_id(new_node["id"])
1053
+ if vec_item and vec_item.vector:
1054
+ new_node["metadata"]["embedding"] = vec_item.vector
1055
+ except Exception as e:
1056
+ logger.warning(f"Failed to fetch vector for node {new_node['id']}: {e}")
1057
+ new_node["metadata"]["embedding"] = None
1058
+ return new_node