MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,1058 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from memos.configs.graph_db import Neo4jGraphDBConfig
|
|
8
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB, _flatten_info_fields, _prepare_node_metadata
|
|
9
|
+
from memos.log import get_logger
|
|
10
|
+
from memos.vec_dbs.factory import VecDBFactory
|
|
11
|
+
from memos.vec_dbs.item import VecDBItem
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Neo4jCommunityGraphDB(Neo4jGraphDB):
|
|
18
|
+
"""
|
|
19
|
+
Neo4j Community Edition graph memory store.
|
|
20
|
+
|
|
21
|
+
Note:
|
|
22
|
+
This class avoids Enterprise-only features:
|
|
23
|
+
- No multi-database support
|
|
24
|
+
- No vector index
|
|
25
|
+
- No CREATE DATABASE
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, config: Neo4jGraphDBConfig):
|
|
29
|
+
assert config.auto_create is False
|
|
30
|
+
assert config.use_multi_db is False
|
|
31
|
+
# Init vector database
|
|
32
|
+
self.vec_db = VecDBFactory.from_config(config.vec_config)
|
|
33
|
+
# Call parent init
|
|
34
|
+
super().__init__(config)
|
|
35
|
+
|
|
36
|
+
def create_index(
|
|
37
|
+
self,
|
|
38
|
+
label: str = "Memory",
|
|
39
|
+
vector_property: str = "embedding",
|
|
40
|
+
dimensions: int = 1536,
|
|
41
|
+
index_name: str = "memory_vector_index",
|
|
42
|
+
) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Create the vector index for embedding and datetime indexes for created_at and updated_at fields.
|
|
45
|
+
"""
|
|
46
|
+
# Create indexes
|
|
47
|
+
self._create_basic_property_indexes()
|
|
48
|
+
|
|
49
|
+
def add_node(
|
|
50
|
+
self, id: str, memory: str, metadata: dict[str, Any], user_name: str | None = None
|
|
51
|
+
) -> None:
|
|
52
|
+
user_name = user_name if user_name else self.config.user_name
|
|
53
|
+
if not self.config.use_multi_db and (self.config.user_name or user_name):
|
|
54
|
+
metadata["user_name"] = user_name
|
|
55
|
+
|
|
56
|
+
# Safely process metadata
|
|
57
|
+
metadata = _prepare_node_metadata(metadata)
|
|
58
|
+
|
|
59
|
+
# Initialize delete_time and delete_record_id fields
|
|
60
|
+
metadata.setdefault("delete_time", "")
|
|
61
|
+
metadata.setdefault("delete_record_id", "")
|
|
62
|
+
|
|
63
|
+
# serialization
|
|
64
|
+
if metadata["sources"]:
|
|
65
|
+
for idx in range(len(metadata["sources"])):
|
|
66
|
+
metadata["sources"][idx] = json.dumps(metadata["sources"][idx])
|
|
67
|
+
# Extract required fields
|
|
68
|
+
embedding = metadata.pop("embedding", None)
|
|
69
|
+
if embedding is None:
|
|
70
|
+
raise ValueError(f"Missing 'embedding' in metadata for node {id}")
|
|
71
|
+
|
|
72
|
+
# Merge node and set metadata
|
|
73
|
+
created_at = metadata.pop("created_at")
|
|
74
|
+
updated_at = metadata.pop("updated_at")
|
|
75
|
+
vector_sync_status = "success"
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Write to Vector DB
|
|
79
|
+
item = VecDBItem(
|
|
80
|
+
id=id,
|
|
81
|
+
vector=embedding,
|
|
82
|
+
payload={
|
|
83
|
+
"memory": memory,
|
|
84
|
+
"vector_sync": vector_sync_status,
|
|
85
|
+
**metadata, # unpack all metadata keys to top-level
|
|
86
|
+
},
|
|
87
|
+
)
|
|
88
|
+
self.vec_db.add([item])
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.warning(f"[VecDB] Vector insert failed for node {id}: {e}")
|
|
91
|
+
vector_sync_status = "failed"
|
|
92
|
+
|
|
93
|
+
metadata["vector_sync"] = vector_sync_status
|
|
94
|
+
query = """
|
|
95
|
+
MERGE (n:Memory {id: $id})
|
|
96
|
+
SET n.memory = $memory,
|
|
97
|
+
n.created_at = datetime($created_at),
|
|
98
|
+
n.updated_at = datetime($updated_at),
|
|
99
|
+
n += $metadata
|
|
100
|
+
"""
|
|
101
|
+
with self.driver.session(database=self.db_name) as session:
|
|
102
|
+
session.run(
|
|
103
|
+
query,
|
|
104
|
+
id=id,
|
|
105
|
+
memory=memory,
|
|
106
|
+
created_at=created_at,
|
|
107
|
+
updated_at=updated_at,
|
|
108
|
+
metadata=metadata,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def add_nodes_batch(self, nodes: list[dict[str, Any]], user_name: str | None = None) -> None:
|
|
112
|
+
print("neo4j_community add_nodes_batch:")
|
|
113
|
+
if not nodes:
|
|
114
|
+
logger.warning("[add_nodes_batch] Empty nodes list, skipping")
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
effective_user_name = user_name if user_name else self.config.user_name
|
|
118
|
+
|
|
119
|
+
vec_items: list[VecDBItem] = []
|
|
120
|
+
prepared_nodes: list[dict[str, Any]] = []
|
|
121
|
+
|
|
122
|
+
for node_data in nodes:
|
|
123
|
+
try:
|
|
124
|
+
node_id = node_data.get("id")
|
|
125
|
+
memory = node_data.get("memory")
|
|
126
|
+
metadata = node_data.get("metadata", {})
|
|
127
|
+
|
|
128
|
+
if node_id is None or memory is None:
|
|
129
|
+
logger.warning("[add_nodes_batch] Skip invalid node: missing id/memory")
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if not self.config.use_multi_db and (self.config.user_name or effective_user_name):
|
|
133
|
+
metadata["user_name"] = effective_user_name
|
|
134
|
+
|
|
135
|
+
metadata = _prepare_node_metadata(metadata)
|
|
136
|
+
metadata = _flatten_info_fields(metadata)
|
|
137
|
+
|
|
138
|
+
# Initialize delete_time and delete_record_id fields
|
|
139
|
+
metadata.setdefault("delete_time", "")
|
|
140
|
+
metadata.setdefault("delete_record_id", "")
|
|
141
|
+
|
|
142
|
+
embedding = metadata.pop("embedding", None)
|
|
143
|
+
if embedding is None:
|
|
144
|
+
raise ValueError(f"Missing 'embedding' in metadata for node {node_id}")
|
|
145
|
+
|
|
146
|
+
vector_sync_status = "success"
|
|
147
|
+
vec_items.append(
|
|
148
|
+
VecDBItem(
|
|
149
|
+
id=node_id,
|
|
150
|
+
vector=embedding,
|
|
151
|
+
payload={
|
|
152
|
+
"memory": memory,
|
|
153
|
+
"vector_sync": vector_sync_status,
|
|
154
|
+
**metadata,
|
|
155
|
+
},
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
created_at = metadata.pop("created_at")
|
|
160
|
+
updated_at = metadata.pop("updated_at")
|
|
161
|
+
metadata["vector_sync"] = vector_sync_status
|
|
162
|
+
|
|
163
|
+
prepared_nodes.append(
|
|
164
|
+
{
|
|
165
|
+
"id": node_id,
|
|
166
|
+
"memory": memory,
|
|
167
|
+
"created_at": created_at,
|
|
168
|
+
"updated_at": updated_at,
|
|
169
|
+
"metadata": metadata,
|
|
170
|
+
}
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(
|
|
174
|
+
f"[add_nodes_batch] Failed to prepare node {node_data.get('id', 'unknown')}: {e}",
|
|
175
|
+
exc_info=True,
|
|
176
|
+
)
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
if not prepared_nodes:
|
|
180
|
+
logger.warning("[add_nodes_batch] No valid nodes to insert after preparation")
|
|
181
|
+
return
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
self.vec_db.add(vec_items)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(f"[VecDB] batch insert failed: {e}")
|
|
187
|
+
for node in prepared_nodes:
|
|
188
|
+
node["metadata"]["vector_sync"] = "failed"
|
|
189
|
+
|
|
190
|
+
query = """
|
|
191
|
+
UNWIND $nodes AS node
|
|
192
|
+
MERGE (n:Memory {id: node.id})
|
|
193
|
+
SET n.memory = node.memory,
|
|
194
|
+
n.created_at = datetime(node.created_at),
|
|
195
|
+
n.updated_at = datetime(node.updated_at),
|
|
196
|
+
n += node.metadata
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
nodes_data = [
|
|
200
|
+
{
|
|
201
|
+
"id": node["id"],
|
|
202
|
+
"memory": node["memory"],
|
|
203
|
+
"created_at": node["created_at"],
|
|
204
|
+
"updated_at": node["updated_at"],
|
|
205
|
+
"metadata": node["metadata"],
|
|
206
|
+
}
|
|
207
|
+
for node in prepared_nodes
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
with self.driver.session(database=self.db_name) as session:
|
|
212
|
+
session.run(query, nodes=nodes_data)
|
|
213
|
+
logger.info(f"[add_nodes_batch] Successfully inserted {len(prepared_nodes)} nodes")
|
|
214
|
+
except Exception as e:
|
|
215
|
+
logger.error(f"[add_nodes_batch] Failed to add nodes: {e}", exc_info=True)
|
|
216
|
+
raise
|
|
217
|
+
|
|
218
|
+
def get_children_with_embeddings(
|
|
219
|
+
self, id: str, user_name: str | None = None
|
|
220
|
+
) -> list[dict[str, Any]]:
|
|
221
|
+
user_name = user_name if user_name else self.config.user_name
|
|
222
|
+
where_user = ""
|
|
223
|
+
params = {"id": id}
|
|
224
|
+
|
|
225
|
+
if not self.config.use_multi_db and (self.config.user_name or user_name):
|
|
226
|
+
where_user = "AND p.user_name = $user_name AND c.user_name = $user_name"
|
|
227
|
+
params["user_name"] = user_name
|
|
228
|
+
|
|
229
|
+
query = f"""
|
|
230
|
+
MATCH (p:Memory)-[:PARENT]->(c:Memory)
|
|
231
|
+
WHERE p.id = $id {where_user}
|
|
232
|
+
RETURN c.id AS id, c.memory AS memory
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
with self.driver.session(database=self.db_name) as session:
|
|
236
|
+
result = session.run(query, params)
|
|
237
|
+
child_nodes = [{"id": r["id"], "memory": r["memory"]} for r in result]
|
|
238
|
+
|
|
239
|
+
# Get embeddings from vector DB
|
|
240
|
+
ids = [n["id"] for n in child_nodes]
|
|
241
|
+
vec_items = {v.id: v.vector for v in self.vec_db.get_by_ids(ids)}
|
|
242
|
+
|
|
243
|
+
# Merge results
|
|
244
|
+
for node in child_nodes:
|
|
245
|
+
node["embedding"] = vec_items.get(node["id"])
|
|
246
|
+
|
|
247
|
+
return child_nodes
|
|
248
|
+
|
|
249
|
+
# Search / recall operations
|
|
250
|
+
def search_by_embedding(
|
|
251
|
+
self,
|
|
252
|
+
vector: list[float],
|
|
253
|
+
top_k: int = 5,
|
|
254
|
+
scope: str | None = None,
|
|
255
|
+
status: str | None = None,
|
|
256
|
+
threshold: float | None = None,
|
|
257
|
+
search_filter: dict | None = None,
|
|
258
|
+
user_name: str | None = None,
|
|
259
|
+
filter: dict | None = None,
|
|
260
|
+
knowledgebase_ids: list[str] | None = None,
|
|
261
|
+
**kwargs,
|
|
262
|
+
) -> list[dict]:
|
|
263
|
+
"""
|
|
264
|
+
Retrieve node IDs based on vector similarity using external vector DB.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
vector (list[float]): The embedding vector representing query semantics.
|
|
268
|
+
top_k (int): Number of top similar nodes to retrieve.
|
|
269
|
+
scope (str, optional): Memory type filter (e.g., 'WorkingMemory', 'LongTermMemory').
|
|
270
|
+
status (str, optional): Node status filter (e.g., 'activated', 'archived').
|
|
271
|
+
threshold (float, optional): Minimum similarity score threshold (0 ~ 1).
|
|
272
|
+
search_filter (dict, optional): Additional metadata filters to apply.
|
|
273
|
+
filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
|
|
274
|
+
Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]}
|
|
275
|
+
knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
|
|
279
|
+
|
|
280
|
+
Notes:
|
|
281
|
+
- This method uses an external vector database (not Neo4j) to perform the search.
|
|
282
|
+
- If 'scope' is provided, it restricts results to nodes with matching memory_type.
|
|
283
|
+
- If 'status' is provided, it further filters nodes by status.
|
|
284
|
+
- If 'threshold' is provided, only results with score >= threshold will be returned.
|
|
285
|
+
- If 'search_filter' is provided, it applies additional metadata-based filtering.
|
|
286
|
+
- If 'filter' is provided, it applies complex filter conditions with AND/OR logic.
|
|
287
|
+
- The returned IDs can be used to fetch full node data from Neo4j if needed.
|
|
288
|
+
"""
|
|
289
|
+
user_name = user_name if user_name else self.config.user_name
|
|
290
|
+
|
|
291
|
+
# First, perform vector search in external vector DB
|
|
292
|
+
vec_filter = {}
|
|
293
|
+
if scope:
|
|
294
|
+
vec_filter["memory_type"] = scope
|
|
295
|
+
if status:
|
|
296
|
+
vec_filter["status"] = status
|
|
297
|
+
vec_filter["vector_sync"] = "success"
|
|
298
|
+
if kwargs.get("cube_name"):
|
|
299
|
+
vec_filter["user_name"] = kwargs["cube_name"]
|
|
300
|
+
else:
|
|
301
|
+
vec_filter["user_name"] = user_name
|
|
302
|
+
|
|
303
|
+
# Add search_filter conditions
|
|
304
|
+
if search_filter:
|
|
305
|
+
vec_filter.update(search_filter)
|
|
306
|
+
|
|
307
|
+
# Perform vector search
|
|
308
|
+
vec_results = []
|
|
309
|
+
if self.vec_db:
|
|
310
|
+
try:
|
|
311
|
+
vec_results = self.vec_db.search(
|
|
312
|
+
query_vector=vector, top_k=top_k, filter=vec_filter
|
|
313
|
+
)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.warning(f"[VecDB] search failed: {e}")
|
|
316
|
+
|
|
317
|
+
# Filter by threshold
|
|
318
|
+
if threshold is not None:
|
|
319
|
+
vec_results = [r for r in vec_results if r.score is None or r.score >= threshold]
|
|
320
|
+
|
|
321
|
+
# If no filter or knowledgebase_ids provided, return vector search results directly
|
|
322
|
+
if not filter and not knowledgebase_ids:
|
|
323
|
+
return [{"id": r.id, "score": r.score} for r in vec_results]
|
|
324
|
+
|
|
325
|
+
# Extract IDs from vector search results
|
|
326
|
+
vec_ids = [r.id for r in vec_results]
|
|
327
|
+
if not vec_ids:
|
|
328
|
+
return []
|
|
329
|
+
|
|
330
|
+
# Build WHERE clause for Neo4j filtering
|
|
331
|
+
where_clauses = ["n.id IN $vec_ids"]
|
|
332
|
+
params = {"vec_ids": vec_ids}
|
|
333
|
+
|
|
334
|
+
# Build user_name filter with knowledgebase_ids support (OR relationship) using common method
|
|
335
|
+
user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher(
|
|
336
|
+
user_name=user_name,
|
|
337
|
+
knowledgebase_ids=knowledgebase_ids,
|
|
338
|
+
default_user_name=self.config.user_name,
|
|
339
|
+
node_alias="n",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Add user_name WHERE clause
|
|
343
|
+
if user_name_conditions:
|
|
344
|
+
if len(user_name_conditions) == 1:
|
|
345
|
+
where_clauses.append(user_name_conditions[0])
|
|
346
|
+
else:
|
|
347
|
+
where_clauses.append(f"({' OR '.join(user_name_conditions)})")
|
|
348
|
+
|
|
349
|
+
# Build filter conditions using common method
|
|
350
|
+
filter_conditions, filter_params = self._build_filter_conditions_cypher(
|
|
351
|
+
filter=filter,
|
|
352
|
+
param_counter_start=0,
|
|
353
|
+
node_alias="n",
|
|
354
|
+
)
|
|
355
|
+
where_clauses.extend(filter_conditions)
|
|
356
|
+
|
|
357
|
+
where_clause = "WHERE " + " AND ".join(where_clauses)
|
|
358
|
+
|
|
359
|
+
# Add user_name and knowledgebase_ids parameters using common method
|
|
360
|
+
params.update(user_name_params)
|
|
361
|
+
|
|
362
|
+
# Add filter parameters
|
|
363
|
+
if filter_params:
|
|
364
|
+
params.update(filter_params)
|
|
365
|
+
|
|
366
|
+
# Query Neo4j to filter results
|
|
367
|
+
query = f"""
|
|
368
|
+
MATCH (n:Memory)
|
|
369
|
+
{where_clause}
|
|
370
|
+
RETURN n.id AS id
|
|
371
|
+
"""
|
|
372
|
+
logger.info(f"[search_by_embedding] query: {query}, params: {params}")
|
|
373
|
+
|
|
374
|
+
with self.driver.session(database=self.db_name) as session:
|
|
375
|
+
neo4j_results = session.run(query, params)
|
|
376
|
+
filtered_ids = {record["id"] for record in neo4j_results}
|
|
377
|
+
|
|
378
|
+
# Filter vector results by Neo4j filtered IDs and return with scores
|
|
379
|
+
filtered_results = [
|
|
380
|
+
{"id": r.id, "score": r.score} for r in vec_results if r.id in filtered_ids
|
|
381
|
+
]
|
|
382
|
+
|
|
383
|
+
return filtered_results
|
|
384
|
+
|
|
385
|
+
def _normalize_date_string(self, date_str: str) -> str:
|
|
386
|
+
"""
|
|
387
|
+
Normalize date string to ISO 8601 format for Neo4j datetime() function.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
date_str: Date string in various formats (e.g., "2025-09-19", "2025-09-19T00:00:00Z")
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
ISO 8601 formatted date string (e.g., "2025-09-19T00:00:00Z")
|
|
394
|
+
"""
|
|
395
|
+
if not isinstance(date_str, str):
|
|
396
|
+
return date_str
|
|
397
|
+
|
|
398
|
+
# If already in ISO 8601 format with time, return as is
|
|
399
|
+
if "T" in date_str or date_str.endswith("Z") or "+" in date_str or "-" in date_str[-6:]:
|
|
400
|
+
return date_str
|
|
401
|
+
|
|
402
|
+
# Check if it's a simple date format (YYYY-MM-DD)
|
|
403
|
+
date_pattern = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", date_str)
|
|
404
|
+
if date_pattern:
|
|
405
|
+
# Convert to ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ
|
|
406
|
+
# For "gt" (greater than), use 00:00:00 of the next day
|
|
407
|
+
# For "lt" (less than), use 00:00:00 of the same day
|
|
408
|
+
# For "gte" (greater than or equal), use 00:00:00 of the same day
|
|
409
|
+
# For "lte" (less than or equal), use 23:59:59.999999999 of the same day
|
|
410
|
+
# But we'll use 00:00:00Z as default and let the caller handle the logic
|
|
411
|
+
return f"{date_str}T00:00:00Z"
|
|
412
|
+
|
|
413
|
+
# If it's already a datetime string, try to parse and reformat
|
|
414
|
+
try:
|
|
415
|
+
# Try to parse various datetime formats
|
|
416
|
+
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
417
|
+
return dt.isoformat().replace("+00:00", "Z")
|
|
418
|
+
except (ValueError, AttributeError):
|
|
419
|
+
# If parsing fails, return as is
|
|
420
|
+
return date_str
|
|
421
|
+
|
|
422
|
+
def _build_filter_conditions_cypher(
|
|
423
|
+
self,
|
|
424
|
+
filter: dict | None,
|
|
425
|
+
param_counter_start: int = 0,
|
|
426
|
+
node_alias: str = "node",
|
|
427
|
+
) -> tuple[list[str], dict[str, Any]]:
|
|
428
|
+
"""
|
|
429
|
+
Build filter conditions for Cypher queries with date normalization.
|
|
430
|
+
|
|
431
|
+
This method extends the parent class method by normalizing date strings
|
|
432
|
+
to ISO 8601 format before building conditions.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
filter: Filter dictionary with "or" or "and" logic
|
|
436
|
+
param_counter_start: Starting value for parameter counter (to avoid conflicts)
|
|
437
|
+
node_alias: Node alias in Cypher query (default: "node" or "n")
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Tuple of (condition_strings_list, parameters_dict)
|
|
441
|
+
"""
|
|
442
|
+
normalized_filter = self._normalize_filter_dates(filter) if filter else filter
|
|
443
|
+
|
|
444
|
+
# Call parent method with normalized filter
|
|
445
|
+
return super()._build_filter_conditions_cypher(
|
|
446
|
+
filter=normalized_filter,
|
|
447
|
+
param_counter_start=param_counter_start,
|
|
448
|
+
node_alias=node_alias,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
def _normalize_filter_dates(self, filter: dict) -> dict:
|
|
452
|
+
"""
|
|
453
|
+
Recursively normalize date strings in filter dictionary.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
filter: Filter dictionary that may contain date strings
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
Filter dictionary with normalized date strings
|
|
460
|
+
"""
|
|
461
|
+
if not isinstance(filter, dict):
|
|
462
|
+
return filter
|
|
463
|
+
|
|
464
|
+
normalized = {}
|
|
465
|
+
|
|
466
|
+
if "and" in filter:
|
|
467
|
+
normalized["and"] = [
|
|
468
|
+
self._normalize_condition_dates(cond) if isinstance(cond, dict) else cond
|
|
469
|
+
for cond in filter["and"]
|
|
470
|
+
]
|
|
471
|
+
elif "or" in filter:
|
|
472
|
+
normalized["or"] = [
|
|
473
|
+
self._normalize_condition_dates(cond) if isinstance(cond, dict) else cond
|
|
474
|
+
for cond in filter["or"]
|
|
475
|
+
]
|
|
476
|
+
else:
|
|
477
|
+
# Single condition
|
|
478
|
+
normalized = self._normalize_condition_dates(filter)
|
|
479
|
+
|
|
480
|
+
return normalized
|
|
481
|
+
|
|
482
|
+
def _normalize_condition_dates(self, condition: dict) -> dict:
|
|
483
|
+
"""
|
|
484
|
+
Normalize date strings in a single condition dictionary.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
condition: A condition dict like {"created_at": {"gt": "2025-09-19"}}
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
Condition dict with normalized date strings
|
|
491
|
+
"""
|
|
492
|
+
from datetime import timedelta
|
|
493
|
+
|
|
494
|
+
normalized = {}
|
|
495
|
+
|
|
496
|
+
for key, value in condition.items():
|
|
497
|
+
# Check if this is a date field
|
|
498
|
+
is_date_field = key in ("created_at", "updated_at") or key.endswith("_at")
|
|
499
|
+
|
|
500
|
+
if isinstance(value, dict):
|
|
501
|
+
# Handle comparison operators
|
|
502
|
+
normalized_value = {}
|
|
503
|
+
for op, op_value in value.items():
|
|
504
|
+
if op in ("gt", "lt", "gte", "lte") and is_date_field:
|
|
505
|
+
# Normalize date string for date comparisons
|
|
506
|
+
if isinstance(op_value, str):
|
|
507
|
+
# Check if it's a simple date format (YYYY-MM-DD)
|
|
508
|
+
date_pattern = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", op_value)
|
|
509
|
+
if date_pattern:
|
|
510
|
+
try:
|
|
511
|
+
# Parse the date
|
|
512
|
+
dt = datetime.fromisoformat(op_value + "T00:00:00")
|
|
513
|
+
|
|
514
|
+
if op == "gt":
|
|
515
|
+
# "gt": "2025-09-19" means > 2025-09-19 00:00:00
|
|
516
|
+
# So we keep it as 2025-09-19T00:00:00Z
|
|
517
|
+
normalized_value[op] = dt.isoformat() + "Z"
|
|
518
|
+
elif op == "gte":
|
|
519
|
+
# "gte": "2025-09-19" means >= 2025-09-19 00:00:00
|
|
520
|
+
normalized_value[op] = dt.isoformat() + "Z"
|
|
521
|
+
elif op == "lt":
|
|
522
|
+
# "lt": "2025-11-29" means < 2025-11-29 (exclude the entire day)
|
|
523
|
+
# So we convert to the start of the next day: 2025-11-30T00:00:00Z
|
|
524
|
+
# This ensures all times on 2025-11-29 are included
|
|
525
|
+
dt_next = dt + timedelta(days=1)
|
|
526
|
+
normalized_value[op] = dt_next.isoformat() + "Z"
|
|
527
|
+
elif op == "lte":
|
|
528
|
+
# "lte": "2025-11-29" means <= 2025-11-29 23:59:59.999999
|
|
529
|
+
# So we convert to end of day: 2025-11-29T23:59:59.999999Z
|
|
530
|
+
dt_end = dt + timedelta(days=1) - timedelta(microseconds=1)
|
|
531
|
+
normalized_value[op] = dt_end.isoformat() + "Z"
|
|
532
|
+
except ValueError:
|
|
533
|
+
# If parsing fails, use the original normalization
|
|
534
|
+
normalized_value[op] = self._normalize_date_string(op_value)
|
|
535
|
+
else:
|
|
536
|
+
# Already in a more complex format, just normalize it
|
|
537
|
+
normalized_value[op] = self._normalize_date_string(op_value)
|
|
538
|
+
else:
|
|
539
|
+
normalized_value[op] = op_value
|
|
540
|
+
else:
|
|
541
|
+
normalized_value[op] = op_value
|
|
542
|
+
normalized[key] = normalized_value
|
|
543
|
+
else:
|
|
544
|
+
normalized[key] = value
|
|
545
|
+
|
|
546
|
+
return normalized
|
|
547
|
+
|
|
548
|
+
def get_all_memory_items(
|
|
549
|
+
self,
|
|
550
|
+
scope: str,
|
|
551
|
+
filter: dict | None = None,
|
|
552
|
+
knowledgebase_ids: list[str] | None = None,
|
|
553
|
+
**kwargs,
|
|
554
|
+
) -> list[dict]:
|
|
555
|
+
"""
|
|
556
|
+
Retrieve all memory items of a specific memory_type.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', 'UserMemory', or 'OuterMemory'.
|
|
560
|
+
filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
|
|
561
|
+
Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]}
|
|
562
|
+
knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by.
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
list[dict]: Full list of memory items under this scope.
|
|
566
|
+
"""
|
|
567
|
+
logger.info(
|
|
568
|
+
f"[get_all_memory_items] scope: {scope}, filter: {filter}, knowledgebase_ids: {knowledgebase_ids}"
|
|
569
|
+
)
|
|
570
|
+
print(
|
|
571
|
+
f"[get_all_memory_items] scope: {scope}, filter: {filter}, knowledgebase_ids: {knowledgebase_ids}"
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
user_name = kwargs.get("user_name") if kwargs.get("user_name") else self.config.user_name
|
|
575
|
+
if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"}:
|
|
576
|
+
raise ValueError(f"Unsupported memory type scope: {scope}")
|
|
577
|
+
|
|
578
|
+
where_clauses = ["n.memory_type = $scope"]
|
|
579
|
+
params = {"scope": scope}
|
|
580
|
+
|
|
581
|
+
# Build user_name filter with knowledgebase_ids support (OR relationship) using common method
|
|
582
|
+
user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher(
|
|
583
|
+
user_name=user_name,
|
|
584
|
+
knowledgebase_ids=knowledgebase_ids,
|
|
585
|
+
default_user_name=self.config.user_name,
|
|
586
|
+
node_alias="n",
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Add user_name WHERE clause
|
|
590
|
+
if user_name_conditions:
|
|
591
|
+
if len(user_name_conditions) == 1:
|
|
592
|
+
where_clauses.append(user_name_conditions[0])
|
|
593
|
+
else:
|
|
594
|
+
where_clauses.append(f"({' OR '.join(user_name_conditions)})")
|
|
595
|
+
|
|
596
|
+
# Build filter conditions using common method
|
|
597
|
+
filter_conditions, filter_params = self._build_filter_conditions_cypher(
|
|
598
|
+
filter=filter,
|
|
599
|
+
param_counter_start=0,
|
|
600
|
+
node_alias="n",
|
|
601
|
+
)
|
|
602
|
+
where_clauses.extend(filter_conditions)
|
|
603
|
+
|
|
604
|
+
where_clause = "WHERE " + " AND ".join(where_clauses)
|
|
605
|
+
|
|
606
|
+
# Add user_name and knowledgebase_ids parameters using common method
|
|
607
|
+
params.update(user_name_params)
|
|
608
|
+
|
|
609
|
+
# Add filter parameters
|
|
610
|
+
if filter_params:
|
|
611
|
+
params.update(filter_params)
|
|
612
|
+
|
|
613
|
+
query = f"""
|
|
614
|
+
MATCH (n:Memory)
|
|
615
|
+
{where_clause}
|
|
616
|
+
RETURN n
|
|
617
|
+
"""
|
|
618
|
+
logger.info(f"[get_all_memory_items] query: {query}, params: {params}")
|
|
619
|
+
print(f"[get_all_memory_items] query: {query}, params: {params}")
|
|
620
|
+
|
|
621
|
+
with self.driver.session(database=self.db_name) as session:
|
|
622
|
+
results = session.run(query, params)
|
|
623
|
+
return [self._parse_node(dict(record["n"])) for record in results]
|
|
624
|
+
|
|
625
|
+
def get_by_metadata(
|
|
626
|
+
self,
|
|
627
|
+
filters: list[dict[str, Any]],
|
|
628
|
+
user_name: str | None = None,
|
|
629
|
+
filter: dict | None = None,
|
|
630
|
+
knowledgebase_ids: list[str] | None = None,
|
|
631
|
+
) -> list[str]:
|
|
632
|
+
"""
|
|
633
|
+
Retrieve node IDs that match given metadata filters.
|
|
634
|
+
Supports exact match.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
filters: List of filter dicts like:
|
|
638
|
+
[
|
|
639
|
+
{"field": "key", "op": "in", "value": ["A", "B"]},
|
|
640
|
+
{"field": "confidence", "op": ">=", "value": 80},
|
|
641
|
+
{"field": "tags", "op": "contains", "value": "AI"},
|
|
642
|
+
...
|
|
643
|
+
]
|
|
644
|
+
filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
|
|
645
|
+
knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by user_name.
|
|
646
|
+
|
|
647
|
+
Returns:
|
|
648
|
+
list[str]: Node IDs whose metadata match the filter conditions. (AND logic).
|
|
649
|
+
|
|
650
|
+
Notes:
|
|
651
|
+
- Supports structured querying such as tag/category/importance/time filtering.
|
|
652
|
+
- Can be used for faceted recall or prefiltering before embedding rerank.
|
|
653
|
+
"""
|
|
654
|
+
logger.info(
|
|
655
|
+
f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
|
|
656
|
+
)
|
|
657
|
+
print(
|
|
658
|
+
f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
|
|
659
|
+
)
|
|
660
|
+
user_name = user_name if user_name else self.config.user_name
|
|
661
|
+
where_clauses = []
|
|
662
|
+
params = {}
|
|
663
|
+
|
|
664
|
+
for i, f in enumerate(filters):
|
|
665
|
+
field = f["field"]
|
|
666
|
+
op = f.get("op", "=")
|
|
667
|
+
value = f["value"]
|
|
668
|
+
param_key = f"val{i}"
|
|
669
|
+
|
|
670
|
+
# Build WHERE clause
|
|
671
|
+
if op == "=":
|
|
672
|
+
where_clauses.append(f"n.{field} = ${param_key}")
|
|
673
|
+
params[param_key] = value
|
|
674
|
+
elif op == "in":
|
|
675
|
+
where_clauses.append(f"n.{field} IN ${param_key}")
|
|
676
|
+
params[param_key] = value
|
|
677
|
+
elif op == "contains":
|
|
678
|
+
where_clauses.append(f"ANY(x IN ${param_key} WHERE x IN n.{field})")
|
|
679
|
+
params[param_key] = value
|
|
680
|
+
elif op == "starts_with":
|
|
681
|
+
where_clauses.append(f"n.{field} STARTS WITH ${param_key}")
|
|
682
|
+
params[param_key] = value
|
|
683
|
+
elif op == "ends_with":
|
|
684
|
+
where_clauses.append(f"n.{field} ENDS WITH ${param_key}")
|
|
685
|
+
params[param_key] = value
|
|
686
|
+
elif op in [">", ">=", "<", "<="]:
|
|
687
|
+
where_clauses.append(f"n.{field} {op} ${param_key}")
|
|
688
|
+
params[param_key] = value
|
|
689
|
+
else:
|
|
690
|
+
raise ValueError(f"Unsupported operator: {op}")
|
|
691
|
+
|
|
692
|
+
# Build user_name filter with knowledgebase_ids support (OR relationship)
|
|
693
|
+
user_name_conditions = []
|
|
694
|
+
if not self.config.use_multi_db and (self.config.user_name or user_name):
|
|
695
|
+
user_name_conditions.append("n.user_name = $user_name")
|
|
696
|
+
|
|
697
|
+
# Add knowledgebase_ids conditions (checking user_name field in the data)
|
|
698
|
+
if knowledgebase_ids and isinstance(knowledgebase_ids, list) and len(knowledgebase_ids) > 0:
|
|
699
|
+
for idx, kb_id in enumerate(knowledgebase_ids):
|
|
700
|
+
if isinstance(kb_id, str):
|
|
701
|
+
param_name = f"kb_id_{idx}"
|
|
702
|
+
user_name_conditions.append(f"n.user_name = ${param_name}")
|
|
703
|
+
|
|
704
|
+
# Add user_name WHERE clause
|
|
705
|
+
if user_name_conditions:
|
|
706
|
+
if len(user_name_conditions) == 1:
|
|
707
|
+
where_clauses.append(user_name_conditions[0])
|
|
708
|
+
else:
|
|
709
|
+
where_clauses.append(f"({' OR '.join(user_name_conditions)})")
|
|
710
|
+
|
|
711
|
+
# Add filter conditions (supports "or" and "and" logic)
|
|
712
|
+
filter_params = {}
|
|
713
|
+
if filter:
|
|
714
|
+
# Helper function to build a single filter condition
|
|
715
|
+
def build_filter_condition(
|
|
716
|
+
condition_dict: dict, param_counter: list
|
|
717
|
+
) -> tuple[str, dict]:
|
|
718
|
+
"""Build a WHERE condition for a single filter item.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
condition_dict: A dict like {"id": "xxx"} or {"A": "xxx"} or {"created_at": {"gt": "2025-11-01"}}
|
|
722
|
+
param_counter: List to track parameter counter for unique param names
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
Tuple of (condition_string, parameters_dict)
|
|
726
|
+
"""
|
|
727
|
+
condition_parts = []
|
|
728
|
+
filter_params_inner = {}
|
|
729
|
+
|
|
730
|
+
for key, value in condition_dict.items():
|
|
731
|
+
# Check if value is a dict with comparison operators (gt, lt, gte, lte)
|
|
732
|
+
if isinstance(value, dict):
|
|
733
|
+
# Handle comparison operators: gt (greater than), lt (less than), gte (greater than or equal), lte (less than or equal)
|
|
734
|
+
for op, op_value in value.items():
|
|
735
|
+
if op in ("gt", "lt", "gte", "lte"):
|
|
736
|
+
# Map operator to Cypher operator
|
|
737
|
+
cypher_op_map = {"gt": ">", "lt": "<", "gte": ">=", "lte": "<="}
|
|
738
|
+
cypher_op = cypher_op_map[op]
|
|
739
|
+
|
|
740
|
+
# All fields are stored as flat properties in Neo4j
|
|
741
|
+
param_name = f"filter_meta_{key}_{op}_{param_counter[0]}"
|
|
742
|
+
param_counter[0] += 1
|
|
743
|
+
filter_params_inner[param_name] = op_value
|
|
744
|
+
|
|
745
|
+
# Check if field is a date field (created_at, updated_at, etc.)
|
|
746
|
+
# Use datetime() function for date comparisons
|
|
747
|
+
if key in ("created_at", "updated_at") or key.endswith("_at"):
|
|
748
|
+
condition_parts.append(
|
|
749
|
+
f"n.{key} {cypher_op} datetime(${param_name})"
|
|
750
|
+
)
|
|
751
|
+
else:
|
|
752
|
+
condition_parts.append(f"n.{key} {cypher_op} ${param_name}")
|
|
753
|
+
else:
|
|
754
|
+
# All fields are stored as flat properties in Neo4j (simple equality)
|
|
755
|
+
param_name = f"filter_meta_{key}_{param_counter[0]}"
|
|
756
|
+
param_counter[0] += 1
|
|
757
|
+
filter_params_inner[param_name] = value
|
|
758
|
+
condition_parts.append(f"n.{key} = ${param_name}")
|
|
759
|
+
|
|
760
|
+
return " AND ".join(condition_parts), filter_params_inner
|
|
761
|
+
|
|
762
|
+
# Process filter structure
|
|
763
|
+
param_counter = [
|
|
764
|
+
len(filters)
|
|
765
|
+
] # Use list to allow modification in nested function, start from len(filters) to avoid conflicts
|
|
766
|
+
|
|
767
|
+
if isinstance(filter, dict):
|
|
768
|
+
if "or" in filter:
|
|
769
|
+
# OR logic: at least one condition must match
|
|
770
|
+
or_conditions = []
|
|
771
|
+
for condition in filter["or"]:
|
|
772
|
+
if isinstance(condition, dict):
|
|
773
|
+
condition_str, filter_params_inner = build_filter_condition(
|
|
774
|
+
condition, param_counter
|
|
775
|
+
)
|
|
776
|
+
if condition_str:
|
|
777
|
+
or_conditions.append(f"({condition_str})")
|
|
778
|
+
filter_params.update(filter_params_inner)
|
|
779
|
+
if or_conditions:
|
|
780
|
+
where_clauses.append(f"({' OR '.join(or_conditions)})")
|
|
781
|
+
|
|
782
|
+
elif "and" in filter:
|
|
783
|
+
# AND logic: all conditions must match
|
|
784
|
+
for condition in filter["and"]:
|
|
785
|
+
if isinstance(condition, dict):
|
|
786
|
+
condition_str, filter_params_inner = build_filter_condition(
|
|
787
|
+
condition, param_counter
|
|
788
|
+
)
|
|
789
|
+
if condition_str:
|
|
790
|
+
where_clauses.append(f"({condition_str})")
|
|
791
|
+
filter_params.update(filter_params_inner)
|
|
792
|
+
|
|
793
|
+
where_str = " AND ".join(where_clauses) if where_clauses else ""
|
|
794
|
+
if where_str:
|
|
795
|
+
query = f"MATCH (n:Memory) WHERE {where_str} RETURN n.id AS id"
|
|
796
|
+
else:
|
|
797
|
+
query = "MATCH (n:Memory) RETURN n.id AS id"
|
|
798
|
+
|
|
799
|
+
# Add user_name parameter
|
|
800
|
+
if not self.config.use_multi_db and (self.config.user_name or user_name):
|
|
801
|
+
params["user_name"] = user_name
|
|
802
|
+
|
|
803
|
+
# Add knowledgebase_ids parameters
|
|
804
|
+
if knowledgebase_ids and isinstance(knowledgebase_ids, list) and len(knowledgebase_ids) > 0:
|
|
805
|
+
for idx, kb_id in enumerate(knowledgebase_ids):
|
|
806
|
+
if isinstance(kb_id, str):
|
|
807
|
+
param_name = f"kb_id_{idx}"
|
|
808
|
+
params[param_name] = kb_id
|
|
809
|
+
|
|
810
|
+
# Merge filter parameters
|
|
811
|
+
if filter_params:
|
|
812
|
+
params.update(filter_params)
|
|
813
|
+
logger.info(f"[get_by_metadata] query: {query},params: {params}")
|
|
814
|
+
print(f"[get_by_metadata] query: {query},params: {params}")
|
|
815
|
+
|
|
816
|
+
with self.driver.session(database=self.db_name) as session:
|
|
817
|
+
result = session.run(query, params)
|
|
818
|
+
return [record["id"] for record in result]
|
|
819
|
+
|
|
820
|
+
def delete_node_by_prams(
|
|
821
|
+
self,
|
|
822
|
+
writable_cube_ids: list[str],
|
|
823
|
+
memory_ids: list[str] | None = None,
|
|
824
|
+
file_ids: list[str] | None = None,
|
|
825
|
+
filter: dict | None = None,
|
|
826
|
+
) -> int:
|
|
827
|
+
"""
|
|
828
|
+
Delete nodes by memory_ids, file_ids, or filter.
|
|
829
|
+
|
|
830
|
+
Args:
|
|
831
|
+
writable_cube_ids (list[str]): List of cube IDs (user_name) to filter nodes. Required parameter.
|
|
832
|
+
memory_ids (list[str], optional): List of memory node IDs to delete.
|
|
833
|
+
file_ids (list[str], optional): List of file node IDs to delete.
|
|
834
|
+
filter (dict, optional): Filter dictionary to query matching nodes for deletion.
|
|
835
|
+
|
|
836
|
+
Returns:
|
|
837
|
+
int: Number of nodes deleted.
|
|
838
|
+
"""
|
|
839
|
+
logger.info(
|
|
840
|
+
f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
|
|
841
|
+
)
|
|
842
|
+
print(
|
|
843
|
+
f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
# Validate writable_cube_ids
|
|
847
|
+
if not writable_cube_ids or len(writable_cube_ids) == 0:
|
|
848
|
+
raise ValueError("writable_cube_ids is required and cannot be empty")
|
|
849
|
+
|
|
850
|
+
# Build WHERE conditions separately for memory_ids and file_ids
|
|
851
|
+
where_clauses = []
|
|
852
|
+
params = {}
|
|
853
|
+
|
|
854
|
+
# Build user_name condition from writable_cube_ids (OR relationship - match any cube_id)
|
|
855
|
+
user_name_conditions = []
|
|
856
|
+
for idx, cube_id in enumerate(writable_cube_ids):
|
|
857
|
+
param_name = f"cube_id_{idx}"
|
|
858
|
+
user_name_conditions.append(f"n.user_name = ${param_name}")
|
|
859
|
+
params[param_name] = cube_id
|
|
860
|
+
|
|
861
|
+
# Handle memory_ids: query n.id
|
|
862
|
+
if memory_ids and len(memory_ids) > 0:
|
|
863
|
+
where_clauses.append("n.id IN $memory_ids")
|
|
864
|
+
params["memory_ids"] = memory_ids
|
|
865
|
+
|
|
866
|
+
# Handle file_ids: query n.file_ids field
|
|
867
|
+
# All file_ids must be present in the array field (AND relationship)
|
|
868
|
+
if file_ids and len(file_ids) > 0:
|
|
869
|
+
file_id_and_conditions = []
|
|
870
|
+
for idx, file_id in enumerate(file_ids):
|
|
871
|
+
param_name = f"file_id_{idx}"
|
|
872
|
+
params[param_name] = file_id
|
|
873
|
+
# Check if this file_id is in the file_ids array field
|
|
874
|
+
file_id_and_conditions.append(f"${param_name} IN n.file_ids")
|
|
875
|
+
if file_id_and_conditions:
|
|
876
|
+
# Use AND to require all file_ids to be present
|
|
877
|
+
where_clauses.append(f"({' AND '.join(file_id_and_conditions)})")
|
|
878
|
+
|
|
879
|
+
# Query nodes by filter if provided
|
|
880
|
+
filter_ids = []
|
|
881
|
+
if filter:
|
|
882
|
+
# Use get_by_metadata with empty filters list and filter
|
|
883
|
+
filter_ids = self.get_by_metadata(
|
|
884
|
+
filters=[],
|
|
885
|
+
user_name=None,
|
|
886
|
+
filter=filter,
|
|
887
|
+
knowledgebase_ids=writable_cube_ids,
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
# If filter returned IDs, add condition for them
|
|
891
|
+
if filter_ids:
|
|
892
|
+
where_clauses.append("n.id IN $filter_ids")
|
|
893
|
+
params["filter_ids"] = filter_ids
|
|
894
|
+
|
|
895
|
+
# If no conditions (except user_name), return 0
|
|
896
|
+
if not where_clauses:
|
|
897
|
+
logger.warning(
|
|
898
|
+
"[delete_node_by_prams] No nodes to delete (no memory_ids, file_ids, or filter provided)"
|
|
899
|
+
)
|
|
900
|
+
return 0
|
|
901
|
+
|
|
902
|
+
# Build WHERE clause
|
|
903
|
+
# First, combine memory_ids, file_ids, and filter conditions with OR (any condition can match)
|
|
904
|
+
data_conditions = " OR ".join([f"({clause})" for clause in where_clauses])
|
|
905
|
+
|
|
906
|
+
# Then, combine with user_name condition using AND (must match user_name AND one of the data conditions)
|
|
907
|
+
user_name_where = " OR ".join(user_name_conditions)
|
|
908
|
+
ids_where = f"({user_name_where}) AND ({data_conditions})"
|
|
909
|
+
|
|
910
|
+
logger.info(
|
|
911
|
+
f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
|
|
912
|
+
)
|
|
913
|
+
print(
|
|
914
|
+
f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
# First count matching nodes to get accurate count
|
|
918
|
+
count_query = f"MATCH (n:Memory) WHERE {ids_where} RETURN count(n) AS node_count"
|
|
919
|
+
logger.info(f"[delete_node_by_prams] count_query: {count_query}")
|
|
920
|
+
print(f"[delete_node_by_prams] count_query: {count_query}")
|
|
921
|
+
|
|
922
|
+
# Then delete nodes
|
|
923
|
+
delete_query = f"MATCH (n:Memory) WHERE {ids_where} DETACH DELETE n"
|
|
924
|
+
logger.info(f"[delete_node_by_prams] delete_query: {delete_query}")
|
|
925
|
+
print(f"[delete_node_by_prams] delete_query: {delete_query}")
|
|
926
|
+
print(f"[delete_node_by_prams] params: {params}")
|
|
927
|
+
|
|
928
|
+
deleted_count = 0
|
|
929
|
+
try:
|
|
930
|
+
with self.driver.session(database=self.db_name) as session:
|
|
931
|
+
# Count nodes before deletion
|
|
932
|
+
count_result = session.run(count_query, **params)
|
|
933
|
+
count_record = count_result.single()
|
|
934
|
+
expected_count = 0
|
|
935
|
+
if count_record:
|
|
936
|
+
expected_count = count_record["node_count"] or 0
|
|
937
|
+
|
|
938
|
+
# Delete nodes
|
|
939
|
+
session.run(delete_query, **params)
|
|
940
|
+
# Use the count from before deletion as the actual deleted count
|
|
941
|
+
deleted_count = expected_count
|
|
942
|
+
|
|
943
|
+
except Exception as e:
|
|
944
|
+
logger.error(f"[delete_node_by_prams] Failed to delete nodes: {e}", exc_info=True)
|
|
945
|
+
raise
|
|
946
|
+
|
|
947
|
+
logger.info(f"[delete_node_by_prams] Successfully deleted {deleted_count} nodes")
|
|
948
|
+
return deleted_count
|
|
949
|
+
|
|
950
|
+
def clear(self, user_name: str | None = None) -> None:
|
|
951
|
+
"""
|
|
952
|
+
Clear the entire graph if the target database exists.
|
|
953
|
+
"""
|
|
954
|
+
# Step 1: clear Neo4j part via parent logic
|
|
955
|
+
user_name = user_name if user_name else self.config.user_name
|
|
956
|
+
super().clear(user_name=user_name)
|
|
957
|
+
|
|
958
|
+
# Step2: Clear the vector db
|
|
959
|
+
try:
|
|
960
|
+
items = self.vec_db.get_by_filter({"user_name": user_name})
|
|
961
|
+
if items:
|
|
962
|
+
self.vec_db.delete([item.id for item in items])
|
|
963
|
+
logger.info(f"Cleared {len(items)} vectors for user '{user_name}'.")
|
|
964
|
+
else:
|
|
965
|
+
logger.info(f"No vectors to clear for user '{user_name}'.")
|
|
966
|
+
except Exception as e:
|
|
967
|
+
logger.warning(f"Failed to clear vector DB for user '{user_name}': {e}")
|
|
968
|
+
|
|
969
|
+
def drop_database(self) -> None:
|
|
970
|
+
"""
|
|
971
|
+
Permanently delete the entire database this instance is using.
|
|
972
|
+
WARNING: This operation is destructive and cannot be undone.
|
|
973
|
+
"""
|
|
974
|
+
raise ValueError(
|
|
975
|
+
f"Refusing to drop protected database: {self.db_name} in "
|
|
976
|
+
f"Shared Database Multi-Tenant mode"
|
|
977
|
+
)
|
|
978
|
+
|
|
979
|
+
# Avoid enterprise feature
|
|
980
|
+
def _ensure_database_exists(self):
|
|
981
|
+
pass
|
|
982
|
+
|
|
983
|
+
def _create_basic_property_indexes(self) -> None:
|
|
984
|
+
"""
|
|
985
|
+
Create standard B-tree indexes on memory_type, created_at,
|
|
986
|
+
and updated_at fields.
|
|
987
|
+
Create standard B-tree indexes on user_name when use Shared Database
|
|
988
|
+
Multi-Tenant Mode
|
|
989
|
+
"""
|
|
990
|
+
# Step 1: Neo4j indexes
|
|
991
|
+
try:
|
|
992
|
+
with self.driver.session(database=self.db_name) as session:
|
|
993
|
+
session.run("""
|
|
994
|
+
CREATE INDEX memory_type_index IF NOT EXISTS
|
|
995
|
+
FOR (n:Memory) ON (n.memory_type)
|
|
996
|
+
""")
|
|
997
|
+
logger.debug("Index 'memory_type_index' ensured.")
|
|
998
|
+
|
|
999
|
+
session.run("""
|
|
1000
|
+
CREATE INDEX memory_created_at_index IF NOT EXISTS
|
|
1001
|
+
FOR (n:Memory) ON (n.created_at)
|
|
1002
|
+
""")
|
|
1003
|
+
logger.debug("Index 'memory_created_at_index' ensured.")
|
|
1004
|
+
|
|
1005
|
+
session.run("""
|
|
1006
|
+
CREATE INDEX memory_updated_at_index IF NOT EXISTS
|
|
1007
|
+
FOR (n:Memory) ON (n.updated_at)
|
|
1008
|
+
""")
|
|
1009
|
+
logger.debug("Index 'memory_updated_at_index' ensured.")
|
|
1010
|
+
|
|
1011
|
+
if not self.config.use_multi_db and self.config.user_name:
|
|
1012
|
+
session.run(
|
|
1013
|
+
"""
|
|
1014
|
+
CREATE INDEX memory_user_name_index IF NOT EXISTS
|
|
1015
|
+
FOR (n:Memory) ON (n.user_name)
|
|
1016
|
+
"""
|
|
1017
|
+
)
|
|
1018
|
+
logger.debug("Index 'memory_user_name_index' ensured.")
|
|
1019
|
+
except Exception as e:
|
|
1020
|
+
logger.warning(f"Failed to create basic property indexes: {e}")
|
|
1021
|
+
|
|
1022
|
+
# Step 2: VectorDB indexes
|
|
1023
|
+
try:
|
|
1024
|
+
if hasattr(self.vec_db, "ensure_payload_indexes"):
|
|
1025
|
+
self.vec_db.ensure_payload_indexes(["user_name", "memory_type", "status"])
|
|
1026
|
+
else:
|
|
1027
|
+
logger.debug("VecDB does not support payload index creation; skipping.")
|
|
1028
|
+
except Exception as e:
|
|
1029
|
+
logger.warning(f"Failed to create VecDB payload indexes: {e}")
|
|
1030
|
+
|
|
1031
|
+
def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]:
|
|
1032
|
+
"""Parse Neo4j node and optionally fetch embedding from vector DB."""
|
|
1033
|
+
node = node_data.copy()
|
|
1034
|
+
|
|
1035
|
+
# Convert Neo4j datetime to string
|
|
1036
|
+
for time_field in ("created_at", "updated_at"):
|
|
1037
|
+
if time_field in node and hasattr(node[time_field], "isoformat"):
|
|
1038
|
+
node[time_field] = node[time_field].isoformat()
|
|
1039
|
+
node.pop("user_name", None)
|
|
1040
|
+
# serialization
|
|
1041
|
+
if node["sources"]:
|
|
1042
|
+
for idx in range(len(node["sources"])):
|
|
1043
|
+
if not (
|
|
1044
|
+
isinstance(node["sources"][idx], str)
|
|
1045
|
+
and node["sources"][idx][0] == "{"
|
|
1046
|
+
and node["sources"][idx][0] == "}"
|
|
1047
|
+
):
|
|
1048
|
+
break
|
|
1049
|
+
node["sources"][idx] = json.loads(node["sources"][idx])
|
|
1050
|
+
new_node = {"id": node.pop("id"), "memory": node.pop("memory", ""), "metadata": node}
|
|
1051
|
+
try:
|
|
1052
|
+
vec_item = self.vec_db.get_by_id(new_node["id"])
|
|
1053
|
+
if vec_item and vec_item.vector:
|
|
1054
|
+
new_node["metadata"]["embedding"] = vec_item.vector
|
|
1055
|
+
except Exception as e:
|
|
1056
|
+
logger.warning(f"Failed to fetch vector for node {new_node['id']}: {e}")
|
|
1057
|
+
new_node["metadata"]["embedding"] = None
|
|
1058
|
+
return new_node
|