MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Factory for creating internet retrievers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, ClassVar
|
|
4
|
+
|
|
5
|
+
from memos.configs.internet_retriever import InternetRetrieverConfigFactory
|
|
6
|
+
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.mem_reader.factory import MemReaderFactory
|
|
8
|
+
from memos.memories.textual.tree_text_memory.retrieve.bochasearch import BochaAISearchRetriever
|
|
9
|
+
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
|
|
10
|
+
InternetGoogleRetriever,
|
|
11
|
+
)
|
|
12
|
+
from memos.memories.textual.tree_text_memory.retrieve.xinyusearch import XinyuSearchRetriever
|
|
13
|
+
from memos.memos_tools.singleton import singleton_factory
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InternetRetrieverFactory:
|
|
17
|
+
"""Factory class for creating internet retriever instances."""
|
|
18
|
+
|
|
19
|
+
backend_to_class: ClassVar[dict[str, Any]] = {
|
|
20
|
+
"google": InternetGoogleRetriever,
|
|
21
|
+
"bing": InternetGoogleRetriever, # TODO: Implement BingRetriever
|
|
22
|
+
"xinyu": XinyuSearchRetriever,
|
|
23
|
+
"bocha": BochaAISearchRetriever,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
@singleton_factory()
|
|
28
|
+
def from_config(
|
|
29
|
+
cls, config_factory: InternetRetrieverConfigFactory, embedder: BaseEmbedder
|
|
30
|
+
) -> InternetGoogleRetriever | None:
|
|
31
|
+
"""
|
|
32
|
+
Create internet retriever from configuration.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
config_factory: Internet retriever configuration
|
|
36
|
+
embedder: Embedder instance for generating embeddings
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
InternetRetriever instance or None if no configuration provided
|
|
40
|
+
"""
|
|
41
|
+
if config_factory.backend is None:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
backend = config_factory.backend
|
|
45
|
+
if backend not in cls.backend_to_class:
|
|
46
|
+
raise ValueError(f"Invalid internet retriever backend: {backend}")
|
|
47
|
+
|
|
48
|
+
retriever_class = cls.backend_to_class[backend]
|
|
49
|
+
config = config_factory.config
|
|
50
|
+
|
|
51
|
+
# Create retriever with appropriate parameters
|
|
52
|
+
if backend == "google":
|
|
53
|
+
return retriever_class(
|
|
54
|
+
api_key=config.api_key,
|
|
55
|
+
search_engine_id=config.search_engine_id,
|
|
56
|
+
embedder=embedder,
|
|
57
|
+
max_results=config.max_results,
|
|
58
|
+
num_per_request=config.num_per_request,
|
|
59
|
+
)
|
|
60
|
+
elif backend == "bing":
|
|
61
|
+
# TODO: Implement Bing retriever
|
|
62
|
+
return retriever_class(
|
|
63
|
+
api_key=config.api_key,
|
|
64
|
+
search_engine_id=None, # Bing doesn't use search_engine_id
|
|
65
|
+
embedder=embedder,
|
|
66
|
+
max_results=config.max_results,
|
|
67
|
+
num_per_request=config.num_per_request,
|
|
68
|
+
)
|
|
69
|
+
elif backend == "xinyu":
|
|
70
|
+
return retriever_class(
|
|
71
|
+
access_key=config.api_key, # Use api_key as access_key for xinyu
|
|
72
|
+
search_engine_id=config.search_engine_id,
|
|
73
|
+
embedder=embedder,
|
|
74
|
+
reader=MemReaderFactory.from_config(config.reader),
|
|
75
|
+
max_results=config.max_results,
|
|
76
|
+
)
|
|
77
|
+
elif backend == "bocha":
|
|
78
|
+
return retriever_class(
|
|
79
|
+
access_key=config.api_key, # Use api_key as access_key for xinyu
|
|
80
|
+
embedder=embedder,
|
|
81
|
+
reader=MemReaderFactory.from_config(config.reader),
|
|
82
|
+
max_results=config.max_results,
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError(f"Unsupported backend: {backend}")
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def create_google_retriever(
|
|
89
|
+
cls, api_key: str, search_engine_id: str, embedder: BaseEmbedder
|
|
90
|
+
) -> InternetGoogleRetriever:
|
|
91
|
+
"""
|
|
92
|
+
Create Google Custom Search retriever.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
api_key: Google API key
|
|
96
|
+
search_engine_id: Google Custom Search Engine ID
|
|
97
|
+
embedder: Embedder instance
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
InternetRetriever instance
|
|
101
|
+
"""
|
|
102
|
+
return InternetGoogleRetriever(api_key, search_engine_id, embedder)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from string import Template
|
|
5
|
+
|
|
6
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
8
|
+
from memos.memories.textual.tree_text_memory.retrieve.utils import REASON_PROMPT
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MemoryReasoner:
|
|
12
|
+
"""
|
|
13
|
+
Memory reasoner that performs reasoning and knowledge synthesis
|
|
14
|
+
over retrieved memory items using a language model.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, llm):
|
|
18
|
+
self.llm = llm
|
|
19
|
+
|
|
20
|
+
def reason(
|
|
21
|
+
self, query: str, ranked_memories: list, parsed_goal: ParsedTaskGoal
|
|
22
|
+
) -> list[TextualMemoryItem]:
|
|
23
|
+
"""
|
|
24
|
+
Reason across multiple retrieved memory items and synthesize
|
|
25
|
+
a response or knowledge structure based on query objective.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
query (str): Original user query description.
|
|
29
|
+
ranked_memories (list): List of relevant memory items.
|
|
30
|
+
parsed_goal (dict): Structured topic/concept/fact from TaskGoalParser.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List of TextualMemoryItem: Refined memory items.
|
|
34
|
+
"""
|
|
35
|
+
prompt_template = Template(REASON_PROMPT)
|
|
36
|
+
memory_detailed_str = "\n".join(
|
|
37
|
+
[f"[{m.id}] {m.metadata.key}: {m.memory}" for m in ranked_memories]
|
|
38
|
+
)
|
|
39
|
+
prompt = prompt_template.substitute(task=query, detailed_memory_list=memory_detailed_str)
|
|
40
|
+
|
|
41
|
+
response = self.llm.generate([{"role": "user", "content": prompt}])
|
|
42
|
+
content = response.content if hasattr(response, "content") else response
|
|
43
|
+
|
|
44
|
+
# Step 1: Extract selected IDs
|
|
45
|
+
selected_ids = self._parse_selected_ids(content)
|
|
46
|
+
id_set = set(selected_ids)
|
|
47
|
+
|
|
48
|
+
return [m for m in ranked_memories if m.id in id_set]
|
|
49
|
+
|
|
50
|
+
def _parse_selected_ids(self, response_text: str) -> list[str]:
|
|
51
|
+
"""
|
|
52
|
+
Extracts memory IDs from model response. Supports both simple text list and JSON.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
parsed = json.loads(response_text)
|
|
56
|
+
if isinstance(parsed, dict) and "selected_ids" in parsed:
|
|
57
|
+
return parsed["selected_ids"]
|
|
58
|
+
except json.JSONDecodeError:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
return re.findall(r"[a-f0-9\-]{36}", response_text) # UUID pattern fallback
|
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
|
|
3
|
+
from memos.context.context import ContextThreadPoolExecutor
|
|
4
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
5
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
6
|
+
from memos.log import get_logger
|
|
7
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
8
|
+
from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25
|
|
9
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class GraphMemoryRetriever:
|
|
16
|
+
"""
|
|
17
|
+
Unified memory retriever that combines both graph-based and vector-based retrieval logic.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
graph_store: Neo4jGraphDB,
|
|
23
|
+
embedder: OllamaEmbedder,
|
|
24
|
+
bm25_retriever: EnhancedBM25 | None = None,
|
|
25
|
+
include_embedding: bool = False,
|
|
26
|
+
):
|
|
27
|
+
self.graph_store = graph_store
|
|
28
|
+
self.embedder = embedder
|
|
29
|
+
self.bm25_retriever = bm25_retriever
|
|
30
|
+
self.max_workers = 10
|
|
31
|
+
self.filter_weight = 0.6
|
|
32
|
+
self.use_bm25 = bool(self.bm25_retriever)
|
|
33
|
+
self.include_embedding = include_embedding
|
|
34
|
+
|
|
35
|
+
def retrieve(
|
|
36
|
+
self,
|
|
37
|
+
query: str,
|
|
38
|
+
parsed_goal: ParsedTaskGoal,
|
|
39
|
+
top_k: int,
|
|
40
|
+
memory_scope: str,
|
|
41
|
+
query_embedding: list[list[float]] | None = None,
|
|
42
|
+
search_filter: dict | None = None,
|
|
43
|
+
search_priority: dict | None = None,
|
|
44
|
+
user_name: str | None = None,
|
|
45
|
+
id_filter: dict | None = None,
|
|
46
|
+
use_fast_graph: bool = False,
|
|
47
|
+
) -> list[TextualMemoryItem]:
|
|
48
|
+
"""
|
|
49
|
+
Perform hybrid memory retrieval:
|
|
50
|
+
- Run graph-based lookup from dispatch plan.
|
|
51
|
+
- Run vector similarity search from embedded query.
|
|
52
|
+
- Merge and return combined result set.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
query (str): Original task query.
|
|
56
|
+
parsed_goal (dict): parsed_goal.
|
|
57
|
+
top_k (int): Number of candidates to return.
|
|
58
|
+
memory_scope (str): One of ['working', 'long_term', 'user'].
|
|
59
|
+
query_embedding(list of embedding): list of embedding of query
|
|
60
|
+
search_filter (dict, optional): Optional metadata filters for search results.
|
|
61
|
+
Returns:
|
|
62
|
+
list: Combined memory items.
|
|
63
|
+
"""
|
|
64
|
+
if memory_scope not in [
|
|
65
|
+
"WorkingMemory",
|
|
66
|
+
"LongTermMemory",
|
|
67
|
+
"UserMemory",
|
|
68
|
+
"ToolSchemaMemory",
|
|
69
|
+
"ToolTrajectoryMemory",
|
|
70
|
+
]:
|
|
71
|
+
raise ValueError(f"Unsupported memory scope: {memory_scope}")
|
|
72
|
+
|
|
73
|
+
if memory_scope == "WorkingMemory":
|
|
74
|
+
# For working memory, retrieve all entries (no session-oriented filtering)
|
|
75
|
+
working_memories = self.graph_store.get_all_memory_items(
|
|
76
|
+
scope="WorkingMemory",
|
|
77
|
+
include_embedding=self.include_embedding,
|
|
78
|
+
user_name=user_name,
|
|
79
|
+
filter=search_filter,
|
|
80
|
+
status="activated",
|
|
81
|
+
)
|
|
82
|
+
return [TextualMemoryItem.from_dict(record) for record in working_memories[:top_k]]
|
|
83
|
+
|
|
84
|
+
with ContextThreadPoolExecutor(max_workers=3) as executor:
|
|
85
|
+
# Structured graph-based retrieval
|
|
86
|
+
future_graph = executor.submit(
|
|
87
|
+
self._graph_recall,
|
|
88
|
+
parsed_goal,
|
|
89
|
+
memory_scope,
|
|
90
|
+
user_name,
|
|
91
|
+
use_fast_graph=use_fast_graph,
|
|
92
|
+
)
|
|
93
|
+
# Vector similarity search
|
|
94
|
+
future_vector = executor.submit(
|
|
95
|
+
self._vector_recall,
|
|
96
|
+
query_embedding or [],
|
|
97
|
+
memory_scope,
|
|
98
|
+
top_k,
|
|
99
|
+
search_filter=search_filter,
|
|
100
|
+
search_priority=search_priority,
|
|
101
|
+
user_name=user_name,
|
|
102
|
+
)
|
|
103
|
+
if self.use_bm25:
|
|
104
|
+
future_bm25 = executor.submit(
|
|
105
|
+
self._bm25_recall,
|
|
106
|
+
query,
|
|
107
|
+
parsed_goal,
|
|
108
|
+
memory_scope,
|
|
109
|
+
top_k=top_k,
|
|
110
|
+
user_name=user_name,
|
|
111
|
+
search_filter=id_filter,
|
|
112
|
+
)
|
|
113
|
+
if use_fast_graph:
|
|
114
|
+
future_fulltext = executor.submit(
|
|
115
|
+
self._fulltext_recall,
|
|
116
|
+
query_words=parsed_goal.keys or [],
|
|
117
|
+
memory_scope=memory_scope,
|
|
118
|
+
top_k=top_k,
|
|
119
|
+
search_filter=search_filter,
|
|
120
|
+
search_priority=search_priority,
|
|
121
|
+
user_name=user_name,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
graph_results = future_graph.result()
|
|
125
|
+
vector_results = future_vector.result()
|
|
126
|
+
bm25_results = future_bm25.result() if self.use_bm25 else []
|
|
127
|
+
fulltext_results = future_fulltext.result() if use_fast_graph else []
|
|
128
|
+
|
|
129
|
+
# Merge and deduplicate by ID
|
|
130
|
+
combined = {
|
|
131
|
+
item.id: item
|
|
132
|
+
for item in graph_results + vector_results + bm25_results + fulltext_results
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return list(combined.values())
|
|
136
|
+
|
|
137
|
+
def retrieve_from_cube(
|
|
138
|
+
self,
|
|
139
|
+
top_k: int,
|
|
140
|
+
memory_scope: str,
|
|
141
|
+
query_embedding: list[list[float]] | None = None,
|
|
142
|
+
cube_name: str = "memos_cube01",
|
|
143
|
+
user_name: str | None = None,
|
|
144
|
+
) -> list[TextualMemoryItem]:
|
|
145
|
+
"""
|
|
146
|
+
Perform hybrid memory retrieval:
|
|
147
|
+
- Run graph-based lookup from dispatch plan.
|
|
148
|
+
- Run vector similarity search from embedded query.
|
|
149
|
+
- Merge and return combined result set.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
top_k (int): Number of candidates to return.
|
|
153
|
+
memory_scope (str): One of ['working', 'long_term', 'user'].
|
|
154
|
+
query_embedding(list of embedding): list of embedding of query
|
|
155
|
+
cube_name: specify cube_name
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
list: Combined memory items.
|
|
159
|
+
"""
|
|
160
|
+
if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]:
|
|
161
|
+
raise ValueError(f"Unsupported memory scope: {memory_scope}")
|
|
162
|
+
|
|
163
|
+
graph_results = self._vector_recall(
|
|
164
|
+
query_embedding, memory_scope, top_k, cube_name=cube_name, user_name=user_name
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
for result_i in graph_results:
|
|
168
|
+
result_i.metadata.memory_type = "OuterMemory"
|
|
169
|
+
# Merge and deduplicate by ID
|
|
170
|
+
combined = {item.id: item for item in graph_results}
|
|
171
|
+
|
|
172
|
+
return list(combined.values())
|
|
173
|
+
|
|
174
|
+
def retrieve_from_mixed(
|
|
175
|
+
self,
|
|
176
|
+
top_k: int,
|
|
177
|
+
memory_scope: str | None = None,
|
|
178
|
+
query_embedding: list[list[float]] | None = None,
|
|
179
|
+
search_filter: dict | None = None,
|
|
180
|
+
user_name: str | None = None,
|
|
181
|
+
) -> list[TextualMemoryItem]:
|
|
182
|
+
"""Retrieve from mixed and memory"""
|
|
183
|
+
vector_results = self._vector_recall(
|
|
184
|
+
query_embedding or [],
|
|
185
|
+
memory_scope,
|
|
186
|
+
top_k,
|
|
187
|
+
search_filter=search_filter,
|
|
188
|
+
user_name=user_name,
|
|
189
|
+
) # Merge and deduplicate by ID
|
|
190
|
+
combined = {item.id: item for item in vector_results}
|
|
191
|
+
return list(combined.values())
|
|
192
|
+
|
|
193
|
+
def _graph_recall(
|
|
194
|
+
self, parsed_goal: ParsedTaskGoal, memory_scope: str, user_name: str | None = None, **kwargs
|
|
195
|
+
) -> list[TextualMemoryItem]:
|
|
196
|
+
"""
|
|
197
|
+
Perform structured node-based retrieval from Neo4j.
|
|
198
|
+
- keys must match exactly (n.key IN keys)
|
|
199
|
+
- tags must overlap with at least 2 input tags
|
|
200
|
+
- scope filters by memory_type if provided
|
|
201
|
+
"""
|
|
202
|
+
use_fast_graph = kwargs.get("use_fast_graph", False)
|
|
203
|
+
|
|
204
|
+
def process_node(node):
|
|
205
|
+
meta = node.get("metadata", {})
|
|
206
|
+
node_key = meta.get("key")
|
|
207
|
+
node_tags = meta.get("tags", []) or []
|
|
208
|
+
|
|
209
|
+
keep = False
|
|
210
|
+
# key equals to node_key
|
|
211
|
+
if parsed_goal.keys and node_key in parsed_goal.keys:
|
|
212
|
+
keep = True
|
|
213
|
+
# overlap tags more than 2
|
|
214
|
+
elif parsed_goal.tags:
|
|
215
|
+
node_tags_list = [tag.lower() for tag in node_tags]
|
|
216
|
+
overlap = len(set(node_tags_list) & set(parsed_goal.tags))
|
|
217
|
+
if overlap >= 2:
|
|
218
|
+
keep = True
|
|
219
|
+
|
|
220
|
+
if keep:
|
|
221
|
+
return TextualMemoryItem.from_dict(node)
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
if not use_fast_graph:
|
|
225
|
+
candidate_ids = set()
|
|
226
|
+
|
|
227
|
+
# 1) key-based OR branch
|
|
228
|
+
if parsed_goal.keys:
|
|
229
|
+
key_filters = [
|
|
230
|
+
{"field": "key", "op": "in", "value": parsed_goal.keys},
|
|
231
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
232
|
+
]
|
|
233
|
+
key_ids = self.graph_store.get_by_metadata(key_filters, user_name=user_name)
|
|
234
|
+
candidate_ids.update(key_ids)
|
|
235
|
+
|
|
236
|
+
# 2) tag-based OR branch
|
|
237
|
+
if parsed_goal.tags:
|
|
238
|
+
tag_filters = [
|
|
239
|
+
{"field": "tags", "op": "contains", "value": parsed_goal.tags},
|
|
240
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
241
|
+
]
|
|
242
|
+
tag_ids = self.graph_store.get_by_metadata(tag_filters, user_name=user_name)
|
|
243
|
+
candidate_ids.update(tag_ids)
|
|
244
|
+
|
|
245
|
+
# No matches → return empty
|
|
246
|
+
if not candidate_ids:
|
|
247
|
+
return []
|
|
248
|
+
|
|
249
|
+
# Load nodes and post-filter
|
|
250
|
+
node_dicts = self.graph_store.get_nodes(
|
|
251
|
+
list(candidate_ids), include_embedding=self.include_embedding, user_name=user_name
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
final_nodes = []
|
|
255
|
+
for node in node_dicts:
|
|
256
|
+
meta = node.get("metadata", {})
|
|
257
|
+
node_key = meta.get("key")
|
|
258
|
+
node_tags = meta.get("tags", []) or []
|
|
259
|
+
|
|
260
|
+
keep = False
|
|
261
|
+
# key equals to node_key
|
|
262
|
+
if parsed_goal.keys and node_key in parsed_goal.keys:
|
|
263
|
+
keep = True
|
|
264
|
+
# overlap tags more than 2
|
|
265
|
+
elif parsed_goal.tags:
|
|
266
|
+
overlap = len(set(node_tags) & set(parsed_goal.tags))
|
|
267
|
+
if overlap >= 2:
|
|
268
|
+
keep = True
|
|
269
|
+
if keep:
|
|
270
|
+
final_nodes.append(TextualMemoryItem.from_dict(node))
|
|
271
|
+
return final_nodes
|
|
272
|
+
else:
|
|
273
|
+
candidate_ids = set()
|
|
274
|
+
|
|
275
|
+
# 1) key-based OR branch
|
|
276
|
+
if parsed_goal.keys:
|
|
277
|
+
key_filters = [
|
|
278
|
+
{"field": "key", "op": "in", "value": parsed_goal.keys},
|
|
279
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
280
|
+
]
|
|
281
|
+
key_ids = self.graph_store.get_by_metadata(
|
|
282
|
+
key_filters, user_name=user_name, status="activated"
|
|
283
|
+
)
|
|
284
|
+
candidate_ids.update(key_ids)
|
|
285
|
+
|
|
286
|
+
# 2) tag-based OR branch
|
|
287
|
+
if parsed_goal.tags:
|
|
288
|
+
tag_filters = [
|
|
289
|
+
{"field": "tags", "op": "contains", "value": parsed_goal.tags},
|
|
290
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
291
|
+
]
|
|
292
|
+
tag_ids = self.graph_store.get_by_metadata(
|
|
293
|
+
tag_filters, user_name=user_name, status="activated"
|
|
294
|
+
)
|
|
295
|
+
candidate_ids.update(tag_ids)
|
|
296
|
+
|
|
297
|
+
# No matches → return empty
|
|
298
|
+
if not candidate_ids:
|
|
299
|
+
return []
|
|
300
|
+
|
|
301
|
+
# Load nodes and post-filter
|
|
302
|
+
node_dicts = self.graph_store.get_nodes(
|
|
303
|
+
list(candidate_ids), include_embedding=self.include_embedding, user_name=user_name
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
final_nodes = []
|
|
307
|
+
with ContextThreadPoolExecutor(max_workers=3) as executor:
|
|
308
|
+
futures = {
|
|
309
|
+
executor.submit(process_node, node): i for i, node in enumerate(node_dicts)
|
|
310
|
+
}
|
|
311
|
+
temp_results = [None] * len(node_dicts)
|
|
312
|
+
|
|
313
|
+
for future in concurrent.futures.as_completed(futures):
|
|
314
|
+
original_index = futures[future]
|
|
315
|
+
result = future.result()
|
|
316
|
+
temp_results[original_index] = result
|
|
317
|
+
|
|
318
|
+
final_nodes = [result for result in temp_results if result is not None]
|
|
319
|
+
return final_nodes
|
|
320
|
+
|
|
321
|
+
def _vector_recall(
|
|
322
|
+
self,
|
|
323
|
+
query_embedding: list[list[float]],
|
|
324
|
+
memory_scope: str,
|
|
325
|
+
top_k: int = 20,
|
|
326
|
+
max_num: int = 20,
|
|
327
|
+
status: str = "activated",
|
|
328
|
+
cube_name: str | None = None,
|
|
329
|
+
search_filter: dict | None = None,
|
|
330
|
+
search_priority: dict | None = None,
|
|
331
|
+
user_name: str | None = None,
|
|
332
|
+
) -> list[TextualMemoryItem]:
|
|
333
|
+
"""
|
|
334
|
+
Perform vector-based similarity retrieval using query embedding.
|
|
335
|
+
# TODO: tackle with post-filter and pre-filter(5.18+) better.
|
|
336
|
+
"""
|
|
337
|
+
if not query_embedding:
|
|
338
|
+
return []
|
|
339
|
+
|
|
340
|
+
def search_single(vec, search_priority=None, search_filter=None):
|
|
341
|
+
return (
|
|
342
|
+
self.graph_store.search_by_embedding(
|
|
343
|
+
vector=vec,
|
|
344
|
+
top_k=top_k,
|
|
345
|
+
status=status,
|
|
346
|
+
scope=memory_scope,
|
|
347
|
+
cube_name=cube_name,
|
|
348
|
+
search_filter=search_priority,
|
|
349
|
+
filter=search_filter,
|
|
350
|
+
user_name=user_name,
|
|
351
|
+
)
|
|
352
|
+
or []
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def search_path_a():
|
|
356
|
+
"""Path A: search without priority"""
|
|
357
|
+
path_a_hits = []
|
|
358
|
+
with ContextThreadPoolExecutor() as executor:
|
|
359
|
+
futures = [
|
|
360
|
+
executor.submit(search_single, vec, None, search_filter)
|
|
361
|
+
for vec in query_embedding[:max_num]
|
|
362
|
+
]
|
|
363
|
+
for f in concurrent.futures.as_completed(futures):
|
|
364
|
+
path_a_hits.extend(f.result() or [])
|
|
365
|
+
return path_a_hits
|
|
366
|
+
|
|
367
|
+
def search_path_b():
|
|
368
|
+
"""Path B: search with priority"""
|
|
369
|
+
if not search_priority:
|
|
370
|
+
return []
|
|
371
|
+
path_b_hits = []
|
|
372
|
+
with ContextThreadPoolExecutor() as executor:
|
|
373
|
+
futures = [
|
|
374
|
+
executor.submit(search_single, vec, search_priority, search_filter)
|
|
375
|
+
for vec in query_embedding[:max_num]
|
|
376
|
+
]
|
|
377
|
+
for f in concurrent.futures.as_completed(futures):
|
|
378
|
+
path_b_hits.extend(f.result() or [])
|
|
379
|
+
return path_b_hits
|
|
380
|
+
|
|
381
|
+
# Execute both paths concurrently
|
|
382
|
+
all_hits = []
|
|
383
|
+
with ContextThreadPoolExecutor(max_workers=2) as executor:
|
|
384
|
+
path_a_future = executor.submit(search_path_a)
|
|
385
|
+
path_b_future = executor.submit(search_path_b)
|
|
386
|
+
|
|
387
|
+
all_hits.extend(path_a_future.result())
|
|
388
|
+
all_hits.extend(path_b_future.result())
|
|
389
|
+
|
|
390
|
+
if not all_hits:
|
|
391
|
+
return []
|
|
392
|
+
|
|
393
|
+
# merge and deduplicate
|
|
394
|
+
unique_ids = {r["id"] for r in all_hits if r.get("id")}
|
|
395
|
+
node_dicts = (
|
|
396
|
+
self.graph_store.get_nodes(
|
|
397
|
+
list(unique_ids),
|
|
398
|
+
include_embedding=self.include_embedding,
|
|
399
|
+
cube_name=cube_name,
|
|
400
|
+
user_name=user_name,
|
|
401
|
+
)
|
|
402
|
+
or []
|
|
403
|
+
)
|
|
404
|
+
return [TextualMemoryItem.from_dict(n) for n in node_dicts]
|
|
405
|
+
|
|
406
|
+
def _bm25_recall(
|
|
407
|
+
self,
|
|
408
|
+
query: str,
|
|
409
|
+
parsed_goal: ParsedTaskGoal,
|
|
410
|
+
memory_scope: str,
|
|
411
|
+
top_k: int = 20,
|
|
412
|
+
user_name: str | None = None,
|
|
413
|
+
search_filter: dict | None = None,
|
|
414
|
+
) -> list[TextualMemoryItem]:
|
|
415
|
+
"""
|
|
416
|
+
Perform BM25-based retrieval.
|
|
417
|
+
"""
|
|
418
|
+
if not self.bm25_retriever:
|
|
419
|
+
return []
|
|
420
|
+
key_filters = [
|
|
421
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
422
|
+
]
|
|
423
|
+
# corpus_name is user_name + user_id
|
|
424
|
+
corpus_name = f"{user_name}" if user_name else ""
|
|
425
|
+
if search_filter is not None:
|
|
426
|
+
for key in search_filter:
|
|
427
|
+
value = search_filter[key]
|
|
428
|
+
key_filters.append({"field": key, "op": "=", "value": value})
|
|
429
|
+
corpus_name += "".join(list(search_filter.values()))
|
|
430
|
+
candidate_ids = self.graph_store.get_by_metadata(
|
|
431
|
+
key_filters, user_name=user_name, status="activated"
|
|
432
|
+
)
|
|
433
|
+
node_dicts = self.graph_store.get_nodes(
|
|
434
|
+
list(candidate_ids), include_embedding=self.include_embedding, user_name=user_name
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
bm25_query = " ".join(list({query, *parsed_goal.keys}))
|
|
438
|
+
bm25_results = self.bm25_retriever.search(
|
|
439
|
+
bm25_query, node_dicts, top_k=top_k, corpus_name=corpus_name
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return [TextualMemoryItem.from_dict(n) for n in bm25_results]
|
|
443
|
+
|
|
444
|
+
def _fulltext_recall(
|
|
445
|
+
self,
|
|
446
|
+
query_words: list[str],
|
|
447
|
+
memory_scope: str,
|
|
448
|
+
top_k: int = 20,
|
|
449
|
+
max_num: int = 5,
|
|
450
|
+
status: str = "activated",
|
|
451
|
+
cube_name: str | None = None,
|
|
452
|
+
search_filter: dict | None = None,
|
|
453
|
+
search_priority: dict | None = None,
|
|
454
|
+
user_name: str | None = None,
|
|
455
|
+
):
|
|
456
|
+
"""Perform fulltext-based retrieval.
|
|
457
|
+
Args:
|
|
458
|
+
query_words: list of query words
|
|
459
|
+
memory_scope: memory scope
|
|
460
|
+
top_k: top k results
|
|
461
|
+
max_num: max number of query words
|
|
462
|
+
status: status
|
|
463
|
+
cube_name: cube name
|
|
464
|
+
search_filter: search filter
|
|
465
|
+
search_priority: search priority
|
|
466
|
+
user_name: user name
|
|
467
|
+
Returns:
|
|
468
|
+
list of TextualMemoryItem
|
|
469
|
+
"""
|
|
470
|
+
if not query_words:
|
|
471
|
+
return []
|
|
472
|
+
logger.info(f"[FULLTEXT] query_words: {query_words}")
|
|
473
|
+
all_hits = self.graph_store.search_by_fulltext(
|
|
474
|
+
query_words=query_words,
|
|
475
|
+
top_k=top_k,
|
|
476
|
+
status=status,
|
|
477
|
+
scope=memory_scope,
|
|
478
|
+
cube_name=cube_name,
|
|
479
|
+
search_filter=search_priority,
|
|
480
|
+
filter=search_filter,
|
|
481
|
+
user_name=user_name,
|
|
482
|
+
)
|
|
483
|
+
if not all_hits:
|
|
484
|
+
return []
|
|
485
|
+
|
|
486
|
+
# merge and deduplicate
|
|
487
|
+
unique_ids = {r["id"] for r in all_hits if r.get("id")}
|
|
488
|
+
node_dicts = (
|
|
489
|
+
self.graph_store.get_nodes(
|
|
490
|
+
list(unique_ids),
|
|
491
|
+
include_embedding=self.include_embedding,
|
|
492
|
+
cube_name=cube_name,
|
|
493
|
+
user_name=user_name,
|
|
494
|
+
)
|
|
495
|
+
or []
|
|
496
|
+
)
|
|
497
|
+
return [TextualMemoryItem.from_dict(n) for n in node_dicts]
|