MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
|
|
3
|
+
from string import Template
|
|
4
|
+
|
|
5
|
+
from memos.llms.base import BaseLLM
|
|
6
|
+
from memos.log import get_logger
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
8
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import (
|
|
9
|
+
FastTokenizer,
|
|
10
|
+
parse_json_result,
|
|
11
|
+
)
|
|
12
|
+
from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TaskGoalParser:
|
|
19
|
+
"""
|
|
20
|
+
Unified TaskGoalParser:
|
|
21
|
+
- mode == 'fast': directly use origin task_description
|
|
22
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, llm=BaseLLM):
|
|
26
|
+
self.llm = llm
|
|
27
|
+
self.tokenizer = FastTokenizer()
|
|
28
|
+
self.retries = 1
|
|
29
|
+
|
|
30
|
+
def parse(
|
|
31
|
+
self,
|
|
32
|
+
task_description: str,
|
|
33
|
+
context: str = "",
|
|
34
|
+
conversation: list[dict] | None = None,
|
|
35
|
+
mode: str = "fast",
|
|
36
|
+
**kwargs,
|
|
37
|
+
) -> ParsedTaskGoal:
|
|
38
|
+
"""
|
|
39
|
+
Parse user input into structured semantic layers.
|
|
40
|
+
Returns:
|
|
41
|
+
ParsedTaskGoal: object containing topic/concept/fact levels and optional metadata
|
|
42
|
+
- mode == 'fast': use jieba to split words only
|
|
43
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
if mode == "fast":
|
|
47
|
+
return self._parse_fast(task_description, context=context, **kwargs)
|
|
48
|
+
elif mode == "fine":
|
|
49
|
+
if not self.llm:
|
|
50
|
+
raise ValueError("LLM not provided for slow mode.")
|
|
51
|
+
return self._parse_fine(task_description, context, conversation, **kwargs)
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError(f"Unknown mode: {mode}")
|
|
54
|
+
|
|
55
|
+
def _parse_fast(self, task_description: str, **kwargs) -> ParsedTaskGoal:
|
|
56
|
+
"""
|
|
57
|
+
Fast mode: simple jieba word split.
|
|
58
|
+
"""
|
|
59
|
+
context = kwargs.get("context", "")
|
|
60
|
+
use_fast_graph = kwargs.get("use_fast_graph", False)
|
|
61
|
+
if use_fast_graph:
|
|
62
|
+
desc_tokenized = self.tokenizer.tokenize_mixed(task_description)
|
|
63
|
+
return ParsedTaskGoal(
|
|
64
|
+
memories=[task_description],
|
|
65
|
+
keys=desc_tokenized,
|
|
66
|
+
tags=desc_tokenized,
|
|
67
|
+
goal_type="default",
|
|
68
|
+
rephrased_query=task_description,
|
|
69
|
+
internet_search=False,
|
|
70
|
+
context=context,
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
return ParsedTaskGoal(
|
|
74
|
+
memories=[task_description],
|
|
75
|
+
keys=[task_description],
|
|
76
|
+
tags=[],
|
|
77
|
+
goal_type="default",
|
|
78
|
+
rephrased_query=task_description,
|
|
79
|
+
internet_search=False,
|
|
80
|
+
context=context,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def _parse_fine(
|
|
84
|
+
self, query: str, context: str = "", conversation: list[dict] | None = None, **kwargs
|
|
85
|
+
) -> ParsedTaskGoal:
|
|
86
|
+
"""
|
|
87
|
+
Slow mode: LLM structured parse.
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
if conversation:
|
|
91
|
+
conversation_prompt = "\n".join(
|
|
92
|
+
[f"{each['role']}: {each['content']}" for each in conversation]
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
conversation_prompt = ""
|
|
96
|
+
prompt = Template(TASK_PARSE_PROMPT).substitute(
|
|
97
|
+
task=query.strip(), context=context, conversation=conversation_prompt
|
|
98
|
+
)
|
|
99
|
+
logger.info(f"Parsing Goal... LLM input is {prompt}")
|
|
100
|
+
response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
|
|
101
|
+
logger.info(f"Parsing Goal... LLM Response is {response}")
|
|
102
|
+
return self._parse_response(response, context=context)
|
|
103
|
+
except Exception:
|
|
104
|
+
logger.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}")
|
|
105
|
+
return self._parse_fast(query, context=context)
|
|
106
|
+
|
|
107
|
+
def _parse_response(self, response: str, **kwargs) -> ParsedTaskGoal:
|
|
108
|
+
"""
|
|
109
|
+
Parse LLM JSON output safely.
|
|
110
|
+
"""
|
|
111
|
+
# Ensure at least one attempt
|
|
112
|
+
attempts = max(1, getattr(self, "retries", 1))
|
|
113
|
+
|
|
114
|
+
for attempt_times in range(attempts):
|
|
115
|
+
try:
|
|
116
|
+
context = kwargs.get("context", "")
|
|
117
|
+
response_json = parse_json_result(response)
|
|
118
|
+
if not response_json:
|
|
119
|
+
raise ValueError("Parsed JSON is empty")
|
|
120
|
+
|
|
121
|
+
return ParsedTaskGoal(
|
|
122
|
+
memories=response_json.get("memories", []),
|
|
123
|
+
keys=response_json.get("keys", []),
|
|
124
|
+
tags=response_json.get("tags", []),
|
|
125
|
+
rephrased_query=response_json.get("rephrased_instruction", None),
|
|
126
|
+
internet_search=response_json.get("internet_search", False),
|
|
127
|
+
goal_type=response_json.get("goal_type", "default"),
|
|
128
|
+
context=context,
|
|
129
|
+
)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
if attempt_times == attempts - 1:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Failed to parse LLM output: {e}\nRaw response:\n{response} retried: {attempt_times + 1}/{attempts}"
|
|
134
|
+
) from e
|
|
135
|
+
continue
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Prompt for task parsing
|
|
2
|
+
TASK_PARSE_PROMPT = """
|
|
3
|
+
You are a task parsing expert. Given a user task instruction, optional former conversation and optional related memory context,extract the following structured information:
|
|
4
|
+
1. Keys: the high-level keywords directly relevant to the user’s task.
|
|
5
|
+
2. Tags: thematic tags to help categorize and retrieve related memories.
|
|
6
|
+
3. Goal Type: retrieval | qa | generation
|
|
7
|
+
4. Rephrased instruction: Give a rephrased task instruction based on the former conversation to make it less confusing to look alone. Make full use of information related to the query, including user's personal information, such as user's name, location, preferences, etc. If you think the task instruction is enough for search, or there is no former conversation, set "rephrased_instruction" to an empty string.
|
|
8
|
+
5. Need for internet search: If the user's task instruction only involves objective facts or can be completed without introducing external knowledge, set "internet_search" to False. Otherwise, set it to True.
|
|
9
|
+
6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
|
|
10
|
+
|
|
11
|
+
Former conversation (if any):
|
|
12
|
+
\"\"\"
|
|
13
|
+
$conversation
|
|
14
|
+
\"\"\"
|
|
15
|
+
|
|
16
|
+
Task description(User Question):
|
|
17
|
+
\"\"\"$task\"\"\"
|
|
18
|
+
|
|
19
|
+
Context (if any):
|
|
20
|
+
\"\"\"$context\"\"\"
|
|
21
|
+
|
|
22
|
+
Return strictly in this JSON format, note that the
|
|
23
|
+
keys/tags/rephrased_instruction/memories should use the same language as the
|
|
24
|
+
input query:
|
|
25
|
+
{
|
|
26
|
+
"keys": [...],
|
|
27
|
+
"tags": [...],
|
|
28
|
+
"goal_type": "retrieval | qa | generation",
|
|
29
|
+
"rephrased_instruction": "...", # return an empty string if the original instruction is easy enough to understand
|
|
30
|
+
"internet_search": true/false,
|
|
31
|
+
"memories": ["...", "...", ...]
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
REASON_PROMPT = """
|
|
37
|
+
You are a reasoning agent working with a memory system. You will synthesize knowledge from multiple memory cards to construct a meaningful response to the task below.
|
|
38
|
+
|
|
39
|
+
Task: ${task}
|
|
40
|
+
|
|
41
|
+
Memory cards (with metadata):
|
|
42
|
+
${detailed_memory_list}
|
|
43
|
+
|
|
44
|
+
Please perform:
|
|
45
|
+
1. Clustering by theme (topic/concept/fact)
|
|
46
|
+
2. Identify useful chains or connections
|
|
47
|
+
3. Return a curated list of memory card IDs with reasons.
|
|
48
|
+
|
|
49
|
+
Output in JSON:
|
|
50
|
+
{
|
|
51
|
+
"selected_ids": [...],
|
|
52
|
+
"explanation": "..."
|
|
53
|
+
}
|
|
54
|
+
"""
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""Xinyu Search API retriever for tree text memory."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from concurrent.futures import as_completed
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from memos.context.context import ContextThreadPoolExecutor
|
|
12
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
13
|
+
from memos.log import get_logger
|
|
14
|
+
from memos.mem_reader.base import BaseMemReader
|
|
15
|
+
from memos.memories.textual.item import (
|
|
16
|
+
SearchedTreeNodeTextualMemoryMetadata,
|
|
17
|
+
SourceMessage,
|
|
18
|
+
TextualMemoryItem,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class XinyuSearchAPI:
|
|
26
|
+
"""Xinyu Search API Client"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, access_key: str, search_engine_id: str, max_results: int = 20):
|
|
29
|
+
"""
|
|
30
|
+
Initialize Xinyu Search API client
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
access_key: Xinyu API access key
|
|
34
|
+
max_results: Maximum number of results to retrieve
|
|
35
|
+
"""
|
|
36
|
+
self.access_key = access_key
|
|
37
|
+
self.max_results = max_results
|
|
38
|
+
|
|
39
|
+
# API configuration
|
|
40
|
+
self.config = {"url": search_engine_id}
|
|
41
|
+
|
|
42
|
+
self.headers = {
|
|
43
|
+
"User-Agent": "PostmanRuntime/7.39.0",
|
|
44
|
+
"Content-Type": "application/json",
|
|
45
|
+
"Accept": "*/*",
|
|
46
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
47
|
+
"Connection": "keep-alive",
|
|
48
|
+
"token": access_key,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def query_detail(self, body: dict | None = None, detail: bool = True) -> list[dict]:
|
|
52
|
+
"""
|
|
53
|
+
Query Xinyu search API for detailed results
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
body: Search parameters
|
|
57
|
+
detail: Whether to get detailed results
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of search results
|
|
61
|
+
"""
|
|
62
|
+
res = []
|
|
63
|
+
try:
|
|
64
|
+
url = self.config["url"]
|
|
65
|
+
|
|
66
|
+
params = json.dumps(body)
|
|
67
|
+
resp = requests.request("POST", url, headers=self.headers, data=params)
|
|
68
|
+
res = json.loads(resp.text)["results"]
|
|
69
|
+
|
|
70
|
+
# If detail interface, return online part
|
|
71
|
+
if "search_type" in body:
|
|
72
|
+
res = res["online"]
|
|
73
|
+
|
|
74
|
+
if not detail:
|
|
75
|
+
for res_i in res:
|
|
76
|
+
res_i["summary"] = "「SUMMARY」" + res_i.get("summary", "")
|
|
77
|
+
|
|
78
|
+
except Exception:
|
|
79
|
+
import traceback
|
|
80
|
+
|
|
81
|
+
logger.error(f"xinyu search error: {traceback.format_exc()}")
|
|
82
|
+
return res
|
|
83
|
+
|
|
84
|
+
def search(self, query: str, max_results: int | None = None) -> list[dict]:
|
|
85
|
+
"""
|
|
86
|
+
Execute search request
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
query: Search query
|
|
90
|
+
max_results: Maximum number of results to return
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of search results
|
|
94
|
+
"""
|
|
95
|
+
if max_results is None:
|
|
96
|
+
max_results = self.max_results
|
|
97
|
+
|
|
98
|
+
body = {
|
|
99
|
+
"search_type": ["online"],
|
|
100
|
+
"online_search": {
|
|
101
|
+
"max_entries": max_results,
|
|
102
|
+
"cache_switch": False,
|
|
103
|
+
"baidu_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
104
|
+
"bing_field": {"switch": True, "mode": "relevance", "type": "page"},
|
|
105
|
+
"sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
106
|
+
},
|
|
107
|
+
"request_id": "memos" + str(uuid.uuid4()),
|
|
108
|
+
"queries": query,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return self.query_detail(body)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class XinyuSearchRetriever:
|
|
115
|
+
"""Xinyu Search retriever that converts search results to TextualMemoryItem format"""
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
access_key: str,
|
|
120
|
+
search_engine_id: str,
|
|
121
|
+
embedder: OllamaEmbedder,
|
|
122
|
+
reader: BaseMemReader,
|
|
123
|
+
max_results: int = 20,
|
|
124
|
+
):
|
|
125
|
+
"""
|
|
126
|
+
Initialize Xinyu search retriever
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
access_key: Xinyu API access key
|
|
130
|
+
embedder: Embedder instance for generating embeddings
|
|
131
|
+
max_results: Maximum number of results to retrieve
|
|
132
|
+
reader: MemReader Moduel to deal with internet contents
|
|
133
|
+
"""
|
|
134
|
+
self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
|
|
135
|
+
self.embedder = embedder
|
|
136
|
+
self.reader = reader
|
|
137
|
+
|
|
138
|
+
def retrieve_from_internet(
|
|
139
|
+
self, query: str, top_k: int = 10, parsed_goal=None, info=None, mode="fast"
|
|
140
|
+
) -> list[TextualMemoryItem]:
|
|
141
|
+
"""
|
|
142
|
+
Retrieve information from Xinyu search and convert to TextualMemoryItem format
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
query: Search query
|
|
146
|
+
top_k: Number of results to return
|
|
147
|
+
parsed_goal: Parsed task goal (optional)
|
|
148
|
+
info (dict): Leave a record of memory consumption.
|
|
149
|
+
Returns:
|
|
150
|
+
List of TextualMemoryItem
|
|
151
|
+
"""
|
|
152
|
+
# Get search results
|
|
153
|
+
search_results = self.xinyu_api.search(query, max_results=top_k)
|
|
154
|
+
|
|
155
|
+
# Convert to TextualMemoryItem format
|
|
156
|
+
memory_items: list[TextualMemoryItem] = []
|
|
157
|
+
|
|
158
|
+
with ContextThreadPoolExecutor(max_workers=8) as executor:
|
|
159
|
+
futures = [
|
|
160
|
+
executor.submit(self._process_result, result, query, parsed_goal, info, mode=mode)
|
|
161
|
+
for result in search_results
|
|
162
|
+
]
|
|
163
|
+
for future in as_completed(futures):
|
|
164
|
+
try:
|
|
165
|
+
memory_items.extend(future.result())
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.error(f"Error processing search result: {e}")
|
|
168
|
+
|
|
169
|
+
unique_memory_items = {}
|
|
170
|
+
for item in memory_items:
|
|
171
|
+
if item.memory not in unique_memory_items:
|
|
172
|
+
unique_memory_items[item.memory] = item
|
|
173
|
+
|
|
174
|
+
return list(unique_memory_items.values())
|
|
175
|
+
|
|
176
|
+
def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
|
|
177
|
+
"""
|
|
178
|
+
Extract entities from title, content and summary
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
title: Article title
|
|
182
|
+
content: Article content
|
|
183
|
+
summary: Article summary
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List of extracted entities
|
|
187
|
+
"""
|
|
188
|
+
# Simple entity extraction - can be enhanced with NER
|
|
189
|
+
text = f"{title} {content} {summary}"
|
|
190
|
+
entities = []
|
|
191
|
+
|
|
192
|
+
# Extract potential entities (simple approach)
|
|
193
|
+
# This can be enhanced with proper NER models
|
|
194
|
+
words = text.split()
|
|
195
|
+
for word in words:
|
|
196
|
+
if len(word) > 2 and word[0].isupper():
|
|
197
|
+
entities.append(word)
|
|
198
|
+
|
|
199
|
+
return list(set(entities))[:10] # Limit to 10 entities
|
|
200
|
+
|
|
201
|
+
def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
|
|
202
|
+
"""
|
|
203
|
+
Extract tags from title, content and summary
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
title: Article title
|
|
207
|
+
content: Article content
|
|
208
|
+
summary: Article summary
|
|
209
|
+
parsed_goal: Parsed task goal (optional)
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of extracted tags
|
|
213
|
+
"""
|
|
214
|
+
tags = []
|
|
215
|
+
|
|
216
|
+
# Add source-based tags
|
|
217
|
+
tags.append("xinyu_search")
|
|
218
|
+
tags.append("news")
|
|
219
|
+
|
|
220
|
+
# Add content-based tags
|
|
221
|
+
text = f"{title} {content} {summary}".lower()
|
|
222
|
+
|
|
223
|
+
# Simple keyword-based tagging
|
|
224
|
+
keywords = {
|
|
225
|
+
"economy": [
|
|
226
|
+
"economy",
|
|
227
|
+
"GDP",
|
|
228
|
+
"growth",
|
|
229
|
+
"production",
|
|
230
|
+
"industry",
|
|
231
|
+
"investment",
|
|
232
|
+
"consumption",
|
|
233
|
+
"market",
|
|
234
|
+
"trade",
|
|
235
|
+
"finance",
|
|
236
|
+
],
|
|
237
|
+
"politics": [
|
|
238
|
+
"politics",
|
|
239
|
+
"government",
|
|
240
|
+
"policy",
|
|
241
|
+
"meeting",
|
|
242
|
+
"leader",
|
|
243
|
+
"election",
|
|
244
|
+
"parliament",
|
|
245
|
+
"ministry",
|
|
246
|
+
],
|
|
247
|
+
"technology": [
|
|
248
|
+
"technology",
|
|
249
|
+
"tech",
|
|
250
|
+
"innovation",
|
|
251
|
+
"digital",
|
|
252
|
+
"internet",
|
|
253
|
+
"AI",
|
|
254
|
+
"artificial intelligence",
|
|
255
|
+
"software",
|
|
256
|
+
"hardware",
|
|
257
|
+
],
|
|
258
|
+
"sports": [
|
|
259
|
+
"sports",
|
|
260
|
+
"game",
|
|
261
|
+
"athlete",
|
|
262
|
+
"olympic",
|
|
263
|
+
"championship",
|
|
264
|
+
"tournament",
|
|
265
|
+
"team",
|
|
266
|
+
"player",
|
|
267
|
+
],
|
|
268
|
+
"culture": [
|
|
269
|
+
"culture",
|
|
270
|
+
"education",
|
|
271
|
+
"art",
|
|
272
|
+
"history",
|
|
273
|
+
"literature",
|
|
274
|
+
"music",
|
|
275
|
+
"film",
|
|
276
|
+
"museum",
|
|
277
|
+
],
|
|
278
|
+
"health": [
|
|
279
|
+
"health",
|
|
280
|
+
"medical",
|
|
281
|
+
"pandemic",
|
|
282
|
+
"hospital",
|
|
283
|
+
"doctor",
|
|
284
|
+
"medicine",
|
|
285
|
+
"disease",
|
|
286
|
+
"treatment",
|
|
287
|
+
],
|
|
288
|
+
"environment": [
|
|
289
|
+
"environment",
|
|
290
|
+
"ecology",
|
|
291
|
+
"pollution",
|
|
292
|
+
"green",
|
|
293
|
+
"climate",
|
|
294
|
+
"sustainability",
|
|
295
|
+
"renewable",
|
|
296
|
+
],
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
for category, words in keywords.items():
|
|
300
|
+
if any(word in text for word in words):
|
|
301
|
+
tags.append(category)
|
|
302
|
+
|
|
303
|
+
# Add goal-based tags if available
|
|
304
|
+
if parsed_goal and hasattr(parsed_goal, "tags"):
|
|
305
|
+
tags.extend(parsed_goal.tags)
|
|
306
|
+
|
|
307
|
+
return list(set(tags))[:15] # Limit to 15 tags
|
|
308
|
+
|
|
309
|
+
def _process_result(
|
|
310
|
+
self, result: dict, query: str, parsed_goal: str, info: None, mode="fast"
|
|
311
|
+
) -> list[TextualMemoryItem]:
|
|
312
|
+
if not info:
|
|
313
|
+
info = {"user_id": "", "session_id": ""}
|
|
314
|
+
title = result.get("title", "")
|
|
315
|
+
content = result.get("content", "")
|
|
316
|
+
summary = result.get("summary", "")
|
|
317
|
+
url = result.get("url", "")
|
|
318
|
+
publish_time = result.get("publish_time", "")
|
|
319
|
+
if publish_time:
|
|
320
|
+
try:
|
|
321
|
+
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
|
|
322
|
+
"%Y-%m-%d"
|
|
323
|
+
)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
logger.error(f"xinyu search error: {e}")
|
|
326
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
327
|
+
else:
|
|
328
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
329
|
+
|
|
330
|
+
if mode == "fast":
|
|
331
|
+
info_ = info.copy()
|
|
332
|
+
user_id = info_.pop("user_id", "")
|
|
333
|
+
session_id = info_.pop("session_id", "")
|
|
334
|
+
return [
|
|
335
|
+
TextualMemoryItem(
|
|
336
|
+
memory=(
|
|
337
|
+
f"[Outer internet view] Title: {title}\nNewsTime:"
|
|
338
|
+
f" {publish_time}\nSummary:"
|
|
339
|
+
f" {summary}\n"
|
|
340
|
+
),
|
|
341
|
+
metadata=SearchedTreeNodeTextualMemoryMetadata(
|
|
342
|
+
user_id=user_id,
|
|
343
|
+
session_id=session_id,
|
|
344
|
+
memory_type="OuterMemory",
|
|
345
|
+
status="activated",
|
|
346
|
+
type="fact",
|
|
347
|
+
source="web",
|
|
348
|
+
sources=[SourceMessage(type="web", url=url)] if url else [],
|
|
349
|
+
visibility="public",
|
|
350
|
+
tags=self._extract_tags(title, content, summary),
|
|
351
|
+
key=title,
|
|
352
|
+
info=info_,
|
|
353
|
+
background="",
|
|
354
|
+
confidence=0.99,
|
|
355
|
+
usage=[],
|
|
356
|
+
embedding=self.embedder.embed([content])[0],
|
|
357
|
+
internet_info={
|
|
358
|
+
"title": title,
|
|
359
|
+
"url": url,
|
|
360
|
+
"summary": summary,
|
|
361
|
+
"content": content,
|
|
362
|
+
},
|
|
363
|
+
),
|
|
364
|
+
)
|
|
365
|
+
]
|
|
366
|
+
else:
|
|
367
|
+
read_items = self.reader.get_memory([content], type="doc", info=info)
|
|
368
|
+
|
|
369
|
+
memory_items = []
|
|
370
|
+
for read_item_i in read_items[0]:
|
|
371
|
+
read_item_i.memory = (
|
|
372
|
+
f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
|
|
373
|
+
f"Content: {read_item_i.memory}"
|
|
374
|
+
)
|
|
375
|
+
read_item_i.metadata.source = "web"
|
|
376
|
+
read_item_i.metadata.memory_type = "OuterMemory"
|
|
377
|
+
read_item_i.metadata.sources = [SourceMessage(type="web", url=url)] if url else []
|
|
378
|
+
read_item_i.metadata.visibility = "public"
|
|
379
|
+
read_item_i.metadata.internet_info = {
|
|
380
|
+
"title": title,
|
|
381
|
+
"url": url,
|
|
382
|
+
"summary": summary,
|
|
383
|
+
"content": content,
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
memory_items.append(read_item_i)
|
|
387
|
+
return memory_items
|