MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Parser for string format messages.
|
|
2
|
+
|
|
3
|
+
Handles simple string messages that need to be converted to memory items.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from memos.embedders.base import BaseEmbedder
|
|
9
|
+
from memos.llms.base import BaseLLM
|
|
10
|
+
from memos.log import get_logger
|
|
11
|
+
from memos.memories.textual.item import (
|
|
12
|
+
SourceMessage,
|
|
13
|
+
TextualMemoryItem,
|
|
14
|
+
TreeNodeTextualMemoryMetadata,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from .base import BaseMessageParser, _add_lang_to_source, _derive_key
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class StringParser(BaseMessageParser):
|
|
24
|
+
"""Parser for string format messages.
|
|
25
|
+
|
|
26
|
+
Handles simple string messages in both fast and fine modes.
|
|
27
|
+
- Fast mode: Directly converts string to memory item
|
|
28
|
+
- Fine mode: Uses LLM to extract structured memories from string
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None):
|
|
32
|
+
"""
|
|
33
|
+
Initialize StringParser.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
embedder: Embedder for generating embeddings
|
|
37
|
+
llm: Optional LLM for fine mode processing
|
|
38
|
+
"""
|
|
39
|
+
super().__init__(embedder, llm)
|
|
40
|
+
|
|
41
|
+
def create_source(
|
|
42
|
+
self,
|
|
43
|
+
message: str,
|
|
44
|
+
info: dict[str, Any],
|
|
45
|
+
) -> SourceMessage:
|
|
46
|
+
"""Create SourceMessage from string message."""
|
|
47
|
+
source = SourceMessage(
|
|
48
|
+
type="doc",
|
|
49
|
+
content=str(message),
|
|
50
|
+
)
|
|
51
|
+
return _add_lang_to_source(source, str(message))
|
|
52
|
+
|
|
53
|
+
def rebuild_from_source(
|
|
54
|
+
self,
|
|
55
|
+
source: SourceMessage,
|
|
56
|
+
) -> str:
|
|
57
|
+
"""We only need rebuild from specific multimodal source"""
|
|
58
|
+
|
|
59
|
+
def parse_fast(
|
|
60
|
+
self,
|
|
61
|
+
message: str,
|
|
62
|
+
info: dict[str, Any],
|
|
63
|
+
**kwargs,
|
|
64
|
+
) -> list[TextualMemoryItem]:
|
|
65
|
+
"""
|
|
66
|
+
Parse string message in fast mode.
|
|
67
|
+
|
|
68
|
+
Fast mode directly converts the string to a memory item without LLM processing.
|
|
69
|
+
This is equivalent to simple_struct fast mode for string messages.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
message: String message to parse
|
|
73
|
+
info: Dictionary containing user_id and session_id
|
|
74
|
+
**kwargs: Additional parameters
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
List of TextualMemoryItem objects
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(message, str):
|
|
80
|
+
logger.warning(f"[StringParser] Expected str, got {type(message)}")
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
content = message.strip()
|
|
84
|
+
if not content:
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
# Split parsed text into chunks
|
|
88
|
+
content_chunks = self._split_text(content)
|
|
89
|
+
|
|
90
|
+
# Extract info fields
|
|
91
|
+
info_ = info.copy()
|
|
92
|
+
user_id = info_.pop("user_id", "")
|
|
93
|
+
session_id = info_.pop("session_id", "")
|
|
94
|
+
|
|
95
|
+
# For string messages, default to LongTermMemory
|
|
96
|
+
memory_type = "LongTermMemory"
|
|
97
|
+
|
|
98
|
+
# Create memory items for each chunk
|
|
99
|
+
memory_items = []
|
|
100
|
+
for _chunk_idx, chunk_text in enumerate(content_chunks):
|
|
101
|
+
if not chunk_text.strip():
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# Create source
|
|
105
|
+
source = self.create_source(chunk_text, info)
|
|
106
|
+
|
|
107
|
+
memory_item = TextualMemoryItem(
|
|
108
|
+
memory=chunk_text,
|
|
109
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
110
|
+
user_id=user_id,
|
|
111
|
+
session_id=session_id,
|
|
112
|
+
memory_type=memory_type,
|
|
113
|
+
status="activated",
|
|
114
|
+
tags=["mode:fast"],
|
|
115
|
+
key=_derive_key(chunk_text),
|
|
116
|
+
embedding=self.embedder.embed([chunk_text])[0],
|
|
117
|
+
usage=[],
|
|
118
|
+
sources=[source],
|
|
119
|
+
background="",
|
|
120
|
+
confidence=0.99,
|
|
121
|
+
type="fact",
|
|
122
|
+
info=info_,
|
|
123
|
+
),
|
|
124
|
+
)
|
|
125
|
+
memory_items.append(memory_item)
|
|
126
|
+
return memory_items
|
|
127
|
+
|
|
128
|
+
def parse_fine(
|
|
129
|
+
self,
|
|
130
|
+
message: str,
|
|
131
|
+
info: dict[str, Any],
|
|
132
|
+
**kwargs,
|
|
133
|
+
) -> list[TextualMemoryItem]:
|
|
134
|
+
logger.info(
|
|
135
|
+
"str memory is inherently a "
|
|
136
|
+
"text-only modality. No special multimodal handling"
|
|
137
|
+
" is required in fine mode."
|
|
138
|
+
)
|
|
139
|
+
return []
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Parser for system messages."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import uuid
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from memos.embedders.base import BaseEmbedder
|
|
12
|
+
from memos.llms.base import BaseLLM
|
|
13
|
+
from memos.log import get_logger
|
|
14
|
+
from memos.memories.textual.item import (
|
|
15
|
+
SourceMessage,
|
|
16
|
+
TextualMemoryItem,
|
|
17
|
+
TreeNodeTextualMemoryMetadata,
|
|
18
|
+
)
|
|
19
|
+
from memos.types.openai_chat_completion_types import ChatCompletionSystemMessageParam
|
|
20
|
+
|
|
21
|
+
from .base import BaseMessageParser, _add_lang_to_source
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SystemParser(BaseMessageParser):
|
|
28
|
+
"""Parser for system messages."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None):
|
|
31
|
+
"""
|
|
32
|
+
Initialize SystemParser.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
embedder: Embedder for generating embeddings
|
|
36
|
+
llm: Optional LLM for fine mode processing
|
|
37
|
+
"""
|
|
38
|
+
super().__init__(embedder, llm)
|
|
39
|
+
|
|
40
|
+
def create_source(
|
|
41
|
+
self,
|
|
42
|
+
message: ChatCompletionSystemMessageParam,
|
|
43
|
+
info: dict[str, Any],
|
|
44
|
+
) -> SourceMessage:
|
|
45
|
+
"""Create SourceMessage from system message."""
|
|
46
|
+
|
|
47
|
+
content = message.get("content", "")
|
|
48
|
+
if isinstance(content, dict):
|
|
49
|
+
content = content.get("text", "")
|
|
50
|
+
|
|
51
|
+
content_wo_tool_schema = re.sub(
|
|
52
|
+
r"<tool_schema>(.*?)</tool_schema>",
|
|
53
|
+
r"<tool_schema>omitted</tool_schema>",
|
|
54
|
+
content,
|
|
55
|
+
flags=re.DOTALL,
|
|
56
|
+
)
|
|
57
|
+
tool_schema_match = re.search(r"<tool_schema>(.*?)</tool_schema>", content, re.DOTALL)
|
|
58
|
+
tool_schema_content = tool_schema_match.group(1) if tool_schema_match else ""
|
|
59
|
+
|
|
60
|
+
source = SourceMessage(
|
|
61
|
+
type="chat",
|
|
62
|
+
role="system",
|
|
63
|
+
chat_time=message.get("chat_time", None),
|
|
64
|
+
message_id=message.get("message_id", None),
|
|
65
|
+
content=content_wo_tool_schema,
|
|
66
|
+
tool_schema=tool_schema_content,
|
|
67
|
+
)
|
|
68
|
+
return _add_lang_to_source(source, content_wo_tool_schema)
|
|
69
|
+
|
|
70
|
+
def rebuild_from_source(
|
|
71
|
+
self,
|
|
72
|
+
source: SourceMessage,
|
|
73
|
+
) -> ChatCompletionSystemMessageParam:
|
|
74
|
+
"""Rebuild system message from SourceMessage."""
|
|
75
|
+
# only rebuild tool schema content, content will be used in full chat content by llm
|
|
76
|
+
return {
|
|
77
|
+
"role": "system",
|
|
78
|
+
"content": source.tool_schema or "",
|
|
79
|
+
"chat_time": source.chat_time,
|
|
80
|
+
"message_id": source.message_id,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
def parse_fast(
|
|
84
|
+
self,
|
|
85
|
+
message: ChatCompletionSystemMessageParam,
|
|
86
|
+
info: dict[str, Any],
|
|
87
|
+
**kwargs,
|
|
88
|
+
) -> list[TextualMemoryItem]:
|
|
89
|
+
content = message.get("content", "")
|
|
90
|
+
if isinstance(content, dict):
|
|
91
|
+
content = content.get("text", "")
|
|
92
|
+
|
|
93
|
+
# Find first tool_schema block
|
|
94
|
+
tool_schema_pattern = r"<tool_schema>(.*?)</tool_schema>"
|
|
95
|
+
match = re.search(tool_schema_pattern, content, flags=re.DOTALL)
|
|
96
|
+
|
|
97
|
+
if match:
|
|
98
|
+
original_text = match.group(0) # Complete <tool_schema>...</tool_schema> block
|
|
99
|
+
schema_content = match.group(1) # Content between the tags
|
|
100
|
+
|
|
101
|
+
# Parse tool schema
|
|
102
|
+
try:
|
|
103
|
+
tool_schema = json.loads(schema_content)
|
|
104
|
+
assert isinstance(tool_schema, list), "Tool schema must be a list[dict]"
|
|
105
|
+
except json.JSONDecodeError:
|
|
106
|
+
try:
|
|
107
|
+
tool_schema = ast.literal_eval(schema_content)
|
|
108
|
+
assert isinstance(tool_schema, list), "Tool schema must be a list[dict]"
|
|
109
|
+
except (ValueError, SyntaxError, AssertionError):
|
|
110
|
+
logger.warning(
|
|
111
|
+
f"[SystemParser] Failed to parse tool schema with both JSON and ast.literal_eval: {schema_content[:100]}..."
|
|
112
|
+
)
|
|
113
|
+
tool_schema = None
|
|
114
|
+
except AssertionError:
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"[SystemParser] Tool schema must be a list[dict]: {schema_content[:100]}..."
|
|
117
|
+
)
|
|
118
|
+
tool_schema = None
|
|
119
|
+
|
|
120
|
+
# Process and replace
|
|
121
|
+
if tool_schema is not None:
|
|
122
|
+
|
|
123
|
+
def remove_descriptions(obj):
|
|
124
|
+
"""Recursively remove all 'description' keys from a nested dict/list structure."""
|
|
125
|
+
if isinstance(obj, dict):
|
|
126
|
+
return {
|
|
127
|
+
k: remove_descriptions(v) for k, v in obj.items() if k != "description"
|
|
128
|
+
}
|
|
129
|
+
elif isinstance(obj, list):
|
|
130
|
+
return [remove_descriptions(item) for item in obj]
|
|
131
|
+
else:
|
|
132
|
+
return obj
|
|
133
|
+
|
|
134
|
+
def keep_first_layer_params(obj):
|
|
135
|
+
"""Only keep first layer parameter information, remove nested parameters."""
|
|
136
|
+
if isinstance(obj, list):
|
|
137
|
+
return [keep_first_layer_params(item) for item in obj]
|
|
138
|
+
elif isinstance(obj, dict):
|
|
139
|
+
result = {}
|
|
140
|
+
for k, v in obj.items():
|
|
141
|
+
if k == "properties" and isinstance(v, dict):
|
|
142
|
+
# For properties, only keep first layer parameter names and types
|
|
143
|
+
first_layer_props = {}
|
|
144
|
+
for param_name, param_info in v.items():
|
|
145
|
+
if isinstance(param_info, dict):
|
|
146
|
+
# Only keep type and basic info, remove nested properties
|
|
147
|
+
first_layer_props[param_name] = {
|
|
148
|
+
key: val
|
|
149
|
+
for key, val in param_info.items()
|
|
150
|
+
if key in ["type", "enum", "required"]
|
|
151
|
+
and key != "properties"
|
|
152
|
+
}
|
|
153
|
+
else:
|
|
154
|
+
first_layer_props[param_name] = param_info
|
|
155
|
+
result[k] = first_layer_props
|
|
156
|
+
elif k == "parameters" and isinstance(v, dict):
|
|
157
|
+
# Process parameters object but only keep first layer
|
|
158
|
+
result[k] = keep_first_layer_params(v)
|
|
159
|
+
elif isinstance(v, dict | list) and k != "properties":
|
|
160
|
+
result[k] = keep_first_layer_params(v)
|
|
161
|
+
else:
|
|
162
|
+
result[k] = v
|
|
163
|
+
return result
|
|
164
|
+
else:
|
|
165
|
+
return obj
|
|
166
|
+
|
|
167
|
+
def format_tool_schema_readable(tool_schema):
|
|
168
|
+
"""Convert tool schema to readable format: tool_name: [param1 (type1), ...](required: ...)"""
|
|
169
|
+
lines = []
|
|
170
|
+
for tool in tool_schema:
|
|
171
|
+
if not tool:
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
# Handle both new format and old-style OpenAI function format
|
|
175
|
+
if tool.get("type") == "function" and "function" in tool:
|
|
176
|
+
tool_info = tool.get("function")
|
|
177
|
+
if not tool_info:
|
|
178
|
+
continue
|
|
179
|
+
else:
|
|
180
|
+
tool_info = tool
|
|
181
|
+
|
|
182
|
+
tool_name = tool_info.get("name", "unknown")
|
|
183
|
+
params_obj = tool_info.get("parameters", {})
|
|
184
|
+
properties = params_obj.get("properties", {})
|
|
185
|
+
required = params_obj.get("required", [])
|
|
186
|
+
|
|
187
|
+
# Format parameters
|
|
188
|
+
param_strs = []
|
|
189
|
+
for param_name, param_info in properties.items():
|
|
190
|
+
if isinstance(param_info, dict):
|
|
191
|
+
param_type = param_info.get("type", "any")
|
|
192
|
+
# Handle enum
|
|
193
|
+
if "enum" in param_info and param_info["enum"] is not None:
|
|
194
|
+
# Ensure all enum values are strings
|
|
195
|
+
enum_values = [str(v) for v in param_info["enum"]]
|
|
196
|
+
param_type = f"{param_type}[{', '.join(enum_values)}]"
|
|
197
|
+
param_strs.append(f"{param_name} ({param_type})")
|
|
198
|
+
else:
|
|
199
|
+
param_strs.append(f"{param_name} (any)")
|
|
200
|
+
|
|
201
|
+
# Format required parameters
|
|
202
|
+
# Ensure all required parameter names are strings
|
|
203
|
+
required_strs = [str(r) for r in required] if required else []
|
|
204
|
+
required_str = (
|
|
205
|
+
f"(required: {', '.join(required_strs)})" if required_strs else ""
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Construct the line
|
|
209
|
+
params_part = f"[{', '.join(param_strs)}]" if param_strs else "[]"
|
|
210
|
+
line = f"{tool_name}: {params_part}{required_str}"
|
|
211
|
+
lines.append(line)
|
|
212
|
+
|
|
213
|
+
return "\n".join(lines)
|
|
214
|
+
|
|
215
|
+
# Compression mode literal: ["compress", "omit"]. compress is core-information-preserving, omit is full omission.
|
|
216
|
+
compression_mode = "compress"
|
|
217
|
+
if compression_mode == "omit":
|
|
218
|
+
processed_text = "<tool_schema>omitted</tool_schema>"
|
|
219
|
+
elif compression_mode == "compress":
|
|
220
|
+
# First keep only first layer params, then remove descriptions
|
|
221
|
+
simple_tool_schema = keep_first_layer_params(tool_schema)
|
|
222
|
+
simple_tool_schema = remove_descriptions(simple_tool_schema)
|
|
223
|
+
# change to readable format
|
|
224
|
+
readable_schema = format_tool_schema_readable(simple_tool_schema)
|
|
225
|
+
|
|
226
|
+
processed_text = f"<tool_schema>{readable_schema}</tool_schema>"
|
|
227
|
+
else:
|
|
228
|
+
raise ValueError(f"Unknown compression mode: {compression_mode}")
|
|
229
|
+
|
|
230
|
+
content = content.replace(original_text, processed_text, 1)
|
|
231
|
+
|
|
232
|
+
parts = ["system: "]
|
|
233
|
+
if message.get("chat_time"):
|
|
234
|
+
parts.append(f"[{message.get('chat_time')}]: ")
|
|
235
|
+
prefix = "".join(parts)
|
|
236
|
+
msg_line = f"{prefix}{content}\n"
|
|
237
|
+
|
|
238
|
+
source = self.create_source(message, info)
|
|
239
|
+
|
|
240
|
+
# Extract info fields
|
|
241
|
+
info_ = info.copy()
|
|
242
|
+
user_id = info_.pop("user_id", "")
|
|
243
|
+
session_id = info_.pop("session_id", "")
|
|
244
|
+
|
|
245
|
+
# Split parsed text into chunks
|
|
246
|
+
content_chunks = self._split_text(msg_line)
|
|
247
|
+
|
|
248
|
+
memory_items = []
|
|
249
|
+
for _chunk_idx, chunk_text in enumerate(content_chunks):
|
|
250
|
+
if not chunk_text.strip():
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
memory_item = TextualMemoryItem(
|
|
254
|
+
memory=chunk_text,
|
|
255
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
256
|
+
user_id=user_id,
|
|
257
|
+
session_id=session_id,
|
|
258
|
+
memory_type="LongTermMemory", # only choce long term memory for system messages as a placeholder
|
|
259
|
+
status="activated",
|
|
260
|
+
tags=["mode:fast"],
|
|
261
|
+
sources=[source],
|
|
262
|
+
info=info_,
|
|
263
|
+
),
|
|
264
|
+
)
|
|
265
|
+
memory_items.append(memory_item)
|
|
266
|
+
return memory_items
|
|
267
|
+
|
|
268
|
+
def parse_fine(
|
|
269
|
+
self,
|
|
270
|
+
message: ChatCompletionSystemMessageParam,
|
|
271
|
+
info: dict[str, Any],
|
|
272
|
+
**kwargs,
|
|
273
|
+
) -> list[TextualMemoryItem]:
|
|
274
|
+
content = message.get("content", "")
|
|
275
|
+
if isinstance(content, dict):
|
|
276
|
+
content = content.get("text", "")
|
|
277
|
+
try:
|
|
278
|
+
tool_schema = json.loads(content)
|
|
279
|
+
assert isinstance(tool_schema, list), "Tool schema must be a list[dict]"
|
|
280
|
+
except json.JSONDecodeError:
|
|
281
|
+
try:
|
|
282
|
+
tool_schema = ast.literal_eval(content)
|
|
283
|
+
assert isinstance(tool_schema, list), "Tool schema must be a list[dict]"
|
|
284
|
+
except (ValueError, SyntaxError, AssertionError):
|
|
285
|
+
logger.warning(
|
|
286
|
+
f"[SystemParser] Failed to parse tool schema with both JSON and ast.literal_eval: {content}"
|
|
287
|
+
)
|
|
288
|
+
return []
|
|
289
|
+
except AssertionError:
|
|
290
|
+
logger.warning(f"[SystemParser] Tool schema must be a list[dict]: {content}")
|
|
291
|
+
return []
|
|
292
|
+
|
|
293
|
+
info_ = info.copy()
|
|
294
|
+
user_id = info_.pop("user_id", "")
|
|
295
|
+
session_id = info_.pop("session_id", "")
|
|
296
|
+
|
|
297
|
+
# Deduplicate tool schemas based on memory content
|
|
298
|
+
# Use hash as key for efficiency, but store original string to handle collisions
|
|
299
|
+
seen_memories = {} # hash -> memory_str mapping
|
|
300
|
+
unique_schemas = []
|
|
301
|
+
for schema in tool_schema:
|
|
302
|
+
memory_str = json.dumps(schema, ensure_ascii=False, sort_keys=True)
|
|
303
|
+
# Use SHA-256 for better collision resistance
|
|
304
|
+
memory_hash = hashlib.sha256(memory_str.encode("utf-8")).hexdigest()
|
|
305
|
+
|
|
306
|
+
# Check if hash exists and verify the actual content (handle potential collision)
|
|
307
|
+
if memory_hash not in seen_memories:
|
|
308
|
+
seen_memories[memory_hash] = memory_str
|
|
309
|
+
unique_schemas.append(schema)
|
|
310
|
+
elif seen_memories[memory_hash] != memory_str:
|
|
311
|
+
unique_schemas.append(schema)
|
|
312
|
+
|
|
313
|
+
return [
|
|
314
|
+
TextualMemoryItem(
|
|
315
|
+
id=str(uuid.uuid4()),
|
|
316
|
+
memory=json.dumps(schema, ensure_ascii=False),
|
|
317
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
318
|
+
user_id=user_id,
|
|
319
|
+
session_id=session_id,
|
|
320
|
+
memory_type="ToolSchemaMemory",
|
|
321
|
+
status="activated",
|
|
322
|
+
embedding=self.embedder.embed([json.dumps(schema, ensure_ascii=False)])[0],
|
|
323
|
+
info=info_,
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
for schema in unique_schemas
|
|
327
|
+
]
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Parser for text content parts (RawMessageList).
|
|
2
|
+
|
|
3
|
+
Handles text content parts in multimodal messages.
|
|
4
|
+
Text content parts are typically used in user/assistant messages with multimodal content.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from memos.embedders.base import BaseEmbedder
|
|
10
|
+
from memos.llms.base import BaseLLM
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.memories.textual.item import (
|
|
13
|
+
SourceMessage,
|
|
14
|
+
TextualMemoryItem,
|
|
15
|
+
TreeNodeTextualMemoryMetadata,
|
|
16
|
+
)
|
|
17
|
+
from memos.types.openai_chat_completion_types import ChatCompletionContentPartTextParam
|
|
18
|
+
|
|
19
|
+
from .base import BaseMessageParser, _add_lang_to_source, _derive_key
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TextContentParser(BaseMessageParser):
|
|
26
|
+
"""Parser for text content parts.
|
|
27
|
+
|
|
28
|
+
Handles text content parts in both fast and fine modes.
|
|
29
|
+
- Fast mode: Directly converts text content to memory item
|
|
30
|
+
- Fine mode: Returns empty list (text content is handled at parent message level)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None):
|
|
34
|
+
"""
|
|
35
|
+
Initialize TextContentParser.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
embedder: Embedder for generating embeddings
|
|
39
|
+
llm: Optional LLM for fine mode processing
|
|
40
|
+
"""
|
|
41
|
+
super().__init__(embedder, llm)
|
|
42
|
+
|
|
43
|
+
def create_source(
|
|
44
|
+
self,
|
|
45
|
+
message: ChatCompletionContentPartTextParam,
|
|
46
|
+
info: dict[str, Any],
|
|
47
|
+
) -> SourceMessage:
|
|
48
|
+
"""Create SourceMessage from text content part."""
|
|
49
|
+
if isinstance(message, dict):
|
|
50
|
+
text = message.get("text", "")
|
|
51
|
+
source = SourceMessage(
|
|
52
|
+
type="text",
|
|
53
|
+
content=text,
|
|
54
|
+
)
|
|
55
|
+
return _add_lang_to_source(source, text)
|
|
56
|
+
source = SourceMessage(type="text", content=str(message))
|
|
57
|
+
return _add_lang_to_source(source, str(message))
|
|
58
|
+
|
|
59
|
+
def rebuild_from_source(
|
|
60
|
+
self,
|
|
61
|
+
source: SourceMessage,
|
|
62
|
+
) -> ChatCompletionContentPartTextParam:
|
|
63
|
+
"""We only need rebuild from specific multimodal source"""
|
|
64
|
+
|
|
65
|
+
def parse_fast(
|
|
66
|
+
self,
|
|
67
|
+
message: ChatCompletionContentPartTextParam,
|
|
68
|
+
info: dict[str, Any],
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> list[TextualMemoryItem]:
|
|
71
|
+
"""
|
|
72
|
+
Parse text content part in fast mode.
|
|
73
|
+
"""
|
|
74
|
+
if not isinstance(message, dict):
|
|
75
|
+
logger.warning(f"[TextContentParser] Expected dict, got {type(message)}")
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
# Extract text content
|
|
79
|
+
text = message.get("text", "")
|
|
80
|
+
if not isinstance(text, str):
|
|
81
|
+
text = str(text) if text is not None else ""
|
|
82
|
+
|
|
83
|
+
content = text.strip()
|
|
84
|
+
if not content:
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
# Create source
|
|
88
|
+
source = self.create_source(message, info)
|
|
89
|
+
|
|
90
|
+
# Extract info fields
|
|
91
|
+
info_ = info.copy()
|
|
92
|
+
user_id = info_.pop("user_id", "")
|
|
93
|
+
session_id = info_.pop("session_id", "")
|
|
94
|
+
|
|
95
|
+
# For text content parts, default to LongTermMemory
|
|
96
|
+
# (since we don't have role information at this level)
|
|
97
|
+
memory_type = "LongTermMemory"
|
|
98
|
+
|
|
99
|
+
# Create memory item
|
|
100
|
+
memory_item = TextualMemoryItem(
|
|
101
|
+
memory=content,
|
|
102
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
103
|
+
user_id=user_id,
|
|
104
|
+
session_id=session_id,
|
|
105
|
+
memory_type=memory_type,
|
|
106
|
+
status="activated",
|
|
107
|
+
tags=["mode:fast"],
|
|
108
|
+
key=_derive_key(content),
|
|
109
|
+
embedding=self.embedder.embed([content])[0],
|
|
110
|
+
usage=[],
|
|
111
|
+
sources=[source],
|
|
112
|
+
background="",
|
|
113
|
+
confidence=0.99,
|
|
114
|
+
type="fact",
|
|
115
|
+
info=info_,
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return [memory_item]
|
|
120
|
+
|
|
121
|
+
def parse_fine(
|
|
122
|
+
self,
|
|
123
|
+
message: ChatCompletionContentPartTextParam,
|
|
124
|
+
info: dict[str, Any],
|
|
125
|
+
**kwargs,
|
|
126
|
+
) -> list[TextualMemoryItem]:
|
|
127
|
+
logger.info(
|
|
128
|
+
"Text content part is inherently a text-only modality. "
|
|
129
|
+
"Fine mode processing is handled at the parent message level (user/assistant)."
|
|
130
|
+
)
|
|
131
|
+
return []
|