MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
- memos/__init__.py +1 -1
- memos/api/config.py +156 -65
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +90 -0
- memos/api/product_models.py +5 -1
- memos/api/routers/product_router.py +54 -26
- memos/configs/graph_db.py +49 -1
- memos/configs/internet_retriever.py +6 -0
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +18 -4
- memos/configs/mem_user.py +58 -0
- memos/graph_dbs/base.py +9 -1
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/nebular.py +1364 -0
- memos/graph_dbs/neo4j.py +4 -4
- memos/log.py +1 -1
- memos/mem_cube/utils.py +13 -6
- memos/mem_os/core.py +140 -30
- memos/mem_os/main.py +1 -1
- memos/mem_os/product.py +266 -152
- memos/mem_os/utils/format_utils.py +314 -67
- memos/mem_reader/simple_struct.py +13 -5
- memos/mem_scheduler/base_scheduler.py +220 -250
- memos/mem_scheduler/general_scheduler.py +193 -73
- memos/mem_scheduler/modules/base.py +5 -5
- memos/mem_scheduler/modules/dispatcher.py +6 -9
- memos/mem_scheduler/modules/misc.py +81 -16
- memos/mem_scheduler/modules/monitor.py +52 -41
- memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
- memos/mem_scheduler/modules/retriever.py +108 -191
- memos/mem_scheduler/modules/scheduler_logger.py +255 -0
- memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +43 -0
- memos/mem_scheduler/schemas/message_schemas.py +148 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +61 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +4 -0
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +35 -91
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +13 -7
- memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +2 -1
- memos/templates/mem_scheduler_prompts.py +41 -7
- memos/templates/mos_prompts.py +87 -0
- memos/mem_scheduler/modules/schemas.py +0 -328
- memos/mem_scheduler/utils.py +0 -75
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
memos/templates/mos_prompts.py
CHANGED
|
@@ -61,3 +61,90 @@ Please synthesize these answers into a comprehensive response that:
|
|
|
61
61
|
3. Provides clear reasoning and connections
|
|
62
62
|
4. Is well-structured and easy to understand
|
|
63
63
|
5. Maintains a natural conversational tone"""
|
|
64
|
+
|
|
65
|
+
MEMOS_PRODUCT_BASE_PROMPT = (
|
|
66
|
+
"You are a knowledgeable and helpful AI assistant with access to user memories. "
|
|
67
|
+
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
|
|
68
|
+
"Use the reference format: [1-n:memoriesID] "
|
|
69
|
+
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
|
|
70
|
+
"and memoriesID is the specific memory ID provided in the available memories list. "
|
|
71
|
+
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
|
|
72
|
+
"Do not use connect format like [1:abc123,2:def456]"
|
|
73
|
+
"Only reference memories that are directly relevant to the user's question. "
|
|
74
|
+
"Make your responses natural and conversational while incorporating memory references when appropriate."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
MEMOS_PRODUCT_ENHANCE_PROMPT = """
|
|
78
|
+
# Memory-Enhanced AI Assistant Prompt
|
|
79
|
+
|
|
80
|
+
You are a knowledgeable and helpful AI assistant with access to two types of memory sources:
|
|
81
|
+
|
|
82
|
+
## Memory Types
|
|
83
|
+
- **PersonalMemory**: User-specific memories and information stored from previous interactions
|
|
84
|
+
- **OuterMemory**: External information retrieved from the internet and other sources
|
|
85
|
+
|
|
86
|
+
## Memory Reference Guidelines
|
|
87
|
+
|
|
88
|
+
### Reference Format
|
|
89
|
+
When citing memories in your responses, use the following format:
|
|
90
|
+
- `[refid:memoriesID]` where:
|
|
91
|
+
- `refid` is a sequential number starting from 1 and incrementing for each reference
|
|
92
|
+
- `memoriesID` is the specific memory ID from the available memories list
|
|
93
|
+
|
|
94
|
+
### Reference Examples
|
|
95
|
+
- Correct: `[1:abc123]`, `[2:def456]`, `[3:ghi789]`, `[4:jkl101]`, `[5:mno112]`
|
|
96
|
+
- Incorrect: `[1:abc123,2:def456]` (do not use connected format)
|
|
97
|
+
|
|
98
|
+
## Response Guidelines
|
|
99
|
+
|
|
100
|
+
### Memory Selection
|
|
101
|
+
- Intelligently choose which memories (PersonalMemory or OuterMemory) are most relevant to the user's query
|
|
102
|
+
- Only reference memories that are directly relevant to the user's question
|
|
103
|
+
- Prioritize the most appropriate memory type based on the context and nature of the query
|
|
104
|
+
|
|
105
|
+
### Response Style
|
|
106
|
+
- Make your responses natural and conversational
|
|
107
|
+
- Seamlessly incorporate memory references when appropriate
|
|
108
|
+
- Ensure the flow of conversation remains smooth despite memory citations
|
|
109
|
+
- Balance factual accuracy with engaging dialogue
|
|
110
|
+
|
|
111
|
+
## Key Principles
|
|
112
|
+
- Reference only relevant memories to avoid information overload
|
|
113
|
+
- Maintain conversational tone while being informative
|
|
114
|
+
- Use memory references to enhance, not disrupt, the user experience
|
|
115
|
+
"""
|
|
116
|
+
QUERY_REWRITING_PROMPT = """
|
|
117
|
+
I'm in discussion with my friend about a question, and we have already talked about something before that. Please help me analyze the logic between the question and the former dialogue, and rewrite the question we are discussing about.
|
|
118
|
+
|
|
119
|
+
Requirements:
|
|
120
|
+
1. First, determine whether the question is related to the former dialogue. If so, set "former_dialogue_related" to True.
|
|
121
|
+
2. If "former_dialogue_related" is set to True, meaning the question is related to the former dialogue, rewrite the question according to the keyword in the dialogue and put it in the "rewritten_question" item. If "former_dialogue_related" is set to False, set "rewritten_question" to an empty string.
|
|
122
|
+
3. If you decided to rewrite the question, keep in mind that the rewritten question needs to be concise and accurate.
|
|
123
|
+
4. You must return ONLY a valid JSON object. Do not include any other text, explanations, or formatting.
|
|
124
|
+
|
|
125
|
+
Here are some examples:
|
|
126
|
+
|
|
127
|
+
Former dialogue:
|
|
128
|
+
————How's the weather in ShangHai today?
|
|
129
|
+
————It's great. The weather in Shanghai is sunny right now. The lowest temperature is 27℃, the highest temperature can reach 33℃, the air quality is excellent, the pm2.5 index is 13, the humidity is 60%, and the northerly wind is at level 1.
|
|
130
|
+
Current question: What should I wear today?
|
|
131
|
+
Answer: {{"former_dialogue_related": True, "rewritten_question": "Considering the weather in Shanghai today, what should I wear?"}}
|
|
132
|
+
|
|
133
|
+
Former dialogue:
|
|
134
|
+
————I need a brief introduction to Oxford-Cambridge boat race.
|
|
135
|
+
————The race originated from a challenge in 1829 between Charles Merivale of Cambridge University and Charles Wordsworth of Oxford University. Oxford won the first race. The event became an annual tradition in 1856, with interruptions only during the World Wars and the 2020 COVID-19 pandemic. The women's race was added in 1927. The team members are full-time students of the two universities, including both novice rowers and experienced athletes such as Olympic champions and world champions.
|
|
136
|
+
————What is the international community's attitude towards the 2024 US election?
|
|
137
|
+
————The international community approached the 2024 U.S. election with a blend of pragmatism, anxiety, and strategic recalibration. Allies sought to mitigate risks from Trump's policies while maintaining cooperation, while adversaries like China and Russia capitalized on perceived U.S. decline to advance their agendas. Developing nations increasingly resisted U.S. dominance, advocating for a multipolar world. Ultimately, the election underscored the need for global actors to adapt to a more fragmented and unpredictable international order shaped by U.S. domestic politics.
|
|
138
|
+
Current question: In March 2025, after a magnitude 7.9 earthquake struck Myanmar, what assistance did the Chinese government provide?
|
|
139
|
+
Answer: {{"former_dialogue_related": False, "rewritten_question": ""}}
|
|
140
|
+
|
|
141
|
+
Former dialogue:
|
|
142
|
+
————I am an entry-level learner of large language models. Please recommend me three papers suitable for reading.
|
|
143
|
+
————For an entry-level learner of large language models (LLMs), here are three foundational papers that provide essential insights into the core concepts, architectures, and advancements in the field: "Attention Is All You Need", "Improving Language Understanding by Generative Pre-Training (GPT-1)", and "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding". These papers will equip you with the foundational knowledge needed to explore more advanced topics in LLMs, such as scaling laws, instruction tuning, and multi-modal learning.
|
|
144
|
+
Current question: Of these three papers, which one do you recommend I start reading?
|
|
145
|
+
Answer: {{"former_dialogue_related": True, "rewritten_question": "Among the three papers \"Attention Is All You Need\", \"Improving Language Understanding by Generative Pre-Training (GPT-1)\" and \"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding\", which one do you recommend I start reading?"}}
|
|
146
|
+
|
|
147
|
+
Former dialogue:
|
|
148
|
+
{dialogue}
|
|
149
|
+
Current question: {query}
|
|
150
|
+
Answer:"""
|
|
@@ -1,328 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import ClassVar, NewType, TypeVar
|
|
6
|
-
from uuid import uuid4
|
|
7
|
-
|
|
8
|
-
from pydantic import BaseModel, Field, computed_field
|
|
9
|
-
from typing_extensions import TypedDict
|
|
10
|
-
|
|
11
|
-
from memos.log import get_logger
|
|
12
|
-
from memos.mem_cube.general import GeneralMemCube
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
logger = get_logger(__name__)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
FILE_PATH = Path(__file__).absolute()
|
|
19
|
-
BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent
|
|
20
|
-
|
|
21
|
-
QUERY_LABEL = "query"
|
|
22
|
-
ANSWER_LABEL = "answer"
|
|
23
|
-
ADD_LABEL = "add"
|
|
24
|
-
|
|
25
|
-
TreeTextMemory_SEARCH_METHOD = "tree_text_memory_search"
|
|
26
|
-
TextMemory_SEARCH_METHOD = "text_memory_search"
|
|
27
|
-
DIRECT_EXCHANGE_TYPE = "direct"
|
|
28
|
-
FANOUT_EXCHANGE_TYPE = "fanout"
|
|
29
|
-
DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT = 20
|
|
30
|
-
DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT = 5
|
|
31
|
-
DEFAULT_ACT_MEM_DUMP_PATH = f"{BASE_DIR}/outputs/mem_scheduler/mem_cube_scheduler_test.kv_cache"
|
|
32
|
-
DEFAULT_THREAD__POOL_MAX_WORKERS = 5
|
|
33
|
-
DEFAULT_CONSUME_INTERVAL_SECONDS = 3
|
|
34
|
-
NOT_INITIALIZED = -1
|
|
35
|
-
BaseModelType = TypeVar("T", bound="BaseModel")
|
|
36
|
-
|
|
37
|
-
# web log
|
|
38
|
-
LONG_TERM_MEMORY_TYPE = "LongTermMemory"
|
|
39
|
-
USER_MEMORY_TYPE = "UserMemory"
|
|
40
|
-
WORKING_MEMORY_TYPE = "WorkingMemory"
|
|
41
|
-
TEXT_MEMORY_TYPE = "TextMemory"
|
|
42
|
-
ACTIVATION_MEMORY_TYPE = "ActivationMemory"
|
|
43
|
-
PARAMETER_MEMORY_TYPE = "ParameterMemory"
|
|
44
|
-
USER_INPUT_TYPE = "UserInput"
|
|
45
|
-
NOT_APPLICABLE_TYPE = "NotApplicable"
|
|
46
|
-
|
|
47
|
-
# monitors
|
|
48
|
-
MONITOR_WORKING_MEMORY_TYPE = "MonitorWorkingMemoryType"
|
|
49
|
-
MONITOR_ACTIVATION_MEMORY_TYPE = "MonitorActivationMemoryType"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# new types
|
|
53
|
-
UserID = NewType("UserID", str)
|
|
54
|
-
MemCubeID = NewType("CubeID", str)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# ************************* Public *************************
|
|
58
|
-
class DictConversionMixin:
|
|
59
|
-
def to_dict(self) -> dict:
|
|
60
|
-
"""Convert the instance to a dictionary."""
|
|
61
|
-
return {
|
|
62
|
-
**self.model_dump(), # 替换 self.dict()
|
|
63
|
-
"timestamp": self.timestamp.isoformat() if hasattr(self, "timestamp") else None,
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def from_dict(cls: type[BaseModelType], data: dict) -> BaseModelType:
|
|
68
|
-
"""Create an instance from a dictionary."""
|
|
69
|
-
if "timestamp" in data:
|
|
70
|
-
data["timestamp"] = datetime.fromisoformat(data["timestamp"])
|
|
71
|
-
return cls(**data)
|
|
72
|
-
|
|
73
|
-
def __str__(self) -> str:
|
|
74
|
-
"""Convert the instance to a JSON string with indentation of 4 spaces.
|
|
75
|
-
This will be used when str() or print() is called on the instance.
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
str: A JSON string representation of the instance with 4-space indentation.
|
|
79
|
-
"""
|
|
80
|
-
return json.dumps(
|
|
81
|
-
self.to_dict(),
|
|
82
|
-
indent=4,
|
|
83
|
-
ensure_ascii=False,
|
|
84
|
-
default=str, # 处理无法序列化的对象
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
class Config:
|
|
88
|
-
json_encoders: ClassVar[dict[type, object]] = {datetime: lambda v: v.isoformat()}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# ************************* Messages *************************
|
|
92
|
-
class ScheduleMessageItem(BaseModel, DictConversionMixin):
|
|
93
|
-
item_id: str = Field(description="uuid", default_factory=lambda: str(uuid4()))
|
|
94
|
-
user_id: str = Field(..., description="user id")
|
|
95
|
-
mem_cube_id: str = Field(..., description="memcube id")
|
|
96
|
-
label: str = Field(..., description="Label of the schedule message")
|
|
97
|
-
mem_cube: GeneralMemCube | str = Field(..., description="memcube for schedule")
|
|
98
|
-
content: str = Field(..., description="Content of the schedule message")
|
|
99
|
-
timestamp: datetime = Field(
|
|
100
|
-
default_factory=datetime.now, description="submit time for schedule_messages"
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
class Config:
|
|
104
|
-
arbitrary_types_allowed = True
|
|
105
|
-
json_encoders: ClassVar[dict[type, object]] = {
|
|
106
|
-
datetime: lambda v: v.isoformat(),
|
|
107
|
-
GeneralMemCube: lambda v: f"<GeneralMemCube:{id(v)}>",
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
def to_dict(self) -> dict:
|
|
111
|
-
"""Convert model to dictionary suitable for Redis Stream"""
|
|
112
|
-
return {
|
|
113
|
-
"item_id": self.item_id,
|
|
114
|
-
"user_id": self.user_id,
|
|
115
|
-
"cube_id": self.mem_cube_id,
|
|
116
|
-
"label": self.label,
|
|
117
|
-
"cube": "Not Applicable", # Custom cube serialization
|
|
118
|
-
"content": self.content,
|
|
119
|
-
"timestamp": self.timestamp.isoformat(),
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
@classmethod
|
|
123
|
-
def from_dict(cls, data: dict) -> "ScheduleMessageItem":
|
|
124
|
-
"""Create model from Redis Stream dictionary"""
|
|
125
|
-
return cls(
|
|
126
|
-
item_id=data.get("item_id", str(uuid4())),
|
|
127
|
-
user_id=data["user_id"],
|
|
128
|
-
cube_id=data["cube_id"],
|
|
129
|
-
label=data["label"],
|
|
130
|
-
cube="Not Applicable", # Custom cube deserialization
|
|
131
|
-
content=data["content"],
|
|
132
|
-
timestamp=datetime.fromisoformat(data["timestamp"]),
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class MemorySizes(TypedDict):
|
|
137
|
-
long_term_memory_size: int
|
|
138
|
-
user_memory_size: int
|
|
139
|
-
working_memory_size: int
|
|
140
|
-
transformed_act_memory_size: int
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
class MemoryCapacities(TypedDict):
|
|
144
|
-
long_term_memory_capacity: int
|
|
145
|
-
user_memory_capacity: int
|
|
146
|
-
working_memory_capacity: int
|
|
147
|
-
transformed_act_memory_capacity: int
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
DEFAULT_MEMORY_SIZES = {
|
|
151
|
-
"long_term_memory_size": NOT_INITIALIZED,
|
|
152
|
-
"user_memory_size": NOT_INITIALIZED,
|
|
153
|
-
"working_memory_size": NOT_INITIALIZED,
|
|
154
|
-
"transformed_act_memory_size": NOT_INITIALIZED,
|
|
155
|
-
"parameter_memory_size": NOT_INITIALIZED,
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
DEFAULT_MEMORY_CAPACITIES = {
|
|
159
|
-
"long_term_memory_capacity": 10000,
|
|
160
|
-
"user_memory_capacity": 10000,
|
|
161
|
-
"working_memory_capacity": 20,
|
|
162
|
-
"transformed_act_memory_capacity": NOT_INITIALIZED,
|
|
163
|
-
"parameter_memory_capacity": NOT_INITIALIZED,
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
class ScheduleLogForWebItem(BaseModel, DictConversionMixin):
|
|
168
|
-
item_id: str = Field(
|
|
169
|
-
description="Unique identifier for the log entry", default_factory=lambda: str(uuid4())
|
|
170
|
-
)
|
|
171
|
-
user_id: str = Field(..., description="Identifier for the user associated with the log")
|
|
172
|
-
mem_cube_id: str = Field(
|
|
173
|
-
..., description="Identifier for the memcube associated with this log entry"
|
|
174
|
-
)
|
|
175
|
-
label: str = Field(..., description="Label categorizing the type of log")
|
|
176
|
-
from_memory_type: str = Field(..., description="Source memory type")
|
|
177
|
-
to_memory_type: str = Field(..., description="Destination memory type")
|
|
178
|
-
log_content: str = Field(..., description="Detailed content of the log entry")
|
|
179
|
-
current_memory_sizes: MemorySizes = Field(
|
|
180
|
-
default_factory=lambda: dict(DEFAULT_MEMORY_SIZES),
|
|
181
|
-
description="Current utilization of memory partitions",
|
|
182
|
-
)
|
|
183
|
-
memory_capacities: MemoryCapacities = Field(
|
|
184
|
-
default_factory=lambda: dict(DEFAULT_MEMORY_CAPACITIES),
|
|
185
|
-
description="Maximum capacities of memory partitions",
|
|
186
|
-
)
|
|
187
|
-
timestamp: datetime = Field(
|
|
188
|
-
default_factory=datetime.now,
|
|
189
|
-
description="Timestamp indicating when the log entry was created",
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
# ************************* Monitor *************************
|
|
194
|
-
class MemoryMonitorItem(BaseModel, DictConversionMixin):
|
|
195
|
-
item_id: str = Field(
|
|
196
|
-
description="Unique identifier for the memory item", default_factory=lambda: str(uuid4())
|
|
197
|
-
)
|
|
198
|
-
memory_text: str = Field(
|
|
199
|
-
...,
|
|
200
|
-
description="The actual content of the memory",
|
|
201
|
-
min_length=1,
|
|
202
|
-
max_length=10000, # Prevent excessively large memory texts
|
|
203
|
-
)
|
|
204
|
-
importance_score: float = Field(
|
|
205
|
-
default=NOT_INITIALIZED,
|
|
206
|
-
description="Numerical score representing the memory's importance",
|
|
207
|
-
ge=NOT_INITIALIZED, # Minimum value of 0
|
|
208
|
-
)
|
|
209
|
-
recording_count: int = Field(
|
|
210
|
-
default=1,
|
|
211
|
-
description="How many times this memory has been recorded",
|
|
212
|
-
ge=1, # Greater than or equal to 1
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
def get_score(self) -> float:
|
|
216
|
-
"""
|
|
217
|
-
Calculate the effective score for the memory item.
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
float: The importance_score if it has been initialized (>=0),
|
|
221
|
-
otherwise the recording_count converted to float.
|
|
222
|
-
|
|
223
|
-
Note:
|
|
224
|
-
This method provides a unified way to retrieve a comparable score
|
|
225
|
-
for memory items, regardless of whether their importance has been explicitly set.
|
|
226
|
-
"""
|
|
227
|
-
if self.importance_score == NOT_INITIALIZED:
|
|
228
|
-
# Return recording_count as float when importance_score is not initialized
|
|
229
|
-
return float(self.recording_count)
|
|
230
|
-
else:
|
|
231
|
-
# Return the initialized importance_score
|
|
232
|
-
return self.importance_score
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
class MemoryMonitorManager(BaseModel, DictConversionMixin):
|
|
236
|
-
user_id: str = Field(..., description="Required user identifier", min_length=1)
|
|
237
|
-
mem_cube_id: str = Field(..., description="Required memory cube identifier", min_length=1)
|
|
238
|
-
memories: list[MemoryMonitorItem] = Field(
|
|
239
|
-
default_factory=list, description="Collection of memory items"
|
|
240
|
-
)
|
|
241
|
-
max_capacity: int | None = Field(
|
|
242
|
-
default=None, description="Maximum number of memories allowed (None for unlimited)", ge=1
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
@computed_field
|
|
246
|
-
@property
|
|
247
|
-
def memory_size(self) -> int:
|
|
248
|
-
"""Automatically calculated count of memory items."""
|
|
249
|
-
return len(self.memories)
|
|
250
|
-
|
|
251
|
-
def update_memories(
|
|
252
|
-
self, text_working_memories: list[str], partial_retention_number: int
|
|
253
|
-
) -> MemoryMonitorItem:
|
|
254
|
-
"""
|
|
255
|
-
Update memories based on text_working_memories.
|
|
256
|
-
|
|
257
|
-
Args:
|
|
258
|
-
text_working_memories: List of memory texts to update
|
|
259
|
-
partial_retention_number: Number of top memories to keep by recording count
|
|
260
|
-
|
|
261
|
-
Returns:
|
|
262
|
-
List of added or updated MemoryMonitorItem instances
|
|
263
|
-
"""
|
|
264
|
-
|
|
265
|
-
# Validate partial_retention_number
|
|
266
|
-
if partial_retention_number < 0:
|
|
267
|
-
raise ValueError("partial_retention_number must be non-negative")
|
|
268
|
-
|
|
269
|
-
# Create text lookup set
|
|
270
|
-
working_memory_set = set(text_working_memories)
|
|
271
|
-
|
|
272
|
-
# Step 1: Update existing memories or add new ones
|
|
273
|
-
added_or_updated = []
|
|
274
|
-
memory_text_map = {item.memory_text: item for item in self.memories}
|
|
275
|
-
|
|
276
|
-
for text in text_working_memories:
|
|
277
|
-
if text in memory_text_map:
|
|
278
|
-
# Update existing memory
|
|
279
|
-
memory = memory_text_map[text]
|
|
280
|
-
memory.recording_count += 1
|
|
281
|
-
added_or_updated.append(memory)
|
|
282
|
-
else:
|
|
283
|
-
# Add new memory
|
|
284
|
-
new_memory = MemoryMonitorItem(memory_text=text, recording_count=1)
|
|
285
|
-
self.memories.append(new_memory)
|
|
286
|
-
added_or_updated.append(new_memory)
|
|
287
|
-
|
|
288
|
-
# Step 2: Identify memories to remove
|
|
289
|
-
# Sort memories by recording_count in descending order
|
|
290
|
-
sorted_memories = sorted(self.memories, key=lambda item: item.recording_count, reverse=True)
|
|
291
|
-
|
|
292
|
-
# Keep the top N memories by recording_count
|
|
293
|
-
records_to_keep = {
|
|
294
|
-
memory.memory_text for memory in sorted_memories[:partial_retention_number]
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
# Collect memories to remove: not in current working memory and not in top N
|
|
298
|
-
memories_to_remove = [
|
|
299
|
-
memory
|
|
300
|
-
for memory in self.memories
|
|
301
|
-
if memory.memory_text not in working_memory_set
|
|
302
|
-
and memory.memory_text not in records_to_keep
|
|
303
|
-
]
|
|
304
|
-
|
|
305
|
-
# Step 3: Remove identified memories
|
|
306
|
-
for memory in memories_to_remove:
|
|
307
|
-
self.memories.remove(memory)
|
|
308
|
-
|
|
309
|
-
# Step 4: Enforce max_capacity if set
|
|
310
|
-
if self.max_capacity is not None and len(self.memories) > self.max_capacity:
|
|
311
|
-
# Sort by importance and then recording count
|
|
312
|
-
sorted_memories = sorted(
|
|
313
|
-
self.memories,
|
|
314
|
-
key=lambda item: (item.importance_score, item.recording_count),
|
|
315
|
-
reverse=True,
|
|
316
|
-
)
|
|
317
|
-
# Keep only the top max_capacity memories
|
|
318
|
-
self.memories = sorted_memories[: self.max_capacity]
|
|
319
|
-
|
|
320
|
-
# Log the update result
|
|
321
|
-
logger.info(
|
|
322
|
-
f"Updated monitor manager for user {self.user_id}, mem_cube {self.mem_cube_id}: "
|
|
323
|
-
f"Total memories: {len(self.memories)}, "
|
|
324
|
-
f"Added/Updated: {len(added_or_updated)}, "
|
|
325
|
-
f"Removed: {len(memories_to_remove)} (excluding top {partial_retention_number} by recording_count)"
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
return added_or_updated
|
memos/mem_scheduler/utils.py
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import re
|
|
3
|
-
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import yaml
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def extract_json_dict(text: str):
|
|
10
|
-
text = text.strip()
|
|
11
|
-
patterns_to_remove = ["json```", "```json", "latex```", "```latex", "```"]
|
|
12
|
-
for pattern in patterns_to_remove:
|
|
13
|
-
text = text.replace(pattern, "")
|
|
14
|
-
res = json.loads(text.strip())
|
|
15
|
-
return res
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def transform_name_to_key(name):
|
|
19
|
-
"""
|
|
20
|
-
Normalize text by removing all punctuation marks, keeping only letters, numbers, and word characters.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
name (str): Input text to be processed
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
str: Processed text with all punctuation removed
|
|
27
|
-
"""
|
|
28
|
-
# Match all characters that are NOT:
|
|
29
|
-
# \w - word characters (letters, digits, underscore)
|
|
30
|
-
# \u4e00-\u9fff - Chinese/Japanese/Korean characters
|
|
31
|
-
# \s - whitespace
|
|
32
|
-
pattern = r"[^\w\u4e00-\u9fff\s]"
|
|
33
|
-
|
|
34
|
-
# Substitute all matched punctuation marks with empty string
|
|
35
|
-
# re.UNICODE flag ensures proper handling of Unicode characters
|
|
36
|
-
normalized = re.sub(pattern, "", name, flags=re.UNICODE)
|
|
37
|
-
|
|
38
|
-
# Optional: Collapse multiple whitespaces into single space
|
|
39
|
-
normalized = "_".join(normalized.split())
|
|
40
|
-
|
|
41
|
-
normalized = normalized.lower()
|
|
42
|
-
|
|
43
|
-
return normalized
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def parse_yaml(yaml_file):
|
|
47
|
-
yaml_path = Path(yaml_file)
|
|
48
|
-
yaml_path = Path(yaml_file)
|
|
49
|
-
if not yaml_path.is_file():
|
|
50
|
-
raise FileNotFoundError(f"No such file: {yaml_file}")
|
|
51
|
-
|
|
52
|
-
with yaml_path.open("r", encoding="utf-8") as fr:
|
|
53
|
-
data = yaml.safe_load(fr)
|
|
54
|
-
|
|
55
|
-
return data
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def is_all_english(input_string: str) -> bool:
|
|
59
|
-
"""Determine if the string consists entirely of English characters (including spaces)"""
|
|
60
|
-
return all(char.isascii() or char.isspace() for char in input_string)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def is_all_chinese(input_string: str) -> bool:
|
|
64
|
-
"""Determine if the string consists entirely of Chinese characters (including Chinese punctuation and spaces)"""
|
|
65
|
-
return all(
|
|
66
|
-
("\u4e00" <= char <= "\u9fff") # Basic Chinese characters
|
|
67
|
-
or ("\u3400" <= char <= "\u4dbf") # Extension A
|
|
68
|
-
or ("\u20000" <= char <= "\u2a6df") # Extension B
|
|
69
|
-
or ("\u2a700" <= char <= "\u2b73f") # Extension C
|
|
70
|
-
or ("\u2b740" <= char <= "\u2b81f") # Extension D
|
|
71
|
-
or ("\u2b820" <= char <= "\u2ceaf") # Extension E
|
|
72
|
-
or ("\u2f800" <= char <= "\u2fa1f") # Extension F
|
|
73
|
-
or char.isspace() # Spaces
|
|
74
|
-
for char in input_string
|
|
75
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|