MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
- memos/__init__.py +1 -1
- memos/api/config.py +158 -69
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +101 -0
- memos/api/product_models.py +5 -1
- memos/api/routers/product_router.py +54 -26
- memos/configs/graph_db.py +49 -1
- memos/configs/internet_retriever.py +19 -0
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +54 -18
- memos/configs/mem_user.py +58 -0
- memos/graph_dbs/base.py +38 -3
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/nebular.py +1612 -0
- memos/graph_dbs/neo4j.py +18 -9
- memos/log.py +6 -1
- memos/mem_cube/utils.py +13 -6
- memos/mem_os/core.py +157 -37
- memos/mem_os/main.py +2 -2
- memos/mem_os/product.py +252 -201
- memos/mem_os/utils/default_config.py +1 -1
- memos/mem_os/utils/format_utils.py +281 -70
- memos/mem_os/utils/reference_utils.py +133 -0
- memos/mem_reader/simple_struct.py +13 -5
- memos/mem_scheduler/base_scheduler.py +239 -266
- memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
- memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
- memos/mem_scheduler/general_modules/misc.py +104 -0
- memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
- memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
- memos/mem_scheduler/general_modules/retriever.py +199 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
- memos/mem_scheduler/general_scheduler.py +243 -80
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
- memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
- memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +44 -0
- memos/mem_scheduler/schemas/message_schemas.py +149 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +102 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +5 -1
- memos/memories/activation/kv.py +20 -8
- memos/memories/textual/base.py +2 -2
- memos/memories/textual/general.py +36 -92
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +13 -7
- memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
- memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/memos_tools/thread_safe_dict.py +288 -0
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +4 -1
- memos/templates/mem_scheduler_prompts.py +62 -15
- memos/templates/mos_prompts.py +116 -0
- memos/templates/tree_reorganize_prompts.py +24 -17
- memos/utils.py +19 -0
- memos/mem_scheduler/modules/misc.py +0 -39
- memos/mem_scheduler/modules/retriever.py +0 -268
- memos/mem_scheduler/modules/schemas.py +0 -328
- memos/mem_scheduler/utils.py +0 -75
- memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
- /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import NewType
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
FILE_PATH = Path(__file__).absolute()
|
|
6
|
+
BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent
|
|
7
|
+
|
|
8
|
+
QUERY_LABEL = "query"
|
|
9
|
+
ANSWER_LABEL = "answer"
|
|
10
|
+
ADD_LABEL = "add"
|
|
11
|
+
|
|
12
|
+
TreeTextMemory_SEARCH_METHOD = "tree_text_memory_search"
|
|
13
|
+
TreeTextMemory_FINE_SEARCH_METHOD = "tree_text_memory_fine_search"
|
|
14
|
+
TextMemory_SEARCH_METHOD = "text_memory_search"
|
|
15
|
+
DIRECT_EXCHANGE_TYPE = "direct"
|
|
16
|
+
FANOUT_EXCHANGE_TYPE = "fanout"
|
|
17
|
+
DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT = 30
|
|
18
|
+
DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT = 20
|
|
19
|
+
DEFAULT_ACT_MEM_DUMP_PATH = f"{BASE_DIR}/outputs/mem_scheduler/mem_cube_scheduler_test.kv_cache"
|
|
20
|
+
DEFAULT_THREAD__POOL_MAX_WORKERS = 5
|
|
21
|
+
DEFAULT_CONSUME_INTERVAL_SECONDS = 3
|
|
22
|
+
NOT_INITIALIZED = -1
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# web log
|
|
26
|
+
LONG_TERM_MEMORY_TYPE = "LongTermMemory"
|
|
27
|
+
USER_MEMORY_TYPE = "UserMemory"
|
|
28
|
+
WORKING_MEMORY_TYPE = "WorkingMemory"
|
|
29
|
+
TEXT_MEMORY_TYPE = "TextMemory"
|
|
30
|
+
ACTIVATION_MEMORY_TYPE = "ActivationMemory"
|
|
31
|
+
PARAMETER_MEMORY_TYPE = "ParameterMemory"
|
|
32
|
+
USER_INPUT_TYPE = "UserInput"
|
|
33
|
+
NOT_APPLICABLE_TYPE = "NotApplicable"
|
|
34
|
+
|
|
35
|
+
# monitors
|
|
36
|
+
MONITOR_WORKING_MEMORY_TYPE = "MonitorWorkingMemoryType"
|
|
37
|
+
MONITOR_ACTIVATION_MEMORY_TYPE = "MonitorActivationMemoryType"
|
|
38
|
+
DEFAULT_MAX_QUERY_KEY_WORDS = 1000
|
|
39
|
+
DEFAULT_WEIGHT_VECTOR_FOR_RANKING = [0.9, 0.05, 0.05]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# new types
|
|
43
|
+
UserID = NewType("UserID", str)
|
|
44
|
+
MemCubeID = NewType("CubeID", str)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer
|
|
6
|
+
from typing_extensions import TypedDict
|
|
7
|
+
|
|
8
|
+
from memos.log import get_logger
|
|
9
|
+
from memos.mem_cube.general import GeneralMemCube
|
|
10
|
+
from memos.mem_scheduler.general_modules.misc import DictConversionMixin
|
|
11
|
+
|
|
12
|
+
from .general_schemas import NOT_INITIALIZED
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
DEFAULT_MEMORY_SIZES = {
|
|
18
|
+
"long_term_memory_size": NOT_INITIALIZED,
|
|
19
|
+
"user_memory_size": NOT_INITIALIZED,
|
|
20
|
+
"working_memory_size": NOT_INITIALIZED,
|
|
21
|
+
"transformed_act_memory_size": NOT_INITIALIZED,
|
|
22
|
+
"parameter_memory_size": NOT_INITIALIZED,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
DEFAULT_MEMORY_CAPACITIES = {
|
|
26
|
+
"long_term_memory_capacity": 10000,
|
|
27
|
+
"user_memory_capacity": 10000,
|
|
28
|
+
"working_memory_capacity": 20,
|
|
29
|
+
"transformed_act_memory_capacity": NOT_INITIALIZED,
|
|
30
|
+
"parameter_memory_capacity": NOT_INITIALIZED,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ScheduleMessageItem(BaseModel, DictConversionMixin):
|
|
35
|
+
item_id: str = Field(description="uuid", default_factory=lambda: str(uuid4()))
|
|
36
|
+
user_id: str = Field(..., description="user id")
|
|
37
|
+
mem_cube_id: str = Field(..., description="memcube id")
|
|
38
|
+
label: str = Field(..., description="Label of the schedule message")
|
|
39
|
+
mem_cube: GeneralMemCube | str = Field(..., description="memcube for schedule")
|
|
40
|
+
content: str = Field(..., description="Content of the schedule message")
|
|
41
|
+
timestamp: datetime = Field(
|
|
42
|
+
default_factory=lambda: datetime.utcnow(), description="submit time for schedule_messages"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Pydantic V2 model configuration
|
|
46
|
+
model_config = ConfigDict(
|
|
47
|
+
# Allows arbitrary Python types as model fields without validation
|
|
48
|
+
# Required when using custom types like GeneralMemCube that aren't Pydantic models
|
|
49
|
+
arbitrary_types_allowed=True,
|
|
50
|
+
# Additional metadata for JSON Schema generation
|
|
51
|
+
json_schema_extra={
|
|
52
|
+
# Example payload demonstrating the expected structure and sample values
|
|
53
|
+
# Used for API documentation, testing, and developer reference
|
|
54
|
+
"example": {
|
|
55
|
+
"item_id": "123e4567-e89b-12d3-a456-426614174000", # Sample UUID
|
|
56
|
+
"user_id": "user123", # Example user identifier
|
|
57
|
+
"mem_cube_id": "cube456", # Sample memory cube ID
|
|
58
|
+
"label": "sample_label", # Demonstration label value
|
|
59
|
+
"mem_cube": "obj of GeneralMemCube", # Added mem_cube example
|
|
60
|
+
"content": "sample content", # Example message content
|
|
61
|
+
"timestamp": "2024-07-22T12:00:00Z", # Added timestamp example
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@field_serializer("mem_cube")
|
|
67
|
+
def serialize_mem_cube(self, cube: GeneralMemCube | str, _info) -> str:
|
|
68
|
+
"""Custom serializer for GeneralMemCube objects to string representation"""
|
|
69
|
+
if isinstance(cube, str):
|
|
70
|
+
return cube
|
|
71
|
+
return f"<GeneralMemCube:{id(cube)}>"
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> dict:
|
|
74
|
+
"""Convert model to dictionary suitable for Redis Stream"""
|
|
75
|
+
return {
|
|
76
|
+
"item_id": self.item_id,
|
|
77
|
+
"user_id": self.user_id,
|
|
78
|
+
"cube_id": self.mem_cube_id,
|
|
79
|
+
"label": self.label,
|
|
80
|
+
"cube": "Not Applicable", # Custom cube serialization
|
|
81
|
+
"content": self.content,
|
|
82
|
+
"timestamp": self.timestamp.isoformat(),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def from_dict(cls, data: dict) -> "ScheduleMessageItem":
|
|
87
|
+
"""Create model from Redis Stream dictionary"""
|
|
88
|
+
return cls(
|
|
89
|
+
item_id=data.get("item_id", str(uuid4())),
|
|
90
|
+
user_id=data["user_id"],
|
|
91
|
+
cube_id=data["cube_id"],
|
|
92
|
+
label=data["label"],
|
|
93
|
+
cube="Not Applicable", # Custom cube deserialization
|
|
94
|
+
content=data["content"],
|
|
95
|
+
timestamp=datetime.fromisoformat(data["timestamp"]),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MemorySizes(TypedDict):
|
|
100
|
+
long_term_memory_size: int
|
|
101
|
+
user_memory_size: int
|
|
102
|
+
working_memory_size: int
|
|
103
|
+
transformed_act_memory_size: int
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class MemoryCapacities(TypedDict):
|
|
107
|
+
long_term_memory_capacity: int
|
|
108
|
+
user_memory_capacity: int
|
|
109
|
+
working_memory_capacity: int
|
|
110
|
+
transformed_act_memory_capacity: int
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ScheduleLogForWebItem(BaseModel, DictConversionMixin):
|
|
114
|
+
item_id: str = Field(
|
|
115
|
+
description="Unique identifier for the log entry", default_factory=lambda: str(uuid4())
|
|
116
|
+
)
|
|
117
|
+
user_id: str = Field(..., description="Identifier for the user associated with the log")
|
|
118
|
+
mem_cube_id: str = Field(
|
|
119
|
+
..., description="Identifier for the memcube associated with this log entry"
|
|
120
|
+
)
|
|
121
|
+
label: str = Field(..., description="Label categorizing the type of log")
|
|
122
|
+
from_memory_type: str = Field(..., description="Source memory type")
|
|
123
|
+
to_memory_type: str = Field(..., description="Destination memory type")
|
|
124
|
+
log_content: str = Field(..., description="Detailed content of the log entry")
|
|
125
|
+
current_memory_sizes: MemorySizes = Field(
|
|
126
|
+
default_factory=lambda: dict(DEFAULT_MEMORY_SIZES),
|
|
127
|
+
description="Current utilization of memory partitions",
|
|
128
|
+
)
|
|
129
|
+
memory_capacities: MemoryCapacities = Field(
|
|
130
|
+
default_factory=lambda: dict(DEFAULT_MEMORY_CAPACITIES),
|
|
131
|
+
description="Maximum capacities of memory partitions",
|
|
132
|
+
)
|
|
133
|
+
timestamp: datetime = Field(
|
|
134
|
+
default_factory=lambda: datetime.utcnow(),
|
|
135
|
+
description="Timestamp indicating when the log entry was created",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def debug_info(self) -> dict[str, Any]:
|
|
139
|
+
"""Return structured debug information for logging purposes."""
|
|
140
|
+
return {
|
|
141
|
+
"content_preview:": self.log_content[:50],
|
|
142
|
+
"log_id": self.item_id,
|
|
143
|
+
"user_id": self.user_id,
|
|
144
|
+
"mem_cube_id": self.mem_cube_id,
|
|
145
|
+
"operation": f"{self.from_memory_type} → {self.to_memory_type}",
|
|
146
|
+
"label": self.label,
|
|
147
|
+
"content_length": len(self.log_content),
|
|
148
|
+
"timestamp": self.timestamp.isoformat(),
|
|
149
|
+
}
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import ClassVar
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, computed_field, field_validator
|
|
10
|
+
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.mem_scheduler.general_modules.misc import AutoDroppingQueue, DictConversionMixin
|
|
13
|
+
from memos.mem_scheduler.schemas.general_schemas import (
|
|
14
|
+
DEFAULT_MAX_QUERY_KEY_WORDS,
|
|
15
|
+
DEFAULT_WEIGHT_VECTOR_FOR_RANKING,
|
|
16
|
+
NOT_INITIALIZED,
|
|
17
|
+
)
|
|
18
|
+
from memos.mem_scheduler.utils.filter_utils import transform_name_to_key
|
|
19
|
+
from memos.memories.textual.tree import TextualMemoryItem
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
FILE_PATH = Path(__file__).absolute()
|
|
25
|
+
BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ============== Queries ==============
|
|
29
|
+
class QueryMonitorItem(BaseModel, DictConversionMixin):
|
|
30
|
+
item_id: str = Field(
|
|
31
|
+
description="Unique identifier for the query item", default_factory=lambda: str(uuid4())
|
|
32
|
+
)
|
|
33
|
+
query_text: str = Field(
|
|
34
|
+
...,
|
|
35
|
+
description="The actual user query text content",
|
|
36
|
+
min_length=1,
|
|
37
|
+
)
|
|
38
|
+
keywords: list[str] | None = Field(
|
|
39
|
+
default=None,
|
|
40
|
+
min_length=1, # If provided, shouldn't be empty
|
|
41
|
+
description="Semantic keywords extracted from the query text",
|
|
42
|
+
)
|
|
43
|
+
max_keywords: ClassVar[int] = DEFAULT_MAX_QUERY_KEY_WORDS
|
|
44
|
+
|
|
45
|
+
timestamp: datetime = Field(
|
|
46
|
+
default_factory=datetime.now, description="Timestamp indicating when query was submitted"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@field_validator("keywords", mode="before")
|
|
50
|
+
@classmethod
|
|
51
|
+
def validate_keywords(cls, v, values):
|
|
52
|
+
if v is None:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
if not isinstance(v, list):
|
|
56
|
+
raise ValueError("Keywords must be a list")
|
|
57
|
+
|
|
58
|
+
if len(v) > cls.max_keywords:
|
|
59
|
+
logger.warning(
|
|
60
|
+
f"Keywords list truncated from {len(v)} to {cls.max_keywords} items. "
|
|
61
|
+
f"Configure max_keywords class attribute to adjust this limit."
|
|
62
|
+
)
|
|
63
|
+
return v[: cls.max_keywords]
|
|
64
|
+
return v
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def with_max_keywords(cls, limit: int):
|
|
68
|
+
"""Create a new class with custom keywords limit."""
|
|
69
|
+
if not isinstance(limit, int) or limit <= 0:
|
|
70
|
+
raise ValueError("Max keywords limit must be positive integer")
|
|
71
|
+
|
|
72
|
+
return type(f"{cls.__name__}_MaxKeywords{limit}", (cls,), {"max_keywords": limit})
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
|
|
76
|
+
"""
|
|
77
|
+
A thread-safe queue for monitoring queries with timestamp and keyword tracking.
|
|
78
|
+
Each item is expected to be a dictionary containing:
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def put(self, item: QueryMonitorItem, block: bool = True, timeout: float | None = 5.0) -> None:
|
|
82
|
+
"""
|
|
83
|
+
Add a query item to the queue. Ensures the item is of correct type.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
item: A QueryMonitorItem instance
|
|
87
|
+
"""
|
|
88
|
+
if not isinstance(item, QueryMonitorItem):
|
|
89
|
+
raise ValueError("Item must be an instance of QueryMonitorItem")
|
|
90
|
+
logger.debug(
|
|
91
|
+
f"Thread {threading.get_ident()} acquired mutex. Timeout is set to {timeout} seconds"
|
|
92
|
+
)
|
|
93
|
+
super().put(item, block, timeout)
|
|
94
|
+
|
|
95
|
+
def get_queries_by_timestamp(
|
|
96
|
+
self, start_time: datetime, end_time: datetime
|
|
97
|
+
) -> list[QueryMonitorItem]:
|
|
98
|
+
"""
|
|
99
|
+
Retrieve queries added between the specified time range.
|
|
100
|
+
"""
|
|
101
|
+
with self.mutex:
|
|
102
|
+
logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
|
|
103
|
+
return [item for item in self.queue if start_time <= item.timestamp <= end_time]
|
|
104
|
+
|
|
105
|
+
def get_keywords_collections(self) -> Counter:
|
|
106
|
+
"""
|
|
107
|
+
Generate a Counter containing keyword frequencies across all queries.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Counter object with keyword counts
|
|
111
|
+
"""
|
|
112
|
+
with self.mutex:
|
|
113
|
+
logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
|
|
114
|
+
all_keywords = [kw for item in self.queue for kw in item.keywords]
|
|
115
|
+
return Counter(all_keywords)
|
|
116
|
+
|
|
117
|
+
def get_queries_with_timesort(self, reverse: bool = True) -> list[str]:
|
|
118
|
+
"""
|
|
119
|
+
Retrieve all queries sorted by timestamp.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
reverse: If True, sort in descending order (newest first),
|
|
123
|
+
otherwise sort in ascending order (oldest first)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of query items sorted by timestamp
|
|
127
|
+
"""
|
|
128
|
+
with self.mutex:
|
|
129
|
+
logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
|
|
130
|
+
return [
|
|
131
|
+
monitor.query_text
|
|
132
|
+
for monitor in sorted(self.queue, key=lambda x: x.timestamp, reverse=reverse)
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ============== Memories ==============
|
|
137
|
+
class MemoryMonitorItem(BaseModel, DictConversionMixin):
|
|
138
|
+
item_id: str = Field(
|
|
139
|
+
description="Unique identifier for the memory item", default_factory=lambda: str(uuid4())
|
|
140
|
+
)
|
|
141
|
+
memory_text: str = Field(
|
|
142
|
+
...,
|
|
143
|
+
description="The actual content of the memory",
|
|
144
|
+
min_length=1,
|
|
145
|
+
)
|
|
146
|
+
tree_memory_item: TextualMemoryItem | None = Field(
|
|
147
|
+
default=None, description="Optional textual memory item"
|
|
148
|
+
)
|
|
149
|
+
tree_memory_item_mapping_key: str = Field(
|
|
150
|
+
description="Key generated from memory_text using transform_name_to_key",
|
|
151
|
+
)
|
|
152
|
+
keywords_score: float = Field(
|
|
153
|
+
default=NOT_INITIALIZED,
|
|
154
|
+
description="The score generate by counting keywords in queries",
|
|
155
|
+
ge=NOT_INITIALIZED, # Minimum value of 0
|
|
156
|
+
)
|
|
157
|
+
sorting_score: float = Field(
|
|
158
|
+
default=NOT_INITIALIZED,
|
|
159
|
+
description="The score generate from rerank process",
|
|
160
|
+
ge=NOT_INITIALIZED, # Minimum value of 0
|
|
161
|
+
)
|
|
162
|
+
importance_score: float = Field(
|
|
163
|
+
default=NOT_INITIALIZED,
|
|
164
|
+
description="Numerical score representing the memory's importance",
|
|
165
|
+
ge=NOT_INITIALIZED, # Minimum value of 0
|
|
166
|
+
)
|
|
167
|
+
recording_count: int = Field(
|
|
168
|
+
default=1,
|
|
169
|
+
description="How many times this memory has been recorded",
|
|
170
|
+
ge=1, # Greater than or equal to 1
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
@field_validator("tree_memory_item_mapping_key", mode="before")
|
|
174
|
+
def generate_mapping_key(cls, v, values): # noqa: N805
|
|
175
|
+
if v is None and "memory_text" in values:
|
|
176
|
+
return transform_name_to_key(values["memory_text"])
|
|
177
|
+
return v
|
|
178
|
+
|
|
179
|
+
def get_importance_score(self, weight_vector: list[float] | None = None) -> float:
|
|
180
|
+
"""
|
|
181
|
+
Calculate the effective score for the memory item.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
float: The importance_score if it has been initialized (>=0),
|
|
185
|
+
otherwise the recording_count converted to float.
|
|
186
|
+
|
|
187
|
+
Note:
|
|
188
|
+
This method provides a unified way to retrieve a comparable score
|
|
189
|
+
for memory items, regardless of whether their importance has been explicitly set.
|
|
190
|
+
"""
|
|
191
|
+
if weight_vector is None:
|
|
192
|
+
logger.warning("weight_vector of get_importance_score is None.")
|
|
193
|
+
weight_vector = DEFAULT_WEIGHT_VECTOR_FOR_RANKING
|
|
194
|
+
assert sum(weight_vector) == 1
|
|
195
|
+
normalized_keywords_score = min(self.keywords_score * weight_vector[1], 5)
|
|
196
|
+
normalized_recording_count_score = min(self.recording_count * weight_vector[2], 2)
|
|
197
|
+
self.importance_score = (
|
|
198
|
+
self.sorting_score * weight_vector[0]
|
|
199
|
+
+ normalized_keywords_score
|
|
200
|
+
+ normalized_recording_count_score
|
|
201
|
+
)
|
|
202
|
+
return self.importance_score
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class MemoryMonitorManager(BaseModel, DictConversionMixin):
|
|
206
|
+
user_id: str = Field(..., description="Required user identifier", min_length=1)
|
|
207
|
+
mem_cube_id: str = Field(..., description="Required memory cube identifier", min_length=1)
|
|
208
|
+
memories: list[MemoryMonitorItem] = Field(
|
|
209
|
+
default_factory=list, description="Collection of memory items"
|
|
210
|
+
)
|
|
211
|
+
max_capacity: int | None = Field(
|
|
212
|
+
default=None, description="Maximum number of memories allowed (None for unlimited)", ge=1
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
@computed_field
|
|
216
|
+
@property
|
|
217
|
+
def memory_size(self) -> int:
|
|
218
|
+
"""Automatically calculated count of memory items."""
|
|
219
|
+
return len(self.memories)
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def memories_mapping_dict(self) -> dict[str, MemoryMonitorItem]:
|
|
223
|
+
"""
|
|
224
|
+
Generate a mapping dictionary for the memories in MemoryMonitorManager,
|
|
225
|
+
using tree_memory_item_mapping_key as the key and MemoryMonitorItem as the value.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Dict[str, MemoryMonitorItem]: A dictionary where keys are
|
|
229
|
+
tree_memory_item_mapping_key values from MemoryMonitorItem,
|
|
230
|
+
and values are the corresponding MemoryMonitorItem objects.
|
|
231
|
+
"""
|
|
232
|
+
mapping_dict = {
|
|
233
|
+
mem_item.tree_memory_item_mapping_key: mem_item for mem_item in self.memories
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
logger.debug(
|
|
237
|
+
f"Generated memories mapping dict for user_id={self.user_id}, "
|
|
238
|
+
f"mem_cube_id={self.mem_cube_id}, "
|
|
239
|
+
f"total_items={len(mapping_dict)}, "
|
|
240
|
+
f"source_memory_count={len(self.memories)}"
|
|
241
|
+
)
|
|
242
|
+
return mapping_dict
|
|
243
|
+
|
|
244
|
+
def get_sorted_mem_monitors(self, reverse=True) -> list[MemoryMonitorItem]:
|
|
245
|
+
"""
|
|
246
|
+
Retrieve memory monitors sorted by their ranking score in descending order.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
list[MemoryMonitorItem]: Sorted list of memory monitor items.
|
|
250
|
+
"""
|
|
251
|
+
return sorted(
|
|
252
|
+
self.memories,
|
|
253
|
+
key=lambda item: item.get_importance_score(
|
|
254
|
+
weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
|
|
255
|
+
),
|
|
256
|
+
reverse=reverse,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def update_memories(
|
|
260
|
+
self, new_memory_monitors: list[MemoryMonitorItem], partial_retention_number: int
|
|
261
|
+
) -> MemoryMonitorItem:
|
|
262
|
+
"""
|
|
263
|
+
Update memories based on monitor_working_memories.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
# Validate partial_retention_number
|
|
267
|
+
if partial_retention_number < 0:
|
|
268
|
+
raise ValueError("partial_retention_number must be non-negative")
|
|
269
|
+
|
|
270
|
+
# Step 1: Update existing memories or add new ones
|
|
271
|
+
added_count = 0
|
|
272
|
+
memories_mapping_dict = self.memories_mapping_dict
|
|
273
|
+
new_mem_set = set()
|
|
274
|
+
for memory_monitor in new_memory_monitors:
|
|
275
|
+
if memory_monitor.tree_memory_item_mapping_key in memories_mapping_dict:
|
|
276
|
+
# Update existing memory
|
|
277
|
+
item: MemoryMonitorItem = memories_mapping_dict[
|
|
278
|
+
memory_monitor.tree_memory_item_mapping_key
|
|
279
|
+
]
|
|
280
|
+
item.recording_count += 1
|
|
281
|
+
item.keywords_score = memory_monitor.keywords_score
|
|
282
|
+
item.sorting_score = memory_monitor.sorting_score
|
|
283
|
+
else:
|
|
284
|
+
# Add new memory
|
|
285
|
+
self.memories.append(memory_monitor)
|
|
286
|
+
added_count += 1
|
|
287
|
+
|
|
288
|
+
new_mem_set.add(memory_monitor.tree_memory_item_mapping_key)
|
|
289
|
+
|
|
290
|
+
# Step 2: Identify memories to remove
|
|
291
|
+
old_mem_monitor_list = []
|
|
292
|
+
for mem_monitor in self.memories:
|
|
293
|
+
if mem_monitor.tree_memory_item_mapping_key not in new_mem_set:
|
|
294
|
+
old_mem_monitor_list.append(mem_monitor)
|
|
295
|
+
|
|
296
|
+
# Sort memories by recording_count in descending order
|
|
297
|
+
sorted_old_mem_monitors = sorted(
|
|
298
|
+
old_mem_monitor_list,
|
|
299
|
+
key=lambda item: item.get_importance_score(
|
|
300
|
+
weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
|
|
301
|
+
),
|
|
302
|
+
reverse=True,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Keep the top N old memories
|
|
306
|
+
memories_to_remove = sorted_old_mem_monitors[partial_retention_number:]
|
|
307
|
+
memories_to_change_score = sorted_old_mem_monitors[:partial_retention_number]
|
|
308
|
+
|
|
309
|
+
# Step 3: Remove identified memories and change the scores of left old memories
|
|
310
|
+
for memory in memories_to_remove:
|
|
311
|
+
self.memories.remove(memory)
|
|
312
|
+
|
|
313
|
+
for memory in memories_to_change_score:
|
|
314
|
+
memory.sorting_score = 0
|
|
315
|
+
memory.recording_count = 0
|
|
316
|
+
memory.keywords_score = 0
|
|
317
|
+
|
|
318
|
+
# Step 4: Enforce max_capacity if set
|
|
319
|
+
sorted_memories = sorted(
|
|
320
|
+
self.memories,
|
|
321
|
+
key=lambda item: item.get_importance_score(
|
|
322
|
+
weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
|
|
323
|
+
),
|
|
324
|
+
reverse=True,
|
|
325
|
+
)
|
|
326
|
+
# Keep only the top max_capacity memories
|
|
327
|
+
self.memories = sorted_memories[: self.max_capacity]
|
|
328
|
+
|
|
329
|
+
# Log the update result
|
|
330
|
+
logger.info(
|
|
331
|
+
f"Updated monitor manager for user {self.user_id}, mem_cube {self.mem_cube_id}: "
|
|
332
|
+
f"Total memories: {len(self.memories)}, "
|
|
333
|
+
f"Added/Updated: {added_count}, "
|
|
334
|
+
f"Removed: {len(memories_to_remove)} (excluding top {partial_retention_number} by recording_count)"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return self.memories
|
|
File without changes
|