MemoryOS 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/METADATA +66 -26
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/RECORD +80 -56
- memoryos-0.2.1.dist-info/entry_points.txt +3 -0
- memos/__init__.py +1 -1
- memos/api/config.py +471 -0
- memos/api/exceptions.py +28 -0
- memos/api/mcp_serve.py +502 -0
- memos/api/product_api.py +35 -0
- memos/api/product_models.py +159 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +358 -0
- memos/chunkers/sentence_chunker.py +8 -2
- memos/cli.py +113 -0
- memos/configs/embedder.py +27 -0
- memos/configs/graph_db.py +83 -2
- memos/configs/llm.py +47 -0
- memos/configs/mem_cube.py +1 -1
- memos/configs/mem_scheduler.py +91 -5
- memos/configs/memory.py +5 -4
- memos/dependency.py +52 -0
- memos/embedders/ark.py +92 -0
- memos/embedders/factory.py +4 -0
- memos/embedders/sentence_transformer.py +8 -2
- memos/embedders/universal_api.py +32 -0
- memos/graph_dbs/base.py +2 -2
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/neo4j.py +331 -122
- memos/graph_dbs/neo4j_community.py +300 -0
- memos/llms/base.py +9 -0
- memos/llms/deepseek.py +54 -0
- memos/llms/factory.py +10 -1
- memos/llms/hf.py +170 -13
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +4 -0
- memos/llms/openai.py +67 -1
- memos/llms/qwen.py +63 -0
- memos/llms/vllm.py +153 -0
- memos/mem_cube/general.py +77 -16
- memos/mem_cube/utils.py +102 -0
- memos/mem_os/core.py +131 -41
- memos/mem_os/main.py +93 -11
- memos/mem_os/product.py +1098 -35
- memos/mem_os/utils/default_config.py +352 -0
- memos/mem_os/utils/format_utils.py +1154 -0
- memos/mem_reader/simple_struct.py +5 -5
- memos/mem_scheduler/base_scheduler.py +467 -36
- memos/mem_scheduler/general_scheduler.py +125 -244
- memos/mem_scheduler/modules/base.py +9 -0
- memos/mem_scheduler/modules/dispatcher.py +68 -2
- memos/mem_scheduler/modules/misc.py +39 -0
- memos/mem_scheduler/modules/monitor.py +228 -49
- memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
- memos/mem_scheduler/modules/redis_service.py +32 -22
- memos/mem_scheduler/modules/retriever.py +250 -23
- memos/mem_scheduler/modules/schemas.py +189 -7
- memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
- memos/mem_scheduler/utils.py +51 -2
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +10 -3
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/factory.py +2 -0
- memos/memories/textual/general.py +7 -5
- memos/memories/textual/tree.py +9 -5
- memos/memories/textual/tree_text_memory/organize/conflict.py +5 -3
- memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
- memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +11 -13
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +73 -51
- memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
- memos/parsers/markitdown.py +8 -2
- memos/templates/mem_reader_prompts.py +65 -23
- memos/templates/mem_scheduler_prompts.py +96 -47
- memos/templates/tree_reorganize_prompts.py +85 -30
- memos/vec_dbs/base.py +12 -0
- memos/vec_dbs/qdrant.py +46 -20
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
memos/mem_os/product.py
CHANGED
|
@@ -1,33 +1,682 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
2
5
|
|
|
3
6
|
from collections.abc import Generator
|
|
4
|
-
from
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Literal
|
|
5
9
|
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
from transformers import AutoTokenizer
|
|
12
|
+
|
|
13
|
+
from memos.configs.mem_cube import GeneralMemCubeConfig
|
|
6
14
|
from memos.configs.mem_os import MOSConfig
|
|
15
|
+
from memos.log import get_logger
|
|
16
|
+
from memos.mem_cube.general import GeneralMemCube
|
|
7
17
|
from memos.mem_os.core import MOSCore
|
|
8
|
-
from memos.
|
|
9
|
-
|
|
10
|
-
|
|
18
|
+
from memos.mem_os.utils.format_utils import (
|
|
19
|
+
convert_graph_to_tree_forworkmem,
|
|
20
|
+
filter_nodes_by_tree_ids,
|
|
21
|
+
remove_embedding_recursive,
|
|
22
|
+
sort_children_by_memory_type,
|
|
23
|
+
)
|
|
24
|
+
from memos.mem_scheduler.modules.schemas import ANSWER_LABEL, QUERY_LABEL, ScheduleMessageItem
|
|
25
|
+
from memos.mem_user.persistent_user_manager import PersistentUserManager
|
|
26
|
+
from memos.mem_user.user_manager import UserRole
|
|
27
|
+
from memos.memories.textual.item import (
|
|
28
|
+
TextualMemoryItem,
|
|
29
|
+
)
|
|
11
30
|
from memos.types import MessageList
|
|
12
31
|
|
|
13
32
|
|
|
33
|
+
logger = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
load_dotenv()
|
|
36
|
+
|
|
37
|
+
CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
|
|
38
|
+
|
|
39
|
+
|
|
14
40
|
class MOSProduct(MOSCore):
|
|
15
41
|
"""
|
|
16
|
-
The MOSProduct class inherits from MOSCore
|
|
42
|
+
The MOSProduct class inherits from MOSCore and manages multiple users.
|
|
43
|
+
Each user has their own configuration and cube access, but shares the same model instances.
|
|
17
44
|
"""
|
|
18
45
|
|
|
19
|
-
def __init__(
|
|
20
|
-
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
default_config: MOSConfig | None = None,
|
|
49
|
+
max_user_instances: int = 100,
|
|
50
|
+
default_cube_config: GeneralMemCubeConfig | None = None,
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Initialize MOSProduct with an optional default configuration.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
default_config (MOSConfig | None): Default configuration for new users
|
|
57
|
+
max_user_instances (int): Maximum number of user instances to keep in memory
|
|
58
|
+
default_cube_config (GeneralMemCubeConfig | None): Default cube configuration for loading cubes
|
|
59
|
+
"""
|
|
60
|
+
# Initialize with a root config for shared resources
|
|
61
|
+
if default_config is None:
|
|
62
|
+
# Create a minimal config for root user
|
|
63
|
+
root_config = MOSConfig(
|
|
64
|
+
user_id="root",
|
|
65
|
+
session_id="root_session",
|
|
66
|
+
chat_model=default_config.chat_model if default_config else None,
|
|
67
|
+
mem_reader=default_config.mem_reader if default_config else None,
|
|
68
|
+
enable_mem_scheduler=default_config.enable_mem_scheduler
|
|
69
|
+
if default_config
|
|
70
|
+
else False,
|
|
71
|
+
mem_scheduler=default_config.mem_scheduler if default_config else None,
|
|
72
|
+
)
|
|
73
|
+
else:
|
|
74
|
+
root_config = default_config.model_copy(deep=True)
|
|
75
|
+
root_config.user_id = "root"
|
|
76
|
+
root_config.session_id = "root_session"
|
|
77
|
+
|
|
78
|
+
# Initialize parent MOSCore with root config
|
|
79
|
+
super().__init__(root_config)
|
|
80
|
+
|
|
81
|
+
# Product-specific attributes
|
|
82
|
+
self.default_config = default_config
|
|
83
|
+
self.default_cube_config = default_cube_config
|
|
84
|
+
self.max_user_instances = max_user_instances
|
|
85
|
+
|
|
86
|
+
# User-specific data structures
|
|
87
|
+
self.user_configs: dict[str, MOSConfig] = {}
|
|
88
|
+
self.user_cube_access: dict[str, set[str]] = {} # user_id -> set of cube_ids
|
|
89
|
+
self.user_chat_histories: dict[str, dict] = {}
|
|
90
|
+
|
|
91
|
+
# Use PersistentUserManager for user management
|
|
92
|
+
self.global_user_manager = PersistentUserManager(user_id="root")
|
|
93
|
+
|
|
94
|
+
# Initialize tiktoken for streaming
|
|
95
|
+
try:
|
|
96
|
+
# Use gpt2 encoding which is more stable and widely compatible
|
|
97
|
+
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
|
|
98
|
+
logger.info("tokenizer initialized successfully for streaming")
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.warning(
|
|
101
|
+
f"Failed to initialize tokenizer, will use character-based chunking: {e}"
|
|
102
|
+
)
|
|
103
|
+
self.tokenizer = None
|
|
104
|
+
|
|
105
|
+
# Restore user instances from persistent storage
|
|
106
|
+
self._restore_user_instances(default_cube_config=default_cube_config)
|
|
107
|
+
logger.info(f"User instances restored successfully, now user is {self.mem_cubes.keys()}")
|
|
108
|
+
|
|
109
|
+
def _restore_user_instances(
|
|
110
|
+
self, default_cube_config: GeneralMemCubeConfig | None = None
|
|
111
|
+
) -> None:
|
|
112
|
+
"""Restore user instances from persistent storage after service restart.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
default_cube_config (GeneralMemCubeConfig | None, optional): Default cube configuration. Defaults to None.
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
# Get all user configurations from persistent storage
|
|
119
|
+
user_configs = self.global_user_manager.list_user_configs()
|
|
120
|
+
|
|
121
|
+
# Get the raw database records for sorting by updated_at
|
|
122
|
+
session = self.global_user_manager._get_session()
|
|
123
|
+
try:
|
|
124
|
+
from memos.mem_user.persistent_user_manager import UserConfig
|
|
125
|
+
|
|
126
|
+
db_configs = session.query(UserConfig).all()
|
|
127
|
+
# Create a mapping of user_id to updated_at timestamp
|
|
128
|
+
updated_at_map = {config.user_id: config.updated_at for config in db_configs}
|
|
129
|
+
|
|
130
|
+
# Sort by updated_at timestamp (most recent first) and limit by max_instances
|
|
131
|
+
sorted_configs = sorted(
|
|
132
|
+
user_configs.items(), key=lambda x: updated_at_map.get(x[0], ""), reverse=True
|
|
133
|
+
)[: self.max_user_instances]
|
|
134
|
+
finally:
|
|
135
|
+
session.close()
|
|
136
|
+
|
|
137
|
+
for user_id, config in sorted_configs:
|
|
138
|
+
if user_id != "root": # Skip root user
|
|
139
|
+
try:
|
|
140
|
+
# Store user config and cube access
|
|
141
|
+
self.user_configs[user_id] = config
|
|
142
|
+
self._load_user_cube_access(user_id)
|
|
143
|
+
|
|
144
|
+
# Pre-load all cubes for this user with default config
|
|
145
|
+
self._preload_user_cubes(user_id, default_cube_config)
|
|
146
|
+
|
|
147
|
+
logger.info(
|
|
148
|
+
f"Restored user configuration and pre-loaded cubes for {user_id}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.error(f"Failed to restore user configuration for {user_id}: {e}")
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.error(f"Error during user instance restoration: {e}")
|
|
156
|
+
|
|
157
|
+
def _preload_user_cubes(
|
|
158
|
+
self, user_id: str, default_cube_config: GeneralMemCubeConfig | None = None
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Pre-load all cubes for a user into memory.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
user_id (str): The user ID to pre-load cubes for.
|
|
164
|
+
default_cube_config (GeneralMemCubeConfig | None, optional): Default cube configuration. Defaults to None.
|
|
165
|
+
"""
|
|
166
|
+
try:
|
|
167
|
+
# Get user's accessible cubes from persistent storage
|
|
168
|
+
accessible_cubes = self.global_user_manager.get_user_cubes(user_id)
|
|
169
|
+
|
|
170
|
+
for cube in accessible_cubes:
|
|
171
|
+
if cube.cube_id not in self.mem_cubes:
|
|
172
|
+
try:
|
|
173
|
+
if cube.cube_path and os.path.exists(cube.cube_path):
|
|
174
|
+
# Pre-load cube with all memory types and default config
|
|
175
|
+
self.register_mem_cube(
|
|
176
|
+
cube.cube_path,
|
|
177
|
+
cube.cube_id,
|
|
178
|
+
user_id,
|
|
179
|
+
memory_types=["act_mem"]
|
|
180
|
+
if self.config.enable_activation_memory
|
|
181
|
+
else [],
|
|
182
|
+
default_config=default_cube_config,
|
|
183
|
+
)
|
|
184
|
+
logger.info(f"Pre-loaded cube {cube.cube_id} for user {user_id}")
|
|
185
|
+
else:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Cube path {cube.cube_path} does not exist for cube {cube.cube_id}, skipping pre-load"
|
|
188
|
+
)
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(
|
|
191
|
+
f"Failed to pre-load cube {cube.cube_id} for user {user_id}: {e}"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"Error pre-loading cubes for user {user_id}: {e}")
|
|
196
|
+
|
|
197
|
+
def _load_user_cubes(
|
|
198
|
+
self, user_id: str, default_cube_config: GeneralMemCubeConfig | None = None
|
|
199
|
+
) -> None:
|
|
200
|
+
"""Load all cubes for a user into memory.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
user_id (str): The user ID to load cubes for.
|
|
204
|
+
default_cube_config (GeneralMemCubeConfig | None, optional): Default cube configuration. Defaults to None.
|
|
205
|
+
"""
|
|
206
|
+
# Get user's accessible cubes from persistent storage
|
|
207
|
+
accessible_cubes = self.global_user_manager.get_user_cubes(user_id)
|
|
208
|
+
|
|
209
|
+
for cube in accessible_cubes[:1]:
|
|
210
|
+
if cube.cube_id not in self.mem_cubes:
|
|
211
|
+
try:
|
|
212
|
+
if cube.cube_path and os.path.exists(cube.cube_path):
|
|
213
|
+
# Use MOSCore's register_mem_cube method directly with default config
|
|
214
|
+
# Only load act_mem since text_mem is stored in database
|
|
215
|
+
self.register_mem_cube(
|
|
216
|
+
cube.cube_path,
|
|
217
|
+
cube.cube_id,
|
|
218
|
+
user_id,
|
|
219
|
+
memory_types=["act_mem"],
|
|
220
|
+
default_config=default_cube_config,
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"Cube path {cube.cube_path} does not exist for cube {cube.cube_id}"
|
|
225
|
+
)
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error(f"Failed to load cube {cube.cube_id} for user {user_id}: {e}")
|
|
228
|
+
|
|
229
|
+
def _ensure_user_instance(self, user_id: str, max_instances: int | None = None) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Ensure user configuration exists, creating it if necessary.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
user_id (str): The user ID
|
|
235
|
+
max_instances (int): Maximum instances to keep in memory (overrides class default)
|
|
236
|
+
"""
|
|
237
|
+
if user_id in self.user_configs:
|
|
238
|
+
return
|
|
239
|
+
|
|
240
|
+
# Try to get config from persistent storage first
|
|
241
|
+
stored_config = self.global_user_manager.get_user_config(user_id)
|
|
242
|
+
if stored_config:
|
|
243
|
+
self.user_configs[user_id] = stored_config
|
|
244
|
+
self._load_user_cube_access(user_id)
|
|
245
|
+
else:
|
|
246
|
+
# Use default config
|
|
247
|
+
if not self.default_config:
|
|
248
|
+
raise ValueError(f"No configuration available for user {user_id}")
|
|
249
|
+
user_config = self.default_config.model_copy(deep=True)
|
|
250
|
+
user_config.user_id = user_id
|
|
251
|
+
user_config.session_id = f"{user_id}_session"
|
|
252
|
+
self.user_configs[user_id] = user_config
|
|
253
|
+
self._load_user_cube_access(user_id)
|
|
254
|
+
|
|
255
|
+
# Apply LRU eviction if needed
|
|
256
|
+
max_instances = max_instances or self.max_user_instances
|
|
257
|
+
if len(self.user_configs) > max_instances:
|
|
258
|
+
# Remove least recently used instance (excluding root)
|
|
259
|
+
user_ids = [uid for uid in self.user_configs if uid != "root"]
|
|
260
|
+
if user_ids:
|
|
261
|
+
oldest_user_id = user_ids[0]
|
|
262
|
+
del self.user_configs[oldest_user_id]
|
|
263
|
+
if oldest_user_id in self.user_cube_access:
|
|
264
|
+
del self.user_cube_access[oldest_user_id]
|
|
265
|
+
logger.info(f"Removed least recently used user configuration: {oldest_user_id}")
|
|
266
|
+
|
|
267
|
+
def _load_user_cube_access(self, user_id: str) -> None:
|
|
268
|
+
"""Load user's cube access permissions."""
|
|
269
|
+
try:
|
|
270
|
+
# Get user's accessible cubes from persistent storage
|
|
271
|
+
accessible_cubes = self.global_user_manager.get_user_cube_access(user_id)
|
|
272
|
+
self.user_cube_access[user_id] = set(accessible_cubes)
|
|
273
|
+
except Exception as e:
|
|
274
|
+
logger.warning(f"Failed to load cube access for user {user_id}: {e}")
|
|
275
|
+
self.user_cube_access[user_id] = set()
|
|
276
|
+
|
|
277
|
+
def _get_user_config(self, user_id: str) -> MOSConfig:
|
|
278
|
+
"""Get user configuration."""
|
|
279
|
+
if user_id not in self.user_configs:
|
|
280
|
+
self._ensure_user_instance(user_id)
|
|
281
|
+
return self.user_configs[user_id]
|
|
282
|
+
|
|
283
|
+
def _validate_user_cube_access(self, user_id: str, cube_id: str) -> None:
|
|
284
|
+
"""Validate user has access to the cube."""
|
|
285
|
+
if user_id not in self.user_cube_access:
|
|
286
|
+
self._load_user_cube_access(user_id)
|
|
287
|
+
|
|
288
|
+
if cube_id not in self.user_cube_access.get(user_id, set()):
|
|
289
|
+
raise ValueError(f"User '{user_id}' does not have access to cube '{cube_id}'")
|
|
290
|
+
|
|
291
|
+
def _validate_user_access(self, user_id: str, cube_id: str | None = None) -> None:
|
|
292
|
+
"""Validate user access using MOSCore's built-in validation."""
|
|
293
|
+
# Use MOSCore's built-in user validation
|
|
294
|
+
if cube_id:
|
|
295
|
+
self._validate_cube_access(user_id, cube_id)
|
|
296
|
+
else:
|
|
297
|
+
self._validate_user_exists(user_id)
|
|
298
|
+
|
|
299
|
+
def _create_user_config(self, user_id: str, config: MOSConfig) -> MOSConfig:
|
|
300
|
+
"""Create a new user configuration."""
|
|
301
|
+
# Create a copy of config with the specific user_id
|
|
302
|
+
user_config = config.model_copy(deep=True)
|
|
303
|
+
user_config.user_id = user_id
|
|
304
|
+
user_config.session_id = f"{user_id}_session"
|
|
305
|
+
|
|
306
|
+
# Save configuration to persistent storage
|
|
307
|
+
self.global_user_manager.save_user_config(user_id, user_config)
|
|
308
|
+
|
|
309
|
+
return user_config
|
|
310
|
+
|
|
311
|
+
def _get_or_create_user_config(
|
|
312
|
+
self, user_id: str, config: MOSConfig | None = None
|
|
313
|
+
) -> MOSConfig:
|
|
314
|
+
"""Get existing user config or create a new one."""
|
|
315
|
+
if user_id in self.user_configs:
|
|
316
|
+
return self.user_configs[user_id]
|
|
317
|
+
|
|
318
|
+
# Try to get config from persistent storage first
|
|
319
|
+
stored_config = self.global_user_manager.get_user_config(user_id)
|
|
320
|
+
if stored_config:
|
|
321
|
+
return self._create_user_config(user_id, stored_config)
|
|
322
|
+
|
|
323
|
+
# Use provided config or default config
|
|
324
|
+
user_config = config or self.default_config
|
|
325
|
+
if not user_config:
|
|
326
|
+
raise ValueError(f"No configuration provided for user {user_id}")
|
|
327
|
+
|
|
328
|
+
return self._create_user_config(user_id, user_config)
|
|
329
|
+
|
|
330
|
+
def _build_system_prompt(self, user_id: str, memories_all: list[TextualMemoryItem]) -> str:
|
|
331
|
+
"""
|
|
332
|
+
Build custom system prompt for the user with memory references.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
user_id (str): The user ID.
|
|
336
|
+
memories (list[TextualMemoryItem]): The memories to build the system prompt.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
str: The custom system prompt.
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
# Build base prompt
|
|
343
|
+
base_prompt = (
|
|
344
|
+
"You are a knowledgeable and helpful AI assistant with access to user memories. "
|
|
345
|
+
"When responding to user queries, you should reference relevant memories using the provided memory IDs. "
|
|
346
|
+
"Use the reference format: [1-n:memoriesID] "
|
|
347
|
+
"where refid is a sequential number starting from 1 and increments for each reference in your response, "
|
|
348
|
+
"and memoriesID is the specific memory ID provided in the available memories list. "
|
|
349
|
+
"For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112] "
|
|
350
|
+
"Only reference memories that are directly relevant to the user's question. "
|
|
351
|
+
"Make your responses natural and conversational while incorporating memory references when appropriate."
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Add memory context if available
|
|
355
|
+
if memories_all:
|
|
356
|
+
memory_context = "\n\n## Available ID Memories:\n"
|
|
357
|
+
for i, memory in enumerate(memories_all, 1):
|
|
358
|
+
# Format: [memory_id]: memory_content
|
|
359
|
+
memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
|
|
360
|
+
memory_content = memory.memory if hasattr(memory, "memory") else str(memory)
|
|
361
|
+
memory_context += f"{memory_id}: {memory_content}\n"
|
|
362
|
+
return base_prompt + memory_context
|
|
363
|
+
|
|
364
|
+
return base_prompt
|
|
365
|
+
|
|
366
|
+
def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
|
|
367
|
+
"""
|
|
368
|
+
Complete streaming reference processing to ensure reference tags are never split.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
text_buffer (str): The accumulated text buffer.
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
tuple[str, str]: (processed_text, remaining_buffer)
|
|
375
|
+
"""
|
|
376
|
+
import re
|
|
377
|
+
|
|
378
|
+
# Pattern to match complete reference tags: [refid:memoriesID]
|
|
379
|
+
complete_pattern = r"\[\d+:[^\]]+\]"
|
|
380
|
+
|
|
381
|
+
# Find all complete reference tags
|
|
382
|
+
complete_matches = list(re.finditer(complete_pattern, text_buffer))
|
|
383
|
+
|
|
384
|
+
if complete_matches:
|
|
385
|
+
# Find the last complete tag
|
|
386
|
+
last_match = complete_matches[-1]
|
|
387
|
+
end_pos = last_match.end()
|
|
388
|
+
|
|
389
|
+
# Return text up to the end of the last complete tag
|
|
390
|
+
processed_text = text_buffer[:end_pos]
|
|
391
|
+
remaining_buffer = text_buffer[end_pos:]
|
|
392
|
+
return processed_text, remaining_buffer
|
|
393
|
+
|
|
394
|
+
# Check for incomplete reference tags
|
|
395
|
+
# Look for opening bracket with number and colon
|
|
396
|
+
opening_pattern = r"\[\d+:"
|
|
397
|
+
opening_matches = list(re.finditer(opening_pattern, text_buffer))
|
|
398
|
+
|
|
399
|
+
if opening_matches:
|
|
400
|
+
# Find the last opening tag
|
|
401
|
+
last_opening = opening_matches[-1]
|
|
402
|
+
opening_start = last_opening.start()
|
|
403
|
+
|
|
404
|
+
# Check if we have a complete opening pattern
|
|
405
|
+
if last_opening.end() <= len(text_buffer):
|
|
406
|
+
# We have a complete opening pattern, keep everything in buffer
|
|
407
|
+
return "", text_buffer
|
|
408
|
+
else:
|
|
409
|
+
# Incomplete opening pattern, return text before it
|
|
410
|
+
return text_buffer[:opening_start], text_buffer[opening_start:]
|
|
411
|
+
|
|
412
|
+
# Check for partial opening pattern (starts with [ but not complete)
|
|
413
|
+
if "[" in text_buffer:
|
|
414
|
+
ref_start = text_buffer.find("[")
|
|
415
|
+
return text_buffer[:ref_start], text_buffer[ref_start:]
|
|
416
|
+
|
|
417
|
+
# No reference tags found, return all text
|
|
418
|
+
return text_buffer, ""
|
|
419
|
+
|
|
420
|
+
def _extract_references_from_response(self, response: str) -> list[dict]:
|
|
421
|
+
"""
|
|
422
|
+
Extract reference information from the response.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
response (str): The complete response text.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
list[dict]: List of reference information.
|
|
429
|
+
"""
|
|
430
|
+
import re
|
|
431
|
+
|
|
432
|
+
references = []
|
|
433
|
+
# Pattern to match [refid:memoriesID]
|
|
434
|
+
pattern = r"\[(\d+):([^\]]+)\]"
|
|
435
|
+
|
|
436
|
+
matches = re.findall(pattern, response)
|
|
437
|
+
for ref_number, memory_id in matches:
|
|
438
|
+
references.append({"memory_id": memory_id, "reference_number": int(ref_number)})
|
|
439
|
+
|
|
440
|
+
return references
|
|
441
|
+
|
|
442
|
+
def _chunk_response_with_tiktoken(
|
|
443
|
+
self, response: str, chunk_size: int = 5
|
|
444
|
+
) -> Generator[str, None, None]:
|
|
445
|
+
"""
|
|
446
|
+
Chunk response using tiktoken for proper token-based streaming.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
response (str): The response text to chunk.
|
|
450
|
+
chunk_size (int): Number of tokens per chunk.
|
|
451
|
+
|
|
452
|
+
Yields:
|
|
453
|
+
str: Chunked text pieces.
|
|
454
|
+
"""
|
|
455
|
+
if self.tokenizer:
|
|
456
|
+
# Use tiktoken for proper token-based chunking
|
|
457
|
+
tokens = self.tokenizer.encode(response)
|
|
458
|
+
|
|
459
|
+
for i in range(0, len(tokens), chunk_size):
|
|
460
|
+
token_chunk = tokens[i : i + chunk_size]
|
|
461
|
+
chunk_text = self.tokenizer.decode(token_chunk)
|
|
462
|
+
yield chunk_text
|
|
463
|
+
else:
|
|
464
|
+
# Fallback to character-based chunking
|
|
465
|
+
char_chunk_size = chunk_size * 4 # Approximate character to token ratio
|
|
466
|
+
for i in range(0, len(response), char_chunk_size):
|
|
467
|
+
yield response[i : i + char_chunk_size]
|
|
468
|
+
|
|
469
|
+
def _send_message_to_scheduler(
|
|
470
|
+
self,
|
|
471
|
+
user_id: str,
|
|
472
|
+
mem_cube_id: str,
|
|
473
|
+
query: str,
|
|
474
|
+
label: str,
|
|
475
|
+
):
|
|
476
|
+
"""
|
|
477
|
+
Send message to scheduler.
|
|
478
|
+
args:
|
|
479
|
+
user_id: str,
|
|
480
|
+
mem_cube_id: str,
|
|
481
|
+
query: str,
|
|
482
|
+
"""
|
|
483
|
+
|
|
484
|
+
if self.enable_mem_scheduler and (self.mem_scheduler is not None):
|
|
485
|
+
message_item = ScheduleMessageItem(
|
|
486
|
+
user_id=user_id,
|
|
487
|
+
mem_cube_id=mem_cube_id,
|
|
488
|
+
mem_cube=self.mem_cubes[mem_cube_id],
|
|
489
|
+
label=label,
|
|
490
|
+
content=query,
|
|
491
|
+
timestamp=datetime.now(),
|
|
492
|
+
)
|
|
493
|
+
self.mem_scheduler.submit_messages(messages=[message_item])
|
|
494
|
+
|
|
495
|
+
def register_mem_cube(
|
|
496
|
+
self,
|
|
497
|
+
mem_cube_name_or_path_or_object: str | GeneralMemCube,
|
|
498
|
+
mem_cube_id: str | None = None,
|
|
499
|
+
user_id: str | None = None,
|
|
500
|
+
memory_types: list[Literal["text_mem", "act_mem", "para_mem"]] | None = None,
|
|
501
|
+
default_config: GeneralMemCubeConfig | None = None,
|
|
502
|
+
) -> None:
|
|
503
|
+
"""
|
|
504
|
+
Register a MemCube with the MOS.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
mem_cube_name_or_path_or_object (str | GeneralMemCube): The name, path, or GeneralMemCube object to register.
|
|
508
|
+
mem_cube_id (str, optional): The identifier for the MemCube. If not provided, a default ID is used.
|
|
509
|
+
user_id (str, optional): The user ID to register the cube for.
|
|
510
|
+
memory_types (list[str], optional): List of memory types to load.
|
|
511
|
+
If None, loads all available memory types.
|
|
512
|
+
Options: ["text_mem", "act_mem", "para_mem"]
|
|
513
|
+
default_config (GeneralMemCubeConfig, optional): Default configuration for the cube.
|
|
514
|
+
"""
|
|
515
|
+
# Handle different input types
|
|
516
|
+
if isinstance(mem_cube_name_or_path_or_object, GeneralMemCube):
|
|
517
|
+
# Direct GeneralMemCube object provided
|
|
518
|
+
mem_cube = mem_cube_name_or_path_or_object
|
|
519
|
+
if mem_cube_id is None:
|
|
520
|
+
mem_cube_id = f"cube_{id(mem_cube)}" # Generate a unique ID
|
|
521
|
+
else:
|
|
522
|
+
# String path provided
|
|
523
|
+
mem_cube_name_or_path = mem_cube_name_or_path_or_object
|
|
524
|
+
if mem_cube_id is None:
|
|
525
|
+
mem_cube_id = mem_cube_name_or_path
|
|
526
|
+
|
|
527
|
+
if mem_cube_id in self.mem_cubes:
|
|
528
|
+
logger.info(f"MemCube with ID {mem_cube_id} already in MOS, skip install.")
|
|
529
|
+
return
|
|
530
|
+
|
|
531
|
+
# Create MemCube from path
|
|
532
|
+
if os.path.exists(mem_cube_name_or_path):
|
|
533
|
+
mem_cube = GeneralMemCube.init_from_dir(
|
|
534
|
+
mem_cube_name_or_path, memory_types, default_config
|
|
535
|
+
)
|
|
536
|
+
else:
|
|
537
|
+
logger.warning(
|
|
538
|
+
f"MemCube {mem_cube_name_or_path} does not exist, try to init from remote repo."
|
|
539
|
+
)
|
|
540
|
+
mem_cube = GeneralMemCube.init_from_remote_repo(
|
|
541
|
+
mem_cube_name_or_path, memory_types=memory_types, default_config=default_config
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Register the MemCube
|
|
545
|
+
logger.info(
|
|
546
|
+
f"Registering MemCube {mem_cube_id} with cube config {mem_cube.config.model_dump(mode='json')}"
|
|
547
|
+
)
|
|
548
|
+
self.mem_cubes[mem_cube_id] = mem_cube
|
|
549
|
+
|
|
550
|
+
def user_register(
|
|
551
|
+
self,
|
|
552
|
+
user_id: str,
|
|
553
|
+
user_name: str | None = None,
|
|
554
|
+
config: MOSConfig | None = None,
|
|
555
|
+
interests: str | None = None,
|
|
556
|
+
default_mem_cube: GeneralMemCube | None = None,
|
|
557
|
+
default_cube_config: GeneralMemCubeConfig | None = None,
|
|
558
|
+
) -> dict[str, str]:
|
|
559
|
+
"""Register a new user with configuration and default cube.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
user_id (str): The user ID for registration.
|
|
563
|
+
user_name (str): The user name for registration.
|
|
564
|
+
config (MOSConfig | None, optional): User-specific configuration. Defaults to None.
|
|
565
|
+
interests (str | None, optional): User interests as string. Defaults to None.
|
|
566
|
+
default_mem_cube (GeneralMemCube | None, optional): Default memory cube. Defaults to None.
|
|
567
|
+
default_cube_config (GeneralMemCubeConfig | None, optional): Default cube configuration. Defaults to None.
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
dict[str, str]: Registration result with status and message.
|
|
571
|
+
"""
|
|
572
|
+
try:
|
|
573
|
+
# Use provided config or default config
|
|
574
|
+
user_config = config or self.default_config
|
|
575
|
+
if not user_config:
|
|
576
|
+
return {
|
|
577
|
+
"status": "error",
|
|
578
|
+
"message": "No configuration provided for user registration",
|
|
579
|
+
}
|
|
580
|
+
if not user_name:
|
|
581
|
+
user_name = user_id
|
|
582
|
+
|
|
583
|
+
# Create user with configuration using persistent user manager
|
|
584
|
+
self.global_user_manager.create_user_with_config(
|
|
585
|
+
user_id, user_config, UserRole.USER, user_id
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# Create user configuration
|
|
589
|
+
user_config = self._create_user_config(user_id, user_config)
|
|
590
|
+
|
|
591
|
+
# Create a default cube for the user using MOSCore's methods
|
|
592
|
+
default_cube_name = f"{user_name}_{user_id}_default_cube"
|
|
593
|
+
mem_cube_name_or_path = f"{CUBE_PATH}/{default_cube_name}"
|
|
594
|
+
default_cube_id = self.create_cube_for_user(
|
|
595
|
+
cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
|
|
596
|
+
)
|
|
21
597
|
|
|
22
|
-
|
|
598
|
+
if default_mem_cube:
|
|
599
|
+
try:
|
|
600
|
+
default_mem_cube.dump(mem_cube_name_or_path)
|
|
601
|
+
except Exception as e:
|
|
602
|
+
print(e)
|
|
603
|
+
|
|
604
|
+
# Register the default cube with MOS
|
|
605
|
+
self.register_mem_cube(
|
|
606
|
+
mem_cube_name_or_path_or_object=default_mem_cube,
|
|
607
|
+
mem_cube_id=default_cube_id,
|
|
608
|
+
user_id=user_id,
|
|
609
|
+
memory_types=["act_mem"] if self.config.enable_activation_memory else [],
|
|
610
|
+
default_config=default_cube_config, # use default cube config
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# Add interests to the default cube if provided
|
|
614
|
+
if interests:
|
|
615
|
+
self.add(memory_content=interests, mem_cube_id=default_cube_id, user_id=user_id)
|
|
616
|
+
|
|
617
|
+
return {
|
|
618
|
+
"status": "success",
|
|
619
|
+
"message": f"User {user_name} registered successfully with default cube {default_cube_id}",
|
|
620
|
+
"user_id": user_id,
|
|
621
|
+
"default_cube_id": default_cube_id,
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
except Exception as e:
|
|
625
|
+
return {"status": "error", "message": f"Failed to register user: {e!s}"}
|
|
626
|
+
|
|
627
|
+
def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
|
|
23
628
|
"""Get suggestion query from LLM.
|
|
24
629
|
Args:
|
|
25
|
-
user_id (str
|
|
630
|
+
user_id (str): User ID.
|
|
631
|
+
language (str): Language for suggestions ("zh" or "en").
|
|
26
632
|
|
|
27
633
|
Returns:
|
|
28
634
|
list[str]: The suggestion query list.
|
|
29
635
|
"""
|
|
30
636
|
|
|
637
|
+
if language == "zh":
|
|
638
|
+
suggestion_prompt = """
|
|
639
|
+
你是一个有用的助手,可以帮助用户生成建议查询。
|
|
640
|
+
我将获取用户最近的一些记忆,
|
|
641
|
+
你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
|
|
642
|
+
用户最近的记忆是:
|
|
643
|
+
{memories}
|
|
644
|
+
请生成3个建议查询用中文,
|
|
645
|
+
输出应该是json格式,键是"query",值是一个建议查询列表。
|
|
646
|
+
|
|
647
|
+
示例:
|
|
648
|
+
{{
|
|
649
|
+
"query": ["查询1", "查询2", "查询3"]
|
|
650
|
+
}}
|
|
651
|
+
"""
|
|
652
|
+
else: # English
|
|
653
|
+
suggestion_prompt = """
|
|
654
|
+
You are a helpful assistant that can help users to generate suggestion query.
|
|
655
|
+
I will get some user recently memories,
|
|
656
|
+
you should generate some suggestion query, the query should be user what to query,
|
|
657
|
+
user recently memories is:
|
|
658
|
+
{memories}
|
|
659
|
+
please generate 3 suggestion query in English,
|
|
660
|
+
output should be a json format, the key is "query", the value is a list of suggestion query.
|
|
661
|
+
|
|
662
|
+
example:
|
|
663
|
+
{{
|
|
664
|
+
"query": ["query1", "query2", "query3"]
|
|
665
|
+
}}
|
|
666
|
+
"""
|
|
667
|
+
text_mem_result = super().search("my recently memories", user_id=user_id, top_k=10)[
|
|
668
|
+
"text_mem"
|
|
669
|
+
]
|
|
670
|
+
if text_mem_result:
|
|
671
|
+
memories = "\n".join([m.memory for m in text_mem_result[0]["memories"]])
|
|
672
|
+
else:
|
|
673
|
+
memories = ""
|
|
674
|
+
message_list = [{"role": "system", "content": suggestion_prompt.format(memories=memories)}]
|
|
675
|
+
response = self.chat_llm.generate(message_list)
|
|
676
|
+
response_json = json.loads(response)
|
|
677
|
+
|
|
678
|
+
return response_json["query"]
|
|
679
|
+
|
|
31
680
|
def chat(
|
|
32
681
|
self,
|
|
33
682
|
query: str,
|
|
@@ -38,43 +687,211 @@ class MOSProduct(MOSCore):
|
|
|
38
687
|
"""Chat with LLM SSE Type.
|
|
39
688
|
Args:
|
|
40
689
|
query (str): Query string.
|
|
41
|
-
user_id (str
|
|
690
|
+
user_id (str): User ID.
|
|
42
691
|
cube_id (str, optional): Custom cube ID for user.
|
|
43
692
|
history (list[dict], optional): Chat history.
|
|
44
693
|
|
|
45
694
|
Returns:
|
|
46
695
|
Generator[str, None, None]: The response string generator.
|
|
47
696
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
697
|
+
# Use MOSCore's built-in validation
|
|
698
|
+
if cube_id:
|
|
699
|
+
self._validate_cube_access(user_id, cube_id)
|
|
700
|
+
else:
|
|
701
|
+
self._validate_user_exists(user_id)
|
|
702
|
+
|
|
703
|
+
# Load user cubes if not already loaded
|
|
704
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
705
|
+
time_start = time.time()
|
|
706
|
+
memories_list = super().search(query, user_id)["text_mem"]
|
|
707
|
+
# Get response from parent MOSCore (returns string, not generator)
|
|
708
|
+
response = super().chat(query, user_id)
|
|
709
|
+
time_end = time.time()
|
|
710
|
+
|
|
711
|
+
# Use tiktoken for proper token-based chunking
|
|
712
|
+
for chunk in self._chunk_response_with_tiktoken(response, chunk_size=5):
|
|
713
|
+
chunk_data = f"data: {json.dumps({'type': 'text', 'content': chunk})}\n\n"
|
|
56
714
|
yield chunk_data
|
|
57
|
-
|
|
715
|
+
|
|
716
|
+
# Prepare reference data
|
|
717
|
+
reference = []
|
|
718
|
+
for memories in memories_list:
|
|
719
|
+
memories_json = memories.model_dump()
|
|
720
|
+
memories_json["metadata"]["ref_id"] = f"[{memories.id.split('-')[0]}]"
|
|
721
|
+
memories_json["metadata"]["embedding"] = []
|
|
722
|
+
memories_json["metadata"]["sources"] = []
|
|
723
|
+
reference.append(memories_json)
|
|
724
|
+
|
|
58
725
|
yield f"data: {json.dumps({'type': 'reference', 'content': reference})}\n\n"
|
|
726
|
+
total_time = round(float(time_end - time_start), 1)
|
|
727
|
+
|
|
728
|
+
yield f"data: {json.dumps({'type': 'time', 'content': {'total_time': total_time, 'speed_improvement': '23%'}})}\n\n"
|
|
59
729
|
yield f"data: {json.dumps({'type': 'end'})}\n\n"
|
|
60
730
|
|
|
61
|
-
def
|
|
731
|
+
def chat_with_references(
|
|
62
732
|
self,
|
|
733
|
+
query: str,
|
|
63
734
|
user_id: str,
|
|
64
|
-
memory_type: Literal["text_mem", "act_mem", "param_mem"],
|
|
65
735
|
cube_id: str | None = None,
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
736
|
+
history: MessageList | None = None,
|
|
737
|
+
) -> Generator[str, None, None]:
|
|
738
|
+
"""
|
|
739
|
+
Chat with LLM with memory references and streaming output.
|
|
740
|
+
|
|
741
|
+
Args:
|
|
742
|
+
query (str): Query string.
|
|
743
|
+
user_id (str): User ID.
|
|
744
|
+
cube_id (str, optional): Custom cube ID for user.
|
|
745
|
+
history (MessageList, optional): Chat history.
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Generator[str, None, None]: The response string generator with reference processing.
|
|
749
|
+
"""
|
|
750
|
+
|
|
751
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
752
|
+
|
|
753
|
+
time_start = time.time()
|
|
754
|
+
memories_list = []
|
|
755
|
+
memories_result = super().search(
|
|
756
|
+
query, user_id, install_cube_ids=[cube_id] if cube_id else None, top_k=10
|
|
757
|
+
)["text_mem"]
|
|
758
|
+
if memories_result:
|
|
759
|
+
memories_list = memories_result[0]["memories"]
|
|
760
|
+
|
|
761
|
+
# Build custom system prompt with relevant memories
|
|
762
|
+
system_prompt = self._build_system_prompt(user_id, memories_list)
|
|
763
|
+
|
|
764
|
+
# Get chat history
|
|
765
|
+
target_user_id = user_id if user_id is not None else self.user_id
|
|
766
|
+
if target_user_id not in self.chat_history_manager:
|
|
767
|
+
self._register_chat_history(target_user_id)
|
|
768
|
+
|
|
769
|
+
chat_history = self.chat_history_manager[target_user_id]
|
|
770
|
+
current_messages = [
|
|
771
|
+
{"role": "system", "content": system_prompt},
|
|
772
|
+
*chat_history.chat_history,
|
|
773
|
+
{"role": "user", "content": query},
|
|
76
774
|
]
|
|
77
|
-
|
|
775
|
+
|
|
776
|
+
# Generate response with custom prompt
|
|
777
|
+
past_key_values = None
|
|
778
|
+
response_stream = None
|
|
779
|
+
if self.config.enable_activation_memory:
|
|
780
|
+
# Handle activation memory (copy MOSCore logic)
|
|
781
|
+
for mem_cube_id, mem_cube in self.mem_cubes.items():
|
|
782
|
+
if mem_cube.act_mem and mem_cube_id == cube_id:
|
|
783
|
+
kv_cache = next(iter(mem_cube.act_mem.get_all()), None)
|
|
784
|
+
past_key_values = (
|
|
785
|
+
kv_cache.memory if (kv_cache and hasattr(kv_cache, "memory")) else None
|
|
786
|
+
)
|
|
787
|
+
if past_key_values is not None:
|
|
788
|
+
logger.info("past_key_values is not None will apply to chat")
|
|
789
|
+
else:
|
|
790
|
+
logger.info("past_key_values is None will not apply to chat")
|
|
791
|
+
break
|
|
792
|
+
if self.config.chat_model.backend == "huggingface":
|
|
793
|
+
response_stream = self.chat_llm.generate_stream(
|
|
794
|
+
current_messages, past_key_values=past_key_values
|
|
795
|
+
)
|
|
796
|
+
elif self.config.chat_model.backend == "vllm":
|
|
797
|
+
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
798
|
+
else:
|
|
799
|
+
if self.config.chat_model.backend in ["huggingface", "vllm"]:
|
|
800
|
+
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
801
|
+
else:
|
|
802
|
+
response_stream = self.chat_llm.generate(current_messages)
|
|
803
|
+
|
|
804
|
+
time_end = time.time()
|
|
805
|
+
|
|
806
|
+
# Simulate streaming output with proper reference handling using tiktoken
|
|
807
|
+
|
|
808
|
+
# Initialize buffer for streaming
|
|
809
|
+
buffer = ""
|
|
810
|
+
full_response = ""
|
|
811
|
+
|
|
812
|
+
# Use tiktoken for proper token-based chunking
|
|
813
|
+
if self.config.chat_model.backend not in ["huggingface", "vllm"]:
|
|
814
|
+
# For non-huggingface backends, we need to collect the full response first
|
|
815
|
+
full_response_text = ""
|
|
816
|
+
for chunk in response_stream:
|
|
817
|
+
if chunk in ["<think>", "</think>"]:
|
|
818
|
+
continue
|
|
819
|
+
full_response_text += chunk
|
|
820
|
+
response_stream = self._chunk_response_with_tiktoken(full_response_text, chunk_size=5)
|
|
821
|
+
for chunk in response_stream:
|
|
822
|
+
if chunk in ["<think>", "</think>"]:
|
|
823
|
+
continue
|
|
824
|
+
buffer += chunk
|
|
825
|
+
full_response += chunk
|
|
826
|
+
|
|
827
|
+
# Process buffer to ensure complete reference tags
|
|
828
|
+
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
|
|
829
|
+
|
|
830
|
+
if processed_chunk:
|
|
831
|
+
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
|
|
832
|
+
yield chunk_data
|
|
833
|
+
buffer = remaining_buffer
|
|
834
|
+
|
|
835
|
+
# Process any remaining buffer
|
|
836
|
+
if buffer:
|
|
837
|
+
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
|
|
838
|
+
if processed_chunk:
|
|
839
|
+
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
|
|
840
|
+
yield chunk_data
|
|
841
|
+
|
|
842
|
+
# Prepare reference data
|
|
843
|
+
reference = []
|
|
844
|
+
for memories in memories_list:
|
|
845
|
+
memories_json = memories.model_dump()
|
|
846
|
+
memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
|
|
847
|
+
memories_json["metadata"]["embedding"] = []
|
|
848
|
+
memories_json["metadata"]["sources"] = []
|
|
849
|
+
memories_json["metadata"]["memory"] = memories.memory
|
|
850
|
+
reference.append({"metadata": memories_json["metadata"]})
|
|
851
|
+
|
|
852
|
+
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
|
|
853
|
+
total_time = round(float(time_end - time_start), 1)
|
|
854
|
+
yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': '23%'}})}\n\n"
|
|
855
|
+
chat_history.chat_history.append({"role": "user", "content": query})
|
|
856
|
+
chat_history.chat_history.append({"role": "assistant", "content": full_response})
|
|
857
|
+
self._send_message_to_scheduler(
|
|
858
|
+
user_id=user_id, mem_cube_id=cube_id, query=query, label=QUERY_LABEL
|
|
859
|
+
)
|
|
860
|
+
self._send_message_to_scheduler(
|
|
861
|
+
user_id=user_id, mem_cube_id=cube_id, query=full_response, label=ANSWER_LABEL
|
|
862
|
+
)
|
|
863
|
+
self.chat_history_manager[user_id] = chat_history
|
|
864
|
+
|
|
865
|
+
yield f"data: {json.dumps({'type': 'end'})}\n\n"
|
|
866
|
+
self.add(
|
|
867
|
+
user_id=user_id,
|
|
868
|
+
messages=[
|
|
869
|
+
{
|
|
870
|
+
"role": "user",
|
|
871
|
+
"content": query,
|
|
872
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"role": "assistant",
|
|
876
|
+
"content": full_response,
|
|
877
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
878
|
+
},
|
|
879
|
+
],
|
|
880
|
+
mem_cube_id=cube_id,
|
|
881
|
+
)
|
|
882
|
+
# Keep chat history under 30 messages by removing oldest conversation pair
|
|
883
|
+
if len(self.chat_history_manager[user_id].chat_history) > 10:
|
|
884
|
+
self.chat_history_manager[user_id].chat_history.pop(0) # Remove oldest user message
|
|
885
|
+
self.chat_history_manager[user_id].chat_history.pop(
|
|
886
|
+
0
|
|
887
|
+
) # Remove oldest assistant response
|
|
888
|
+
|
|
889
|
+
def get_all(
|
|
890
|
+
self,
|
|
891
|
+
user_id: str,
|
|
892
|
+
memory_type: Literal["text_mem", "act_mem", "param_mem", "para_mem"],
|
|
893
|
+
mem_cube_ids: list[str] | None = None,
|
|
894
|
+
) -> list[dict[str, Any]]:
|
|
78
895
|
"""Get all memory items for a user.
|
|
79
896
|
|
|
80
897
|
Args:
|
|
@@ -83,7 +900,253 @@ class MOSProduct(MOSCore):
|
|
|
83
900
|
memory_type (Literal["text_mem", "act_mem", "param_mem"]): The type of memory to get.
|
|
84
901
|
|
|
85
902
|
Returns:
|
|
86
|
-
list[
|
|
903
|
+
list[dict[str, Any]]: A list of memory items with cube_id and memories structure.
|
|
87
904
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
905
|
+
|
|
906
|
+
# Load user cubes if not already loaded
|
|
907
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
908
|
+
memory_list = super().get_all(
|
|
909
|
+
mem_cube_id=mem_cube_ids[0] if mem_cube_ids else None, user_id=user_id
|
|
910
|
+
)[memory_type]
|
|
911
|
+
reformat_memory_list = []
|
|
912
|
+
if memory_type == "text_mem":
|
|
913
|
+
for memory in memory_list:
|
|
914
|
+
memories = remove_embedding_recursive(memory["memories"])
|
|
915
|
+
custom_type_ratios = {
|
|
916
|
+
"WorkingMemory": 0.20,
|
|
917
|
+
"LongTermMemory": 0.40,
|
|
918
|
+
"UserMemory": 0.40,
|
|
919
|
+
}
|
|
920
|
+
tree_result, node_type_count = convert_graph_to_tree_forworkmem(
|
|
921
|
+
memories, target_node_count=150, type_ratios=custom_type_ratios
|
|
922
|
+
)
|
|
923
|
+
memories_filtered = filter_nodes_by_tree_ids(tree_result, memories)
|
|
924
|
+
children = tree_result["children"]
|
|
925
|
+
children_sort = sort_children_by_memory_type(children)
|
|
926
|
+
tree_result["children"] = children_sort
|
|
927
|
+
memories_filtered["tree_structure"] = tree_result
|
|
928
|
+
reformat_memory_list.append(
|
|
929
|
+
{
|
|
930
|
+
"cube_id": memory["cube_id"],
|
|
931
|
+
"memories": [memories_filtered],
|
|
932
|
+
"memory_statistics": node_type_count,
|
|
933
|
+
}
|
|
934
|
+
)
|
|
935
|
+
elif memory_type == "act_mem":
|
|
936
|
+
memories_list = []
|
|
937
|
+
act_mem_params = self.mem_cubes[mem_cube_ids[0]].act_mem.get_all()
|
|
938
|
+
if act_mem_params:
|
|
939
|
+
memories_data = act_mem_params[0].model_dump()
|
|
940
|
+
records = memories_data.get("records", [])
|
|
941
|
+
for record in records["text_memories"]:
|
|
942
|
+
memories_list.append(
|
|
943
|
+
{
|
|
944
|
+
"id": memories_data["id"],
|
|
945
|
+
"text": record,
|
|
946
|
+
"create_time": records["timestamp"],
|
|
947
|
+
"size": random.randint(1, 20),
|
|
948
|
+
"modify_times": 1,
|
|
949
|
+
}
|
|
950
|
+
)
|
|
951
|
+
reformat_memory_list.append(
|
|
952
|
+
{
|
|
953
|
+
"cube_id": "xxxxxxxxxxxxxxxx" if not mem_cube_ids else mem_cube_ids[0],
|
|
954
|
+
"memories": memories_list,
|
|
955
|
+
}
|
|
956
|
+
)
|
|
957
|
+
elif memory_type == "para_mem":
|
|
958
|
+
act_mem_params = self.mem_cubes[mem_cube_ids[0]].act_mem.get_all()
|
|
959
|
+
logger.info(f"act_mem_params: {act_mem_params}")
|
|
960
|
+
reformat_memory_list.append(
|
|
961
|
+
{
|
|
962
|
+
"cube_id": "xxxxxxxxxxxxxxxx" if not mem_cube_ids else mem_cube_ids[0],
|
|
963
|
+
"memories": act_mem_params[0].model_dump(),
|
|
964
|
+
}
|
|
965
|
+
)
|
|
966
|
+
return reformat_memory_list
|
|
967
|
+
|
|
968
|
+
def _get_subgraph(
|
|
969
|
+
self, query: str, mem_cube_id: str, user_id: str | None = None, top_k: int = 5
|
|
970
|
+
) -> list[dict[str, Any]]:
|
|
971
|
+
result = {"para_mem": [], "act_mem": [], "text_mem": []}
|
|
972
|
+
if self.config.enable_textual_memory and self.mem_cubes[mem_cube_id].text_mem:
|
|
973
|
+
result["text_mem"].append(
|
|
974
|
+
{
|
|
975
|
+
"cube_id": mem_cube_id,
|
|
976
|
+
"memories": self.mem_cubes[mem_cube_id].text_mem.get_relevant_subgraph(
|
|
977
|
+
query, top_k=top_k
|
|
978
|
+
),
|
|
979
|
+
}
|
|
980
|
+
)
|
|
981
|
+
return result
|
|
982
|
+
|
|
983
|
+
def get_subgraph(
|
|
984
|
+
self,
|
|
985
|
+
user_id: str,
|
|
986
|
+
query: str,
|
|
987
|
+
mem_cube_ids: list[str] | None = None,
|
|
988
|
+
) -> list[dict[str, Any]]:
|
|
989
|
+
"""Get all memory items for a user.
|
|
990
|
+
|
|
991
|
+
Args:
|
|
992
|
+
user_id (str): The ID of the user.
|
|
993
|
+
cube_id (str | None, optional): The ID of the cube. Defaults to None.
|
|
994
|
+
mem_cube_ids (list[str], optional): The IDs of the cubes. Defaults to None.
|
|
995
|
+
|
|
996
|
+
Returns:
|
|
997
|
+
list[dict[str, Any]]: A list of memory items with cube_id and memories structure.
|
|
998
|
+
"""
|
|
999
|
+
|
|
1000
|
+
# Load user cubes if not already loaded
|
|
1001
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
1002
|
+
memory_list = self._get_subgraph(
|
|
1003
|
+
query=query, mem_cube_id=mem_cube_ids[0], user_id=user_id, top_k=20
|
|
1004
|
+
)["text_mem"]
|
|
1005
|
+
reformat_memory_list = []
|
|
1006
|
+
for memory in memory_list:
|
|
1007
|
+
memories = remove_embedding_recursive(memory["memories"])
|
|
1008
|
+
custom_type_ratios = {"WorkingMemory": 0.20, "LongTermMemory": 0.40, "UserMemory": 0.4}
|
|
1009
|
+
tree_result, node_type_count = convert_graph_to_tree_forworkmem(
|
|
1010
|
+
memories, target_node_count=150, type_ratios=custom_type_ratios
|
|
1011
|
+
)
|
|
1012
|
+
memories_filtered = filter_nodes_by_tree_ids(tree_result, memories)
|
|
1013
|
+
children = tree_result["children"]
|
|
1014
|
+
children_sort = sort_children_by_memory_type(children)
|
|
1015
|
+
tree_result["children"] = children_sort
|
|
1016
|
+
memories_filtered["tree_structure"] = tree_result
|
|
1017
|
+
reformat_memory_list.append(
|
|
1018
|
+
{
|
|
1019
|
+
"cube_id": memory["cube_id"],
|
|
1020
|
+
"memories": [memories_filtered],
|
|
1021
|
+
"memory_statistics": node_type_count,
|
|
1022
|
+
}
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
return reformat_memory_list
|
|
1026
|
+
|
|
1027
|
+
def search(
|
|
1028
|
+
self, query: str, user_id: str, install_cube_ids: list[str] | None = None, top_k: int = 20
|
|
1029
|
+
):
|
|
1030
|
+
"""Search memories for a specific user."""
|
|
1031
|
+
# Validate user access
|
|
1032
|
+
self._validate_user_access(user_id)
|
|
1033
|
+
|
|
1034
|
+
# Load user cubes if not already loaded
|
|
1035
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
1036
|
+
search_result = super().search(query, user_id, install_cube_ids, top_k)
|
|
1037
|
+
text_memory_list = search_result["text_mem"]
|
|
1038
|
+
reformat_memory_list = []
|
|
1039
|
+
for memory in text_memory_list:
|
|
1040
|
+
memories_list = []
|
|
1041
|
+
for data in memory["memories"]:
|
|
1042
|
+
memories = data.model_dump()
|
|
1043
|
+
memories["ref_id"] = f"[{memories['id'].split('-')[0]}]"
|
|
1044
|
+
memories["metadata"]["embedding"] = []
|
|
1045
|
+
memories["metadata"]["sources"] = []
|
|
1046
|
+
memories["metadata"]["ref_id"] = f"[{memories['id'].split('-')[0]}]"
|
|
1047
|
+
memories["metadata"]["id"] = memories["id"]
|
|
1048
|
+
memories["metadata"]["memory"] = memories["memory"]
|
|
1049
|
+
memories_list.append(memories)
|
|
1050
|
+
reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
|
|
1051
|
+
search_result["text_mem"] = reformat_memory_list
|
|
1052
|
+
|
|
1053
|
+
return search_result
|
|
1054
|
+
|
|
1055
|
+
def add(
|
|
1056
|
+
self,
|
|
1057
|
+
user_id: str,
|
|
1058
|
+
messages: MessageList | None = None,
|
|
1059
|
+
memory_content: str | None = None,
|
|
1060
|
+
doc_path: str | None = None,
|
|
1061
|
+
mem_cube_id: str | None = None,
|
|
1062
|
+
):
|
|
1063
|
+
"""Add memory for a specific user."""
|
|
1064
|
+
# Use MOSCore's built-in user/cube validation
|
|
1065
|
+
if mem_cube_id:
|
|
1066
|
+
self._validate_cube_access(user_id, mem_cube_id)
|
|
1067
|
+
else:
|
|
1068
|
+
self._validate_user_exists(user_id)
|
|
1069
|
+
|
|
1070
|
+
# Load user cubes if not already loaded
|
|
1071
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
1072
|
+
|
|
1073
|
+
result = super().add(messages, memory_content, doc_path, mem_cube_id, user_id)
|
|
1074
|
+
|
|
1075
|
+
return result
|
|
1076
|
+
|
|
1077
|
+
def list_users(self) -> list:
|
|
1078
|
+
"""List all registered users."""
|
|
1079
|
+
return self.global_user_manager.list_users()
|
|
1080
|
+
|
|
1081
|
+
def get_user_info(self, user_id: str) -> dict:
|
|
1082
|
+
"""Get user information including accessible cubes."""
|
|
1083
|
+
# Use MOSCore's built-in user validation
|
|
1084
|
+
# Validate user access
|
|
1085
|
+
self._validate_user_access(user_id)
|
|
1086
|
+
|
|
1087
|
+
result = super().get_user_info()
|
|
1088
|
+
|
|
1089
|
+
return result
|
|
1090
|
+
|
|
1091
|
+
def share_cube_with_user(self, cube_id: str, owner_user_id: str, target_user_id: str) -> bool:
|
|
1092
|
+
"""Share a cube with another user."""
|
|
1093
|
+
# Use MOSCore's built-in cube access validation
|
|
1094
|
+
self._validate_cube_access(owner_user_id, cube_id)
|
|
1095
|
+
|
|
1096
|
+
result = super().share_cube_with_user(cube_id, target_user_id)
|
|
1097
|
+
|
|
1098
|
+
return result
|
|
1099
|
+
|
|
1100
|
+
def clear_user_chat_history(self, user_id: str) -> None:
|
|
1101
|
+
"""Clear chat history for a specific user."""
|
|
1102
|
+
# Validate user access
|
|
1103
|
+
self._validate_user_access(user_id)
|
|
1104
|
+
|
|
1105
|
+
super().clear_messages(user_id)
|
|
1106
|
+
|
|
1107
|
+
def update_user_config(self, user_id: str, config: MOSConfig) -> bool:
|
|
1108
|
+
"""Update user configuration.
|
|
1109
|
+
|
|
1110
|
+
Args:
|
|
1111
|
+
user_id (str): The user ID.
|
|
1112
|
+
config (MOSConfig): The new configuration.
|
|
1113
|
+
|
|
1114
|
+
Returns:
|
|
1115
|
+
bool: True if successful, False otherwise.
|
|
1116
|
+
"""
|
|
1117
|
+
try:
|
|
1118
|
+
# Save to persistent storage
|
|
1119
|
+
success = self.global_user_manager.save_user_config(user_id, config)
|
|
1120
|
+
if success:
|
|
1121
|
+
# Update in-memory config
|
|
1122
|
+
self.user_configs[user_id] = config
|
|
1123
|
+
logger.info(f"Updated configuration for user {user_id}")
|
|
1124
|
+
|
|
1125
|
+
return success
|
|
1126
|
+
except Exception as e:
|
|
1127
|
+
logger.error(f"Failed to update user config for {user_id}: {e}")
|
|
1128
|
+
return False
|
|
1129
|
+
|
|
1130
|
+
def get_user_config(self, user_id: str) -> MOSConfig | None:
|
|
1131
|
+
"""Get user configuration.
|
|
1132
|
+
|
|
1133
|
+
Args:
|
|
1134
|
+
user_id (str): The user ID.
|
|
1135
|
+
|
|
1136
|
+
Returns:
|
|
1137
|
+
MOSConfig | None: The user's configuration or None if not found.
|
|
1138
|
+
"""
|
|
1139
|
+
return self.global_user_manager.get_user_config(user_id)
|
|
1140
|
+
|
|
1141
|
+
def get_active_user_count(self) -> int:
|
|
1142
|
+
"""Get the number of active user configurations in memory."""
|
|
1143
|
+
return len(self.user_configs)
|
|
1144
|
+
|
|
1145
|
+
def get_user_instance_info(self) -> dict[str, Any]:
|
|
1146
|
+
"""Get information about user configurations in memory."""
|
|
1147
|
+
return {
|
|
1148
|
+
"active_instances": len(self.user_configs),
|
|
1149
|
+
"max_instances": self.max_user_instances,
|
|
1150
|
+
"user_ids": list(self.user_configs.keys()),
|
|
1151
|
+
"lru_order": list(self.user_configs.keys()), # OrderedDict maintains insertion order
|
|
1152
|
+
}
|