mem1 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mem1/__init__.py +5 -4
- mem1/config.py +14 -5
- mem1/llm.py +54 -5
- mem1/{memory_es.py → memory.py} +225 -382
- mem1/prompts.py +22 -0
- mem1/storage.py +399 -0
- mem1-0.0.8.dist-info/METADATA +290 -0
- mem1-0.0.8.dist-info/RECORD +12 -0
- mem1-0.0.6.dist-info/METADATA +0 -191
- mem1-0.0.6.dist-info/RECORD +0 -11
- {mem1-0.0.6.dist-info → mem1-0.0.8.dist-info}/WHEEL +0 -0
mem1/{memory_es.py → memory.py}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""基于可插拔存储层的记忆管理系统"""
|
|
2
2
|
import re
|
|
3
3
|
import shutil
|
|
4
4
|
import base64
|
|
@@ -6,26 +6,20 @@ import logging
|
|
|
6
6
|
from datetime import datetime, timedelta
|
|
7
7
|
from typing import List, Dict, Any, Optional
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
from mem1.config import Mem1Config
|
|
11
11
|
from mem1.llm import LLMClient, VLClient
|
|
12
|
-
from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT
|
|
12
|
+
from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT, CONTEXT_SUFFICIENT_PROMPT
|
|
13
|
+
from mem1.storage import StorageBackend, ESStorage
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
16
|
-
# 用户状态索引名
|
|
17
|
-
USER_STATE_INDEX = "mem1_user_state"
|
|
18
|
-
# 用户画像索引名
|
|
19
|
-
USER_PROFILE_INDEX = "mem1_user_profile"
|
|
20
|
-
|
|
21
17
|
|
|
22
18
|
class Mem1Memory:
|
|
23
|
-
"""
|
|
19
|
+
"""用户记忆系统(支持可插拔存储后端)
|
|
24
20
|
|
|
25
|
-
|
|
26
|
-
-
|
|
27
|
-
- ES 索引 mem1_user_state: 用户更新状态(轮数、上次更新时间)
|
|
28
|
-
- ES 索引 mem1_user_profile: 用户画像(按 user_id 共享,跨话题)
|
|
21
|
+
数据存储:
|
|
22
|
+
- 存储后端: 历史对话记录 + 用户状态 + 用户画像
|
|
29
23
|
- 本地文件: 图片文件存储
|
|
30
24
|
"""
|
|
31
25
|
|
|
@@ -35,9 +29,10 @@ class Mem1Memory:
|
|
|
35
29
|
user_id: str,
|
|
36
30
|
topic_id: str = "default",
|
|
37
31
|
memory_dir: Optional[str] = None,
|
|
38
|
-
profile_template: Optional[ProfileTemplate] = None
|
|
32
|
+
profile_template: Optional[ProfileTemplate] = None,
|
|
33
|
+
storage: Optional[StorageBackend] = None
|
|
39
34
|
):
|
|
40
|
-
"""
|
|
35
|
+
"""初始化记忆系统
|
|
41
36
|
|
|
42
37
|
Args:
|
|
43
38
|
config: 配置对象
|
|
@@ -45,6 +40,7 @@ class Mem1Memory:
|
|
|
45
40
|
topic_id: 话题ID(默认 "default"),同一用户可有多个话题
|
|
46
41
|
memory_dir: 记忆文件存储目录
|
|
47
42
|
profile_template: 用户画像模板
|
|
43
|
+
storage: 存储后端(可选,默认使用 ESStorage)
|
|
48
44
|
"""
|
|
49
45
|
self.config = config
|
|
50
46
|
self.user_id = user_id
|
|
@@ -52,13 +48,15 @@ class Mem1Memory:
|
|
|
52
48
|
self.memory_dir = Path(memory_dir or config.memory.memory_dir)
|
|
53
49
|
self.memory_dir.mkdir(parents=True, exist_ok=True)
|
|
54
50
|
|
|
55
|
-
#
|
|
51
|
+
# 图片存储目录
|
|
56
52
|
self.images_dir = Path(config.images.images_dir)
|
|
57
53
|
self.images_dir.mkdir(parents=True, exist_ok=True)
|
|
58
54
|
|
|
59
|
-
#
|
|
60
|
-
|
|
61
|
-
|
|
55
|
+
# 存储后端(可插拔)
|
|
56
|
+
if storage:
|
|
57
|
+
self.storage = storage
|
|
58
|
+
else:
|
|
59
|
+
self.storage = ESStorage(config.es.hosts, config.es.index_name)
|
|
62
60
|
|
|
63
61
|
# LLM 客户端
|
|
64
62
|
self.llm = LLMClient(config.llm)
|
|
@@ -76,9 +74,8 @@ class Mem1Memory:
|
|
|
76
74
|
self.update_interval_minutes = config.memory.update_interval_minutes
|
|
77
75
|
self.save_assistant_messages = config.memory.save_assistant_messages
|
|
78
76
|
self.max_assistant_chars = config.memory.max_assistant_chars
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
self._ensure_state_index()
|
|
77
|
+
|
|
78
|
+
# ========== 图片处理 ==========
|
|
82
79
|
|
|
83
80
|
def _get_user_images_dir(self, user_id: str) -> Path:
|
|
84
81
|
"""获取用户图片目录"""
|
|
@@ -88,28 +85,16 @@ class Mem1Memory:
|
|
|
88
85
|
|
|
89
86
|
def _load_images_index(self, user_id: str) -> List[Dict[str, str]]:
|
|
90
87
|
"""从对话记录中提取用户所有图片"""
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
},
|
|
102
|
-
size=1000,
|
|
103
|
-
sort=[{"timestamp": {"order": "asc"}}]
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
images = []
|
|
107
|
-
for hit in response["hits"]["hits"]:
|
|
108
|
-
conv_images = hit["_source"].get("images", [])
|
|
109
|
-
images.extend(conv_images)
|
|
110
|
-
return images
|
|
111
|
-
except Exception:
|
|
112
|
-
return []
|
|
88
|
+
if hasattr(self.storage, 'get_conversations_with_images'):
|
|
89
|
+
conversations = self.storage.get_conversations_with_images(user_id)
|
|
90
|
+
else:
|
|
91
|
+
conversations = self.storage.get_conversations(user_id)
|
|
92
|
+
|
|
93
|
+
images = []
|
|
94
|
+
for conv in conversations:
|
|
95
|
+
conv_images = conv.get("images", [])
|
|
96
|
+
images.extend(conv_images)
|
|
97
|
+
return images
|
|
113
98
|
|
|
114
99
|
def _save_image_to_conversation(self, conversation_entry: Dict, image_doc: Dict[str, str]) -> None:
|
|
115
100
|
"""将图片信息添加到对话记录"""
|
|
@@ -117,29 +102,19 @@ class Mem1Memory:
|
|
|
117
102
|
conversation_entry["images"] = []
|
|
118
103
|
conversation_entry["images"].append(image_doc)
|
|
119
104
|
|
|
105
|
+
# ========== 用户画像 ==========
|
|
106
|
+
|
|
120
107
|
def _get_profile(self, user_id: str) -> Optional[str]:
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
return response["_source"]["content"]
|
|
125
|
-
except Exception:
|
|
126
|
-
return None
|
|
108
|
+
"""获取用户画像"""
|
|
109
|
+
result = self.storage.get_profile(user_id)
|
|
110
|
+
return result["content"] if result else None
|
|
127
111
|
|
|
128
112
|
def _save_profile(self, user_id: str, content: str) -> None:
|
|
129
|
-
"""
|
|
130
|
-
self.
|
|
131
|
-
index=USER_PROFILE_INDEX,
|
|
132
|
-
id=user_id,
|
|
133
|
-
document={
|
|
134
|
-
"user_id": user_id,
|
|
135
|
-
"content": content,
|
|
136
|
-
"updated_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
137
|
-
},
|
|
138
|
-
refresh=True
|
|
139
|
-
)
|
|
113
|
+
"""保存用户画像"""
|
|
114
|
+
self.storage.save_profile(user_id, content)
|
|
140
115
|
|
|
141
116
|
def _init_profile(self, user_id: str) -> str:
|
|
142
|
-
"""
|
|
117
|
+
"""初始化用户画像(不存在则创建)"""
|
|
143
118
|
content = self._get_profile(user_id)
|
|
144
119
|
if content is None:
|
|
145
120
|
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
|
@@ -148,89 +123,21 @@ class Mem1Memory:
|
|
|
148
123
|
logger.info(f"✓ 创建用户画像: {user_id}")
|
|
149
124
|
return content
|
|
150
125
|
|
|
151
|
-
|
|
152
|
-
"""确保所有索引存在"""
|
|
153
|
-
# 确保对话记录索引存在
|
|
154
|
-
if not self.es.indices.exists(index=self.index_name):
|
|
155
|
-
self.es.indices.create(
|
|
156
|
-
index=self.index_name,
|
|
157
|
-
body={
|
|
158
|
-
"mappings": {
|
|
159
|
-
"properties": {
|
|
160
|
-
"user_id": {"type": "keyword"},
|
|
161
|
-
"topic_id": {"type": "keyword"},
|
|
162
|
-
"timestamp": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"},
|
|
163
|
-
"messages": {"type": "nested"},
|
|
164
|
-
"metadata": {"type": "object"}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
)
|
|
169
|
-
logger.info(f"✓ 创建对话记录索引: {self.index_name}")
|
|
170
|
-
|
|
171
|
-
# 确保用户状态索引存在
|
|
172
|
-
if not self.es.indices.exists(index=USER_STATE_INDEX):
|
|
173
|
-
self.es.indices.create(
|
|
174
|
-
index=USER_STATE_INDEX,
|
|
175
|
-
body={
|
|
176
|
-
"mappings": {
|
|
177
|
-
"properties": {
|
|
178
|
-
"user_id": {"type": "keyword"},
|
|
179
|
-
"rounds": {"type": "integer"},
|
|
180
|
-
"last_update": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
)
|
|
185
|
-
logger.info(f"✓ 创建用户状态索引: {USER_STATE_INDEX}")
|
|
186
|
-
|
|
187
|
-
# 确保画像索引存在
|
|
188
|
-
if not self.es.indices.exists(index=USER_PROFILE_INDEX):
|
|
189
|
-
self.es.indices.create(
|
|
190
|
-
index=USER_PROFILE_INDEX,
|
|
191
|
-
body={
|
|
192
|
-
"mappings": {
|
|
193
|
-
"properties": {
|
|
194
|
-
"user_id": {"type": "keyword"},
|
|
195
|
-
"content": {"type": "text"},
|
|
196
|
-
"updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
)
|
|
201
|
-
logger.info(f"✓ 创建用户画像索引: {USER_PROFILE_INDEX}")
|
|
126
|
+
# ========== 用户状态 ==========
|
|
202
127
|
|
|
203
128
|
def _get_user_state(self, user_id: str) -> Dict[str, Any]:
|
|
204
|
-
"""
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
return response["_source"]
|
|
208
|
-
except Exception:
|
|
209
|
-
# 用户状态不存在,返回初始状态
|
|
129
|
+
"""获取用户更新状态"""
|
|
130
|
+
state = self.storage.get_user_state(user_id)
|
|
131
|
+
if state is None:
|
|
210
132
|
return {"user_id": user_id, "rounds": 0, "last_update": None}
|
|
133
|
+
return state
|
|
211
134
|
|
|
212
135
|
def _update_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
|
|
213
|
-
"""
|
|
214
|
-
|
|
215
|
-
if last_update:
|
|
216
|
-
doc["last_update"] = last_update
|
|
217
|
-
|
|
218
|
-
self.es.index(
|
|
219
|
-
index=USER_STATE_INDEX,
|
|
220
|
-
id=user_id,
|
|
221
|
-
document=doc,
|
|
222
|
-
refresh=True
|
|
223
|
-
)
|
|
136
|
+
"""更新用户状态"""
|
|
137
|
+
self.storage.save_user_state(user_id, rounds, last_update)
|
|
224
138
|
|
|
225
139
|
def _should_trigger_update(self, user_id: str) -> bool:
|
|
226
|
-
"""
|
|
227
|
-
判断是否应该触发画像更新(基于 ES 存储的状态)
|
|
228
|
-
|
|
229
|
-
触发条件(满足任一即触发):
|
|
230
|
-
1. 累积对话轮数 >= update_interval_rounds
|
|
231
|
-
2. 距上次更新时间 >= update_interval_minutes
|
|
232
|
-
3. 首次(无 last_update)
|
|
233
|
-
"""
|
|
140
|
+
"""判断是否应该触发画像更新"""
|
|
234
141
|
state = self._get_user_state(user_id)
|
|
235
142
|
rounds = state.get("rounds", 0) + 1
|
|
236
143
|
last_update_str = state.get("last_update")
|
|
@@ -238,12 +145,10 @@ class Mem1Memory:
|
|
|
238
145
|
should_update = False
|
|
239
146
|
reason = ""
|
|
240
147
|
|
|
241
|
-
# 条件1:累积轮数达到阈值
|
|
242
148
|
if rounds >= self.update_interval_rounds:
|
|
243
149
|
should_update = True
|
|
244
150
|
reason = f"轮数={rounds} >= {self.update_interval_rounds}"
|
|
245
151
|
|
|
246
|
-
# 条件2:距上次更新超过时间阈值
|
|
247
152
|
if not should_update and last_update_str:
|
|
248
153
|
try:
|
|
249
154
|
last_update = datetime.strptime(last_update_str, '%Y-%m-%d %H:%M:%S')
|
|
@@ -254,21 +159,21 @@ class Mem1Memory:
|
|
|
254
159
|
except ValueError:
|
|
255
160
|
pass
|
|
256
161
|
|
|
257
|
-
# 条件3:首次更新
|
|
258
162
|
if not should_update and last_update_str is None:
|
|
259
163
|
should_update = True
|
|
260
164
|
reason = "首次创建画像"
|
|
261
165
|
|
|
262
166
|
if should_update:
|
|
263
167
|
logger.info(f"📊 触发画像更新({reason}): {user_id}")
|
|
264
|
-
# 重置轮数
|
|
265
168
|
self._update_user_state(user_id, 0, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
|
266
169
|
else:
|
|
267
|
-
# 增加轮数
|
|
268
170
|
self._update_user_state(user_id, rounds, last_update_str)
|
|
269
171
|
logger.debug(f"📊 暂不更新(轮数={rounds}/{self.update_interval_rounds}): {user_id}")
|
|
270
172
|
|
|
271
173
|
return should_update
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ========== 对话管理 ==========
|
|
272
177
|
|
|
273
178
|
def add_conversation(
|
|
274
179
|
self,
|
|
@@ -277,12 +182,11 @@ class Mem1Memory:
|
|
|
277
182
|
metadata: Optional[Dict[str, Any]] = None,
|
|
278
183
|
timestamp: Optional[str] = None
|
|
279
184
|
) -> Dict[str, Any]:
|
|
280
|
-
"""
|
|
185
|
+
"""添加对话"""
|
|
281
186
|
ts = timestamp or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
282
187
|
user_id = self.user_id
|
|
283
188
|
topic_id = self.topic_id
|
|
284
189
|
|
|
285
|
-
# 构建对话记录
|
|
286
190
|
conversation_entry = {
|
|
287
191
|
"user_id": user_id,
|
|
288
192
|
"topic_id": topic_id,
|
|
@@ -309,21 +213,16 @@ class Mem1Memory:
|
|
|
309
213
|
|
|
310
214
|
image_refs.append(filename)
|
|
311
215
|
|
|
312
|
-
# 生成图片描述(用户描述 + VL 理解)
|
|
313
216
|
user_desc = ""
|
|
314
217
|
for msg in messages:
|
|
315
218
|
if msg["role"] == "user":
|
|
316
219
|
user_desc = msg["content"]
|
|
317
220
|
break
|
|
318
221
|
|
|
319
|
-
# 如果启用了 VL 模型,调用视觉理解
|
|
320
222
|
if self.vl:
|
|
321
223
|
try:
|
|
322
224
|
vl_result = self.vl.understand_image(str(img_path), user_desc)
|
|
323
|
-
if user_desc
|
|
324
|
-
description = f"【用户描述】{user_desc}\n\n{vl_result}"
|
|
325
|
-
else:
|
|
326
|
-
description = vl_result
|
|
225
|
+
description = f"【用户描述】{user_desc}\n\n{vl_result}" if user_desc else vl_result
|
|
327
226
|
logger.info(f"🖼️ VL 图片理解完成: {filename}")
|
|
328
227
|
except Exception as e:
|
|
329
228
|
logger.warning(f"⚠️ VL 图片理解失败: {e}, 使用用户描述")
|
|
@@ -331,7 +230,6 @@ class Mem1Memory:
|
|
|
331
230
|
else:
|
|
332
231
|
description = user_desc or img['filename']
|
|
333
232
|
|
|
334
|
-
# 图片信息存入对话记录
|
|
335
233
|
self._save_image_to_conversation(conversation_entry, {
|
|
336
234
|
"filename": filename,
|
|
337
235
|
"description": description,
|
|
@@ -352,28 +250,20 @@ class Mem1Memory:
|
|
|
352
250
|
content = msg["content"]
|
|
353
251
|
if len(content) > self.max_assistant_chars:
|
|
354
252
|
content = self._summarize_assistant_response(content)
|
|
355
|
-
conversation_entry["messages"].append({
|
|
356
|
-
"role": "assistant",
|
|
357
|
-
"content": content
|
|
358
|
-
})
|
|
253
|
+
conversation_entry["messages"].append({"role": "assistant", "content": content})
|
|
359
254
|
|
|
360
|
-
#
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
document=conversation_entry,
|
|
364
|
-
refresh=True
|
|
365
|
-
)
|
|
255
|
+
# 保存到存储后端
|
|
256
|
+
record_id = self.storage.save_conversation(conversation_entry)
|
|
257
|
+
logger.info(f"✓ 对话已存储: user={user_id}, topic={topic_id}, timestamp={ts}, id={record_id}")
|
|
366
258
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
# 自动更新画像(基于 ES 状态判断)
|
|
259
|
+
# 自动更新画像
|
|
370
260
|
if self.auto_update_profile and self._should_trigger_update(user_id):
|
|
371
261
|
try:
|
|
372
262
|
self.update_profile()
|
|
373
263
|
except Exception as e:
|
|
374
264
|
logger.error(f"❌ 画像更新失败: {user_id}, error={e}")
|
|
375
265
|
|
|
376
|
-
return {"status": "success", "
|
|
266
|
+
return {"status": "success", "id": record_id}
|
|
377
267
|
|
|
378
268
|
def get_conversations(
|
|
379
269
|
self,
|
|
@@ -381,46 +271,19 @@ class Mem1Memory:
|
|
|
381
271
|
metadata_filter: Optional[Dict[str, Any]] = None,
|
|
382
272
|
size: int = 1000
|
|
383
273
|
) -> List[Dict[str, Any]]:
|
|
384
|
-
"""
|
|
385
|
-
|
|
386
|
-
topic_id = self.topic_id
|
|
387
|
-
|
|
388
|
-
query = {
|
|
389
|
-
"bool": {
|
|
390
|
-
"must": [
|
|
391
|
-
{"term": {"user_id": user_id}},
|
|
392
|
-
{"term": {"topic_id": topic_id}}
|
|
393
|
-
]
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
# 时间过滤
|
|
274
|
+
"""获取当前话题的对话记录"""
|
|
275
|
+
start_time = None
|
|
398
276
|
if days_limit:
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
if metadata_filter:
|
|
408
|
-
for k, v in metadata_filter.items():
|
|
409
|
-
query["bool"]["must"].append({
|
|
410
|
-
"term": {f"metadata.{k}": v}
|
|
411
|
-
})
|
|
412
|
-
|
|
413
|
-
# 查询 ES
|
|
414
|
-
response = self.es.search(
|
|
415
|
-
index=self.index_name,
|
|
416
|
-
query=query,
|
|
417
|
-
size=size,
|
|
418
|
-
sort=[{"timestamp": {"order": "asc"}}]
|
|
277
|
+
start_time = datetime.now() - timedelta(days=days_limit)
|
|
278
|
+
|
|
279
|
+
conversations = self.storage.get_conversations(
|
|
280
|
+
user_id=self.user_id,
|
|
281
|
+
topic_id=self.topic_id,
|
|
282
|
+
start_time=start_time,
|
|
283
|
+
metadata_filter=metadata_filter,
|
|
284
|
+
limit=size
|
|
419
285
|
)
|
|
420
|
-
|
|
421
|
-
conversations = [hit["_source"] for hit in response["hits"]["hits"]]
|
|
422
|
-
logger.info(f"📖 从 ES 读取对话: user={user_id}, topic={topic_id}, count={len(conversations)}")
|
|
423
|
-
|
|
286
|
+
logger.info(f"📖 读取对话: user={self.user_id}, topic={self.topic_id}, count={len(conversations)}")
|
|
424
287
|
return conversations
|
|
425
288
|
|
|
426
289
|
def get_all_conversations(
|
|
@@ -428,53 +291,52 @@ class Mem1Memory:
|
|
|
428
291
|
days_limit: Optional[int] = None,
|
|
429
292
|
size: int = 1000
|
|
430
293
|
) -> List[Dict[str, Any]]:
|
|
431
|
-
"""
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
query = {
|
|
435
|
-
"bool": {
|
|
436
|
-
"must": [
|
|
437
|
-
{"term": {"user_id": user_id}}
|
|
438
|
-
]
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
|
|
294
|
+
"""获取用户所有话题的对话记录"""
|
|
295
|
+
start_time = None
|
|
442
296
|
if days_limit:
|
|
443
|
-
|
|
444
|
-
query["bool"]["must"].append({
|
|
445
|
-
"range": {
|
|
446
|
-
"timestamp": {"gte": cutoff_date}
|
|
447
|
-
}
|
|
448
|
-
})
|
|
449
|
-
|
|
450
|
-
response = self.es.search(
|
|
451
|
-
index=self.index_name,
|
|
452
|
-
query=query,
|
|
453
|
-
size=size,
|
|
454
|
-
sort=[{"timestamp": {"order": "asc"}}]
|
|
455
|
-
)
|
|
456
|
-
|
|
457
|
-
conversations = [hit["_source"] for hit in response["hits"]["hits"]]
|
|
458
|
-
logger.info(f"📖 从 ES 读取所有对话: user={user_id}, count={len(conversations)}")
|
|
297
|
+
start_time = datetime.now() - timedelta(days=days_limit)
|
|
459
298
|
|
|
299
|
+
conversations = self.storage.get_conversations(
|
|
300
|
+
user_id=self.user_id,
|
|
301
|
+
topic_id=None,
|
|
302
|
+
start_time=start_time,
|
|
303
|
+
limit=size
|
|
304
|
+
)
|
|
305
|
+
logger.info(f"📖 读取所有对话: user={self.user_id}, count={len(conversations)}")
|
|
460
306
|
return conversations
|
|
461
307
|
|
|
308
|
+
def _get_conversations_range(self, start_days_ago: int, end_days_ago: int) -> List[Dict[str, Any]]:
|
|
309
|
+
"""获取指定天数范围内的对话"""
|
|
310
|
+
now = datetime.now()
|
|
311
|
+
start_time = now - timedelta(days=end_days_ago)
|
|
312
|
+
end_time = now - timedelta(days=start_days_ago)
|
|
313
|
+
|
|
314
|
+
return self.storage.get_conversations(
|
|
315
|
+
user_id=self.user_id,
|
|
316
|
+
topic_id=self.topic_id,
|
|
317
|
+
start_time=start_time,
|
|
318
|
+
end_time=end_time
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def search_conversations(self, start_days: int, end_days: int) -> List[Dict[str, Any]]:
|
|
322
|
+
"""按时间范围检索对话(供外部 LLM 作为 tool 调用)"""
|
|
323
|
+
return self._get_conversations_range(start_days, end_days)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# ========== 画像更新 ==========
|
|
327
|
+
|
|
462
328
|
def update_profile(self) -> Dict[str, Any]:
|
|
463
|
-
"""
|
|
329
|
+
"""更新用户画像"""
|
|
464
330
|
user_id = self.user_id
|
|
465
331
|
self._init_profile(user_id)
|
|
466
332
|
|
|
467
|
-
# 从 ES 读取所有话题的对话
|
|
468
333
|
conversations = self.get_all_conversations()
|
|
469
334
|
if not conversations:
|
|
470
335
|
return {"status": "success", "updated": False, "reason": "no_conversation"}
|
|
471
336
|
|
|
472
337
|
history_content = self._format_conversations_for_llm(conversations)
|
|
473
|
-
|
|
474
|
-
# 从 ES 读取现有画像
|
|
475
338
|
profile_content = self._get_profile(user_id)
|
|
476
339
|
|
|
477
|
-
# LLM 更新画像
|
|
478
340
|
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
|
479
341
|
prompt = self.profile_template.get_update_prompt().format(
|
|
480
342
|
user_id=user_id,
|
|
@@ -490,29 +352,37 @@ class Mem1Memory:
|
|
|
490
352
|
|
|
491
353
|
response = self.llm.generate(messages, response_format="text")
|
|
492
354
|
|
|
493
|
-
# 检查是否需要压缩
|
|
494
355
|
if len(response) > self.max_profile_chars:
|
|
495
356
|
logger.info(f"📦 用户画像超长({len(response)}>{self.max_profile_chars}),触发压缩...")
|
|
496
357
|
response = self._compress_profile(user_id, response)
|
|
497
358
|
logger.info(f"📦 压缩后长度: {len(response)}")
|
|
498
359
|
|
|
499
|
-
# 保存到 ES
|
|
500
360
|
self._save_profile(user_id, response)
|
|
501
|
-
logger.info(f"✓
|
|
361
|
+
logger.info(f"✓ 画像已更新: {user_id}")
|
|
502
362
|
|
|
503
363
|
return {"status": "success", "updated": True, "length": len(response)}
|
|
504
364
|
|
|
505
|
-
def
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
365
|
+
def _compress_profile(self, user_id: str, profile_content: str) -> str:
|
|
366
|
+
"""压缩用户画像"""
|
|
367
|
+
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
|
368
|
+
prompt = self.profile_template.get_compress_prompt().format(
|
|
369
|
+
user_id=user_id,
|
|
370
|
+
profile_content=profile_content,
|
|
371
|
+
max_chars=self.max_profile_chars,
|
|
372
|
+
timestamp=timestamp
|
|
373
|
+
)
|
|
511
374
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
375
|
+
messages = [
|
|
376
|
+
{"role": "system", "content": prompt},
|
|
377
|
+
{"role": "user", "content": "请压缩用户画像"}
|
|
378
|
+
]
|
|
379
|
+
|
|
380
|
+
return self.llm.generate(messages, response_format="text")
|
|
381
|
+
|
|
382
|
+
# ========== 上下文获取 ==========
|
|
383
|
+
|
|
384
|
+
def get_context(self, query: str = "", days_limit: Optional[int] = None) -> Dict[str, Any]:
|
|
385
|
+
"""获取记忆上下文"""
|
|
516
386
|
user_id = self.user_id
|
|
517
387
|
profile_content = self._init_profile(user_id)
|
|
518
388
|
|
|
@@ -520,15 +390,11 @@ class Mem1Memory:
|
|
|
520
390
|
weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
|
|
521
391
|
current_time = f"{now.strftime('%Y-%m-%d %H:%M')} {weekdays[now.weekday()]}"
|
|
522
392
|
|
|
523
|
-
# 从 ES 获取画像更新时间
|
|
524
393
|
profile_last_updated = "未更新"
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
profile_last_updated =
|
|
528
|
-
except Exception:
|
|
529
|
-
pass
|
|
394
|
+
profile_data = self.storage.get_profile(user_id)
|
|
395
|
+
if profile_data:
|
|
396
|
+
profile_last_updated = profile_data.get("updated_at", "未更新")
|
|
530
397
|
|
|
531
|
-
# 强制检索最近 days_limit 天的对话
|
|
532
398
|
if days_limit is None:
|
|
533
399
|
days_limit = self.config.memory.context_days_limit
|
|
534
400
|
conversations = self.get_conversations(days_limit=days_limit)
|
|
@@ -544,57 +410,72 @@ class Mem1Memory:
|
|
|
544
410
|
"profile_last_updated": profile_last_updated
|
|
545
411
|
}
|
|
546
412
|
|
|
547
|
-
def
|
|
548
|
-
"""
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
user_id=user_id,
|
|
552
|
-
profile_content=profile_content,
|
|
553
|
-
max_chars=self.max_profile_chars,
|
|
554
|
-
timestamp=timestamp
|
|
555
|
-
)
|
|
556
|
-
|
|
557
|
-
messages = [
|
|
558
|
-
{"role": "system", "content": prompt},
|
|
559
|
-
{"role": "user", "content": "请压缩用户画像"}
|
|
560
|
-
]
|
|
561
|
-
|
|
562
|
-
response = self.llm.generate(messages, response_format="text")
|
|
563
|
-
return response
|
|
564
|
-
|
|
565
|
-
def _should_include_history(self, query: str) -> tuple[bool, str]:
|
|
566
|
-
"""LLM 判断是否需要加载历史记录"""
|
|
567
|
-
prompt = RECALL_DECISION_PROMPT.format(query=query)
|
|
413
|
+
def get_context_progressive(self, query: str, max_days: int = 31, step: int = 7) -> Dict[str, Any]:
|
|
414
|
+
"""渐进式检索:每次多查一周,直到 LLM 认为信息足够"""
|
|
415
|
+
user_id = self.user_id
|
|
416
|
+
profile_content = self._init_profile(user_id)
|
|
568
417
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
]
|
|
418
|
+
now = datetime.now()
|
|
419
|
+
weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
|
|
420
|
+
current_time = f"{now.strftime('%Y-%m-%d %H:%M')} {weekdays[now.weekday()]}"
|
|
573
421
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
422
|
+
profile_last_updated = "未更新"
|
|
423
|
+
profile_data = self.storage.get_profile(user_id)
|
|
424
|
+
if profile_data:
|
|
425
|
+
profile_last_updated = profile_data.get("updated_at", "未更新")
|
|
426
|
+
|
|
427
|
+
all_conversations = []
|
|
428
|
+
searched_days = 0
|
|
429
|
+
|
|
430
|
+
for end_day in range(step, max_days + step, step):
|
|
431
|
+
end_day = min(end_day, max_days)
|
|
432
|
+
new_conversations = self._get_conversations_range(searched_days, end_day)
|
|
433
|
+
all_conversations.extend(new_conversations)
|
|
434
|
+
searched_days = end_day
|
|
435
|
+
|
|
436
|
+
if not all_conversations:
|
|
437
|
+
logger.info(f"📖 渐进检索: 0-{end_day}天 无对话,继续...")
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
normal_content = self._format_conversations_for_llm(all_conversations)
|
|
441
|
+
if self._is_context_sufficient(query, profile_content, normal_content, end_day):
|
|
442
|
+
logger.info(f"✓ 渐进检索完成: 0-{end_day}天,{len(all_conversations)}条对话")
|
|
443
|
+
break
|
|
444
|
+
|
|
445
|
+
logger.info(f"📖 渐进检索: 0-{end_day}天 信息不足,继续...")
|
|
577
446
|
|
|
578
|
-
|
|
447
|
+
normal_content = self._format_conversations_for_llm(all_conversations) if all_conversations else ""
|
|
579
448
|
|
|
580
|
-
return
|
|
449
|
+
return {
|
|
450
|
+
"current_time": current_time,
|
|
451
|
+
"user_id": user_id,
|
|
452
|
+
"topic_id": self.topic_id,
|
|
453
|
+
"import_content": profile_content,
|
|
454
|
+
"normal_content": normal_content,
|
|
455
|
+
"conversations_count": len(all_conversations),
|
|
456
|
+
"profile_last_updated": profile_last_updated,
|
|
457
|
+
"searched_days": searched_days
|
|
458
|
+
}
|
|
581
459
|
|
|
582
|
-
def
|
|
583
|
-
"""
|
|
584
|
-
prompt =
|
|
585
|
-
|
|
586
|
-
|
|
460
|
+
def _is_context_sufficient(self, query: str, profile: str, conversations: str, days: int) -> bool:
|
|
461
|
+
"""LLM 判断当前上下文是否足够"""
|
|
462
|
+
prompt = CONTEXT_SUFFICIENT_PROMPT.format(
|
|
463
|
+
query=query,
|
|
464
|
+
profile=profile,
|
|
465
|
+
conversations=conversations or "(无对话记录)",
|
|
466
|
+
days=days
|
|
587
467
|
)
|
|
588
468
|
|
|
589
469
|
messages = [
|
|
590
470
|
{"role": "system", "content": prompt},
|
|
591
|
-
{"role": "user", "content":
|
|
471
|
+
{"role": "user", "content": query}
|
|
592
472
|
]
|
|
593
473
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
474
|
+
response = self.llm.generate(messages, response_format="text")
|
|
475
|
+
return "true" in response.strip().lower()
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
# ========== 图片搜索 ==========
|
|
598
479
|
|
|
599
480
|
def search_images(self, query: str) -> List[Dict[str, str]]:
|
|
600
481
|
"""搜索用户图片"""
|
|
@@ -618,20 +499,52 @@ class Mem1Memory:
|
|
|
618
499
|
response = self.llm.generate(messages, response_format="text")
|
|
619
500
|
|
|
620
501
|
results = []
|
|
621
|
-
# 提取所有数字(支持多种格式:纯数字、[0]、0. 等)
|
|
622
|
-
import re
|
|
623
502
|
numbers = re.findall(r'\b(\d+)\b', response)
|
|
624
503
|
for num_str in numbers:
|
|
625
504
|
idx = int(num_str)
|
|
626
505
|
if 0 <= idx < len(images_index):
|
|
627
506
|
img = images_index[idx].copy()
|
|
628
507
|
img['abs_path'] = str((self._get_user_images_dir(user_id) / img['filename']).resolve())
|
|
629
|
-
if img not in results:
|
|
508
|
+
if img not in results:
|
|
630
509
|
results.append(img)
|
|
631
510
|
|
|
632
511
|
logger.info(f"🖼️ 图片搜索: query='{query}', 找到 {len(results)} 张")
|
|
633
512
|
return results
|
|
634
513
|
|
|
514
|
+
# ========== LLM 辅助方法 ==========
|
|
515
|
+
|
|
516
|
+
def _should_include_history(self, query: str) -> tuple[bool, str]:
|
|
517
|
+
"""LLM 判断是否需要加载历史记录"""
|
|
518
|
+
prompt = RECALL_DECISION_PROMPT.format(query=query)
|
|
519
|
+
|
|
520
|
+
messages = [
|
|
521
|
+
{"role": "system", "content": prompt},
|
|
522
|
+
{"role": "user", "content": query}
|
|
523
|
+
]
|
|
524
|
+
|
|
525
|
+
response = self.llm.generate(messages, response_format="text")
|
|
526
|
+
response_lower = response.strip().lower()
|
|
527
|
+
need_history = "true" in response_lower or "是" in response_lower or "需要" in response_lower
|
|
528
|
+
|
|
529
|
+
logger.info(f"🔍 回忆判断: query='{query[:50]}...', need_history={need_history}")
|
|
530
|
+
return need_history, response.strip()
|
|
531
|
+
|
|
532
|
+
def _summarize_assistant_response(self, content: str) -> str:
|
|
533
|
+
"""对超长的助手回复生成摘要"""
|
|
534
|
+
prompt = ASSISTANT_SUMMARY_PROMPT.format(
|
|
535
|
+
content=content,
|
|
536
|
+
max_chars=self.max_assistant_chars
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
messages = [
|
|
540
|
+
{"role": "system", "content": prompt},
|
|
541
|
+
{"role": "user", "content": "请生成摘要"}
|
|
542
|
+
]
|
|
543
|
+
|
|
544
|
+
summary = self.llm.generate(messages, response_format="text")
|
|
545
|
+
logger.info(f"📝 助手回复摘要: {len(content)} -> {len(summary)} 字符")
|
|
546
|
+
return summary
|
|
547
|
+
|
|
635
548
|
def _format_conversations_for_llm(self, conversations: List[Dict[str, Any]]) -> str:
|
|
636
549
|
"""格式化对话记录为文本"""
|
|
637
550
|
output = []
|
|
@@ -661,48 +574,23 @@ class Mem1Memory:
|
|
|
661
574
|
|
|
662
575
|
return "\n".join(output)
|
|
663
576
|
|
|
577
|
+
# ========== 用户/话题管理 ==========
|
|
578
|
+
|
|
664
579
|
def get_user_list(self) -> List[str]:
|
|
665
|
-
"""获取所有用户ID
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
"users": {
|
|
672
|
-
"terms": {"field": "user_id", "size": 10000}
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
}
|
|
676
|
-
)
|
|
677
|
-
|
|
678
|
-
users = [bucket["key"] for bucket in response["aggregations"]["users"]["buckets"]]
|
|
679
|
-
return users
|
|
580
|
+
"""获取所有用户ID列表"""
|
|
581
|
+
return self.storage.get_user_list()
|
|
582
|
+
|
|
583
|
+
def list_topics(self) -> List[Dict[str, Any]]:
|
|
584
|
+
"""列出用户的所有话题"""
|
|
585
|
+
return self.storage.get_topic_list(self.user_id)
|
|
680
586
|
|
|
681
587
|
def delete_user(self) -> Dict[str, Any]:
|
|
682
|
-
"""
|
|
588
|
+
"""删除用户所有记忆"""
|
|
683
589
|
user_id = self.user_id
|
|
684
590
|
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
index=self.index_name,
|
|
689
|
-
query={"term": {"user_id": user_id}},
|
|
690
|
-
refresh=True
|
|
691
|
-
)
|
|
692
|
-
except Exception:
|
|
693
|
-
pass # 索引可能不存在
|
|
694
|
-
|
|
695
|
-
# 删除 ES 中的用户状态
|
|
696
|
-
try:
|
|
697
|
-
self.es.delete(index=USER_STATE_INDEX, id=user_id, refresh=True)
|
|
698
|
-
except Exception:
|
|
699
|
-
pass # 状态可能不存在
|
|
700
|
-
|
|
701
|
-
# 删除 ES 中的用户画像
|
|
702
|
-
try:
|
|
703
|
-
self.es.delete(index=USER_PROFILE_INDEX, id=user_id, refresh=True)
|
|
704
|
-
except Exception:
|
|
705
|
-
pass # 画像可能不存在
|
|
591
|
+
self.storage.delete_conversations(user_id)
|
|
592
|
+
self.storage.delete_user_state(user_id)
|
|
593
|
+
self.storage.delete_profile(user_id)
|
|
706
594
|
|
|
707
595
|
# 删除本地图片文件
|
|
708
596
|
user_images_dir = self.images_dir / user_id
|
|
@@ -717,52 +605,7 @@ class Mem1Memory:
|
|
|
717
605
|
user_id = self.user_id
|
|
718
606
|
topic_id = self.topic_id
|
|
719
607
|
|
|
720
|
-
|
|
721
|
-
self.es.delete_by_query(
|
|
722
|
-
index=self.index_name,
|
|
723
|
-
query={
|
|
724
|
-
"bool": {
|
|
725
|
-
"must": [
|
|
726
|
-
{"term": {"user_id": user_id}},
|
|
727
|
-
{"term": {"topic_id": topic_id}}
|
|
728
|
-
]
|
|
729
|
-
}
|
|
730
|
-
},
|
|
731
|
-
refresh=True
|
|
732
|
-
)
|
|
733
|
-
except Exception:
|
|
734
|
-
pass # 索引可能不存在
|
|
608
|
+
self.storage.delete_conversations(user_id, topic_id)
|
|
735
609
|
|
|
736
610
|
logger.info(f"✓ 已删除话题: user={user_id}, topic={topic_id}")
|
|
737
611
|
return {"status": "success", "deleted_topic": topic_id}
|
|
738
|
-
|
|
739
|
-
def list_topics(self) -> List[Dict[str, Any]]:
|
|
740
|
-
"""列出用户的所有话题"""
|
|
741
|
-
user_id = self.user_id
|
|
742
|
-
|
|
743
|
-
response = self.es.search(
|
|
744
|
-
index=self.index_name,
|
|
745
|
-
body={
|
|
746
|
-
"size": 0,
|
|
747
|
-
"query": {"term": {"user_id": user_id}},
|
|
748
|
-
"aggs": {
|
|
749
|
-
"topics": {
|
|
750
|
-
"terms": {"field": "topic_id", "size": 1000},
|
|
751
|
-
"aggs": {
|
|
752
|
-
"latest": {"max": {"field": "timestamp"}},
|
|
753
|
-
"count": {"value_count": {"field": "timestamp"}}
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
}
|
|
757
|
-
}
|
|
758
|
-
)
|
|
759
|
-
|
|
760
|
-
topics = []
|
|
761
|
-
for bucket in response["aggregations"]["topics"]["buckets"]:
|
|
762
|
-
topics.append({
|
|
763
|
-
"topic_id": bucket["key"],
|
|
764
|
-
"conversation_count": bucket["doc_count"],
|
|
765
|
-
"last_active": bucket["latest"]["value_as_string"] if bucket["latest"]["value"] else None
|
|
766
|
-
})
|
|
767
|
-
|
|
768
|
-
return topics
|