mem1 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- """基于 Elasticsearch 的记忆管理系统"""
1
+ """基于可插拔存储层的记忆管理系统"""
2
2
  import re
3
3
  import shutil
4
4
  import base64
@@ -6,26 +6,20 @@ import logging
6
6
  from datetime import datetime, timedelta
7
7
  from typing import List, Dict, Any, Optional
8
8
  from pathlib import Path
9
- from elasticsearch import Elasticsearch
9
+
10
10
  from mem1.config import Mem1Config
11
11
  from mem1.llm import LLMClient, VLClient
12
- from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT
12
+ from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT, CONTEXT_SUFFICIENT_PROMPT
13
+ from mem1.storage import StorageBackend, ESStorage
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
16
 
16
- # 用户状态索引名
17
- USER_STATE_INDEX = "mem1_user_state"
18
- # 用户画像索引名
19
- USER_PROFILE_INDEX = "mem1_user_profile"
20
-
21
17
 
22
18
  class Mem1Memory:
23
- """基于 Elasticsearch 的用户记忆系统
19
+ """用户记忆系统(支持可插拔存储后端)
24
20
 
25
- 数据存储(全部在 ES):
26
- - ES 索引 {index_name}: 历史对话记录 + 图片索引(按 user_id + topic_id 隔离)
27
- - ES 索引 mem1_user_state: 用户更新状态(轮数、上次更新时间)
28
- - ES 索引 mem1_user_profile: 用户画像(按 user_id 共享,跨话题)
21
+ 数据存储:
22
+ - 存储后端: 历史对话记录 + 用户状态 + 用户画像
29
23
  - 本地文件: 图片文件存储
30
24
  """
31
25
 
@@ -35,9 +29,10 @@ class Mem1Memory:
35
29
  user_id: str,
36
30
  topic_id: str = "default",
37
31
  memory_dir: Optional[str] = None,
38
- profile_template: Optional[ProfileTemplate] = None
32
+ profile_template: Optional[ProfileTemplate] = None,
33
+ storage: Optional[StorageBackend] = None
39
34
  ):
40
- """初始化 ES 记忆系统
35
+ """初始化记忆系统
41
36
 
42
37
  Args:
43
38
  config: 配置对象
@@ -45,6 +40,7 @@ class Mem1Memory:
45
40
  topic_id: 话题ID(默认 "default"),同一用户可有多个话题
46
41
  memory_dir: 记忆文件存储目录
47
42
  profile_template: 用户画像模板
43
+ storage: 存储后端(可选,默认使用 ESStorage)
48
44
  """
49
45
  self.config = config
50
46
  self.user_id = user_id
@@ -52,13 +48,15 @@ class Mem1Memory:
52
48
  self.memory_dir = Path(memory_dir or config.memory.memory_dir)
53
49
  self.memory_dir.mkdir(parents=True, exist_ok=True)
54
50
 
55
- # 图片存储目录(独立配置)
51
+ # 图片存储目录
56
52
  self.images_dir = Path(config.images.images_dir)
57
53
  self.images_dir.mkdir(parents=True, exist_ok=True)
58
54
 
59
- # ES 连接(从配置读取)
60
- self.es = Elasticsearch(config.es.hosts)
61
- self.index_name = config.es.index_name
55
+ # 存储后端(可插拔)
56
+ if storage:
57
+ self.storage = storage
58
+ else:
59
+ self.storage = ESStorage(config.es.hosts, config.es.index_name)
62
60
 
63
61
  # LLM 客户端
64
62
  self.llm = LLMClient(config.llm)
@@ -76,9 +74,8 @@ class Mem1Memory:
76
74
  self.update_interval_minutes = config.memory.update_interval_minutes
77
75
  self.save_assistant_messages = config.memory.save_assistant_messages
78
76
  self.max_assistant_chars = config.memory.max_assistant_chars
79
-
80
- # 确保索引存在
81
- self._ensure_state_index()
77
+
78
+ # ========== 图片处理 ==========
82
79
 
83
80
  def _get_user_images_dir(self, user_id: str) -> Path:
84
81
  """获取用户图片目录"""
@@ -88,28 +85,16 @@ class Mem1Memory:
88
85
 
89
86
  def _load_images_index(self, user_id: str) -> List[Dict[str, str]]:
90
87
  """从对话记录中提取用户所有图片"""
91
- try:
92
- response = self.es.search(
93
- index=self.index_name,
94
- query={
95
- "bool": {
96
- "must": [
97
- {"term": {"user_id": user_id}},
98
- {"exists": {"field": "images"}}
99
- ]
100
- }
101
- },
102
- size=1000,
103
- sort=[{"timestamp": {"order": "asc"}}]
104
- )
105
-
106
- images = []
107
- for hit in response["hits"]["hits"]:
108
- conv_images = hit["_source"].get("images", [])
109
- images.extend(conv_images)
110
- return images
111
- except Exception:
112
- return []
88
+ if hasattr(self.storage, 'get_conversations_with_images'):
89
+ conversations = self.storage.get_conversations_with_images(user_id)
90
+ else:
91
+ conversations = self.storage.get_conversations(user_id)
92
+
93
+ images = []
94
+ for conv in conversations:
95
+ conv_images = conv.get("images", [])
96
+ images.extend(conv_images)
97
+ return images
113
98
 
114
99
  def _save_image_to_conversation(self, conversation_entry: Dict, image_doc: Dict[str, str]) -> None:
115
100
  """将图片信息添加到对话记录"""
@@ -117,29 +102,19 @@ class Mem1Memory:
117
102
  conversation_entry["images"] = []
118
103
  conversation_entry["images"].append(image_doc)
119
104
 
105
+ # ========== 用户画像 ==========
106
+
120
107
  def _get_profile(self, user_id: str) -> Optional[str]:
121
- """从 ES 获取用户画像"""
122
- try:
123
- response = self.es.get(index=USER_PROFILE_INDEX, id=user_id)
124
- return response["_source"]["content"]
125
- except Exception:
126
- return None
108
+ """获取用户画像"""
109
+ result = self.storage.get_profile(user_id)
110
+ return result["content"] if result else None
127
111
 
128
112
  def _save_profile(self, user_id: str, content: str) -> None:
129
- """保存用户画像到 ES"""
130
- self.es.index(
131
- index=USER_PROFILE_INDEX,
132
- id=user_id,
133
- document={
134
- "user_id": user_id,
135
- "content": content,
136
- "updated_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
137
- },
138
- refresh=True
139
- )
113
+ """保存用户画像"""
114
+ self.storage.save_profile(user_id, content)
140
115
 
141
116
  def _init_profile(self, user_id: str) -> str:
142
- """初始化用户画像(从 ES 读取,不存在则创建)"""
117
+ """初始化用户画像(不存在则创建)"""
143
118
  content = self._get_profile(user_id)
144
119
  if content is None:
145
120
  timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
@@ -148,89 +123,21 @@ class Mem1Memory:
148
123
  logger.info(f"✓ 创建用户画像: {user_id}")
149
124
  return content
150
125
 
151
- def _ensure_state_index(self) -> None:
152
- """确保所有索引存在"""
153
- # 确保对话记录索引存在
154
- if not self.es.indices.exists(index=self.index_name):
155
- self.es.indices.create(
156
- index=self.index_name,
157
- body={
158
- "mappings": {
159
- "properties": {
160
- "user_id": {"type": "keyword"},
161
- "topic_id": {"type": "keyword"},
162
- "timestamp": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"},
163
- "messages": {"type": "nested"},
164
- "metadata": {"type": "object"}
165
- }
166
- }
167
- }
168
- )
169
- logger.info(f"✓ 创建对话记录索引: {self.index_name}")
170
-
171
- # 确保用户状态索引存在
172
- if not self.es.indices.exists(index=USER_STATE_INDEX):
173
- self.es.indices.create(
174
- index=USER_STATE_INDEX,
175
- body={
176
- "mappings": {
177
- "properties": {
178
- "user_id": {"type": "keyword"},
179
- "rounds": {"type": "integer"},
180
- "last_update": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
181
- }
182
- }
183
- }
184
- )
185
- logger.info(f"✓ 创建用户状态索引: {USER_STATE_INDEX}")
186
-
187
- # 确保画像索引存在
188
- if not self.es.indices.exists(index=USER_PROFILE_INDEX):
189
- self.es.indices.create(
190
- index=USER_PROFILE_INDEX,
191
- body={
192
- "mappings": {
193
- "properties": {
194
- "user_id": {"type": "keyword"},
195
- "content": {"type": "text"},
196
- "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
197
- }
198
- }
199
- }
200
- )
201
- logger.info(f"✓ 创建用户画像索引: {USER_PROFILE_INDEX}")
126
+ # ========== 用户状态 ==========
202
127
 
203
128
  def _get_user_state(self, user_id: str) -> Dict[str, Any]:
204
- """从 ES 获取用户更新状态"""
205
- try:
206
- response = self.es.get(index=USER_STATE_INDEX, id=user_id)
207
- return response["_source"]
208
- except Exception:
209
- # 用户状态不存在,返回初始状态
129
+ """获取用户更新状态"""
130
+ state = self.storage.get_user_state(user_id)
131
+ if state is None:
210
132
  return {"user_id": user_id, "rounds": 0, "last_update": None}
133
+ return state
211
134
 
212
135
  def _update_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
213
- """更新 ES 中的用户状态"""
214
- doc = {"user_id": user_id, "rounds": rounds}
215
- if last_update:
216
- doc["last_update"] = last_update
217
-
218
- self.es.index(
219
- index=USER_STATE_INDEX,
220
- id=user_id,
221
- document=doc,
222
- refresh=True
223
- )
136
+ """更新用户状态"""
137
+ self.storage.save_user_state(user_id, rounds, last_update)
224
138
 
225
139
  def _should_trigger_update(self, user_id: str) -> bool:
226
- """
227
- 判断是否应该触发画像更新(基于 ES 存储的状态)
228
-
229
- 触发条件(满足任一即触发):
230
- 1. 累积对话轮数 >= update_interval_rounds
231
- 2. 距上次更新时间 >= update_interval_minutes
232
- 3. 首次(无 last_update)
233
- """
140
+ """判断是否应该触发画像更新"""
234
141
  state = self._get_user_state(user_id)
235
142
  rounds = state.get("rounds", 0) + 1
236
143
  last_update_str = state.get("last_update")
@@ -238,12 +145,10 @@ class Mem1Memory:
238
145
  should_update = False
239
146
  reason = ""
240
147
 
241
- # 条件1:累积轮数达到阈值
242
148
  if rounds >= self.update_interval_rounds:
243
149
  should_update = True
244
150
  reason = f"轮数={rounds} >= {self.update_interval_rounds}"
245
151
 
246
- # 条件2:距上次更新超过时间阈值
247
152
  if not should_update and last_update_str:
248
153
  try:
249
154
  last_update = datetime.strptime(last_update_str, '%Y-%m-%d %H:%M:%S')
@@ -254,21 +159,21 @@ class Mem1Memory:
254
159
  except ValueError:
255
160
  pass
256
161
 
257
- # 条件3:首次更新
258
162
  if not should_update and last_update_str is None:
259
163
  should_update = True
260
164
  reason = "首次创建画像"
261
165
 
262
166
  if should_update:
263
167
  logger.info(f"📊 触发画像更新({reason}): {user_id}")
264
- # 重置轮数
265
168
  self._update_user_state(user_id, 0, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
266
169
  else:
267
- # 增加轮数
268
170
  self._update_user_state(user_id, rounds, last_update_str)
269
171
  logger.debug(f"📊 暂不更新(轮数={rounds}/{self.update_interval_rounds}): {user_id}")
270
172
 
271
173
  return should_update
174
+
175
+
176
+ # ========== 对话管理 ==========
272
177
 
273
178
  def add_conversation(
274
179
  self,
@@ -277,12 +182,11 @@ class Mem1Memory:
277
182
  metadata: Optional[Dict[str, Any]] = None,
278
183
  timestamp: Optional[str] = None
279
184
  ) -> Dict[str, Any]:
280
- """添加对话到 ES(使用实例绑定的 user_id 和 topic_id)"""
185
+ """添加对话"""
281
186
  ts = timestamp or datetime.now().strftime('%Y-%m-%d %H:%M:%S')
282
187
  user_id = self.user_id
283
188
  topic_id = self.topic_id
284
189
 
285
- # 构建对话记录
286
190
  conversation_entry = {
287
191
  "user_id": user_id,
288
192
  "topic_id": topic_id,
@@ -309,21 +213,16 @@ class Mem1Memory:
309
213
 
310
214
  image_refs.append(filename)
311
215
 
312
- # 生成图片描述(用户描述 + VL 理解)
313
216
  user_desc = ""
314
217
  for msg in messages:
315
218
  if msg["role"] == "user":
316
219
  user_desc = msg["content"]
317
220
  break
318
221
 
319
- # 如果启用了 VL 模型,调用视觉理解
320
222
  if self.vl:
321
223
  try:
322
224
  vl_result = self.vl.understand_image(str(img_path), user_desc)
323
- if user_desc:
324
- description = f"【用户描述】{user_desc}\n\n{vl_result}"
325
- else:
326
- description = vl_result
225
+ description = f"【用户描述】{user_desc}\n\n{vl_result}" if user_desc else vl_result
327
226
  logger.info(f"🖼️ VL 图片理解完成: {filename}")
328
227
  except Exception as e:
329
228
  logger.warning(f"⚠️ VL 图片理解失败: {e}, 使用用户描述")
@@ -331,7 +230,6 @@ class Mem1Memory:
331
230
  else:
332
231
  description = user_desc or img['filename']
333
232
 
334
- # 图片信息存入对话记录
335
233
  self._save_image_to_conversation(conversation_entry, {
336
234
  "filename": filename,
337
235
  "description": description,
@@ -352,28 +250,20 @@ class Mem1Memory:
352
250
  content = msg["content"]
353
251
  if len(content) > self.max_assistant_chars:
354
252
  content = self._summarize_assistant_response(content)
355
- conversation_entry["messages"].append({
356
- "role": "assistant",
357
- "content": content
358
- })
253
+ conversation_entry["messages"].append({"role": "assistant", "content": content})
359
254
 
360
- # 写入 ES(refresh=True 确保立即可搜索)
361
- response = self.es.index(
362
- index=self.index_name,
363
- document=conversation_entry,
364
- refresh=True
365
- )
255
+ # 保存到存储后端
256
+ record_id = self.storage.save_conversation(conversation_entry)
257
+ logger.info(f"✓ 对话已存储: user={user_id}, topic={topic_id}, timestamp={ts}, id={record_id}")
366
258
 
367
- logger.info(f"✓ 对话已存入 ES: user={user_id}, topic={topic_id}, timestamp={ts}, id={response['_id']}")
368
-
369
- # 自动更新画像(基于 ES 状态判断)
259
+ # 自动更新画像
370
260
  if self.auto_update_profile and self._should_trigger_update(user_id):
371
261
  try:
372
262
  self.update_profile()
373
263
  except Exception as e:
374
264
  logger.error(f"❌ 画像更新失败: {user_id}, error={e}")
375
265
 
376
- return {"status": "success", "es_id": response['_id']}
266
+ return {"status": "success", "id": record_id}
377
267
 
378
268
  def get_conversations(
379
269
  self,
@@ -381,46 +271,19 @@ class Mem1Memory:
381
271
  metadata_filter: Optional[Dict[str, Any]] = None,
382
272
  size: int = 1000
383
273
  ) -> List[Dict[str, Any]]:
384
- """从 ES 获取当前话题的对话记录"""
385
- user_id = self.user_id
386
- topic_id = self.topic_id
387
-
388
- query = {
389
- "bool": {
390
- "must": [
391
- {"term": {"user_id": user_id}},
392
- {"term": {"topic_id": topic_id}}
393
- ]
394
- }
395
- }
396
-
397
- # 时间过滤
274
+ """获取当前话题的对话记录"""
275
+ start_time = None
398
276
  if days_limit:
399
- cutoff_date = (datetime.now() - timedelta(days=days_limit)).strftime('%Y-%m-%d %H:%M:%S')
400
- query["bool"]["must"].append({
401
- "range": {
402
- "timestamp": {"gte": cutoff_date}
403
- }
404
- })
405
-
406
- # 元数据过滤
407
- if metadata_filter:
408
- for k, v in metadata_filter.items():
409
- query["bool"]["must"].append({
410
- "term": {f"metadata.{k}": v}
411
- })
412
-
413
- # 查询 ES
414
- response = self.es.search(
415
- index=self.index_name,
416
- query=query,
417
- size=size,
418
- sort=[{"timestamp": {"order": "asc"}}]
277
+ start_time = datetime.now() - timedelta(days=days_limit)
278
+
279
+ conversations = self.storage.get_conversations(
280
+ user_id=self.user_id,
281
+ topic_id=self.topic_id,
282
+ start_time=start_time,
283
+ metadata_filter=metadata_filter,
284
+ limit=size
419
285
  )
420
-
421
- conversations = [hit["_source"] for hit in response["hits"]["hits"]]
422
- logger.info(f"📖 从 ES 读取对话: user={user_id}, topic={topic_id}, count={len(conversations)}")
423
-
286
+ logger.info(f"📖 读取对话: user={self.user_id}, topic={self.topic_id}, count={len(conversations)}")
424
287
  return conversations
425
288
 
426
289
  def get_all_conversations(
@@ -428,53 +291,52 @@ class Mem1Memory:
428
291
  days_limit: Optional[int] = None,
429
292
  size: int = 1000
430
293
  ) -> List[Dict[str, Any]]:
431
- """从 ES 获取用户所有话题的对话记录(用于更新画像)"""
432
- user_id = self.user_id
433
-
434
- query = {
435
- "bool": {
436
- "must": [
437
- {"term": {"user_id": user_id}}
438
- ]
439
- }
440
- }
441
-
294
+ """获取用户所有话题的对话记录"""
295
+ start_time = None
442
296
  if days_limit:
443
- cutoff_date = (datetime.now() - timedelta(days=days_limit)).strftime('%Y-%m-%d %H:%M:%S')
444
- query["bool"]["must"].append({
445
- "range": {
446
- "timestamp": {"gte": cutoff_date}
447
- }
448
- })
449
-
450
- response = self.es.search(
451
- index=self.index_name,
452
- query=query,
453
- size=size,
454
- sort=[{"timestamp": {"order": "asc"}}]
455
- )
456
-
457
- conversations = [hit["_source"] for hit in response["hits"]["hits"]]
458
- logger.info(f"📖 从 ES 读取所有对话: user={user_id}, count={len(conversations)}")
297
+ start_time = datetime.now() - timedelta(days=days_limit)
459
298
 
299
+ conversations = self.storage.get_conversations(
300
+ user_id=self.user_id,
301
+ topic_id=None,
302
+ start_time=start_time,
303
+ limit=size
304
+ )
305
+ logger.info(f"📖 读取所有对话: user={self.user_id}, count={len(conversations)}")
460
306
  return conversations
461
307
 
308
+ def _get_conversations_range(self, start_days_ago: int, end_days_ago: int) -> List[Dict[str, Any]]:
309
+ """获取指定天数范围内的对话"""
310
+ now = datetime.now()
311
+ start_time = now - timedelta(days=end_days_ago)
312
+ end_time = now - timedelta(days=start_days_ago)
313
+
314
+ return self.storage.get_conversations(
315
+ user_id=self.user_id,
316
+ topic_id=self.topic_id,
317
+ start_time=start_time,
318
+ end_time=end_time
319
+ )
320
+
321
+ def search_conversations(self, start_days: int, end_days: int) -> List[Dict[str, Any]]:
322
+ """按时间范围检索对话(供外部 LLM 作为 tool 调用)"""
323
+ return self._get_conversations_range(start_days, end_days)
324
+
325
+
326
+ # ========== 画像更新 ==========
327
+
462
328
  def update_profile(self) -> Dict[str, Any]:
463
- """更新用户画像(基于所有话题的对话)"""
329
+ """更新用户画像"""
464
330
  user_id = self.user_id
465
331
  self._init_profile(user_id)
466
332
 
467
- # 从 ES 读取所有话题的对话
468
333
  conversations = self.get_all_conversations()
469
334
  if not conversations:
470
335
  return {"status": "success", "updated": False, "reason": "no_conversation"}
471
336
 
472
337
  history_content = self._format_conversations_for_llm(conversations)
473
-
474
- # 从 ES 读取现有画像
475
338
  profile_content = self._get_profile(user_id)
476
339
 
477
- # LLM 更新画像
478
340
  timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
479
341
  prompt = self.profile_template.get_update_prompt().format(
480
342
  user_id=user_id,
@@ -490,29 +352,37 @@ class Mem1Memory:
490
352
 
491
353
  response = self.llm.generate(messages, response_format="text")
492
354
 
493
- # 检查是否需要压缩
494
355
  if len(response) > self.max_profile_chars:
495
356
  logger.info(f"📦 用户画像超长({len(response)}>{self.max_profile_chars}),触发压缩...")
496
357
  response = self._compress_profile(user_id, response)
497
358
  logger.info(f"📦 压缩后长度: {len(response)}")
498
359
 
499
- # 保存到 ES
500
360
  self._save_profile(user_id, response)
501
- logger.info(f"✓ 画像已更新到 ES: {user_id}")
361
+ logger.info(f"✓ 画像已更新: {user_id}")
502
362
 
503
363
  return {"status": "success", "updated": True, "length": len(response)}
504
364
 
505
- def get_context(
506
- self,
507
- query: str = "",
508
- days_limit: Optional[int] = None
509
- ) -> Dict[str, Any]:
510
- """获取记忆上下文(当前话题)
365
+ def _compress_profile(self, user_id: str, profile_content: str) -> str:
366
+ """压缩用户画像"""
367
+ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
368
+ prompt = self.profile_template.get_compress_prompt().format(
369
+ user_id=user_id,
370
+ profile_content=profile_content,
371
+ max_chars=self.max_profile_chars,
372
+ timestamp=timestamp
373
+ )
511
374
 
512
- Args:
513
- query: 用户问题(保留参数,暂未使用)
514
- days_limit: 检索最近几天的对话,默认使用配置值
515
- """
375
+ messages = [
376
+ {"role": "system", "content": prompt},
377
+ {"role": "user", "content": "请压缩用户画像"}
378
+ ]
379
+
380
+ return self.llm.generate(messages, response_format="text")
381
+
382
+ # ========== 上下文获取 ==========
383
+
384
+ def get_context(self, query: str = "", days_limit: Optional[int] = None) -> Dict[str, Any]:
385
+ """获取记忆上下文"""
516
386
  user_id = self.user_id
517
387
  profile_content = self._init_profile(user_id)
518
388
 
@@ -520,15 +390,11 @@ class Mem1Memory:
520
390
  weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
521
391
  current_time = f"{now.strftime('%Y-%m-%d %H:%M')} {weekdays[now.weekday()]}"
522
392
 
523
- # 从 ES 获取画像更新时间
524
393
  profile_last_updated = "未更新"
525
- try:
526
- response = self.es.get(index=USER_PROFILE_INDEX, id=user_id)
527
- profile_last_updated = response["_source"].get("updated_at", "未更新")
528
- except Exception:
529
- pass
394
+ profile_data = self.storage.get_profile(user_id)
395
+ if profile_data:
396
+ profile_last_updated = profile_data.get("updated_at", "未更新")
530
397
 
531
- # 强制检索最近 days_limit 天的对话
532
398
  if days_limit is None:
533
399
  days_limit = self.config.memory.context_days_limit
534
400
  conversations = self.get_conversations(days_limit=days_limit)
@@ -544,57 +410,72 @@ class Mem1Memory:
544
410
  "profile_last_updated": profile_last_updated
545
411
  }
546
412
 
547
- def _compress_profile(self, user_id: str, profile_content: str) -> str:
548
- """压缩用户画像"""
549
- timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
550
- prompt = self.profile_template.get_compress_prompt().format(
551
- user_id=user_id,
552
- profile_content=profile_content,
553
- max_chars=self.max_profile_chars,
554
- timestamp=timestamp
555
- )
556
-
557
- messages = [
558
- {"role": "system", "content": prompt},
559
- {"role": "user", "content": "请压缩用户画像"}
560
- ]
561
-
562
- response = self.llm.generate(messages, response_format="text")
563
- return response
564
-
565
- def _should_include_history(self, query: str) -> tuple[bool, str]:
566
- """LLM 判断是否需要加载历史记录"""
567
- prompt = RECALL_DECISION_PROMPT.format(query=query)
413
+ def get_context_progressive(self, query: str, max_days: int = 31, step: int = 7) -> Dict[str, Any]:
414
+ """渐进式检索:每次多查一周,直到 LLM 认为信息足够"""
415
+ user_id = self.user_id
416
+ profile_content = self._init_profile(user_id)
568
417
 
569
- messages = [
570
- {"role": "system", "content": prompt},
571
- {"role": "user", "content": query}
572
- ]
418
+ now = datetime.now()
419
+ weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
420
+ current_time = f"{now.strftime('%Y-%m-%d %H:%M')} {weekdays[now.weekday()]}"
573
421
 
574
- response = self.llm.generate(messages, response_format="text")
575
- response_lower = response.strip().lower()
576
- need_history = "true" in response_lower or "是" in response_lower or "需要" in response_lower
422
+ profile_last_updated = "未更新"
423
+ profile_data = self.storage.get_profile(user_id)
424
+ if profile_data:
425
+ profile_last_updated = profile_data.get("updated_at", "未更新")
426
+
427
+ all_conversations = []
428
+ searched_days = 0
429
+
430
+ for end_day in range(step, max_days + step, step):
431
+ end_day = min(end_day, max_days)
432
+ new_conversations = self._get_conversations_range(searched_days, end_day)
433
+ all_conversations.extend(new_conversations)
434
+ searched_days = end_day
435
+
436
+ if not all_conversations:
437
+ logger.info(f"📖 渐进检索: 0-{end_day}天 无对话,继续...")
438
+ continue
439
+
440
+ normal_content = self._format_conversations_for_llm(all_conversations)
441
+ if self._is_context_sufficient(query, profile_content, normal_content, end_day):
442
+ logger.info(f"✓ 渐进检索完成: 0-{end_day}天,{len(all_conversations)}条对话")
443
+ break
444
+
445
+ logger.info(f"📖 渐进检索: 0-{end_day}天 信息不足,继续...")
577
446
 
578
- logger.info(f"🔍 回忆判断: query='{query[:50]}...', need_history={need_history}")
447
+ normal_content = self._format_conversations_for_llm(all_conversations) if all_conversations else ""
579
448
 
580
- return need_history, response.strip()
449
+ return {
450
+ "current_time": current_time,
451
+ "user_id": user_id,
452
+ "topic_id": self.topic_id,
453
+ "import_content": profile_content,
454
+ "normal_content": normal_content,
455
+ "conversations_count": len(all_conversations),
456
+ "profile_last_updated": profile_last_updated,
457
+ "searched_days": searched_days
458
+ }
581
459
 
582
- def _summarize_assistant_response(self, content: str) -> str:
583
- """对超长的助手回复生成摘要"""
584
- prompt = ASSISTANT_SUMMARY_PROMPT.format(
585
- content=content,
586
- max_chars=self.max_assistant_chars
460
+ def _is_context_sufficient(self, query: str, profile: str, conversations: str, days: int) -> bool:
461
+ """LLM 判断当前上下文是否足够"""
462
+ prompt = CONTEXT_SUFFICIENT_PROMPT.format(
463
+ query=query,
464
+ profile=profile,
465
+ conversations=conversations or "(无对话记录)",
466
+ days=days
587
467
  )
588
468
 
589
469
  messages = [
590
470
  {"role": "system", "content": prompt},
591
- {"role": "user", "content": "请生成摘要"}
471
+ {"role": "user", "content": query}
592
472
  ]
593
473
 
594
- summary = self.llm.generate(messages, response_format="text")
595
- logger.info(f"📝 助手回复摘要: {len(content)} -> {len(summary)} 字符")
596
-
597
- return summary
474
+ response = self.llm.generate(messages, response_format="text")
475
+ return "true" in response.strip().lower()
476
+
477
+
478
+ # ========== 图片搜索 ==========
598
479
 
599
480
  def search_images(self, query: str) -> List[Dict[str, str]]:
600
481
  """搜索用户图片"""
@@ -618,20 +499,52 @@ class Mem1Memory:
618
499
  response = self.llm.generate(messages, response_format="text")
619
500
 
620
501
  results = []
621
- # 提取所有数字(支持多种格式:纯数字、[0]、0. 等)
622
- import re
623
502
  numbers = re.findall(r'\b(\d+)\b', response)
624
503
  for num_str in numbers:
625
504
  idx = int(num_str)
626
505
  if 0 <= idx < len(images_index):
627
506
  img = images_index[idx].copy()
628
507
  img['abs_path'] = str((self._get_user_images_dir(user_id) / img['filename']).resolve())
629
- if img not in results: # 去重
508
+ if img not in results:
630
509
  results.append(img)
631
510
 
632
511
  logger.info(f"🖼️ 图片搜索: query='{query}', 找到 {len(results)} 张")
633
512
  return results
634
513
 
514
+ # ========== LLM 辅助方法 ==========
515
+
516
+ def _should_include_history(self, query: str) -> tuple[bool, str]:
517
+ """LLM 判断是否需要加载历史记录"""
518
+ prompt = RECALL_DECISION_PROMPT.format(query=query)
519
+
520
+ messages = [
521
+ {"role": "system", "content": prompt},
522
+ {"role": "user", "content": query}
523
+ ]
524
+
525
+ response = self.llm.generate(messages, response_format="text")
526
+ response_lower = response.strip().lower()
527
+ need_history = "true" in response_lower or "是" in response_lower or "需要" in response_lower
528
+
529
+ logger.info(f"🔍 回忆判断: query='{query[:50]}...', need_history={need_history}")
530
+ return need_history, response.strip()
531
+
532
+ def _summarize_assistant_response(self, content: str) -> str:
533
+ """对超长的助手回复生成摘要"""
534
+ prompt = ASSISTANT_SUMMARY_PROMPT.format(
535
+ content=content,
536
+ max_chars=self.max_assistant_chars
537
+ )
538
+
539
+ messages = [
540
+ {"role": "system", "content": prompt},
541
+ {"role": "user", "content": "请生成摘要"}
542
+ ]
543
+
544
+ summary = self.llm.generate(messages, response_format="text")
545
+ logger.info(f"📝 助手回复摘要: {len(content)} -> {len(summary)} 字符")
546
+ return summary
547
+
635
548
  def _format_conversations_for_llm(self, conversations: List[Dict[str, Any]]) -> str:
636
549
  """格式化对话记录为文本"""
637
550
  output = []
@@ -661,48 +574,23 @@ class Mem1Memory:
661
574
 
662
575
  return "\n".join(output)
663
576
 
577
+ # ========== 用户/话题管理 ==========
578
+
664
579
  def get_user_list(self) -> List[str]:
665
- """获取所有用户ID列表(从 ES)"""
666
- response = self.es.search(
667
- index=self.index_name,
668
- body={
669
- "size": 0,
670
- "aggs": {
671
- "users": {
672
- "terms": {"field": "user_id", "size": 10000}
673
- }
674
- }
675
- }
676
- )
677
-
678
- users = [bucket["key"] for bucket in response["aggregations"]["users"]["buckets"]]
679
- return users
580
+ """获取所有用户ID列表"""
581
+ return self.storage.get_user_list()
582
+
583
+ def list_topics(self) -> List[Dict[str, Any]]:
584
+ """列出用户的所有话题"""
585
+ return self.storage.get_topic_list(self.user_id)
680
586
 
681
587
  def delete_user(self) -> Dict[str, Any]:
682
- """删除用户所有记忆(所有话题)"""
588
+ """删除用户所有记忆"""
683
589
  user_id = self.user_id
684
590
 
685
- # 删除 ES 中的对话记录
686
- try:
687
- self.es.delete_by_query(
688
- index=self.index_name,
689
- query={"term": {"user_id": user_id}},
690
- refresh=True
691
- )
692
- except Exception:
693
- pass # 索引可能不存在
694
-
695
- # 删除 ES 中的用户状态
696
- try:
697
- self.es.delete(index=USER_STATE_INDEX, id=user_id, refresh=True)
698
- except Exception:
699
- pass # 状态可能不存在
700
-
701
- # 删除 ES 中的用户画像
702
- try:
703
- self.es.delete(index=USER_PROFILE_INDEX, id=user_id, refresh=True)
704
- except Exception:
705
- pass # 画像可能不存在
591
+ self.storage.delete_conversations(user_id)
592
+ self.storage.delete_user_state(user_id)
593
+ self.storage.delete_profile(user_id)
706
594
 
707
595
  # 删除本地图片文件
708
596
  user_images_dir = self.images_dir / user_id
@@ -717,52 +605,7 @@ class Mem1Memory:
717
605
  user_id = self.user_id
718
606
  topic_id = self.topic_id
719
607
 
720
- try:
721
- self.es.delete_by_query(
722
- index=self.index_name,
723
- query={
724
- "bool": {
725
- "must": [
726
- {"term": {"user_id": user_id}},
727
- {"term": {"topic_id": topic_id}}
728
- ]
729
- }
730
- },
731
- refresh=True
732
- )
733
- except Exception:
734
- pass # 索引可能不存在
608
+ self.storage.delete_conversations(user_id, topic_id)
735
609
 
736
610
  logger.info(f"✓ 已删除话题: user={user_id}, topic={topic_id}")
737
611
  return {"status": "success", "deleted_topic": topic_id}
738
-
739
- def list_topics(self) -> List[Dict[str, Any]]:
740
- """列出用户的所有话题"""
741
- user_id = self.user_id
742
-
743
- response = self.es.search(
744
- index=self.index_name,
745
- body={
746
- "size": 0,
747
- "query": {"term": {"user_id": user_id}},
748
- "aggs": {
749
- "topics": {
750
- "terms": {"field": "topic_id", "size": 1000},
751
- "aggs": {
752
- "latest": {"max": {"field": "timestamp"}},
753
- "count": {"value_count": {"field": "timestamp"}}
754
- }
755
- }
756
- }
757
- }
758
- )
759
-
760
- topics = []
761
- for bucket in response["aggregations"]["topics"]["buckets"]:
762
- topics.append({
763
- "topic_id": bucket["key"],
764
- "conversation_count": bucket["doc_count"],
765
- "last_active": bucket["latest"]["value_as_string"] if bucket["latest"]["value"] else None
766
- })
767
-
768
- return topics