mem1 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mem1/config.py +14 -5
- mem1/llm.py +54 -5
- mem1/memory_es.py +143 -1
- mem1/prompts.py +22 -0
- mem1-0.0.7.dist-info/METADATA +249 -0
- mem1-0.0.7.dist-info/RECORD +11 -0
- mem1-0.0.6.dist-info/METADATA +0 -191
- mem1-0.0.6.dist-info/RECORD +0 -11
- {mem1-0.0.6.dist-info → mem1-0.0.7.dist-info}/WHEEL +0 -0
mem1/config.py
CHANGED
|
@@ -13,14 +13,21 @@ class LLMConfig(BaseModel):
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class VLConfig(BaseModel):
|
|
16
|
-
"""视觉语言模型配置(可选,配置了
|
|
16
|
+
"""视觉语言模型配置(可选,配置了 provider 即启用)
|
|
17
|
+
|
|
18
|
+
支持的 provider:
|
|
19
|
+
- qwen: 使用 dashscope SDK 调用 Qwen-VL
|
|
20
|
+
- doubao: 使用 OpenAI 兼容接口调用豆包视觉模型
|
|
21
|
+
"""
|
|
22
|
+
provider: str = "" # qwen / doubao
|
|
17
23
|
model: str = ""
|
|
18
24
|
api_key: str = ""
|
|
25
|
+
base_url: str = "" # doubao 需要
|
|
19
26
|
|
|
20
27
|
@property
|
|
21
28
|
def enabled(self) -> bool:
|
|
22
|
-
"""
|
|
23
|
-
return bool(self.
|
|
29
|
+
"""配置了 provider 就启用"""
|
|
30
|
+
return bool(self.provider)
|
|
24
31
|
|
|
25
32
|
|
|
26
33
|
class MemoryConfig(BaseModel):
|
|
@@ -101,10 +108,12 @@ class Mem1Config(BaseModel):
|
|
|
101
108
|
memory_dir = required_vars["MEM1_MEMORY_DIR"]
|
|
102
109
|
images_dir = f"{memory_dir}/images"
|
|
103
110
|
|
|
104
|
-
# VL 模型配置(可选,配置了
|
|
111
|
+
# VL 模型配置(可选,配置了 provider 即启用)
|
|
105
112
|
vl_config = VLConfig(
|
|
113
|
+
provider=os.getenv("MEM1_VL_PROVIDER", ""),
|
|
106
114
|
model=os.getenv("MEM1_VL_MODEL", ""),
|
|
107
|
-
api_key=os.getenv("MEM1_VL_API_KEY", "")
|
|
115
|
+
api_key=os.getenv("MEM1_VL_API_KEY", ""),
|
|
116
|
+
base_url=os.getenv("MEM1_VL_BASE_URL", "")
|
|
108
117
|
)
|
|
109
118
|
|
|
110
119
|
return cls(
|
mem1/llm.py
CHANGED
|
@@ -44,12 +44,20 @@ class LLMClient:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
class VLClient:
|
|
47
|
-
"""
|
|
47
|
+
"""视觉语言模型客户端(支持 qwen/doubao)"""
|
|
48
48
|
|
|
49
49
|
def __init__(self, config: VLConfig):
|
|
50
50
|
self.config = config
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
self.provider = config.provider.lower()
|
|
52
|
+
|
|
53
|
+
if self.provider == "qwen":
|
|
54
|
+
import dashscope
|
|
55
|
+
dashscope.api_key = config.api_key
|
|
56
|
+
elif self.provider == "doubao":
|
|
57
|
+
self.client = OpenAI(
|
|
58
|
+
api_key=config.api_key,
|
|
59
|
+
base_url=config.base_url
|
|
60
|
+
)
|
|
53
61
|
|
|
54
62
|
def understand_image(
|
|
55
63
|
self,
|
|
@@ -65,13 +73,22 @@ class VLClient:
|
|
|
65
73
|
Returns:
|
|
66
74
|
图片理解结果(包含 OCR 文字和内容描述)
|
|
67
75
|
"""
|
|
68
|
-
import dashscope
|
|
69
|
-
|
|
70
76
|
prompt = "请分析这张图片,完成以下任务:\n1. OCR识别:提取图片中的所有文字\n2. 内容理解:描述图片的主要内容和关键信息\n\n请用简洁的中文回答,格式如下:\n【文字内容】...\n【图片描述】..."
|
|
71
77
|
|
|
72
78
|
if user_description:
|
|
73
79
|
prompt += f"\n\n用户补充说明:{user_description}"
|
|
74
80
|
|
|
81
|
+
if self.provider == "qwen":
|
|
82
|
+
return self._call_qwen(image_path, prompt)
|
|
83
|
+
elif self.provider == "doubao":
|
|
84
|
+
return self._call_doubao(image_path, prompt)
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"不支持的 VL provider: {self.provider}")
|
|
87
|
+
|
|
88
|
+
def _call_qwen(self, image_path: str, prompt: str) -> str:
|
|
89
|
+
"""调用 Qwen-VL(dashscope SDK)"""
|
|
90
|
+
import dashscope
|
|
91
|
+
|
|
75
92
|
messages = [
|
|
76
93
|
{
|
|
77
94
|
"role": "user",
|
|
@@ -88,3 +105,35 @@ class VLClient:
|
|
|
88
105
|
)
|
|
89
106
|
|
|
90
107
|
return response.output.choices[0].message.content[0]["text"]
|
|
108
|
+
|
|
109
|
+
def _call_doubao(self, image_path: str, prompt: str) -> str:
|
|
110
|
+
"""调用豆包视觉模型(OpenAI 兼容接口)"""
|
|
111
|
+
import base64
|
|
112
|
+
|
|
113
|
+
# 读取图片并转为 base64
|
|
114
|
+
with open(image_path, "rb") as f:
|
|
115
|
+
image_data = base64.b64encode(f.read()).decode("utf-8")
|
|
116
|
+
|
|
117
|
+
# 获取图片格式
|
|
118
|
+
ext = image_path.lower().split(".")[-1]
|
|
119
|
+
mime_type = {"png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif", "webp": "image/webp"}.get(ext, "image/png")
|
|
120
|
+
|
|
121
|
+
messages = [
|
|
122
|
+
{
|
|
123
|
+
"role": "user",
|
|
124
|
+
"content": [
|
|
125
|
+
{
|
|
126
|
+
"type": "image_url",
|
|
127
|
+
"image_url": {"url": f"data:{mime_type};base64,{image_data}"}
|
|
128
|
+
},
|
|
129
|
+
{"type": "text", "text": prompt}
|
|
130
|
+
]
|
|
131
|
+
}
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
response = self.client.chat.completions.create(
|
|
135
|
+
model=self.config.model,
|
|
136
|
+
messages=messages
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return response.choices[0].message.content
|
mem1/memory_es.py
CHANGED
|
@@ -9,7 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from elasticsearch import Elasticsearch
|
|
10
10
|
from mem1.config import Mem1Config
|
|
11
11
|
from mem1.llm import LLMClient, VLClient
|
|
12
|
-
from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT
|
|
12
|
+
from mem1.prompts import ProfileTemplate, RECALL_DECISION_PROMPT, IMAGE_SEARCH_PROMPT, ASSISTANT_SUMMARY_PROMPT, CONTEXT_SUFFICIENT_PROMPT
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -544,6 +544,148 @@ class Mem1Memory:
|
|
|
544
544
|
"profile_last_updated": profile_last_updated
|
|
545
545
|
}
|
|
546
546
|
|
|
547
|
+
def get_context_progressive(
|
|
548
|
+
self,
|
|
549
|
+
query: str,
|
|
550
|
+
max_days: int = 31,
|
|
551
|
+
step: int = 7
|
|
552
|
+
) -> Dict[str, Any]:
|
|
553
|
+
"""渐进式检索:每次多查一周,直到 LLM 认为信息足够
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
query: 用户问题
|
|
557
|
+
max_days: 最大检索天数,默认31天
|
|
558
|
+
step: 每步增加的天数,默认7天
|
|
559
|
+
"""
|
|
560
|
+
user_id = self.user_id
|
|
561
|
+
profile_content = self._init_profile(user_id)
|
|
562
|
+
|
|
563
|
+
now = datetime.now()
|
|
564
|
+
weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
|
|
565
|
+
current_time = f"{now.strftime('%Y-%m-%d %H:%M')} {weekdays[now.weekday()]}"
|
|
566
|
+
|
|
567
|
+
# 从 ES 获取画像更新时间
|
|
568
|
+
profile_last_updated = "未更新"
|
|
569
|
+
try:
|
|
570
|
+
response = self.es.get(index=USER_PROFILE_INDEX, id=user_id)
|
|
571
|
+
profile_last_updated = response["_source"].get("updated_at", "未更新")
|
|
572
|
+
except Exception:
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
# 渐进式检索
|
|
576
|
+
all_conversations = []
|
|
577
|
+
searched_days = 0
|
|
578
|
+
|
|
579
|
+
for end_day in range(step, max_days + step, step):
|
|
580
|
+
end_day = min(end_day, max_days)
|
|
581
|
+
|
|
582
|
+
# 检索这个时间段的对话
|
|
583
|
+
new_conversations = self._get_conversations_range(searched_days, end_day)
|
|
584
|
+
all_conversations.extend(new_conversations)
|
|
585
|
+
searched_days = end_day
|
|
586
|
+
|
|
587
|
+
if not all_conversations:
|
|
588
|
+
logger.info(f"📖 渐进检索: 0-{end_day}天 无对话,继续...")
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
# LLM 判断信息是否足够
|
|
592
|
+
normal_content = self._format_conversations_for_llm(all_conversations)
|
|
593
|
+
if self._is_context_sufficient(query, profile_content, normal_content, end_day):
|
|
594
|
+
logger.info(f"✓ 渐进检索完成: 0-{end_day}天,{len(all_conversations)}条对话")
|
|
595
|
+
break
|
|
596
|
+
|
|
597
|
+
logger.info(f"📖 渐进检索: 0-{end_day}天 信息不足,继续...")
|
|
598
|
+
|
|
599
|
+
normal_content = self._format_conversations_for_llm(all_conversations) if all_conversations else ""
|
|
600
|
+
|
|
601
|
+
return {
|
|
602
|
+
"current_time": current_time,
|
|
603
|
+
"user_id": user_id,
|
|
604
|
+
"topic_id": self.topic_id,
|
|
605
|
+
"import_content": profile_content,
|
|
606
|
+
"normal_content": normal_content,
|
|
607
|
+
"conversations_count": len(all_conversations),
|
|
608
|
+
"profile_last_updated": profile_last_updated,
|
|
609
|
+
"searched_days": searched_days
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
def _get_conversations_range(
|
|
613
|
+
self,
|
|
614
|
+
start_days_ago: int,
|
|
615
|
+
end_days_ago: int
|
|
616
|
+
) -> List[Dict[str, Any]]:
|
|
617
|
+
"""获取指定天数范围内的对话(start_days_ago 到 end_days_ago 天前)"""
|
|
618
|
+
user_id = self.user_id
|
|
619
|
+
topic_id = self.topic_id
|
|
620
|
+
|
|
621
|
+
now = datetime.now()
|
|
622
|
+
start_date = (now - timedelta(days=end_days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
623
|
+
end_date = (now - timedelta(days=start_days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
624
|
+
|
|
625
|
+
query = {
|
|
626
|
+
"bool": {
|
|
627
|
+
"must": [
|
|
628
|
+
{"term": {"user_id": user_id}},
|
|
629
|
+
{"term": {"topic_id": topic_id}},
|
|
630
|
+
{"range": {"timestamp": {"gte": start_date, "lt": end_date}}}
|
|
631
|
+
]
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
response = self.es.search(
|
|
636
|
+
index=self.index_name,
|
|
637
|
+
query=query,
|
|
638
|
+
size=1000,
|
|
639
|
+
sort=[{"timestamp": {"order": "asc"}}]
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
return [hit["_source"] for hit in response["hits"]["hits"]]
|
|
643
|
+
|
|
644
|
+
def _is_context_sufficient(
|
|
645
|
+
self,
|
|
646
|
+
query: str,
|
|
647
|
+
profile: str,
|
|
648
|
+
conversations: str,
|
|
649
|
+
days: int
|
|
650
|
+
) -> bool:
|
|
651
|
+
"""LLM 判断当前上下文是否足够回答问题"""
|
|
652
|
+
prompt = CONTEXT_SUFFICIENT_PROMPT.format(
|
|
653
|
+
query=query,
|
|
654
|
+
profile=profile,
|
|
655
|
+
conversations=conversations or "(无对话记录)",
|
|
656
|
+
days=days
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
messages = [
|
|
660
|
+
{"role": "system", "content": prompt},
|
|
661
|
+
{"role": "user", "content": query}
|
|
662
|
+
]
|
|
663
|
+
|
|
664
|
+
response = self.llm.generate(messages, response_format="text")
|
|
665
|
+
is_sufficient = "true" in response.strip().lower()
|
|
666
|
+
|
|
667
|
+
return is_sufficient
|
|
668
|
+
|
|
669
|
+
def search_conversations(
|
|
670
|
+
self,
|
|
671
|
+
start_days: int,
|
|
672
|
+
end_days: int
|
|
673
|
+
) -> List[Dict[str, Any]]:
|
|
674
|
+
"""按时间范围检索对话(供外部 LLM 作为 tool 调用)
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
start_days: 起始天数(距今多少天,较近)
|
|
678
|
+
end_days: 结束天数(距今多少天,较远)
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
对话记录列表
|
|
682
|
+
|
|
683
|
+
示例:
|
|
684
|
+
search_conversations(0, 7) # 最近7天
|
|
685
|
+
search_conversations(170, 180) # 约半年前的10天
|
|
686
|
+
"""
|
|
687
|
+
return self._get_conversations_range(start_days, end_days)
|
|
688
|
+
|
|
547
689
|
def _compress_profile(self, user_id: str, profile_content: str) -> str:
|
|
548
690
|
"""压缩用户画像"""
|
|
549
691
|
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
mem1/prompts.py
CHANGED
|
@@ -207,6 +207,28 @@ ASSISTANT_SUMMARY_PROMPT = """你是对话摘要专家。将助手的长回复
|
|
|
207
207
|
直接输出摘要内容,开头标注 [摘要]"""
|
|
208
208
|
|
|
209
209
|
|
|
210
|
+
# ============ 渐进式检索判断提示词 ============
|
|
211
|
+
|
|
212
|
+
CONTEXT_SUFFICIENT_PROMPT = """判断当前信息是否足够回答用户问题。
|
|
213
|
+
|
|
214
|
+
## 用户问题
|
|
215
|
+
{query}
|
|
216
|
+
|
|
217
|
+
## 用户画像
|
|
218
|
+
{profile}
|
|
219
|
+
|
|
220
|
+
## 已检索的对话记录(最近 {days} 天)
|
|
221
|
+
{conversations}
|
|
222
|
+
|
|
223
|
+
## 判断标准
|
|
224
|
+
- 如果画像或对话中包含回答问题所需的信息,输出 `true`
|
|
225
|
+
- 如果问题涉及的时间、事件、数据在已有信息中找不到,输出 `false`
|
|
226
|
+
- 如果是通用问题(不依赖历史记录),输出 `true`
|
|
227
|
+
|
|
228
|
+
## 输出
|
|
229
|
+
只输出:`true`(信息足够)或 `false`(需要检索更早的记录)"""
|
|
230
|
+
|
|
231
|
+
|
|
210
232
|
# ============ 图片搜索提示词(通用) ============
|
|
211
233
|
|
|
212
234
|
IMAGE_SEARCH_PROMPT = """根据用户查询,从图片列表中找出匹配的图片。
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mem1
|
|
3
|
+
Version: 0.0.7
|
|
4
|
+
Summary: 基于云服务的用户记忆系统
|
|
5
|
+
Project-URL: Homepage, https://github.com/sougannkyou/mem1
|
|
6
|
+
Project-URL: Repository, https://github.com/sougannkyou/mem1
|
|
7
|
+
Author: Song
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: langchain,llm,memory,user-profile
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: dashscope>=1.14.0
|
|
18
|
+
Requires-Dist: elasticsearch>=8.0.0
|
|
19
|
+
Requires-Dist: openai>=1.0.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
|
21
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: ipython>=8.0.0; extra == 'dev'
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# mem1 - 用户记忆系统
|
|
27
|
+
|
|
28
|
+
让 AI 真正"记住"用户:三层记忆架构 + 图片记忆 + 话题隔离 + 业务场景解耦。
|
|
29
|
+
|
|
30
|
+
## 核心特性
|
|
31
|
+
|
|
32
|
+
- **三层记忆架构**:短期会话 → 用户画像 → 长期记录
|
|
33
|
+
- **话题隔离**:同一用户可有多个话题,对话按话题隔离,画像跨话题共享
|
|
34
|
+
- **图片记忆**:存储图片时自动调用 VL 模型生成描述(OCR + 内容理解),搜索时基于文字描述召回
|
|
35
|
+
- **业务解耦**:通过 ProfileTemplate 适配不同场景
|
|
36
|
+
- **画像自动更新**:基于对话轮数/时间自动触发 LLM 更新用户画像
|
|
37
|
+
|
|
38
|
+
## 安装
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install mem1
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## 快速开始
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from mem1 import Mem1Memory, Mem1Config
|
|
48
|
+
|
|
49
|
+
# 从环境变量加载配置
|
|
50
|
+
config = Mem1Config.from_env()
|
|
51
|
+
|
|
52
|
+
# 创建记忆实例(绑定用户和话题)
|
|
53
|
+
memory = Mem1Memory(config, user_id="user001", topic_id="project_a")
|
|
54
|
+
|
|
55
|
+
# 添加对话
|
|
56
|
+
memory.add_conversation(
|
|
57
|
+
messages=[
|
|
58
|
+
{"role": "user", "content": "你好,我是张明"},
|
|
59
|
+
{"role": "assistant", "content": "你好张明!"}
|
|
60
|
+
]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# 获取上下文(含用户画像 + 最近对话)
|
|
64
|
+
ctx = memory.get_context()
|
|
65
|
+
print(ctx['import_content']) # 用户画像
|
|
66
|
+
print(ctx['normal_content']) # 最近对话记录
|
|
67
|
+
print(ctx['current_time']) # 当前时间
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 环境变量配置
|
|
71
|
+
|
|
72
|
+
复制 `.env.example` 为 `.env` 并填写配置:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
cp .env.example .env
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## 图片记忆
|
|
79
|
+
|
|
80
|
+
存储图片时自动调用 VL 模型(如 Qwen-VL)生成描述:
|
|
81
|
+
- 【用户描述】用户发送图片时的文字说明
|
|
82
|
+
- 【文字内容】OCR 识别图片中的文字
|
|
83
|
+
- 【图片描述】VL 模型对图片内容的理解
|
|
84
|
+
|
|
85
|
+
搜索时基于描述文本进行关键词匹配,返回图片路径。
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
# 添加带图片的对话
|
|
89
|
+
memory.add_conversation(
|
|
90
|
+
messages=[{"role": "user", "content": "这是今天的报表"}],
|
|
91
|
+
images=[{"path": "./report.png", "filename": "report.png"}]
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# 搜索图片(基于 VL 生成的描述)
|
|
95
|
+
results = memory.search_images(query="报表")
|
|
96
|
+
# 返回: [{"filename": "...", "description": "...", "abs_path": "..."}]
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## LangChain 集成
|
|
100
|
+
|
|
101
|
+
完整示例(记忆存储 + 召回):
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from langchain_openai import ChatOpenAI
|
|
105
|
+
from langchain_core.messages import SystemMessage, HumanMessage
|
|
106
|
+
from mem1 import Mem1Memory, Mem1Config
|
|
107
|
+
|
|
108
|
+
config = Mem1Config.from_env()
|
|
109
|
+
memory = Mem1Memory(config, user_id="user001", topic_id="project_a")
|
|
110
|
+
llm = ChatOpenAI(model=config.llm.model, api_key=config.llm.api_key, base_url=config.llm.base_url)
|
|
111
|
+
|
|
112
|
+
# ========== 第一次对话:存储记忆 ==========
|
|
113
|
+
memory.add_conversation(messages=[
|
|
114
|
+
{"role": "user", "content": "我是李明,市网信办的,每周一要交周报"},
|
|
115
|
+
{"role": "assistant", "content": "李明您好!已记录:周一交周报。"}
|
|
116
|
+
])
|
|
117
|
+
memory.add_conversation(messages=[
|
|
118
|
+
{"role": "user", "content": "本月处理了97起舆情,重大舆情11起"},
|
|
119
|
+
{"role": "assistant", "content": "已记录本月数据。"}
|
|
120
|
+
])
|
|
121
|
+
|
|
122
|
+
# ========== 第二次对话:召回记忆 ==========
|
|
123
|
+
user_question = "帮我写个本月舆情简报"
|
|
124
|
+
|
|
125
|
+
# 1. 获取记忆上下文
|
|
126
|
+
ctx = memory.get_context(query=user_question, days_limit=7)
|
|
127
|
+
|
|
128
|
+
# 2. 构建 system prompt(注入画像 + 历史对话)
|
|
129
|
+
system_prompt = f"""你是舆情助手。
|
|
130
|
+
|
|
131
|
+
## 用户画像
|
|
132
|
+
{ctx['import_content']}
|
|
133
|
+
|
|
134
|
+
## 最近对话记录
|
|
135
|
+
{ctx['normal_content']}
|
|
136
|
+
|
|
137
|
+
## 当前时间
|
|
138
|
+
{ctx['current_time']}
|
|
139
|
+
|
|
140
|
+
## 重要规则
|
|
141
|
+
- 回答必须基于对话记录,不要编造
|
|
142
|
+
- 数字必须从记录中原样提取
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
# 3. 调用 LLM(记忆已注入)
|
|
146
|
+
messages = [SystemMessage(content=system_prompt), HumanMessage(content=user_question)]
|
|
147
|
+
response = llm.invoke(messages)
|
|
148
|
+
print(response.content) # AI 会基于记忆中的 97起、11起 来回答
|
|
149
|
+
|
|
150
|
+
# 4. 保存本次对话
|
|
151
|
+
memory.add_conversation(messages=[
|
|
152
|
+
{"role": "user", "content": user_question},
|
|
153
|
+
{"role": "assistant", "content": response.content}
|
|
154
|
+
])
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## 核心接口
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
memory = Mem1Memory(config, user_id="user001", topic_id="project_a")
|
|
161
|
+
|
|
162
|
+
# 添加对话
|
|
163
|
+
memory.add_conversation(messages=[...], images=[...], metadata={...})
|
|
164
|
+
|
|
165
|
+
# 获取上下文(画像 + 最近 N 天对话)
|
|
166
|
+
ctx = memory.get_context(days_limit=31)
|
|
167
|
+
|
|
168
|
+
# 渐进式检索(先查近期,不够再扩展)
|
|
169
|
+
ctx = memory.get_context_progressive(query="帮我写周报", max_days=31, step=7)
|
|
170
|
+
|
|
171
|
+
# 按时间范围检索(供外部 LLM 作为 tool 调用)
|
|
172
|
+
convs = memory.search_conversations(start_days=170, end_days=180) # 查半年前
|
|
173
|
+
|
|
174
|
+
# 查询对话
|
|
175
|
+
convs = memory.get_conversations(days_limit=7)
|
|
176
|
+
all_convs = memory.get_all_conversations(days_limit=7)
|
|
177
|
+
|
|
178
|
+
# 图片搜索
|
|
179
|
+
results = memory.search_images(query="麻花")
|
|
180
|
+
|
|
181
|
+
# 话题管理
|
|
182
|
+
topics = memory.list_topics()
|
|
183
|
+
memory.delete_topic()
|
|
184
|
+
memory.delete_user()
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## 远期记忆检索
|
|
188
|
+
|
|
189
|
+
mem1 定位是**记忆存储层**,不内置时间意图解析。当用户问"半年前的XX事"时,建议:
|
|
190
|
+
|
|
191
|
+
1. **外部 LLM 判断时间范围**:通过 function calling 让 LLM 提取时间意图
|
|
192
|
+
2. **调用 `search_conversations(start_days, end_days)`**:定向检索指定时间段
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
# 示例:作为 LangChain Tool 暴露给 LLM
|
|
196
|
+
from langchain.tools import tool
|
|
197
|
+
|
|
198
|
+
@tool
|
|
199
|
+
def search_memory(start_days: int, end_days: int) -> str:
|
|
200
|
+
"""搜索用户历史对话。start_days 和 end_days 表示距今多少天。"""
|
|
201
|
+
convs = memory.search_conversations(start_days=start_days, end_days=end_days)
|
|
202
|
+
return memory._format_conversations_for_llm(convs)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
这样设计的原因:
|
|
206
|
+
- 外部 LLM 有完整对话上下文,判断时间范围更准确
|
|
207
|
+
- 避免 mem1 内部嵌套 LLM 调用,架构更清晰
|
|
208
|
+
- 符合 Agent / function calling 的设计模式
|
|
209
|
+
|
|
210
|
+
## ES 索引
|
|
211
|
+
|
|
212
|
+
| 索引 | 用途 |
|
|
213
|
+
|------|------|
|
|
214
|
+
| `conversation_history` | 对话记录(含图片索引) |
|
|
215
|
+
| `mem1_user_state` | 用户状态 |
|
|
216
|
+
| `mem1_user_profile` | 用户画像 |
|
|
217
|
+
|
|
218
|
+
## LLM 提示词建议
|
|
219
|
+
|
|
220
|
+
使用 `get_context()` 获取上下文后,建议在 system prompt 中加入以下规则,避免 LLM 编造信息:
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
## 重要规则
|
|
224
|
+
1. 回答必须基于上述对话记录中的实际内容,严禁编造任何信息
|
|
225
|
+
2. 涉及数字(金额、数量、百分比、日期等)时,必须从对话记录中原样提取,不得估算或编造
|
|
226
|
+
3. 需要汇总累加时,必须列出计算过程(如:23+31+18+25=97)
|
|
227
|
+
4. 涉及人名、公司名、账号名等实体时,必须使用对话中的原始名称
|
|
228
|
+
5. 如果对话记录中没有相关信息,请明确说"对话记录中未提及",不要猜测
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## 设计决策:为什么不用 Context Caching
|
|
232
|
+
|
|
233
|
+
豆包等大模型提供了 Context Caching 功能(缓存命中可省 86% token 费),但 mem1 选择不使用:
|
|
234
|
+
|
|
235
|
+
| 对比 | Context Caching | mem1 架构 |
|
|
236
|
+
|------|-----------------|-----------|
|
|
237
|
+
| 原理 | 缓存整个对话历史,按 session 复用 | 画像压缩 + 按需检索 |
|
|
238
|
+
| 适用场景 | 单 session 内反复分析同一长文档 | 跨 session 持久化记忆 |
|
|
239
|
+
| 多模态 | Responses API 支持图片/视频缓存 | 图片转描述文本存储 |
|
|
240
|
+
| 过期 | 72h 自动过期需重建 | ES 永久存储 |
|
|
241
|
+
| 灵活性 | 固定缓存内容 | 动态组装 prompt |
|
|
242
|
+
|
|
243
|
+
mem1 的记忆是动态组装的(画像 + 检索到的相关对话),每次 prompt 内容不同,Context Caching 的"相同前缀复用"优势无法发挥。
|
|
244
|
+
|
|
245
|
+
如果担心 token 消耗,建议调小 `MEM1_CONTEXT_DAYS_LIMIT`(如 3-7 天),让远期记忆靠画像覆盖。
|
|
246
|
+
|
|
247
|
+
## License
|
|
248
|
+
|
|
249
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
mem1/__init__.py,sha256=tNsBrO4d7fujDIPpvl6pweVcg5kHr_EYRgslR8nWWEI,494
|
|
2
|
+
mem1/config.py,sha256=9U-dJD6JsQ2CyDy-rwLqE3_kWwG3sPo7jTbkV_Tl4SE,5422
|
|
3
|
+
mem1/langchain_middleware.py,sha256=h2mG7K2Tq1N7IovXMvCyvOhsAwTWOR1NAqivF4db2AE,6648
|
|
4
|
+
mem1/llm.py,sha256=S23OA2OpZVb6A36iQ2YY_7Q7rRtnqC7xUbVW-bZSjsI,4419
|
|
5
|
+
mem1/memory_es.py,sha256=keLiTq2yw2QC_tmIvCYSdOVnSJNpq981ko3GrQpsyjE,34301
|
|
6
|
+
mem1/memory_md.py,sha256=uu_TvdBoUpAncT1eissOSe1Y3vCy3iWMcuvCy3vCjEA,26258
|
|
7
|
+
mem1/memory_tools.py,sha256=b1YBiRNet0gXnW-KGIZ2KQclluB9Q6dli_DbWLS571k,3646
|
|
8
|
+
mem1/prompts.py,sha256=5HUG-yvTD7iBUzzXwO-WnRomDLkz0UJWox3z3zcT0kI,10599
|
|
9
|
+
mem1-0.0.7.dist-info/METADATA,sha256=VnJlMl-3KoG9zw59qA7xDKKoxo9Bzy1EeChu9EpjGhE,8300
|
|
10
|
+
mem1-0.0.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
11
|
+
mem1-0.0.7.dist-info/RECORD,,
|
mem1-0.0.6.dist-info/METADATA
DELETED
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: mem1
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: 基于云服务的用户记忆系统
|
|
5
|
-
Project-URL: Homepage, https://github.com/sougannkyou/mem1
|
|
6
|
-
Project-URL: Repository, https://github.com/sougannkyou/mem1
|
|
7
|
-
Author: Song
|
|
8
|
-
License: MIT
|
|
9
|
-
Keywords: langchain,llm,memory,user-profile
|
|
10
|
-
Classifier: Development Status :: 3 - Alpha
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
-
Requires-Python: >=3.12
|
|
17
|
-
Requires-Dist: dashscope>=1.14.0
|
|
18
|
-
Requires-Dist: elasticsearch>=8.0.0
|
|
19
|
-
Requires-Dist: openai>=1.0.0
|
|
20
|
-
Requires-Dist: pydantic>=2.0.0
|
|
21
|
-
Requires-Dist: python-dotenv>=1.0.0
|
|
22
|
-
Provides-Extra: dev
|
|
23
|
-
Requires-Dist: ipython>=8.0.0; extra == 'dev'
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
|
|
26
|
-
# mem1 - 用户记忆系统
|
|
27
|
-
|
|
28
|
-
让 AI 真正"记住"用户:三层记忆架构 + 图片记忆 + 话题隔离 + 业务场景解耦。
|
|
29
|
-
|
|
30
|
-
## 核心特性
|
|
31
|
-
|
|
32
|
-
- **三层记忆架构**:短期会话 → 用户画像 → 长期记录
|
|
33
|
-
- **话题隔离**:同一用户可有多个话题,对话按话题隔离,画像跨话题共享
|
|
34
|
-
- **图片记忆**:存储图片时自动调用 VL 模型生成描述(OCR + 内容理解),搜索时基于文字描述召回
|
|
35
|
-
- **业务解耦**:通过 ProfileTemplate 适配不同场景
|
|
36
|
-
- **画像自动更新**:基于对话轮数/时间自动触发 LLM 更新用户画像
|
|
37
|
-
|
|
38
|
-
## 安装
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
pip install mem1
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## 快速开始
|
|
45
|
-
|
|
46
|
-
```python
|
|
47
|
-
from mem1 import Mem1Memory, Mem1Config
|
|
48
|
-
|
|
49
|
-
# 从环境变量加载配置
|
|
50
|
-
config = Mem1Config.from_env()
|
|
51
|
-
|
|
52
|
-
# 创建记忆实例(绑定用户和话题)
|
|
53
|
-
memory = Mem1Memory(config, user_id="user001", topic_id="project_a")
|
|
54
|
-
|
|
55
|
-
# 添加对话
|
|
56
|
-
memory.add_conversation(
|
|
57
|
-
messages=[
|
|
58
|
-
{"role": "user", "content": "你好,我是张明"},
|
|
59
|
-
{"role": "assistant", "content": "你好张明!"}
|
|
60
|
-
]
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# 获取上下文(含用户画像 + 最近对话)
|
|
64
|
-
ctx = memory.get_context()
|
|
65
|
-
print(ctx['import_content']) # 用户画像
|
|
66
|
-
print(ctx['normal_content']) # 最近对话记录
|
|
67
|
-
print(ctx['current_time']) # 当前时间
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
## 环境变量配置
|
|
71
|
-
|
|
72
|
-
```bash
|
|
73
|
-
# LLM 配置
|
|
74
|
-
MEM1_LLM_API_KEY=your-api-key
|
|
75
|
-
MEM1_LLM_BASE_URL=https://api.deepseek.com
|
|
76
|
-
MEM1_LLM_MODEL=deepseek-chat
|
|
77
|
-
|
|
78
|
-
# ES 配置
|
|
79
|
-
MEM1_ES_HOSTS=http://localhost:9200
|
|
80
|
-
MEM1_ES_INDEX=conversation_history
|
|
81
|
-
|
|
82
|
-
# 记忆配置
|
|
83
|
-
MEM1_MEMORY_DIR=./memories
|
|
84
|
-
MEM1_AUTO_UPDATE_PROFILE=true
|
|
85
|
-
MEM1_MAX_PROFILE_CHARS=3000
|
|
86
|
-
MEM1_UPDATE_INTERVAL_ROUNDS=5
|
|
87
|
-
MEM1_UPDATE_INTERVAL_MINUTES=3
|
|
88
|
-
MEM1_SAVE_ASSISTANT_MESSAGES=true
|
|
89
|
-
MEM1_MAX_ASSISTANT_CHARS=500
|
|
90
|
-
MEM1_CONTEXT_DAYS_LIMIT=31
|
|
91
|
-
|
|
92
|
-
# VL 视觉模型(可选,配置 MODEL 即启用,使用 dashscope SDK)
|
|
93
|
-
MEM1_VL_MODEL=qwen-vl-max
|
|
94
|
-
MEM1_VL_API_KEY=your-dashscope-key
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
## 图片记忆
|
|
98
|
-
|
|
99
|
-
### 实现机制
|
|
100
|
-
|
|
101
|
-
1. **存储阶段**:`add_conversation()` 遇到图片时自动调用 VL 模型(如 Qwen-VL)
|
|
102
|
-
2. **VL 处理**:生成包含三部分的描述文本
|
|
103
|
-
- 【用户描述】用户发送图片时的文字说明
|
|
104
|
-
- 【文字内容】OCR 识别图片中的文字
|
|
105
|
-
- 【图片描述】VL 模型对图片内容的理解
|
|
106
|
-
3. **数据存储**:图片信息存入 ES 对话记录的 `images` 字段,图片文件存本地
|
|
107
|
-
4. **搜索召回**:`search_images()` 基于描述文本进行关键词匹配,返回图片路径
|
|
108
|
-
|
|
109
|
-
### ES 数据结构
|
|
110
|
-
|
|
111
|
-
```json
|
|
112
|
-
{
|
|
113
|
-
"user_id": "user001",
|
|
114
|
-
"topic_id": "default",
|
|
115
|
-
"timestamp": "2026-01-06 16:46:03",
|
|
116
|
-
"messages": [
|
|
117
|
-
{"role": "user", "content": "...", "image_refs": ["20260106_164603_report.png"]}
|
|
118
|
-
],
|
|
119
|
-
"images": [
|
|
120
|
-
{
|
|
121
|
-
"filename": "20260106_164603_report.png",
|
|
122
|
-
"description": "【用户描述】...\n\n【文字内容】...\n\n【图片描述】...",
|
|
123
|
-
"timestamp": "2026-01-06 16:46:16",
|
|
124
|
-
"original_name": "report.png"
|
|
125
|
-
}
|
|
126
|
-
]
|
|
127
|
-
}
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
### 使用示例
|
|
131
|
-
|
|
132
|
-
```python
|
|
133
|
-
# 添加带图片的对话
|
|
134
|
-
memory.add_conversation(
|
|
135
|
-
messages=[{"role": "user", "content": "这是今天的报表"}],
|
|
136
|
-
images=[{"path": "./report.png", "filename": "report.png"}]
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
# 搜索图片(基于 VL 生成的描述)
|
|
140
|
-
results = memory.search_images(query="报表")
|
|
141
|
-
# 返回: [{"filename": "...", "description": "...", "abs_path": "..."}]
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
## 核心接口
|
|
145
|
-
|
|
146
|
-
```python
|
|
147
|
-
memory = Mem1Memory(config, user_id="user001", topic_id="project_a")
|
|
148
|
-
|
|
149
|
-
# 添加对话
|
|
150
|
-
memory.add_conversation(messages=[...], images=[...], metadata={...})
|
|
151
|
-
|
|
152
|
-
# 获取上下文(画像 + 最近 N 天对话)
|
|
153
|
-
ctx = memory.get_context(days_limit=31)
|
|
154
|
-
|
|
155
|
-
# 查询对话
|
|
156
|
-
convs = memory.get_conversations(days_limit=7)
|
|
157
|
-
all_convs = memory.get_all_conversations(days_limit=7)
|
|
158
|
-
|
|
159
|
-
# 图片搜索
|
|
160
|
-
results = memory.search_images(query="麻花")
|
|
161
|
-
|
|
162
|
-
# 话题管理
|
|
163
|
-
topics = memory.list_topics()
|
|
164
|
-
memory.delete_topic()
|
|
165
|
-
memory.delete_user()
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
## ES 索引
|
|
169
|
-
|
|
170
|
-
| 索引 | 用途 |
|
|
171
|
-
|------|------|
|
|
172
|
-
| `conversation_history` | 对话记录(含图片索引) |
|
|
173
|
-
| `mem1_user_state` | 用户状态 |
|
|
174
|
-
| `mem1_user_profile` | 用户画像 |
|
|
175
|
-
|
|
176
|
-
## LLM 提示词建议
|
|
177
|
-
|
|
178
|
-
使用 `get_context()` 获取上下文后,建议在 system prompt 中加入以下规则,避免 LLM 编造信息:
|
|
179
|
-
|
|
180
|
-
```
|
|
181
|
-
## 重要规则
|
|
182
|
-
1. 回答必须基于上述对话记录中的实际内容,严禁编造任何信息
|
|
183
|
-
2. 涉及数字(金额、数量、百分比、日期等)时,必须从对话记录中原样提取,不得估算或编造
|
|
184
|
-
3. 需要汇总累加时,必须列出计算过程(如:23+31+18+25=97)
|
|
185
|
-
4. 涉及人名、公司名、账号名等实体时,必须使用对话中的原始名称
|
|
186
|
-
5. 如果对话记录中没有相关信息,请明确说"对话记录中未提及",不要猜测
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
## License
|
|
190
|
-
|
|
191
|
-
MIT
|
mem1-0.0.6.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
mem1/__init__.py,sha256=tNsBrO4d7fujDIPpvl6pweVcg5kHr_EYRgslR8nWWEI,494
|
|
2
|
-
mem1/config.py,sha256=YWMzO3AIRp0PEa37fBzScRuY0TsoVJmHMi1xzDpmLsk,5123
|
|
3
|
-
mem1/langchain_middleware.py,sha256=h2mG7K2Tq1N7IovXMvCyvOhsAwTWOR1NAqivF4db2AE,6648
|
|
4
|
-
mem1/llm.py,sha256=Mq5a-3RMXeIXjVyv_W2-1JGzCSZ1PJ8hKGyXpQF8r6M,2632
|
|
5
|
-
mem1/memory_es.py,sha256=P836RJjA4TcOEqrA3ja9kQMD092mBokMRjllgsk0ki0,29198
|
|
6
|
-
mem1/memory_md.py,sha256=uu_TvdBoUpAncT1eissOSe1Y3vCy3iWMcuvCy3vCjEA,26258
|
|
7
|
-
mem1/memory_tools.py,sha256=b1YBiRNet0gXnW-KGIZ2KQclluB9Q6dli_DbWLS571k,3646
|
|
8
|
-
mem1/prompts.py,sha256=ISVnCnrZ1QJGcQOO6bK30ZPJPCpasd2Hs9n_MsVY_R4,9987
|
|
9
|
-
mem1-0.0.6.dist-info/METADATA,sha256=oaqbpU3W2JjdAyBUwb-3tyMOFeIlf9BNwMicShInxoQ,5642
|
|
10
|
-
mem1-0.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
11
|
-
mem1-0.0.6.dist-info/RECORD,,
|
|
File without changes
|