mem1 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mem1/storage.py ADDED
@@ -0,0 +1,399 @@
1
+ """可插拔存储层抽象
2
+
3
+ 设计目标:
4
+ - 将存储操作从 Mem1Memory 中解耦
5
+ - 支持 ES/SQLite/MySQL 等多种后端
6
+ - 保持接口简洁,只抽象必要操作
7
+
8
+ 使用方式:
9
+ from mem1.storage import ESStorage
10
+ storage = ESStorage(config.es)
11
+
12
+ # 或未来实现
13
+ from mem1.storage import SQLiteStorage
14
+ storage = SQLiteStorage(db_path="mem1.db")
15
+ """
16
+ from abc import ABC, abstractmethod
17
+ from datetime import datetime
18
+ from typing import List, Dict, Any, Optional
19
+
20
+
21
+ class StorageBackend(ABC):
22
+ """存储后端抽象基类
23
+
24
+ 所有存储实现需要实现以下方法:
25
+ - 对话记录:save_conversation, get_conversations, delete_conversations
26
+ - 用户画像:get_profile, save_profile, delete_profile
27
+ - 用户状态:get_user_state, save_user_state, delete_user_state
28
+ - 聚合查询:get_user_list, get_topic_list
29
+ """
30
+
31
+ # ========== 对话记录 ==========
32
+
33
+ @abstractmethod
34
+ def save_conversation(self, conversation: Dict[str, Any]) -> str:
35
+ """保存对话记录
36
+
37
+ Args:
38
+ conversation: {
39
+ "user_id": str,
40
+ "topic_id": str,
41
+ "timestamp": str, # 格式: '%Y-%m-%d %H:%M:%S'
42
+ "messages": List[Dict],
43
+ "metadata": Dict,
44
+ "images": List[Dict] (可选)
45
+ }
46
+
47
+ Returns:
48
+ 记录ID
49
+ """
50
+ pass
51
+
52
+ @abstractmethod
53
+ def get_conversations(
54
+ self,
55
+ user_id: str,
56
+ topic_id: Optional[str] = None,
57
+ start_time: Optional[datetime] = None,
58
+ end_time: Optional[datetime] = None,
59
+ metadata_filter: Optional[Dict[str, Any]] = None,
60
+ limit: int = 1000
61
+ ) -> List[Dict[str, Any]]:
62
+ """查询对话记录
63
+
64
+ Args:
65
+ user_id: 用户ID
66
+ topic_id: 话题ID,None 表示所有话题
67
+ start_time: 起始时间
68
+ end_time: 结束时间
69
+ metadata_filter: 元数据过滤
70
+ limit: 最大返回数量
71
+
72
+ Returns:
73
+ 对话记录列表,按时间升序
74
+ """
75
+ pass
76
+
77
+ @abstractmethod
78
+ def delete_conversations(
79
+ self,
80
+ user_id: str,
81
+ topic_id: Optional[str] = None
82
+ ) -> int:
83
+ """删除对话记录
84
+
85
+ Args:
86
+ user_id: 用户ID
87
+ topic_id: 话题ID,None 表示删除所有话题
88
+
89
+ Returns:
90
+ 删除的记录数
91
+ """
92
+ pass
93
+
94
+ # ========== 用户画像 ==========
95
+
96
+ @abstractmethod
97
+ def get_profile(self, user_id: str) -> Optional[Dict[str, Any]]:
98
+ """获取用户画像
99
+
100
+ Returns:
101
+ {"content": str, "updated_at": str} 或 None
102
+ """
103
+ pass
104
+
105
+ @abstractmethod
106
+ def save_profile(self, user_id: str, content: str) -> None:
107
+ """保存用户画像"""
108
+ pass
109
+
110
+ @abstractmethod
111
+ def delete_profile(self, user_id: str) -> bool:
112
+ """删除用户画像"""
113
+ pass
114
+
115
+ # ========== 用户状态 ==========
116
+
117
+ @abstractmethod
118
+ def get_user_state(self, user_id: str) -> Optional[Dict[str, Any]]:
119
+ """获取用户状态
120
+
121
+ Returns:
122
+ {"rounds": int, "last_update": str} 或 None
123
+ """
124
+ pass
125
+
126
+ @abstractmethod
127
+ def save_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
128
+ """保存用户状态"""
129
+ pass
130
+
131
+ @abstractmethod
132
+ def delete_user_state(self, user_id: str) -> bool:
133
+ """删除用户状态"""
134
+ pass
135
+
136
+ # ========== 聚合查询 ==========
137
+
138
+ @abstractmethod
139
+ def get_user_list(self) -> List[str]:
140
+ """获取所有用户ID列表"""
141
+ pass
142
+
143
+ @abstractmethod
144
+ def get_topic_list(self, user_id: str) -> List[Dict[str, Any]]:
145
+ """获取用户的话题列表
146
+
147
+ Returns:
148
+ [{"topic_id": str, "conversation_count": int, "last_active": str}, ...]
149
+ """
150
+ pass
151
+
152
+ # ========== 初始化 ==========
153
+
154
+ @abstractmethod
155
+ def ensure_schema(self) -> None:
156
+ """确保存储结构存在(索引/表)"""
157
+ pass
158
+
159
+
160
+
161
+ class ESStorage(StorageBackend):
162
+ """Elasticsearch 存储后端"""
163
+
164
+ # 索引名常量
165
+ USER_STATE_INDEX = "mem1_user_state"
166
+ USER_PROFILE_INDEX = "mem1_user_profile"
167
+
168
+ def __init__(self, hosts: List[str], index_name: str):
169
+ """
170
+ Args:
171
+ hosts: ES 地址列表
172
+ index_name: 对话记录索引名
173
+ """
174
+ from elasticsearch import Elasticsearch
175
+ self.es = Elasticsearch(hosts)
176
+ self.index_name = index_name
177
+ self.ensure_schema()
178
+
179
+ def ensure_schema(self) -> None:
180
+ """确保所有索引存在"""
181
+ # 对话记录索引
182
+ if not self.es.indices.exists(index=self.index_name):
183
+ self.es.indices.create(
184
+ index=self.index_name,
185
+ body={
186
+ "mappings": {
187
+ "properties": {
188
+ "user_id": {"type": "keyword"},
189
+ "topic_id": {"type": "keyword"},
190
+ "timestamp": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"},
191
+ "messages": {"type": "nested"},
192
+ "metadata": {"type": "object"},
193
+ "images": {"type": "nested"}
194
+ }
195
+ }
196
+ }
197
+ )
198
+
199
+ # 用户状态索引
200
+ if not self.es.indices.exists(index=self.USER_STATE_INDEX):
201
+ self.es.indices.create(
202
+ index=self.USER_STATE_INDEX,
203
+ body={
204
+ "mappings": {
205
+ "properties": {
206
+ "user_id": {"type": "keyword"},
207
+ "rounds": {"type": "integer"},
208
+ "last_update": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
209
+ }
210
+ }
211
+ }
212
+ )
213
+
214
+ # 用户画像索引
215
+ if not self.es.indices.exists(index=self.USER_PROFILE_INDEX):
216
+ self.es.indices.create(
217
+ index=self.USER_PROFILE_INDEX,
218
+ body={
219
+ "mappings": {
220
+ "properties": {
221
+ "user_id": {"type": "keyword"},
222
+ "content": {"type": "text"},
223
+ "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
224
+ }
225
+ }
226
+ }
227
+ )
228
+
229
+ # ========== 对话记录 ==========
230
+
231
+ def save_conversation(self, conversation: Dict[str, Any]) -> str:
232
+ response = self.es.index(
233
+ index=self.index_name,
234
+ document=conversation,
235
+ refresh=True
236
+ )
237
+ return response["_id"]
238
+
239
+ def get_conversations(
240
+ self,
241
+ user_id: str,
242
+ topic_id: Optional[str] = None,
243
+ start_time: Optional[datetime] = None,
244
+ end_time: Optional[datetime] = None,
245
+ metadata_filter: Optional[Dict[str, Any]] = None,
246
+ limit: int = 1000
247
+ ) -> List[Dict[str, Any]]:
248
+ query = {"bool": {"must": [{"term": {"user_id": user_id}}]}}
249
+
250
+ if topic_id:
251
+ query["bool"]["must"].append({"term": {"topic_id": topic_id}})
252
+
253
+ if start_time or end_time:
254
+ range_query = {}
255
+ if start_time:
256
+ range_query["gte"] = start_time.strftime('%Y-%m-%d %H:%M:%S')
257
+ if end_time:
258
+ range_query["lt"] = end_time.strftime('%Y-%m-%d %H:%M:%S')
259
+ query["bool"]["must"].append({"range": {"timestamp": range_query}})
260
+
261
+ if metadata_filter:
262
+ for k, v in metadata_filter.items():
263
+ query["bool"]["must"].append({"term": {f"metadata.{k}": v}})
264
+
265
+ response = self.es.search(
266
+ index=self.index_name,
267
+ query=query,
268
+ size=limit,
269
+ sort=[{"timestamp": {"order": "asc"}}]
270
+ )
271
+
272
+ return [hit["_source"] for hit in response["hits"]["hits"]]
273
+
274
+ def delete_conversations(self, user_id: str, topic_id: Optional[str] = None) -> int:
275
+ query = {"bool": {"must": [{"term": {"user_id": user_id}}]}}
276
+ if topic_id:
277
+ query["bool"]["must"].append({"term": {"topic_id": topic_id}})
278
+
279
+ try:
280
+ response = self.es.delete_by_query(
281
+ index=self.index_name,
282
+ query=query,
283
+ refresh=True
284
+ )
285
+ return response.get("deleted", 0)
286
+ except Exception:
287
+ return 0
288
+
289
+ # ========== 用户画像 ==========
290
+
291
+ def get_profile(self, user_id: str) -> Optional[Dict[str, Any]]:
292
+ try:
293
+ response = self.es.get(index=self.USER_PROFILE_INDEX, id=user_id)
294
+ return response["_source"]
295
+ except Exception:
296
+ return None
297
+
298
+ def save_profile(self, user_id: str, content: str) -> None:
299
+ self.es.index(
300
+ index=self.USER_PROFILE_INDEX,
301
+ id=user_id,
302
+ document={
303
+ "user_id": user_id,
304
+ "content": content,
305
+ "updated_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
306
+ },
307
+ refresh=True
308
+ )
309
+
310
+ def delete_profile(self, user_id: str) -> bool:
311
+ try:
312
+ self.es.delete(index=self.USER_PROFILE_INDEX, id=user_id, refresh=True)
313
+ return True
314
+ except Exception:
315
+ return False
316
+
317
+ # ========== 用户状态 ==========
318
+
319
+ def get_user_state(self, user_id: str) -> Optional[Dict[str, Any]]:
320
+ try:
321
+ response = self.es.get(index=self.USER_STATE_INDEX, id=user_id)
322
+ return response["_source"]
323
+ except Exception:
324
+ return None
325
+
326
+ def save_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
327
+ doc = {"user_id": user_id, "rounds": rounds}
328
+ if last_update:
329
+ doc["last_update"] = last_update
330
+
331
+ self.es.index(
332
+ index=self.USER_STATE_INDEX,
333
+ id=user_id,
334
+ document=doc,
335
+ refresh=True
336
+ )
337
+
338
+ def delete_user_state(self, user_id: str) -> bool:
339
+ try:
340
+ self.es.delete(index=self.USER_STATE_INDEX, id=user_id, refresh=True)
341
+ return True
342
+ except Exception:
343
+ return False
344
+
345
+ # ========== 聚合查询 ==========
346
+
347
+ def get_user_list(self) -> List[str]:
348
+ response = self.es.search(
349
+ index=self.index_name,
350
+ body={
351
+ "size": 0,
352
+ "aggs": {"users": {"terms": {"field": "user_id", "size": 10000}}}
353
+ }
354
+ )
355
+ return [bucket["key"] for bucket in response["aggregations"]["users"]["buckets"]]
356
+
357
+ def get_topic_list(self, user_id: str) -> List[Dict[str, Any]]:
358
+ response = self.es.search(
359
+ index=self.index_name,
360
+ body={
361
+ "size": 0,
362
+ "query": {"term": {"user_id": user_id}},
363
+ "aggs": {
364
+ "topics": {
365
+ "terms": {"field": "topic_id", "size": 1000},
366
+ "aggs": {
367
+ "latest": {"max": {"field": "timestamp"}},
368
+ "count": {"value_count": {"field": "timestamp"}}
369
+ }
370
+ }
371
+ }
372
+ }
373
+ )
374
+
375
+ topics = []
376
+ for bucket in response["aggregations"]["topics"]["buckets"]:
377
+ topics.append({
378
+ "topic_id": bucket["key"],
379
+ "conversation_count": bucket["doc_count"],
380
+ "last_active": bucket["latest"]["value_as_string"] if bucket["latest"]["value"] else None
381
+ })
382
+ return topics
383
+
384
+ def get_conversations_with_images(self, user_id: str) -> List[Dict[str, Any]]:
385
+ """获取用户所有带图片的对话(用于图片索引)"""
386
+ response = self.es.search(
387
+ index=self.index_name,
388
+ query={
389
+ "bool": {
390
+ "must": [
391
+ {"term": {"user_id": user_id}},
392
+ {"exists": {"field": "images"}}
393
+ ]
394
+ }
395
+ },
396
+ size=1000,
397
+ sort=[{"timestamp": {"order": "asc"}}]
398
+ )
399
+ return [hit["_source"] for hit in response["hits"]["hits"]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mem1
3
- Version: 0.0.7
3
+ Version: 0.0.8
4
4
  Summary: 基于云服务的用户记忆系统
5
5
  Project-URL: Homepage, https://github.com/sougannkyou/mem1
6
6
  Project-URL: Repository, https://github.com/sougannkyou/mem1
@@ -34,6 +34,7 @@ Description-Content-Type: text/markdown
34
34
  - **图片记忆**:存储图片时自动调用 VL 模型生成描述(OCR + 内容理解),搜索时基于文字描述召回
35
35
  - **业务解耦**:通过 ProfileTemplate 适配不同场景
36
36
  - **画像自动更新**:基于对话轮数/时间自动触发 LLM 更新用户画像
37
+ - **可插拔存储**:支持 ES 后端,预留 SQLite/MySQL 扩展接口
37
38
 
38
39
  ## 安装
39
40
 
@@ -184,6 +185,24 @@ memory.delete_topic()
184
185
  memory.delete_user()
185
186
  ```
186
187
 
188
+ ## 可插拔存储层
189
+
190
+ v0.0.7 引入了可插拔存储层架构,支持自定义存储后端:
191
+
192
+ ```python
193
+ from mem1 import Mem1Memory, Mem1Config, StorageBackend, ESStorage
194
+
195
+ # 默认使用 ES
196
+ memory = Mem1Memory(config, user_id="user001", topic_id="default")
197
+
198
+ # 或显式指定存储后端
199
+ storage = ESStorage(hosts=["http://localhost:9200"], index_name="my_index")
200
+ memory = Mem1Memory(config, user_id="user001", storage=storage)
201
+
202
+ # 未来可扩展 SQLite/MySQL 后端
203
+ # storage = SQLiteStorage(db_path="mem1.db")
204
+ ```
205
+
187
206
  ## 远期记忆检索
188
207
 
189
208
  mem1 定位是**记忆存储层**,不内置时间意图解析。当用户问"半年前的XX事"时,建议:
@@ -228,6 +247,28 @@ def search_memory(start_days: int, end_days: int) -> str:
228
247
  5. 如果对话记录中没有相关信息,请明确说"对话记录中未提及",不要猜测
229
248
  ```
230
249
 
250
+ ## 设计决策:为什么不用向量数据库
251
+
252
+ mem1 选择 ES 时间范围检索而非 Milvus/Pinecone 等向量数据库,核心原因是**对话记忆需要上下文连续性**:
253
+
254
+ | 对比 | 向量检索(Milvus) | mem1 时间范围检索(ES) |
255
+ |------|-------------------|------------------------|
256
+ | 召回方式 | 单条 Embedding → Top-K 相似 | 时间范围 → 整体拼接 |
257
+ | 上下文 | 碎片化,语义割裂 | 连续对话流,因果关系完整 |
258
+ | 适用场景 | 知识库问答、独立文档 | 对话记忆、需要理解对话序列 |
259
+
260
+ 举例说明:
261
+ ```
262
+ 用户: 我是李明,市网信办的
263
+ 用户: 本月处理了97起舆情
264
+ 用户: 帮我写周报
265
+ ```
266
+
267
+ - **向量检索**:"帮我写周报" 可能只召回包含"周报"的那一条,丢失"97起舆情"
268
+ - **时间范围检索**:LLM 看到完整对话流,理解"周报"要包含"97起舆情"
269
+
270
+ 向量检索更适合:长期记忆中的独立事实召回(如半年前提过的偏好)。但 mem1 通过**画像压缩**解决这个问题——重要信息会被 LLM 提取到用户画像中持久保存。
271
+
231
272
  ## 设计决策:为什么不用 Context Caching
232
273
 
233
274
  豆包等大模型提供了 Context Caching 功能(缓存命中可省 86% token 费),但 mem1 选择不使用:
@@ -1,11 +1,12 @@
1
- mem1/__init__.py,sha256=tNsBrO4d7fujDIPpvl6pweVcg5kHr_EYRgslR8nWWEI,494
1
+ mem1/__init__.py,sha256=8Dv6_SUtTh78wadGFMkDG5DaCRrNlYZkdR5MyFEbXN0,582
2
2
  mem1/config.py,sha256=9U-dJD6JsQ2CyDy-rwLqE3_kWwG3sPo7jTbkV_Tl4SE,5422
3
3
  mem1/langchain_middleware.py,sha256=h2mG7K2Tq1N7IovXMvCyvOhsAwTWOR1NAqivF4db2AE,6648
4
4
  mem1/llm.py,sha256=S23OA2OpZVb6A36iQ2YY_7Q7rRtnqC7xUbVW-bZSjsI,4419
5
- mem1/memory_es.py,sha256=keLiTq2yw2QC_tmIvCYSdOVnSJNpq981ko3GrQpsyjE,34301
5
+ mem1/memory.py,sha256=RRAzAXDvHAGCtejhBhM8ab7wEb2O9bDScZOOLF1VoxY,24398
6
6
  mem1/memory_md.py,sha256=uu_TvdBoUpAncT1eissOSe1Y3vCy3iWMcuvCy3vCjEA,26258
7
7
  mem1/memory_tools.py,sha256=b1YBiRNet0gXnW-KGIZ2KQclluB9Q6dli_DbWLS571k,3646
8
8
  mem1/prompts.py,sha256=5HUG-yvTD7iBUzzXwO-WnRomDLkz0UJWox3z3zcT0kI,10599
9
- mem1-0.0.7.dist-info/METADATA,sha256=VnJlMl-3KoG9zw59qA7xDKKoxo9Bzy1EeChu9EpjGhE,8300
10
- mem1-0.0.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
- mem1-0.0.7.dist-info/RECORD,,
9
+ mem1/storage.py,sha256=J2JUTjPEXe3dO21LVoj3sl8_78qKOECad2Ol5R9kvCU,12774
10
+ mem1-0.0.8.dist-info/METADATA,sha256=YuSnf9DbnYFPbbXAddreia5ZlJVQFydHvtfWsgu9EE8,10002
11
+ mem1-0.0.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
+ mem1-0.0.8.dist-info/RECORD,,
File without changes