PyPI - mem1 - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

mem1 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

mem1/__init__.py +5 -4
mem1/{memory_es.py → memory.py} +172 -471
mem1/storage.py +399 -0
{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/METADATA +42 -1
{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/RECORD +6 -5
{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/WHEEL +0 -0

mem1/storage.py ADDED Viewed

@@ -0,0 +1,399 @@
+"""可插拔存储层抽象
+设计目标：
+- 将存储操作从 Mem1Memory 中解耦
+- 支持 ES/SQLite/MySQL 等多种后端
+- 保持接口简洁，只抽象必要操作
+使用方式：
+    from mem1.storage import ESStorage
+    storage = ESStorage(config.es)
+    # 或未来实现
+    from mem1.storage import SQLiteStorage
+    storage = SQLiteStorage(db_path="mem1.db")
+"""
+from abc import ABC, abstractmethod
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+class StorageBackend(ABC):
+    """存储后端抽象基类
+    所有存储实现需要实现以下方法：
+    - 对话记录：save_conversation, get_conversations, delete_conversations
+    - 用户画像：get_profile, save_profile, delete_profile
+    - 用户状态：get_user_state, save_user_state, delete_user_state
+    - 聚合查询：get_user_list, get_topic_list
+    """
+    # ========== 对话记录 ==========
+    @abstractmethod
+    def save_conversation(self, conversation: Dict[str, Any]) -> str:
+        """保存对话记录
+        Args:
+            conversation: {
+                "user_id": str,
+                "topic_id": str,
+                "timestamp": str,  # 格式: '%Y-%m-%d %H:%M:%S'
+                "messages": List[Dict],
+                "metadata": Dict,
+                "images": List[Dict] (可选)
+            }
+        Returns:
+            记录ID
+        """
+        pass
+    @abstractmethod
+    def get_conversations(
+        self,
+        user_id: str,
+        topic_id: Optional[str] = None,
+        start_time: Optional[datetime] = None,
+        end_time: Optional[datetime] = None,
+        metadata_filter: Optional[Dict[str, Any]] = None,
+        limit: int = 1000
+    ) -> List[Dict[str, Any]]:
+        """查询对话记录
+        Args:
+            user_id: 用户ID
+            topic_id: 话题ID，None 表示所有话题
+            start_time: 起始时间
+            end_time: 结束时间
+            metadata_filter: 元数据过滤
+            limit: 最大返回数量
+        Returns:
+            对话记录列表，按时间升序
+        """
+        pass
+    @abstractmethod
+    def delete_conversations(
+        self,
+        user_id: str,
+        topic_id: Optional[str] = None
+    ) -> int:
+        """删除对话记录
+        Args:
+            user_id: 用户ID
+            topic_id: 话题ID，None 表示删除所有话题
+        Returns:
+            删除的记录数
+        """
+        pass
+    # ========== 用户画像 ==========
+    @abstractmethod
+    def get_profile(self, user_id: str) -> Optional[Dict[str, Any]]:
+        """获取用户画像
+        Returns:
+            {"content": str, "updated_at": str} 或 None
+        """
+        pass
+    @abstractmethod
+    def save_profile(self, user_id: str, content: str) -> None:
+        """保存用户画像"""
+        pass
+    @abstractmethod
+    def delete_profile(self, user_id: str) -> bool:
+        """删除用户画像"""
+        pass
+    # ========== 用户状态 ==========
+    @abstractmethod
+    def get_user_state(self, user_id: str) -> Optional[Dict[str, Any]]:
+        """获取用户状态
+        Returns:
+            {"rounds": int, "last_update": str} 或 None
+        """
+        pass
+    @abstractmethod
+    def save_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
+        """保存用户状态"""
+        pass
+    @abstractmethod
+    def delete_user_state(self, user_id: str) -> bool:
+        """删除用户状态"""
+        pass
+    # ========== 聚合查询 ==========
+    @abstractmethod
+    def get_user_list(self) -> List[str]:
+        """获取所有用户ID列表"""
+        pass
+    @abstractmethod
+    def get_topic_list(self, user_id: str) -> List[Dict[str, Any]]:
+        """获取用户的话题列表
+        Returns:
+            [{"topic_id": str, "conversation_count": int, "last_active": str}, ...]
+        """
+        pass
+    # ========== 初始化 ==========
+    @abstractmethod
+    def ensure_schema(self) -> None:
+        """确保存储结构存在（索引/表）"""
+        pass
+class ESStorage(StorageBackend):
+    """Elasticsearch 存储后端"""
+    # 索引名常量
+    USER_STATE_INDEX = "mem1_user_state"
+    USER_PROFILE_INDEX = "mem1_user_profile"
+    def __init__(self, hosts: List[str], index_name: str):
+        """
+        Args:
+            hosts: ES 地址列表
+            index_name: 对话记录索引名
+        """
+        from elasticsearch import Elasticsearch
+        self.es = Elasticsearch(hosts)
+        self.index_name = index_name
+        self.ensure_schema()
+    def ensure_schema(self) -> None:
+        """确保所有索引存在"""
+        # 对话记录索引
+        if not self.es.indices.exists(index=self.index_name):
+            self.es.indices.create(
+                index=self.index_name,
+                body={
+                    "mappings": {
+                        "properties": {
+                            "user_id": {"type": "keyword"},
+                            "topic_id": {"type": "keyword"},
+                            "timestamp": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"},
+                            "messages": {"type": "nested"},
+                            "metadata": {"type": "object"},
+                            "images": {"type": "nested"}
+                        }
+                    }
+                }
+            )
+        # 用户状态索引
+        if not self.es.indices.exists(index=self.USER_STATE_INDEX):
+            self.es.indices.create(
+                index=self.USER_STATE_INDEX,
+                body={
+                    "mappings": {
+                        "properties": {
+                            "user_id": {"type": "keyword"},
+                            "rounds": {"type": "integer"},
+                            "last_update": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
+                        }
+                    }
+                }
+            )
+        # 用户画像索引
+        if not self.es.indices.exists(index=self.USER_PROFILE_INDEX):
+            self.es.indices.create(
+                index=self.USER_PROFILE_INDEX,
+                body={
+                    "mappings": {
+                        "properties": {
+                            "user_id": {"type": "keyword"},
+                            "content": {"type": "text"},
+                            "updated_at": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"}
+                        }
+                    }
+                }
+            )
+    # ========== 对话记录 ==========
+    def save_conversation(self, conversation: Dict[str, Any]) -> str:
+        response = self.es.index(
+            index=self.index_name,
+            document=conversation,
+            refresh=True
+        )
+        return response["_id"]
+    def get_conversations(
+        self,
+        user_id: str,
+        topic_id: Optional[str] = None,
+        start_time: Optional[datetime] = None,
+        end_time: Optional[datetime] = None,
+        metadata_filter: Optional[Dict[str, Any]] = None,
+        limit: int = 1000
+    ) -> List[Dict[str, Any]]:
+        query = {"bool": {"must": [{"term": {"user_id": user_id}}]}}
+        if topic_id:
+            query["bool"]["must"].append({"term": {"topic_id": topic_id}})
+        if start_time or end_time:
+            range_query = {}
+            if start_time:
+                range_query["gte"] = start_time.strftime('%Y-%m-%d %H:%M:%S')
+            if end_time:
+                range_query["lt"] = end_time.strftime('%Y-%m-%d %H:%M:%S')
+            query["bool"]["must"].append({"range": {"timestamp": range_query}})
+        if metadata_filter:
+            for k, v in metadata_filter.items():
+                query["bool"]["must"].append({"term": {f"metadata.{k}": v}})
+        response = self.es.search(
+            index=self.index_name,
+            query=query,
+            size=limit,
+            sort=[{"timestamp": {"order": "asc"}}]
+        )
+        return [hit["_source"] for hit in response["hits"]["hits"]]
+    def delete_conversations(self, user_id: str, topic_id: Optional[str] = None) -> int:
+        query = {"bool": {"must": [{"term": {"user_id": user_id}}]}}
+        if topic_id:
+            query["bool"]["must"].append({"term": {"topic_id": topic_id}})
+        try:
+            response = self.es.delete_by_query(
+                index=self.index_name,
+                query=query,
+                refresh=True
+            )
+            return response.get("deleted", 0)
+        except Exception:
+            return 0
+    # ========== 用户画像 ==========
+    def get_profile(self, user_id: str) -> Optional[Dict[str, Any]]:
+        try:
+            response = self.es.get(index=self.USER_PROFILE_INDEX, id=user_id)
+            return response["_source"]
+        except Exception:
+            return None
+    def save_profile(self, user_id: str, content: str) -> None:
+        self.es.index(
+            index=self.USER_PROFILE_INDEX,
+            id=user_id,
+            document={
+                "user_id": user_id,
+                "content": content,
+                "updated_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            },
+            refresh=True
+        )
+    def delete_profile(self, user_id: str) -> bool:
+        try:
+            self.es.delete(index=self.USER_PROFILE_INDEX, id=user_id, refresh=True)
+            return True
+        except Exception:
+            return False
+    # ========== 用户状态 ==========
+    def get_user_state(self, user_id: str) -> Optional[Dict[str, Any]]:
+        try:
+            response = self.es.get(index=self.USER_STATE_INDEX, id=user_id)
+            return response["_source"]
+        except Exception:
+            return None
+    def save_user_state(self, user_id: str, rounds: int, last_update: Optional[str] = None) -> None:
+        doc = {"user_id": user_id, "rounds": rounds}
+        if last_update:
+            doc["last_update"] = last_update
+        self.es.index(
+            index=self.USER_STATE_INDEX,
+            id=user_id,
+            document=doc,
+            refresh=True
+        )
+    def delete_user_state(self, user_id: str) -> bool:
+        try:
+            self.es.delete(index=self.USER_STATE_INDEX, id=user_id, refresh=True)
+            return True
+        except Exception:
+            return False
+    # ========== 聚合查询 ==========
+    def get_user_list(self) -> List[str]:
+        response = self.es.search(
+            index=self.index_name,
+            body={
+                "size": 0,
+                "aggs": {"users": {"terms": {"field": "user_id", "size": 10000}}}
+            }
+        )
+        return [bucket["key"] for bucket in response["aggregations"]["users"]["buckets"]]
+    def get_topic_list(self, user_id: str) -> List[Dict[str, Any]]:
+        response = self.es.search(
+            index=self.index_name,
+            body={
+                "size": 0,
+                "query": {"term": {"user_id": user_id}},
+                "aggs": {
+                    "topics": {
+                        "terms": {"field": "topic_id", "size": 1000},
+                        "aggs": {
+                            "latest": {"max": {"field": "timestamp"}},
+                            "count": {"value_count": {"field": "timestamp"}}
+                        }
+                    }
+                }
+            }
+        )
+        topics = []
+        for bucket in response["aggregations"]["topics"]["buckets"]:
+            topics.append({
+                "topic_id": bucket["key"],
+                "conversation_count": bucket["doc_count"],
+                "last_active": bucket["latest"]["value_as_string"] if bucket["latest"]["value"] else None
+            })
+        return topics
+    def get_conversations_with_images(self, user_id: str) -> List[Dict[str, Any]]:
+        """获取用户所有带图片的对话（用于图片索引）"""
+        response = self.es.search(
+            index=self.index_name,
+            query={
+                "bool": {
+                    "must": [
+                        {"term": {"user_id": user_id}},
+                        {"exists": {"field": "images"}}
+                    ]
+                }
+            },
+            size=1000,
+            sort=[{"timestamp": {"order": "asc"}}]
+        )
+        return [hit["_source"] for hit in response["hits"]["hits"]]

{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mem1
-Version: 0.0.7
+Version: 0.0.8
 Summary: 基于云服务的用户记忆系统
 Project-URL: Homepage, https://github.com/sougannkyou/mem1
 Project-URL: Repository, https://github.com/sougannkyou/mem1
@@ -34,6 +34,7 @@ Description-Content-Type: text/markdown
 - **图片记忆**：存储图片时自动调用 VL 模型生成描述（OCR + 内容理解），搜索时基于文字描述召回
 - **业务解耦**：通过 ProfileTemplate 适配不同场景
 - **画像自动更新**：基于对话轮数/时间自动触发 LLM 更新用户画像
+- **可插拔存储**：支持 ES 后端，预留 SQLite/MySQL 扩展接口
 ## 安装
@@ -184,6 +185,24 @@ memory.delete_topic()
 memory.delete_user()
 ```
+## 可插拔存储层
+v0.0.7 引入了可插拔存储层架构，支持自定义存储后端：
+```python
+from mem1 import Mem1Memory, Mem1Config, StorageBackend, ESStorage
+# 默认使用 ES
+memory = Mem1Memory(config, user_id="user001", topic_id="default")
+# 或显式指定存储后端
+storage = ESStorage(hosts=["http://localhost:9200"], index_name="my_index")
+memory = Mem1Memory(config, user_id="user001", storage=storage)
+# 未来可扩展 SQLite/MySQL 后端
+# storage = SQLiteStorage(db_path="mem1.db")
+```
 ## 远期记忆检索
 mem1 定位是**记忆存储层**，不内置时间意图解析。当用户问"半年前的XX事"时，建议：
@@ -228,6 +247,28 @@ def search_memory(start_days: int, end_days: int) -> str:
 5. 如果对话记录中没有相关信息，请明确说"对话记录中未提及"，不要猜测
 ```
+## 设计决策：为什么不用向量数据库
+mem1 选择 ES 时间范围检索而非 Milvus/Pinecone 等向量数据库，核心原因是**对话记忆需要上下文连续性**：
+| 对比 | 向量检索（Milvus） | mem1 时间范围检索（ES） |
+|------|-------------------|------------------------|
+| 召回方式 | 单条 Embedding → Top-K 相似 | 时间范围 → 整体拼接 |
+| 上下文 | 碎片化，语义割裂 | 连续对话流，因果关系完整 |
+| 适用场景 | 知识库问答、独立文档 | 对话记忆、需要理解对话序列 |
+举例说明：
+```
+用户: 我是李明，市网信办的
+用户: 本月处理了97起舆情
+用户: 帮我写周报
+```
+- **向量检索**："帮我写周报" 可能只召回包含"周报"的那一条，丢失"97起舆情"
+- **时间范围检索**：LLM 看到完整对话流，理解"周报"要包含"97起舆情"
+向量检索更适合：长期记忆中的独立事实召回（如半年前提过的偏好）。但 mem1 通过**画像压缩**解决这个问题——重要信息会被 LLM 提取到用户画像中持久保存。
 ## 设计决策：为什么不用 Context Caching
 豆包等大模型提供了 Context Caching 功能（缓存命中可省 86% token 费），但 mem1 选择不使用：

{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,12 @@
-mem1/__init__.py,sha256=tNsBrO4d7fujDIPpvl6pweVcg5kHr_EYRgslR8nWWEI,494
+mem1/__init__.py,sha256=8Dv6_SUtTh78wadGFMkDG5DaCRrNlYZkdR5MyFEbXN0,582
 mem1/config.py,sha256=9U-dJD6JsQ2CyDy-rwLqE3_kWwG3sPo7jTbkV_Tl4SE,5422
 mem1/langchain_middleware.py,sha256=h2mG7K2Tq1N7IovXMvCyvOhsAwTWOR1NAqivF4db2AE,6648
 mem1/llm.py,sha256=S23OA2OpZVb6A36iQ2YY_7Q7rRtnqC7xUbVW-bZSjsI,4419
-mem1/memory_es.py,sha256=keLiTq2yw2QC_tmIvCYSdOVnSJNpq981ko3GrQpsyjE,34301
+mem1/memory.py,sha256=RRAzAXDvHAGCtejhBhM8ab7wEb2O9bDScZOOLF1VoxY,24398
 mem1/memory_md.py,sha256=uu_TvdBoUpAncT1eissOSe1Y3vCy3iWMcuvCy3vCjEA,26258
 mem1/memory_tools.py,sha256=b1YBiRNet0gXnW-KGIZ2KQclluB9Q6dli_DbWLS571k,3646
 mem1/prompts.py,sha256=5HUG-yvTD7iBUzzXwO-WnRomDLkz0UJWox3z3zcT0kI,10599
-mem1-0.0.7.dist-info/METADATA,sha256=VnJlMl-3KoG9zw59qA7xDKKoxo9Bzy1EeChu9EpjGhE,8300
-mem1-0.0.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-mem1-0.0.7.dist-info/RECORD,,
+mem1/storage.py,sha256=J2JUTjPEXe3dO21LVoj3sl8_78qKOECad2Ol5R9kvCU,12774
+mem1-0.0.8.dist-info/METADATA,sha256=YuSnf9DbnYFPbbXAddreia5ZlJVQFydHvtfWsgu9EE8,10002
+mem1-0.0.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+mem1-0.0.8.dist-info/RECORD,,

{mem1-0.0.7.dist-info → mem1-0.0.8.dist-info}/WHEEL RENAMED Viewed

File without changes

mem1 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

mem1 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl