memex-agent-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memex/SKILL.md ADDED
@@ -0,0 +1,178 @@
1
+ # Memex Skill — 通用 Agent 记忆系统
2
+
3
+ ## 简介
4
+
5
+ Memex 是 Agent 的长期记忆系统,让 Agent 能在跨会话中积累知识和偏好。
6
+
7
+ **通用设计**:任何 Agent 框架都可以接入(OpenClaw、Claude Code、Cursor、Codex 等)。
8
+
9
+ **核心理念**:
10
+ - 原文存储 — 原文保留,不依赖 LLM 提取
11
+ - 向量语义搜索 — 支持自然语言查询
12
+ - 隐私可控 — 用户可随时查看、删除记忆
13
+
14
+ ---
15
+
16
+ ## 何时使用
17
+
18
+ | 时机 | 命令 | 说明 |
19
+ |------|------|------|
20
+ | 对话结束 | `memex save` | 提炼关键信息存入记忆 |
21
+ | 新对话开始 | `memex search` | 检索相关记忆 |
22
+ | 遇到决策点 | `memex search` | 查询约束/策略 |
23
+ | 发现新 pattern | `memex save --type system_pattern` | 存入系统模式 |
24
+ | 用户明确偏好 | `memex save --type user_model` | 存入用户偏好 |
25
+
26
+ ---
27
+
28
+ ## 记忆类型
29
+
30
+ | 类型 | 何时用 | importance | 示例 |
31
+ |------|--------|-----------|------|
32
+ | `constraint` | 用户强制的规则 | 0.9 | "用户不喜欢废话" |
33
+ | `user_model` | 用户偏好/习惯 | 0.85 | "用户喜欢简洁回复" |
34
+ | `strategy` | 验证过的方法论 | 0.8 | "用 divide and conquer 解决复杂问题" |
35
+ | `system_pattern` | 系统设计模式 | 0.75 | "项目采用前后端分离架构" |
36
+ | `belief` | 验证过的结论 | 0.7 | "Python 适合快速原型" |
37
+ | `journal` | 工作日志 | 0.4 | "今天完成了 xxx" |
38
+
39
+ ---
40
+
41
+ ## 命令参考
42
+
43
+ ### 存入记忆
44
+
45
+ ```bash
46
+ memex save --type <type> --content "<内容>" --repo <namespace>
47
+ ```
48
+
49
+ **示例**:
50
+ ```bash
51
+ memex save --type user_model --content "用户喜欢简洁的回复风格,不喜欢废话" --repo user
52
+ memex save --type belief --content "这个项目使用 Python FastAPI" --repo myproject
53
+ ```
54
+
55
+ ### 语义搜索
56
+
57
+ ```bash
58
+ memex search "<查询内容>" --repo <namespace> --limit <数量>
59
+ ```
60
+
61
+ **示例**:
62
+ ```bash
63
+ memex search "用户的偏好是什么" --repo user --limit 5
64
+ ```
65
+
66
+ **返回格式**(JSON,Agent 自行解析):
67
+ ```json
68
+ {
69
+ "query": "用户的偏好是什么",
70
+ "results": [
71
+ {
72
+ "id": "uuid",
73
+ "type": "user_model",
74
+ "content": "用户喜欢简洁的回复风格,不喜欢废话",
75
+ "importance": 0.85,
76
+ "confidence": 0.8,
77
+ "stability": "medium",
78
+ "score": 0.92
79
+ }
80
+ ],
81
+ "total": 1
82
+ }
83
+ ```
84
+
85
+ ### 其他命令
86
+
87
+ ```bash
88
+ # 列出记忆
89
+ memex list --repo <namespace> --limit 20
90
+
91
+ # 获取单条记忆
92
+ memex get <memory-id>
93
+
94
+ # 删除记忆
95
+ memex delete <memory-id>
96
+
97
+ # 查看统计
98
+ memex stats --repo <namespace>
99
+ ```
100
+
101
+ ---
102
+
103
+ ## 隐私控制
104
+
105
+ 用户可以随时查看和删除自己的记忆:
106
+
107
+ ```bash
108
+ # 查看被记住的内容
109
+ memex recall --repo user
110
+
111
+ # 删除单条记忆
112
+ memex purge <memory-id>
113
+
114
+ # 清空所有记忆
115
+ memex purge --all
116
+ ```
117
+
118
+ ---
119
+
120
+ ## 混合评分
121
+
122
+ 搜索结果按以下公式排序:
123
+
124
+ ```
125
+ final_score = 0.55×similarity + 0.20×importance + 0.15×freshness + 0.10×frequency
126
+ ```
127
+
128
+ - `similarity`:向量余弦相似度
129
+ - `importance`:记忆类型预设重要性
130
+ - `freshness`:最近访问时间
131
+ - `frequency`:访问频率
132
+
133
+ ---
134
+
135
+ ## 多 Agent 支持
136
+
137
+ 每个 Agent 用不同的 `--repo` 命名空间:
138
+
139
+ | Repo | 用途 |
140
+ |------|------|
141
+ | `user` | 跨 Agent 共享的用户偏好 |
142
+ | `<agent-name>` | 特定 Agent 的记忆 |
143
+ | `default` | 通用记忆 |
144
+
145
+ ---
146
+
147
+ ## 安装
148
+
149
+ ```bash
150
+ # 安装 memex
151
+ pip install memex
152
+
153
+ # 初始化
154
+ memex init
155
+
156
+ # 验证
157
+ memex stats
158
+ ```
159
+
160
+ ### OpenClaw Agent 接入
161
+
162
+ 将 skill 复制到 `~/.openclaw/skills/memex/`,Agent 在合适的时机调用 `memex search` / `memex save`。
163
+
164
+ ### 其他 Agent 框架
165
+
166
+ 直接调用 CLI 命令即可:
167
+ ```bash
168
+ memex search "用户偏好" --repo user --limit 5
169
+ ```
170
+
171
+ ---
172
+
173
+ ## 注意事项
174
+
175
+ 1. **模型升级兼容**:`raw_text` 字段保存原始文本,换 embedding 模型后自动重建索引
176
+ 2. **向量维度**:BGE-base-zh-v1.5 = 768 维
177
+ 3. **存储位置**:`~/.memex/memory/`
178
+ 4. **网络需求**:首次下载 BGE 模型约 400MB,之后离线可用
memex/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # Memex — Agent 记忆系统
memex/_config.py ADDED
@@ -0,0 +1,88 @@
1
+ """
2
+ Config — 配置管理
3
+ ~/.memex/config.toml
4
+ """
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class MemoryConfig(BaseModel):
13
+ """记忆配置"""
14
+ storage_path: str = "~/.memex/memory"
15
+
16
+
17
+ class VectorStoreConfig(BaseModel):
18
+ """向量存储配置"""
19
+ provider: str = "lancedb" # lancedb | chroma | memory
20
+
21
+
22
+ class EmbeddingConfig(BaseModel):
23
+ """Embedding 配置"""
24
+ model: str = "BAAI/bge-base-zh-v1.5"
25
+ dimension: int = 768
26
+
27
+
28
+ class RetrievalConfig(BaseModel):
29
+ """检索配置"""
30
+ default_limit: int = 10
31
+ min_similarity: float = 0.4
32
+
33
+
34
+ class Config(BaseModel):
35
+ """完整配置"""
36
+ memory: MemoryConfig = MemoryConfig()
37
+ vector_store: VectorStoreConfig = VectorStoreConfig()
38
+ embedding: EmbeddingConfig = EmbeddingConfig()
39
+ retrieval: RetrievalConfig = RetrievalConfig()
40
+
41
+
42
+ # 全局配置实例
43
+ _config: Optional[Config] = None
44
+
45
+
46
+ def get_config_path() -> Path:
47
+ """获取配置路径"""
48
+ return Path.home() / ".memex" / "config.toml"
49
+
50
+
51
+ def load_config() -> Config:
52
+ """加载配置"""
53
+ global _config
54
+
55
+ if _config is not None:
56
+ return _config
57
+
58
+ config_path = get_config_path()
59
+
60
+ if config_path.exists():
61
+ import toml
62
+ data = toml.load(config_path)
63
+ _config = Config(**data)
64
+ else:
65
+ _config = Config()
66
+
67
+ return _config
68
+
69
+
70
+ def save_config(config: Config) -> None:
71
+ """保存配置"""
72
+ config_path = get_config_path()
73
+ config_path.parent.mkdir(parents=True, exist_ok=True)
74
+
75
+ import toml
76
+ with open(config_path, "w") as f:
77
+ toml.dump(config.model_dump(), f)
78
+
79
+ global _config
80
+ _config = config
81
+
82
+
83
+ def get_storage_path() -> Path:
84
+ """获取存储路径"""
85
+ config = load_config()
86
+ path = Path(config.memory.storage_path).expanduser()
87
+ path.mkdir(parents=True, exist_ok=True)
88
+ return path
@@ -0,0 +1,142 @@
1
+ """
2
+ Consolidation — 记忆合并
3
+
4
+ 当两条记忆相似度 > 阈值时,合并为一条,保留信息更丰富的内容。
5
+ 阈值:0.85(向量相似度)
6
+ """
7
+
8
+ from typing import List, Tuple, Optional
9
+ import math
10
+
11
+ from ._types import MemoryRecord, MemoryType
12
+
13
+
14
+ # 合并阈值
15
+ SIMILARITY_THRESHOLD = 0.85
16
+
17
+
18
+ def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
19
+ """计算两个向量的余弦相似度"""
20
+ dot_product = sum(a * b for a, b in zip(vec_a, vec_b))
21
+ norm_a = math.sqrt(sum(a * a for a in vec_a))
22
+ norm_b = math.sqrt(sum(b * b for b in vec_b))
23
+
24
+ if norm_a == 0 or norm_b == 0:
25
+ return 0.0
26
+
27
+ return dot_product / (norm_a * norm_b)
28
+
29
+
30
+ def find_similar_pairs(
31
+ records: List[MemoryRecord],
32
+ vectors: List[List[float]],
33
+ threshold: float = SIMILARITY_THRESHOLD,
34
+ ) -> List[Tuple[int, int, float]]:
35
+ """
36
+ 找出所有相似度 > threshold 的记忆对
37
+
38
+ Returns:
39
+ [(idx_a, idx_b, similarity), ...]
40
+ """
41
+ if len(records) != len(vectors):
42
+ raise ValueError("records and vectors must have same length")
43
+
44
+ pairs = []
45
+ for i in range(len(records)):
46
+ for j in range(i + 1, len(records)):
47
+ sim = cosine_similarity(vectors[i], vectors[j])
48
+ if sim >= threshold:
49
+ pairs.append((i, j, sim))
50
+
51
+ # 按相似度降序
52
+ pairs.sort(key=lambda x: x[2], reverse=True)
53
+ return pairs
54
+
55
+
56
+ def consolidate_pair(
57
+ record_a: MemoryRecord,
58
+ record_b: MemoryRecord,
59
+ ) -> MemoryRecord:
60
+ """
61
+ 合并两条记忆
62
+
63
+ 策略:
64
+ - 保留 content 更长/更详细的那条
65
+ - importance 取两者较高
66
+ - confidence 取两者平均
67
+ - revision_count 取两者较大
68
+ - 更新 last_updated
69
+ - 记录合并来源
70
+ """
71
+ # 选择更好的内容(更长更详细)
72
+ if len(record_b.content) > len(record_a.content):
73
+ primary, secondary = record_b, record_a
74
+ else:
75
+ primary, secondary = record_a, record_b
76
+
77
+ import time
78
+
79
+ merged = MemoryRecord.from_dict(primary.to_dict())
80
+ merged.importance = max(primary.importance, secondary.importance)
81
+ merged.confidence = (primary.confidence + secondary.confidence) / 2
82
+ merged.revision_count = max(primary.revision_count, secondary.revision_count) + 1
83
+ merged.last_updated = int(time.time() * 1000)
84
+ merged.metadata["consolidated"] = True
85
+ merged.metadata["consolidated_from"] = [primary.id, secondary.id]
86
+ merged.metadata["consolidated_at"] = int(time.time() * 1000)
87
+
88
+ return merged
89
+
90
+
91
+ def consolidate_all(
92
+ records: List[MemoryRecord],
93
+ vectors: List[List[float]],
94
+ threshold: float = SIMILARITY_THRESHOLD,
95
+ ) -> List[Tuple[List[MemoryRecord], List[List[float]]]]:
96
+ """
97
+ 合并所有相似的记忆
98
+
99
+ Returns:
100
+ 每次合并的结果 [(remaining_records, remaining_vectors), ...]
101
+ 最终返回所有合并轮次的结果
102
+ """
103
+ import copy
104
+
105
+ remaining_records = list(records)
106
+ remaining_vectors = list(vectors)
107
+ history = []
108
+
109
+ while True:
110
+ pairs = find_similar_pairs(remaining_records, remaining_vectors, threshold)
111
+ if not pairs:
112
+ break
113
+
114
+ # 合并第一对(最高相似度)
115
+ idx_a, idx_b, sim = pairs[0]
116
+
117
+ merged = consolidate_pair(remaining_records[idx_a], remaining_records[idx_b])
118
+
119
+ # 移除被合并的两个,插入合并结果
120
+ # 先处理索引较大的,再处理索引小的
121
+ max_idx = max(idx_a, idx_b)
122
+ min_idx = min(idx_a, idx_b)
123
+
124
+ new_records = remaining_records.copy()
125
+ new_vectors = remaining_vectors.copy()
126
+
127
+ del new_records[max_idx]
128
+ del new_vectors[max_idx]
129
+ del new_records[min_idx]
130
+ del new_vectors[min_idx]
131
+
132
+ new_records.insert(min_idx, merged)
133
+ # 用被合并的两个向量的平均作为新向量
134
+ avg_vector = [(v1 + v2) / 2 for v1, v2 in zip(remaining_vectors[idx_a], remaining_vectors[idx_b])]
135
+ new_vectors.insert(min_idx, avg_vector)
136
+
137
+ history.append((list(remaining_records), list(remaining_vectors)))
138
+
139
+ remaining_records = new_records
140
+ remaining_vectors = new_vectors
141
+
142
+ return history
@@ -0,0 +1,206 @@
1
+ """
2
+ Contradiction Detection — 矛盾检测
3
+
4
+ 检测两条记忆之间的语义矛盾。
5
+ 当检测到矛盾时,降低双方的 importance。
6
+
7
+ 矛盾模式:
8
+ 1. 否定模式:A 说"X 是 Y",B 说"X 不是 Y"
9
+ 2. 行为冲突模式:A 说"做 X",B 说"不做 X"
10
+ """
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Optional, List, Tuple
14
+
15
+ from ._types import MemoryRecord
16
+
17
+
18
+ @dataclass
19
+ class ContradictionResult:
20
+ """矛盾检测结果"""
21
+ has_contradiction: bool
22
+ confidence: float # 0-1,矛盾置信度
23
+ pattern: str # 矛盾模式
24
+ details: str # 详细描述
25
+
26
+
27
+ def detect_record_pair_contradiction(
28
+ record_a: MemoryRecord,
29
+ record_b: MemoryRecord,
30
+ ) -> ContradictionResult:
31
+ """
32
+ 检测两条记忆之间的矛盾
33
+
34
+ Returns:
35
+ ContradictionResult
36
+ """
37
+ content_a = record_a.content.lower()
38
+ content_b = record_b.content.lower()
39
+
40
+ # 1. 检测否定模式
41
+ pattern = _detect_negation_pattern(content_a, content_b)
42
+ if pattern:
43
+ return ContradictionResult(
44
+ has_contradiction=True,
45
+ confidence=0.8,
46
+ pattern="negation",
47
+ details=pattern,
48
+ )
49
+
50
+ # 2. 检测行为冲突(做 vs 不做)
51
+ pattern = _detect_action_pattern(content_a, content_b)
52
+ if pattern:
53
+ return ContradictionResult(
54
+ has_contradiction=True,
55
+ confidence=0.7,
56
+ pattern="action_conflict",
57
+ details=pattern,
58
+ )
59
+
60
+ return ContradictionResult(
61
+ has_contradiction=False,
62
+ confidence=0.0,
63
+ pattern="none",
64
+ details="",
65
+ )
66
+
67
+
68
+ def _detect_negation_pattern(a: str, b: str) -> Optional[str]:
69
+ """检测否定模式矛盾"""
70
+ import re
71
+
72
+ # 英文单词集合
73
+ def get_english_words(text: str) -> set:
74
+ return set(re.findall(r'[a-z]+', text))
75
+
76
+ english_words_a = get_english_words(a)
77
+ english_words_b = get_english_words(b)
78
+
79
+ # 英文否定词
80
+ en_neg = {"no", "not", "never", "dont", "doesnt", "cant", "shouldnt", "wont", "noone"}
81
+
82
+ # 检测英文否定
83
+ a_has_en_neg = bool(en_neg & english_words_a)
84
+ b_has_en_neg = bool(en_neg & english_words_b)
85
+
86
+ # 检测中文否定(直接搜索否定词)
87
+ def has_cn_negation(text: str) -> bool:
88
+ cn_negations = [
89
+ "不用", "不是", "不支持", "不可以", "不应", "不推荐", "不建议", "不愿意",
90
+ "不使用", "不应该", "不建议", "不鼓励", "不允许",
91
+ ]
92
+ return any(neg in text for neg in cn_negations)
93
+
94
+ a_has_cn_neg = has_cn_negation(a)
95
+ b_has_cn_neg = has_cn_negation(b)
96
+
97
+ # 英文肯定词
98
+ en_pos = {"use", "uses", "is", "are", "was", "were", "do", "does", "can", "should", "recommend", "like"}
99
+
100
+ # 检测英文矛盾:A 无否定,B 有否定,且有共同肯定词
101
+ if not a_has_en_neg and b_has_en_neg:
102
+ common = en_pos & english_words_a & english_words_b
103
+ if common:
104
+ return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(英文否定矛盾)"
105
+ if not b_has_en_neg and a_has_en_neg:
106
+ common = en_pos & english_words_a & english_words_b
107
+ if common:
108
+ return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(英文否定矛盾)"
109
+
110
+ # 检测中文矛盾:当一方有否定词时,必须有共同关键词
111
+ def has_shared_content(text_a: str, text_b: str) -> bool:
112
+ """检查两句话是否有共同的内容词(排除停用词)"""
113
+ stop_words = {"的", "是", "在", "有", "了", "和", "与", "很", "也", "都", "the", "a", "an", "is", "are", "was", "were", "this", "that", "it"}
114
+ # 提取中文词(简单按字符,不精确但够用)
115
+ def get_cn_words(text):
116
+ import re
117
+ # 提取连续中文字符序列
118
+ cn_phrases = re.findall(r'[\u4e00-\u9fff]+', text)
119
+ words = set()
120
+ for phrase in cn_phrases:
121
+ for char in phrase:
122
+ words.add(char)
123
+ return words
124
+
125
+ cn_a = get_cn_words(text_a) - stop_words
126
+ cn_b = get_cn_words(text_b) - stop_words
127
+
128
+ # 英文词
129
+ en_a = set(re.findall(r'[a-z]+', text_a))
130
+ en_b = set(re.findall(r'[a-z]+', text_b))
131
+
132
+ return bool((cn_a & cn_b) or (en_a & en_b))
133
+
134
+ if a_has_cn_neg and not b_has_cn_neg:
135
+ if has_shared_content(a, b):
136
+ return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文否定矛盾)"
137
+ if b_has_cn_neg and not a_has_cn_neg:
138
+ if has_shared_content(a, b):
139
+ return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文否定矛盾)"
140
+ if a_has_cn_neg and b_has_cn_neg:
141
+ if has_shared_content(a, b):
142
+ return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文双重否定矛盾)"
143
+
144
+ return None
145
+
146
+
147
+ def _detect_action_pattern(a: str, b: str) -> Optional[str]:
148
+ """检测行为冲突(做 vs 不做)"""
149
+ # 英文行为词对
150
+ action_pairs = [
151
+ ("use", "avoid"),
152
+ ("recommend", "don't recommend"),
153
+ ("should", "shouldn't"),
154
+ ]
155
+
156
+ for pos, neg in action_pairs:
157
+ if pos in a and neg in b:
158
+ return f"行为冲突:A 推荐\"{pos}\",B 反对\"{neg}\""
159
+ if neg in a and pos in b:
160
+ return f"行为冲突:A 反对\"{neg}\",B 推荐\"{pos}\""
161
+
162
+ return None
163
+
164
+
165
+ def find_contradictions(
166
+ records: List[MemoryRecord],
167
+ ) -> List[Tuple[MemoryRecord, MemoryRecord, ContradictionResult]]:
168
+ """
169
+ 在记忆列表中找出所有相互矛盾的记忆对
170
+ """
171
+ contradictions = []
172
+
173
+ for i in range(len(records)):
174
+ for j in range(i + 1, len(records)):
175
+ result = detect_record_pair_contradiction(records[i], records[j])
176
+ if result.has_contradiction:
177
+ contradictions.append((records[i], records[j], result))
178
+
179
+ return contradictions
180
+
181
+
182
+ def apply_contradiction_penalty(
183
+ record_a: MemoryRecord,
184
+ record_b: MemoryRecord,
185
+ ) -> Tuple[MemoryRecord, MemoryRecord]:
186
+ """
187
+ 当检测到矛盾时,降低双方 importance
188
+ """
189
+ import time
190
+
191
+ def _penalize(r: MemoryRecord, confidence: float) -> MemoryRecord:
192
+ factor = 0.7 if confidence >= 0.8 else 0.85
193
+ updated = MemoryRecord.from_dict(r.to_dict())
194
+ updated.importance = max(0.1, updated.importance * factor)
195
+ updated.metadata["contradiction_penalty"] = True
196
+ updated.metadata["contradiction_at"] = int(time.time() * 1000)
197
+ return updated
198
+
199
+ result = detect_record_pair_contradiction(record_a, record_b)
200
+ if not result.has_contradiction:
201
+ return record_a, record_b
202
+
203
+ updated_a = _penalize(record_a, result.confidence)
204
+ updated_b = _penalize(record_b, result.confidence)
205
+
206
+ return updated_a, updated_b