memex-agent-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memex/SKILL.md +178 -0
- memex/__init__.py +1 -0
- memex/_config.py +88 -0
- memex/_consolidation.py +142 -0
- memex/_contradiction.py +206 -0
- memex/_decay.py +114 -0
- memex/_distill.py +250 -0
- memex/_embed.py +44 -0
- memex/_hybrid.py +128 -0
- memex/_revision.py +167 -0
- memex/_types.py +92 -0
- memex/cli.py +228 -0
- memex/store/__init__.py +5 -0
- memex/store/factory.py +30 -0
- memex/store/interface.py +79 -0
- memex/store/lancedb.py +190 -0
- memex/store/memory.py +99 -0
- memex_agent_memory-0.1.0.dist-info/METADATA +285 -0
- memex_agent_memory-0.1.0.dist-info/RECORD +22 -0
- memex_agent_memory-0.1.0.dist-info/WHEEL +5 -0
- memex_agent_memory-0.1.0.dist-info/entry_points.txt +2 -0
- memex_agent_memory-0.1.0.dist-info/top_level.txt +1 -0
memex/SKILL.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Memex Skill — 通用 Agent 记忆系统
|
|
2
|
+
|
|
3
|
+
## 简介
|
|
4
|
+
|
|
5
|
+
Memex 是 Agent 的长期记忆系统,让 Agent 能在跨会话中积累知识和偏好。
|
|
6
|
+
|
|
7
|
+
**通用设计**:任何 Agent 框架都可以接入(OpenClaw、Claude Code、Cursor、Codex 等)。
|
|
8
|
+
|
|
9
|
+
**核心理念**:
|
|
10
|
+
- 原文存储 — 原文保留,不依赖 LLM 提取
|
|
11
|
+
- 向量语义搜索 — 支持自然语言查询
|
|
12
|
+
- 隐私可控 — 用户可随时查看、删除记忆
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## 何时使用
|
|
17
|
+
|
|
18
|
+
| 时机 | 命令 | 说明 |
|
|
19
|
+
|------|------|------|
|
|
20
|
+
| 对话结束 | `memex save` | 提炼关键信息存入记忆 |
|
|
21
|
+
| 新对话开始 | `memex search` | 检索相关记忆 |
|
|
22
|
+
| 遇到决策点 | `memex search` | 查询约束/策略 |
|
|
23
|
+
| 发现新 pattern | `memex save --type system_pattern` | 存入系统模式 |
|
|
24
|
+
| 用户明确偏好 | `memex save --type user_model` | 存入用户偏好 |
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 记忆类型
|
|
29
|
+
|
|
30
|
+
| 类型 | 何时用 | importance | 示例 |
|
|
31
|
+
|------|--------|-----------|------|
|
|
32
|
+
| `constraint` | 用户强制的规则 | 0.9 | "用户不喜欢废话" |
|
|
33
|
+
| `user_model` | 用户偏好/习惯 | 0.85 | "用户喜欢简洁回复" |
|
|
34
|
+
| `strategy` | 验证过的方法论 | 0.8 | "用 divide and conquer 解决复杂问题" |
|
|
35
|
+
| `system_pattern` | 系统设计模式 | 0.75 | "项目采用前后端分离架构" |
|
|
36
|
+
| `belief` | 验证过的结论 | 0.7 | "Python 适合快速原型" |
|
|
37
|
+
| `journal` | 工作日志 | 0.4 | "今天完成了 xxx" |
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 命令参考
|
|
42
|
+
|
|
43
|
+
### 存入记忆
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
memex save --type <type> --content "<内容>" --repo <namespace>
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**示例**:
|
|
50
|
+
```bash
|
|
51
|
+
memex save --type user_model --content "用户喜欢简洁的回复风格,不喜欢废话" --repo user
|
|
52
|
+
memex save --type belief --content "这个项目使用 Python FastAPI" --repo myproject
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 语义搜索
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
memex search "<查询内容>" --repo <namespace> --limit <数量>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**示例**:
|
|
62
|
+
```bash
|
|
63
|
+
memex search "用户的偏好是什么" --repo user --limit 5
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**返回格式**(JSON,Agent 自行解析):
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"query": "用户的偏好是什么",
|
|
70
|
+
"results": [
|
|
71
|
+
{
|
|
72
|
+
"id": "uuid",
|
|
73
|
+
"type": "user_model",
|
|
74
|
+
"content": "用户喜欢简洁的回复风格,不喜欢废话",
|
|
75
|
+
"importance": 0.85,
|
|
76
|
+
"confidence": 0.8,
|
|
77
|
+
"stability": "medium",
|
|
78
|
+
"score": 0.92
|
|
79
|
+
}
|
|
80
|
+
],
|
|
81
|
+
"total": 1
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### 其他命令
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# 列出记忆
|
|
89
|
+
memex list --repo <namespace> --limit 20
|
|
90
|
+
|
|
91
|
+
# 获取单条记忆
|
|
92
|
+
memex get <memory-id>
|
|
93
|
+
|
|
94
|
+
# 删除记忆
|
|
95
|
+
memex delete <memory-id>
|
|
96
|
+
|
|
97
|
+
# 查看统计
|
|
98
|
+
memex stats --repo <namespace>
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## 隐私控制
|
|
104
|
+
|
|
105
|
+
用户可以随时查看和删除自己的记忆:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# 查看被记住的内容
|
|
109
|
+
memex recall --repo user
|
|
110
|
+
|
|
111
|
+
# 删除单条记忆
|
|
112
|
+
memex purge <memory-id>
|
|
113
|
+
|
|
114
|
+
# 清空所有记忆
|
|
115
|
+
memex purge --all
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## 混合评分
|
|
121
|
+
|
|
122
|
+
搜索结果按以下公式排序:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
final_score = 0.55×similarity + 0.20×importance + 0.15×freshness + 0.10×frequency
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
- `similarity`:向量余弦相似度
|
|
129
|
+
- `importance`:记忆类型预设重要性
|
|
130
|
+
- `freshness`:最近访问时间
|
|
131
|
+
- `frequency`:访问频率
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## 多 Agent 支持
|
|
136
|
+
|
|
137
|
+
每个 Agent 用不同的 `--repo` 命名空间:
|
|
138
|
+
|
|
139
|
+
| Repo | 用途 |
|
|
140
|
+
|------|------|
|
|
141
|
+
| `user` | 跨 Agent 共享的用户偏好 |
|
|
142
|
+
| `<agent-name>` | 特定 Agent 的记忆 |
|
|
143
|
+
| `default` | 通用记忆 |
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## 安装
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# 安装 memex
|
|
151
|
+
pip install memex
|
|
152
|
+
|
|
153
|
+
# 初始化
|
|
154
|
+
memex init
|
|
155
|
+
|
|
156
|
+
# 验证
|
|
157
|
+
memex stats
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### OpenClaw Agent 接入
|
|
161
|
+
|
|
162
|
+
将 skill 复制到 `~/.openclaw/skills/memex/`,Agent 在合适的时机调用 `memex search` / `memex save`。
|
|
163
|
+
|
|
164
|
+
### 其他 Agent 框架
|
|
165
|
+
|
|
166
|
+
直接调用 CLI 命令即可:
|
|
167
|
+
```bash
|
|
168
|
+
memex search "用户偏好" --repo user --limit 5
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## 注意事项
|
|
174
|
+
|
|
175
|
+
1. **模型升级兼容**:`raw_text` 字段保存原始文本,换 embedding 模型后自动重建索引
|
|
176
|
+
2. **向量维度**:BGE-base-zh-v1.5 = 768 维
|
|
177
|
+
3. **存储位置**:`~/.memex/memory/`
|
|
178
|
+
4. **网络需求**:首次下载 BGE 模型约 400MB,之后离线可用
|
memex/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Memex — Agent 记忆系统
|
memex/_config.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Config — 配置管理
|
|
3
|
+
~/.memex/config.toml
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MemoryConfig(BaseModel):
|
|
13
|
+
"""记忆配置"""
|
|
14
|
+
storage_path: str = "~/.memex/memory"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectorStoreConfig(BaseModel):
|
|
18
|
+
"""向量存储配置"""
|
|
19
|
+
provider: str = "lancedb" # lancedb | chroma | memory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EmbeddingConfig(BaseModel):
|
|
23
|
+
"""Embedding 配置"""
|
|
24
|
+
model: str = "BAAI/bge-base-zh-v1.5"
|
|
25
|
+
dimension: int = 768
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RetrievalConfig(BaseModel):
|
|
29
|
+
"""检索配置"""
|
|
30
|
+
default_limit: int = 10
|
|
31
|
+
min_similarity: float = 0.4
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Config(BaseModel):
|
|
35
|
+
"""完整配置"""
|
|
36
|
+
memory: MemoryConfig = MemoryConfig()
|
|
37
|
+
vector_store: VectorStoreConfig = VectorStoreConfig()
|
|
38
|
+
embedding: EmbeddingConfig = EmbeddingConfig()
|
|
39
|
+
retrieval: RetrievalConfig = RetrievalConfig()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# 全局配置实例
|
|
43
|
+
_config: Optional[Config] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_config_path() -> Path:
|
|
47
|
+
"""获取配置路径"""
|
|
48
|
+
return Path.home() / ".memex" / "config.toml"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_config() -> Config:
|
|
52
|
+
"""加载配置"""
|
|
53
|
+
global _config
|
|
54
|
+
|
|
55
|
+
if _config is not None:
|
|
56
|
+
return _config
|
|
57
|
+
|
|
58
|
+
config_path = get_config_path()
|
|
59
|
+
|
|
60
|
+
if config_path.exists():
|
|
61
|
+
import toml
|
|
62
|
+
data = toml.load(config_path)
|
|
63
|
+
_config = Config(**data)
|
|
64
|
+
else:
|
|
65
|
+
_config = Config()
|
|
66
|
+
|
|
67
|
+
return _config
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def save_config(config: Config) -> None:
|
|
71
|
+
"""保存配置"""
|
|
72
|
+
config_path = get_config_path()
|
|
73
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
import toml
|
|
76
|
+
with open(config_path, "w") as f:
|
|
77
|
+
toml.dump(config.model_dump(), f)
|
|
78
|
+
|
|
79
|
+
global _config
|
|
80
|
+
_config = config
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_storage_path() -> Path:
|
|
84
|
+
"""获取存储路径"""
|
|
85
|
+
config = load_config()
|
|
86
|
+
path = Path(config.memory.storage_path).expanduser()
|
|
87
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
return path
|
memex/_consolidation.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Consolidation — 记忆合并
|
|
3
|
+
|
|
4
|
+
当两条记忆相似度 > 阈值时,合并为一条,保留信息更丰富的内容。
|
|
5
|
+
阈值:0.85(向量相似度)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Tuple, Optional
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
from ._types import MemoryRecord, MemoryType
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# 合并阈值
|
|
15
|
+
SIMILARITY_THRESHOLD = 0.85
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
|
|
19
|
+
"""计算两个向量的余弦相似度"""
|
|
20
|
+
dot_product = sum(a * b for a, b in zip(vec_a, vec_b))
|
|
21
|
+
norm_a = math.sqrt(sum(a * a for a in vec_a))
|
|
22
|
+
norm_b = math.sqrt(sum(b * b for b in vec_b))
|
|
23
|
+
|
|
24
|
+
if norm_a == 0 or norm_b == 0:
|
|
25
|
+
return 0.0
|
|
26
|
+
|
|
27
|
+
return dot_product / (norm_a * norm_b)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def find_similar_pairs(
|
|
31
|
+
records: List[MemoryRecord],
|
|
32
|
+
vectors: List[List[float]],
|
|
33
|
+
threshold: float = SIMILARITY_THRESHOLD,
|
|
34
|
+
) -> List[Tuple[int, int, float]]:
|
|
35
|
+
"""
|
|
36
|
+
找出所有相似度 > threshold 的记忆对
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
[(idx_a, idx_b, similarity), ...]
|
|
40
|
+
"""
|
|
41
|
+
if len(records) != len(vectors):
|
|
42
|
+
raise ValueError("records and vectors must have same length")
|
|
43
|
+
|
|
44
|
+
pairs = []
|
|
45
|
+
for i in range(len(records)):
|
|
46
|
+
for j in range(i + 1, len(records)):
|
|
47
|
+
sim = cosine_similarity(vectors[i], vectors[j])
|
|
48
|
+
if sim >= threshold:
|
|
49
|
+
pairs.append((i, j, sim))
|
|
50
|
+
|
|
51
|
+
# 按相似度降序
|
|
52
|
+
pairs.sort(key=lambda x: x[2], reverse=True)
|
|
53
|
+
return pairs
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def consolidate_pair(
|
|
57
|
+
record_a: MemoryRecord,
|
|
58
|
+
record_b: MemoryRecord,
|
|
59
|
+
) -> MemoryRecord:
|
|
60
|
+
"""
|
|
61
|
+
合并两条记忆
|
|
62
|
+
|
|
63
|
+
策略:
|
|
64
|
+
- 保留 content 更长/更详细的那条
|
|
65
|
+
- importance 取两者较高
|
|
66
|
+
- confidence 取两者平均
|
|
67
|
+
- revision_count 取两者较大
|
|
68
|
+
- 更新 last_updated
|
|
69
|
+
- 记录合并来源
|
|
70
|
+
"""
|
|
71
|
+
# 选择更好的内容(更长更详细)
|
|
72
|
+
if len(record_b.content) > len(record_a.content):
|
|
73
|
+
primary, secondary = record_b, record_a
|
|
74
|
+
else:
|
|
75
|
+
primary, secondary = record_a, record_b
|
|
76
|
+
|
|
77
|
+
import time
|
|
78
|
+
|
|
79
|
+
merged = MemoryRecord.from_dict(primary.to_dict())
|
|
80
|
+
merged.importance = max(primary.importance, secondary.importance)
|
|
81
|
+
merged.confidence = (primary.confidence + secondary.confidence) / 2
|
|
82
|
+
merged.revision_count = max(primary.revision_count, secondary.revision_count) + 1
|
|
83
|
+
merged.last_updated = int(time.time() * 1000)
|
|
84
|
+
merged.metadata["consolidated"] = True
|
|
85
|
+
merged.metadata["consolidated_from"] = [primary.id, secondary.id]
|
|
86
|
+
merged.metadata["consolidated_at"] = int(time.time() * 1000)
|
|
87
|
+
|
|
88
|
+
return merged
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def consolidate_all(
|
|
92
|
+
records: List[MemoryRecord],
|
|
93
|
+
vectors: List[List[float]],
|
|
94
|
+
threshold: float = SIMILARITY_THRESHOLD,
|
|
95
|
+
) -> List[Tuple[List[MemoryRecord], List[List[float]]]]:
|
|
96
|
+
"""
|
|
97
|
+
合并所有相似的记忆
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
每次合并的结果 [(remaining_records, remaining_vectors), ...]
|
|
101
|
+
最终返回所有合并轮次的结果
|
|
102
|
+
"""
|
|
103
|
+
import copy
|
|
104
|
+
|
|
105
|
+
remaining_records = list(records)
|
|
106
|
+
remaining_vectors = list(vectors)
|
|
107
|
+
history = []
|
|
108
|
+
|
|
109
|
+
while True:
|
|
110
|
+
pairs = find_similar_pairs(remaining_records, remaining_vectors, threshold)
|
|
111
|
+
if not pairs:
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
# 合并第一对(最高相似度)
|
|
115
|
+
idx_a, idx_b, sim = pairs[0]
|
|
116
|
+
|
|
117
|
+
merged = consolidate_pair(remaining_records[idx_a], remaining_records[idx_b])
|
|
118
|
+
|
|
119
|
+
# 移除被合并的两个,插入合并结果
|
|
120
|
+
# 先处理索引较大的,再处理索引小的
|
|
121
|
+
max_idx = max(idx_a, idx_b)
|
|
122
|
+
min_idx = min(idx_a, idx_b)
|
|
123
|
+
|
|
124
|
+
new_records = remaining_records.copy()
|
|
125
|
+
new_vectors = remaining_vectors.copy()
|
|
126
|
+
|
|
127
|
+
del new_records[max_idx]
|
|
128
|
+
del new_vectors[max_idx]
|
|
129
|
+
del new_records[min_idx]
|
|
130
|
+
del new_vectors[min_idx]
|
|
131
|
+
|
|
132
|
+
new_records.insert(min_idx, merged)
|
|
133
|
+
# 用被合并的两个向量的平均作为新向量
|
|
134
|
+
avg_vector = [(v1 + v2) / 2 for v1, v2 in zip(remaining_vectors[idx_a], remaining_vectors[idx_b])]
|
|
135
|
+
new_vectors.insert(min_idx, avg_vector)
|
|
136
|
+
|
|
137
|
+
history.append((list(remaining_records), list(remaining_vectors)))
|
|
138
|
+
|
|
139
|
+
remaining_records = new_records
|
|
140
|
+
remaining_vectors = new_vectors
|
|
141
|
+
|
|
142
|
+
return history
|
memex/_contradiction.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Contradiction Detection — 矛盾检测
|
|
3
|
+
|
|
4
|
+
检测两条记忆之间的语义矛盾。
|
|
5
|
+
当检测到矛盾时,降低双方的 importance。
|
|
6
|
+
|
|
7
|
+
矛盾模式:
|
|
8
|
+
1. 否定模式:A 说"X 是 Y",B 说"X 不是 Y"
|
|
9
|
+
2. 行为冲突模式:A 说"做 X",B 说"不做 X"
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Optional, List, Tuple
|
|
14
|
+
|
|
15
|
+
from ._types import MemoryRecord
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ContradictionResult:
|
|
20
|
+
"""矛盾检测结果"""
|
|
21
|
+
has_contradiction: bool
|
|
22
|
+
confidence: float # 0-1,矛盾置信度
|
|
23
|
+
pattern: str # 矛盾模式
|
|
24
|
+
details: str # 详细描述
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def detect_record_pair_contradiction(
|
|
28
|
+
record_a: MemoryRecord,
|
|
29
|
+
record_b: MemoryRecord,
|
|
30
|
+
) -> ContradictionResult:
|
|
31
|
+
"""
|
|
32
|
+
检测两条记忆之间的矛盾
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
ContradictionResult
|
|
36
|
+
"""
|
|
37
|
+
content_a = record_a.content.lower()
|
|
38
|
+
content_b = record_b.content.lower()
|
|
39
|
+
|
|
40
|
+
# 1. 检测否定模式
|
|
41
|
+
pattern = _detect_negation_pattern(content_a, content_b)
|
|
42
|
+
if pattern:
|
|
43
|
+
return ContradictionResult(
|
|
44
|
+
has_contradiction=True,
|
|
45
|
+
confidence=0.8,
|
|
46
|
+
pattern="negation",
|
|
47
|
+
details=pattern,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# 2. 检测行为冲突(做 vs 不做)
|
|
51
|
+
pattern = _detect_action_pattern(content_a, content_b)
|
|
52
|
+
if pattern:
|
|
53
|
+
return ContradictionResult(
|
|
54
|
+
has_contradiction=True,
|
|
55
|
+
confidence=0.7,
|
|
56
|
+
pattern="action_conflict",
|
|
57
|
+
details=pattern,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return ContradictionResult(
|
|
61
|
+
has_contradiction=False,
|
|
62
|
+
confidence=0.0,
|
|
63
|
+
pattern="none",
|
|
64
|
+
details="",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _detect_negation_pattern(a: str, b: str) -> Optional[str]:
|
|
69
|
+
"""检测否定模式矛盾"""
|
|
70
|
+
import re
|
|
71
|
+
|
|
72
|
+
# 英文单词集合
|
|
73
|
+
def get_english_words(text: str) -> set:
|
|
74
|
+
return set(re.findall(r'[a-z]+', text))
|
|
75
|
+
|
|
76
|
+
english_words_a = get_english_words(a)
|
|
77
|
+
english_words_b = get_english_words(b)
|
|
78
|
+
|
|
79
|
+
# 英文否定词
|
|
80
|
+
en_neg = {"no", "not", "never", "dont", "doesnt", "cant", "shouldnt", "wont", "noone"}
|
|
81
|
+
|
|
82
|
+
# 检测英文否定
|
|
83
|
+
a_has_en_neg = bool(en_neg & english_words_a)
|
|
84
|
+
b_has_en_neg = bool(en_neg & english_words_b)
|
|
85
|
+
|
|
86
|
+
# 检测中文否定(直接搜索否定词)
|
|
87
|
+
def has_cn_negation(text: str) -> bool:
|
|
88
|
+
cn_negations = [
|
|
89
|
+
"不用", "不是", "不支持", "不可以", "不应", "不推荐", "不建议", "不愿意",
|
|
90
|
+
"不使用", "不应该", "不建议", "不鼓励", "不允许",
|
|
91
|
+
]
|
|
92
|
+
return any(neg in text for neg in cn_negations)
|
|
93
|
+
|
|
94
|
+
a_has_cn_neg = has_cn_negation(a)
|
|
95
|
+
b_has_cn_neg = has_cn_negation(b)
|
|
96
|
+
|
|
97
|
+
# 英文肯定词
|
|
98
|
+
en_pos = {"use", "uses", "is", "are", "was", "were", "do", "does", "can", "should", "recommend", "like"}
|
|
99
|
+
|
|
100
|
+
# 检测英文矛盾:A 无否定,B 有否定,且有共同肯定词
|
|
101
|
+
if not a_has_en_neg and b_has_en_neg:
|
|
102
|
+
common = en_pos & english_words_a & english_words_b
|
|
103
|
+
if common:
|
|
104
|
+
return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(英文否定矛盾)"
|
|
105
|
+
if not b_has_en_neg and a_has_en_neg:
|
|
106
|
+
common = en_pos & english_words_a & english_words_b
|
|
107
|
+
if common:
|
|
108
|
+
return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(英文否定矛盾)"
|
|
109
|
+
|
|
110
|
+
# 检测中文矛盾:当一方有否定词时,必须有共同关键词
|
|
111
|
+
def has_shared_content(text_a: str, text_b: str) -> bool:
|
|
112
|
+
"""检查两句话是否有共同的内容词(排除停用词)"""
|
|
113
|
+
stop_words = {"的", "是", "在", "有", "了", "和", "与", "很", "也", "都", "the", "a", "an", "is", "are", "was", "were", "this", "that", "it"}
|
|
114
|
+
# 提取中文词(简单按字符,不精确但够用)
|
|
115
|
+
def get_cn_words(text):
|
|
116
|
+
import re
|
|
117
|
+
# 提取连续中文字符序列
|
|
118
|
+
cn_phrases = re.findall(r'[\u4e00-\u9fff]+', text)
|
|
119
|
+
words = set()
|
|
120
|
+
for phrase in cn_phrases:
|
|
121
|
+
for char in phrase:
|
|
122
|
+
words.add(char)
|
|
123
|
+
return words
|
|
124
|
+
|
|
125
|
+
cn_a = get_cn_words(text_a) - stop_words
|
|
126
|
+
cn_b = get_cn_words(text_b) - stop_words
|
|
127
|
+
|
|
128
|
+
# 英文词
|
|
129
|
+
en_a = set(re.findall(r'[a-z]+', text_a))
|
|
130
|
+
en_b = set(re.findall(r'[a-z]+', text_b))
|
|
131
|
+
|
|
132
|
+
return bool((cn_a & cn_b) or (en_a & en_b))
|
|
133
|
+
|
|
134
|
+
if a_has_cn_neg and not b_has_cn_neg:
|
|
135
|
+
if has_shared_content(a, b):
|
|
136
|
+
return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文否定矛盾)"
|
|
137
|
+
if b_has_cn_neg and not a_has_cn_neg:
|
|
138
|
+
if has_shared_content(a, b):
|
|
139
|
+
return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文否定矛盾)"
|
|
140
|
+
if a_has_cn_neg and b_has_cn_neg:
|
|
141
|
+
if has_shared_content(a, b):
|
|
142
|
+
return f"A 说\"{a[:50]}\",B 说\"{b[:50]}\"(中文双重否定矛盾)"
|
|
143
|
+
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _detect_action_pattern(a: str, b: str) -> Optional[str]:
|
|
148
|
+
"""检测行为冲突(做 vs 不做)"""
|
|
149
|
+
# 英文行为词对
|
|
150
|
+
action_pairs = [
|
|
151
|
+
("use", "avoid"),
|
|
152
|
+
("recommend", "don't recommend"),
|
|
153
|
+
("should", "shouldn't"),
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
for pos, neg in action_pairs:
|
|
157
|
+
if pos in a and neg in b:
|
|
158
|
+
return f"行为冲突:A 推荐\"{pos}\",B 反对\"{neg}\""
|
|
159
|
+
if neg in a and pos in b:
|
|
160
|
+
return f"行为冲突:A 反对\"{neg}\",B 推荐\"{pos}\""
|
|
161
|
+
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def find_contradictions(
|
|
166
|
+
records: List[MemoryRecord],
|
|
167
|
+
) -> List[Tuple[MemoryRecord, MemoryRecord, ContradictionResult]]:
|
|
168
|
+
"""
|
|
169
|
+
在记忆列表中找出所有相互矛盾的记忆对
|
|
170
|
+
"""
|
|
171
|
+
contradictions = []
|
|
172
|
+
|
|
173
|
+
for i in range(len(records)):
|
|
174
|
+
for j in range(i + 1, len(records)):
|
|
175
|
+
result = detect_record_pair_contradiction(records[i], records[j])
|
|
176
|
+
if result.has_contradiction:
|
|
177
|
+
contradictions.append((records[i], records[j], result))
|
|
178
|
+
|
|
179
|
+
return contradictions
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def apply_contradiction_penalty(
|
|
183
|
+
record_a: MemoryRecord,
|
|
184
|
+
record_b: MemoryRecord,
|
|
185
|
+
) -> Tuple[MemoryRecord, MemoryRecord]:
|
|
186
|
+
"""
|
|
187
|
+
当检测到矛盾时,降低双方 importance
|
|
188
|
+
"""
|
|
189
|
+
import time
|
|
190
|
+
|
|
191
|
+
def _penalize(r: MemoryRecord, confidence: float) -> MemoryRecord:
|
|
192
|
+
factor = 0.7 if confidence >= 0.8 else 0.85
|
|
193
|
+
updated = MemoryRecord.from_dict(r.to_dict())
|
|
194
|
+
updated.importance = max(0.1, updated.importance * factor)
|
|
195
|
+
updated.metadata["contradiction_penalty"] = True
|
|
196
|
+
updated.metadata["contradiction_at"] = int(time.time() * 1000)
|
|
197
|
+
return updated
|
|
198
|
+
|
|
199
|
+
result = detect_record_pair_contradiction(record_a, record_b)
|
|
200
|
+
if not result.has_contradiction:
|
|
201
|
+
return record_a, record_b
|
|
202
|
+
|
|
203
|
+
updated_a = _penalize(record_a, result.confidence)
|
|
204
|
+
updated_b = _penalize(record_b, result.confidence)
|
|
205
|
+
|
|
206
|
+
return updated_a, updated_b
|