ultra-memory 3.1.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,351 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ultra-memory: 自动遗忘引擎 (Evolution Engine Phase 3)
4
+ 基于时间衰减 + 访问频率 + 重要性评分计算每条事实的 decay_score。
5
+ decay_score < 0.05 的事实标记为遗忘(soft-delete,不删除原始记录)。
6
+
7
+ 衰减公式:
8
+ decay_score = importance_score × recency_weight × access_weight
9
+
10
+ recency_weight = 0.5 ^ (age_days / half_life_days) # half_life=30天
11
+ access_weight = min(1.0, log2(access_count + 1) / log2(11))
12
+ # access_count=0 → ~0, access_count=10 → ~0.95
13
+
14
+ 衰减等级:
15
+ ≥ 0.6 → "none" (健康)
16
+ 0.4–0.6 → "mild" (轻度衰减)
17
+ 0.2–0.4 → "moderate" (中度衰减)
18
+ 0.05–0.2 → "severe" (即将遗忘)
19
+ < 0.05 → "forgotten" (触发遗忘)
20
+
21
+ 被 cleanup.py --run-decay 调用,或每日定时触发。
22
+ """
23
+
24
+ import os
25
+ import sys
26
+ import json
27
+ import argparse
28
+ import math
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+
32
+ if sys.stdout.encoding != "utf-8":
33
+ sys.stdout.reconfigure(encoding="utf-8")
34
+ if sys.stderr.encoding != "utf-8":
35
+ sys.stderr.reconfigure(encoding="utf-8")
36
+
37
+ ULTRA_MEMORY_HOME = Path(os.environ.get("ULTRA_MEMORY_HOME", Path.home() / ".ultra-memory"))
38
+
39
+ # 默认衰减参数(可通过 config.json 覆盖)
40
+ DEFAULT_HALF_LIFE_DAYS = 30
41
+ DEFAULT_FORGET_THRESHOLD = 0.05
42
+
43
+ # 衰减等级边界
44
+ DECAY_LEVELS = [
45
+ (0.6, "none"),
46
+ (0.4, "mild"),
47
+ (0.2, "moderate"),
48
+ (0.05, "severe"),
49
+ ]
50
+
51
+
52
+ # ── 工具函数 ───────────────────────────────────────────────────────────────
53
+
54
+
55
+ def _now_iso() -> str:
56
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
57
+
58
+
59
+ def _parse_ts(ts_str: str) -> datetime:
60
+ """解析 ISO 时间字符串"""
61
+ if not ts_str:
62
+ return datetime.now(timezone.utc)
63
+ try:
64
+ # 移除 Z 后缀
65
+ ts_str = ts_str.replace("Z", "+00:00")
66
+ return datetime.fromisoformat(ts_str)
67
+ except ValueError:
68
+ return datetime.now(timezone.utc)
69
+
70
+
71
+ def _load_config() -> dict:
72
+ """加载配置(如果有 decay 相关配置)"""
73
+ config_file = ULTRA_MEMORY_HOME / "config.json"
74
+ if config_file.exists():
75
+ try:
76
+ with open(config_file, encoding="utf-8") as f:
77
+ return json.load(f)
78
+ except (json.JSONDecodeError, IOError):
79
+ pass
80
+ return {}
81
+
82
+
83
+ # ── 衰减计算 ───────────────────────────────────────────────────────────────
84
+
85
+
86
+ def compute_decay_score(
87
+ fact_metadata: dict,
88
+ now: datetime,
89
+ ) -> float:
90
+ """
91
+ 计算单条事实的衰减评分。
92
+
93
+ decay_score = importance_score × recency_weight × access_weight
94
+ """
95
+ age_days = (now - _parse_ts(fact_metadata.get("last_updated", ""))).days
96
+ half_life_days = fact_metadata.get("ttl_days", DEFAULT_HALF_LIFE_DAYS)
97
+
98
+ # 时间衰减权重
99
+ recency_weight = math.pow(0.5, age_days / half_life_days) if half_life_days > 0 else 0.0
100
+
101
+ # 访问频率权重
102
+ access_count = fact_metadata.get("access_count", 0)
103
+ access_weight = min(1.0, math.log2(access_count + 1) / math.log2(11))
104
+
105
+ # 重要性评分
106
+ importance_score = fact_metadata.get("importance_score", 0.5)
107
+
108
+ score = importance_score * recency_weight * access_weight
109
+
110
+ # 从未访问且较老的事实额外惩罚
111
+ if access_count == 0 and age_days > half_life_days:
112
+ score *= 0.5
113
+
114
+ return max(0.0, min(1.0, score))
115
+
116
+
117
+ def compute_decay_level(score: float) -> str:
118
+ """根据衰减评分确定衰减等级"""
119
+ for threshold, level in DECAY_LEVELS:
120
+ if score >= threshold:
121
+ return level
122
+ return "forgotten"
123
+
124
+
125
+ def compute_importance_score(fact: dict, meta: dict) -> float:
126
+ """
127
+ 计算事实的重要性评分 (0.0–1.0)。
128
+ 来源类型 × 矛盾抵抗 × 用户确认度 的加权组合。
129
+ """
130
+ base = 0.5
131
+
132
+ # 来源类型权重
133
+ source_weights = {
134
+ "milestone": 0.9,
135
+ "decision": 0.85,
136
+ "user_instruction": 0.8,
137
+ "file_write": 0.7,
138
+ "tool_call": 0.6,
139
+ "reasoning": 0.6,
140
+ "bash_exec": 0.55,
141
+ "error": 0.5,
142
+ "file_read": 0.4,
143
+ }
144
+ source_type = fact.get("source_type", "")
145
+ base = source_weights.get(source_type, 0.6)
146
+
147
+ # 矛盾抵抗:矛盾越少越稳定
148
+ contradiction_count = meta.get("contradiction_count", 0)
149
+ contradiction_penalty = min(0.3, contradiction_count * 0.1)
150
+ base = max(0.1, base - contradiction_penalty)
151
+
152
+ # 用户显式确认权重(如果 correction_history 有 manual 条目)
153
+ manual_corrections = [
154
+ c for c in meta.get("correction_history", [])
155
+ if c.get("source") == "manual"
156
+ ]
157
+ if manual_corrections:
158
+ base = min(1.0, base + 0.15)
159
+
160
+ return max(0.1, min(1.0, base))
161
+
162
+
163
+ # ── 遗忘处理 ───────────────────────────────────────────────────────────────
164
+
165
+
166
+ def _load_facts() -> list[dict]:
167
+ """加载所有 facts"""
168
+ facts_file = ULTRA_MEMORY_HOME / "evolution" / "facts.jsonl"
169
+ if not facts_file.exists():
170
+ return []
171
+ facts = []
172
+ with open(facts_file, encoding="utf-8") as f:
173
+ for line in f:
174
+ line = line.strip()
175
+ if not line:
176
+ continue
177
+ try:
178
+ facts.append(json.loads(line))
179
+ except json.JSONDecodeError:
180
+ continue
181
+ return facts
182
+
183
+
184
+ def _load_metadata() -> dict:
185
+ """加载 fact_metadata.json"""
186
+ meta_file = ULTRA_MEMORY_HOME / "evolution" / "fact_metadata.json"
187
+ if not meta_file.exists():
188
+ return {"version": 1, "updated_at": _now_iso(), "facts": {}}
189
+ try:
190
+ with open(meta_file, encoding="utf-8") as f:
191
+ return json.load(f)
192
+ except (json.JSONDecodeError, IOError):
193
+ return {"version": 1, "updated_at": _now_iso(), "facts": {}}
194
+
195
+
196
+ def _save_metadata(meta: dict):
197
+ """原子写入 metadata"""
198
+ evolution_dir = ULTRA_MEMORY_HOME / "evolution"
199
+ evolution_dir.mkdir(parents=True, exist_ok=True)
200
+ meta_file = evolution_dir / "fact_metadata.json"
201
+ meta["updated_at"] = _now_iso()
202
+
203
+ tmp_file = meta_file.with_suffix(".tmp")
204
+ with open(tmp_file, "w", encoding="utf-8") as f:
205
+ json.dump(meta, f, ensure_ascii=False, indent=2)
206
+ tmp_file.replace(meta_file)
207
+
208
+
209
+ def append_decay_log(entry: dict):
210
+ """追加写入 decay_log.jsonl"""
211
+ evolution_dir = ULTRA_MEMORY_HOME / "evolution"
212
+ evolution_dir.mkdir(parents=True, exist_ok=True)
213
+ log_file = evolution_dir / "decay_log.jsonl"
214
+
215
+ with open(log_file, "a", encoding="utf-8") as f:
216
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
217
+
218
+
219
+ # ── 全量衰减扫描 ───────────────────────────────────────────────────────────
220
+
221
+
222
+ def run_decay_pass(session_id: str | None = None):
223
+ """
224
+ 执行全量衰减扫描。
225
+ session_id 不为 None 时只扫描指定 session 的事实。
226
+ """
227
+ now = datetime.now(timezone.utc)
228
+ facts = _load_facts()
229
+ meta = _load_metadata()
230
+
231
+ config = _load_config()
232
+ forget_threshold = (
233
+ config.get("decay", {}).get("forget_threshold", DEFAULT_FORGET_THRESHOLD)
234
+ )
235
+
236
+ # 初始化所有 fact 的 metadata(如果不存在)
237
+ for fact in facts:
238
+ fid = fact.get("fact_id")
239
+ if not fid:
240
+ continue
241
+ if fid not in meta["facts"]:
242
+ meta["facts"][fid] = {
243
+ "confidence": fact.get("confidence", 0.7),
244
+ "access_count": fact.get("access_count", 1),
245
+ "last_accessed": fact.get("last_accessed", _now_iso()),
246
+ "last_updated": fact.get("ts", _now_iso()),
247
+ "importance_score": compute_importance_score(fact, {}),
248
+ "decay_level": "none",
249
+ "ttl_days": DEFAULT_HALF_LIFE_DAYS,
250
+ "expires_at": None,
251
+ "status": "active",
252
+ "contradiction_count": fact.get("contradiction_count", 0),
253
+ "correction_history": [],
254
+ }
255
+
256
+ # 计算每条事实的衰减
257
+ forgotten_ids = []
258
+ severe_ids = []
259
+ updated_ids = []
260
+
261
+ for fid, fact_meta in meta["facts"].items():
262
+ if fact_meta.get("status") in ("forgotten", "superseded"):
263
+ continue
264
+
265
+ # session_id 过滤
266
+ if session_id:
267
+ # 找到对应 fact
268
+ matching_fact = next(
269
+ (f for f in facts if f.get("fact_id") == fid), None
270
+ )
271
+ if not matching_fact:
272
+ continue
273
+ if matching_fact.get("session_id") != session_id:
274
+ continue
275
+
276
+ # TTL 过期检测
277
+ if fact_meta.get("expires_at"):
278
+ expires_at = _parse_ts(fact_meta["expires_at"])
279
+ if now >= expires_at:
280
+ fact_meta["decay_level"] = "forgotten"
281
+
282
+ # 重新计算重要性评分(基于最新事实数据)
283
+ matching_fact = next(
284
+ (f for f in facts if f.get("fact_id") == fid), None
285
+ )
286
+ if matching_fact:
287
+ fact_meta["importance_score"] = compute_importance_score(
288
+ matching_fact, fact_meta
289
+ )
290
+
291
+ # 计算衰减评分
292
+ decay_score = compute_decay_score(fact_meta, now)
293
+ old_level = fact_meta.get("decay_level", "none")
294
+ new_level = compute_decay_level(decay_score)
295
+ fact_meta["decay_level"] = new_level
296
+
297
+ # 更新 ttl_days 配置
298
+ ttl_days = fact_meta.get("ttl_days", DEFAULT_HALF_LIFE_DAYS)
299
+
300
+ # 触发遗忘
301
+ if new_level == "forgotten" and old_level != "forgotten":
302
+ fact_meta["status"] = "forgotten"
303
+ fact_meta["forgotten_at"] = _now_iso()
304
+ forgotten_ids.append(fid)
305
+
306
+ append_decay_log({
307
+ "ts": _now_iso(),
308
+ "fact_id": fid,
309
+ "reason": "ttl_expired" if fact_meta.get("expires_at") else "importance_decay",
310
+ "decay_level_before": old_level,
311
+ "action": "marked_forgotten",
312
+ "decay_score": round(decay_score, 3),
313
+ "session_id": session_id or "system",
314
+ })
315
+ elif new_level == "severe" and old_level in ("none", "mild", "moderate"):
316
+ severe_ids.append(fid)
317
+
318
+ if new_level != old_level:
319
+ updated_ids.append(fid)
320
+
321
+ _save_metadata(meta)
322
+
323
+ print(f"[ultra-memory] ✅ 衰减扫描完成 (session: {session_id or 'all'})")
324
+ print(f" 遗忘: {len(forgotten_ids)} 条")
325
+ print(f" 严重衰减: {len(severe_ids)} 条")
326
+ print(f" 等级变化: {len(updated_ids)} 条")
327
+
328
+ return {
329
+ "forgotten": forgotten_ids,
330
+ "severe": severe_ids,
331
+ "updated": updated_ids,
332
+ }
333
+
334
+
335
+ # ── CLI ─────────────────────────────────────────────────────────────────────
336
+
337
+
338
+ if __name__ == "__main__":
339
+ parser = argparse.ArgumentParser(description="执行事实衰减扫描")
340
+ parser.add_argument(
341
+ "--session", default=None,
342
+ help="会话 ID(省略则扫描所有事实)"
343
+ )
344
+ parser.add_argument(
345
+ "--run", action="store_true",
346
+ help="执行衰减扫描(配合 cleanup.py 使用)"
347
+ )
348
+ args = parser.parse_args()
349
+
350
+ result = run_decay_pass(args.session)
351
+ sys.exit(0)
@@ -152,5 +152,26 @@ if __name__ == "__main__":
152
152
  parser.add_argument("--archive-only", action="store_true", help="只归档到 archive/ 目录,不删除")
153
153
  parser.add_argument("--dry-run", action="store_true", help="演习模式,只打印不执行")
154
154
  parser.add_argument("--project", default=None, help="只清理指定项目(默认所有项目)")
155
+ parser.add_argument(
156
+ "--run-decay", action="store_true",
157
+ help="执行事实衰减扫描(auto_decay.py),在清理前运行"
158
+ )
155
159
  args = parser.parse_args()
160
+
161
+ if args.run_decay:
162
+ import subprocess
163
+ scripts_dir = Path(__file__).parent
164
+ python = sys.executable
165
+ startupinfo = subprocess.STARTUPINFO()
166
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
167
+ print("[ultra-memory] 运行事实衰减扫描...")
168
+ result = subprocess.run(
169
+ [python, str(scripts_dir / "auto_decay.py"), "--session", args.project or ""],
170
+ capture_output=True, text=True, startupinfo=startupinfo,
171
+ )
172
+ if result.stdout:
173
+ print(result.stdout)
174
+ if result.stderr:
175
+ print(result.stderr, file=sys.stderr)
176
+
156
177
  cleanup(args.days, args.archive_only, args.dry_run, args.project)
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ultra-memory: 矛盾检测核心模块
4
+ 检测 user_profile.json 和 knowledge_base.jsonl 中的时序矛盾,
5
+ 将旧记录标记为 superseded: true,召回时跳过失效记录。
6
+
7
+ 只使用 Python 标准库,无外部依赖。
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import json
13
+ from datetime import datetime, timezone
14
+ from pathlib import Path
15
+
16
+ # ── 停用词表 ────────────────────────────────────────────────────────────────
17
+
18
+ STOPWORDS = {
19
+ "的", "了", "是", "在", "和", "与", "或", "以及",
20
+ "a", "an", "the", "is", "was", "are", "were",
21
+ "to", "of", "for", "with", "by", "from",
22
+ }
23
+
24
+ # ── 否定词表 ────────────────────────────────────────────────────────────────
25
+
26
+ NEGATION_WORDS = {
27
+ "不", "没有", "无法", "不能", "不是", "别", "莫", "永不", "绝不",
28
+ "not", "no", "never", "none", "cannot", "don't", "doesn't",
29
+ "isn't", "aren't", "wasn't", "weren't", "won't", "wouldn't",
30
+ }
31
+
32
+ # ── 对立词对 ───────────────────────────────────────────────────────────────
33
+
34
+ CONTRADICTORY_PAIRS = [
35
+ frozenset(["成功", "失败"]),
36
+ frozenset(["可以", "不能"]),
37
+ frozenset(["推荐", "不推荐"]),
38
+ frozenset(["启用", "禁用"]),
39
+ frozenset(["enable", "disable"]),
40
+ frozenset(["success", "failure"]),
41
+ frozenset(["yes", "no"]),
42
+ frozenset(["true", "false"]),
43
+ ]
44
+
45
+
46
+ # ── 工具函数 ───────────────────────────────────────────────────────────────
47
+
48
+
49
+ def _now_iso() -> str:
50
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
51
+
52
+
53
+ def _stopword_filter(words: list[str]) -> list[str]:
54
+ return [w for w in words if w.lower() not in STOPWORDS and len(w) > 1]
55
+
56
+
57
+ # ── 画像冲突检测 ───────────────────────────────────────────────────────────
58
+
59
+
60
+ def detect_profile_conflict(new_data: dict, home: Path) -> list[dict]:
61
+ """
62
+ 检测 user_profile.json 中的字段冲突。
63
+ 返回冲突列表,每条格式:
64
+ {"field": "...", "old_value": "...", "new_value": "...", "superseded_at": "..."}
65
+ """
66
+ profile_file = home / "semantic" / "user_profile.json"
67
+ if not profile_file.exists():
68
+ return []
69
+
70
+ try:
71
+ with open(profile_file, encoding="utf-8") as f:
72
+ profile = json.load(f)
73
+ except (json.JSONDecodeError, IOError):
74
+ return []
75
+
76
+ conflicts = []
77
+
78
+ for field, new_value in new_data.items():
79
+ # observed_patterns 只追加,不检测冲突
80
+ if field == "observed_patterns":
81
+ continue
82
+
83
+ # 跳过 superseded 标记的字段
84
+ if field.endswith("_superseded"):
85
+ continue
86
+
87
+ if field not in profile:
88
+ continue
89
+
90
+ old_value = profile[field]
91
+
92
+ # 字符串类型:旧值与新值不同 → 冲突
93
+ if isinstance(old_value, str) and isinstance(new_value, str):
94
+ if old_value != new_value:
95
+ conflicts.append({
96
+ "field": field,
97
+ "old_value": old_value,
98
+ "new_value": new_value,
99
+ "superseded_at": _now_iso(),
100
+ })
101
+
102
+ # 数组类型:完全替换(完全不同)→ 冲突;只新增元素 → 不冲突
103
+ elif isinstance(old_value, list) and isinstance(new_value, list):
104
+ # 新旧完全不同才算冲突;新值包含旧值的所有元素(新增元素)不算冲突
105
+ old_set = set(str(v) for v in old_value)
106
+ new_set = set(str(v) for v in new_value)
107
+ if old_set and new_set and old_set != new_set:
108
+ # 只有当旧值中的元素不在新值中,才算冲突
109
+ removed = old_set - new_set
110
+ if removed: # 有元素被移除才算冲突
111
+ conflicts.append({
112
+ "field": field,
113
+ "old_value": old_value,
114
+ "new_value": new_value,
115
+ "superseded_at": _now_iso(),
116
+ })
117
+
118
+ # dict 类型:递归检测每个 key
119
+ elif isinstance(old_value, dict) and isinstance(new_value, dict):
120
+ for sub_key, sub_old in old_value.items():
121
+ if sub_key not in new_value:
122
+ continue
123
+ sub_new = new_value[sub_key]
124
+ if isinstance(sub_old, str) and isinstance(sub_new, str):
125
+ if sub_old != sub_new:
126
+ full_key = f"{field}.{sub_key}"
127
+ conflicts.append({
128
+ "field": full_key,
129
+ "old_value": sub_old,
130
+ "new_value": "(已更新)" if sub_new else sub_new,
131
+ "superseded_at": _now_iso(),
132
+ })
133
+
134
+ return conflicts
135
+
136
+
137
+ def mark_profile_superseded(home: Path, conflicts: list[dict]):
138
+ """
139
+ 将 user_profile.json 中冲突的旧字段标记为 superseded。
140
+ 在旧字段旁追加 "<field>_superseded" 记录旧值。
141
+ """
142
+ profile_file = home / "semantic" / "user_profile.json"
143
+ if not profile_file.exists() or not conflicts:
144
+ return
145
+
146
+ try:
147
+ with open(profile_file, encoding="utf-8") as f:
148
+ profile = json.load(f)
149
+ except (json.JSONDecodeError, IOError):
150
+ return
151
+
152
+ for conflict in conflicts:
153
+ field = conflict["field"]
154
+ # 记录旧值
155
+ superseded_key = f"{field}_superseded"
156
+ profile[superseded_key] = {
157
+ "old_value": conflict["old_value"],
158
+ "superseded_at": conflict["superseded_at"],
159
+ }
160
+ # 将新值写入主字段(已经在 profile_update 时更新了,这里只处理遗留情况)
161
+ if "." in field:
162
+ # 处理嵌套 dict key,如 work_style.confirm_before_implement
163
+ parts = field.split(".")
164
+ d = profile
165
+ for p in parts[:-1]:
166
+ d = d.setdefault(p, {})
167
+ d[parts[-1]] = conflict["new_value"]
168
+ else:
169
+ profile[field] = conflict["new_value"]
170
+
171
+ # 原子写入
172
+ tmp_file = profile_file.with_suffix(".tmp")
173
+ with open(tmp_file, "w", encoding="utf-8") as f:
174
+ json.dump(profile, f, ensure_ascii=False, indent=2)
175
+ tmp_file.replace(profile_file)
176
+
177
+
178
+ # ── 知识库冲突检测 ────────────────────────────────────────────────────────
179
+
180
+
181
+ def _extract_keywords(text: str) -> set[str]:
182
+ """提取关键词(去除停用词)"""
183
+ words = re.split(r'[\s,;.()\[\]{}]+', text.lower())
184
+ return set(_stopword_filter(words))
185
+
186
+
187
+ def _title_similarity(title_a: str, title_b: str) -> float:
188
+ """计算标题相似度:关键词重叠数 / max(len_a, len_b)"""
189
+ keywords_a = _extract_keywords(title_a)
190
+ keywords_b = _extract_keywords(title_b)
191
+ if not keywords_a or not keywords_b:
192
+ return 0.0
193
+ overlap = len(keywords_a & keywords_b)
194
+ max_len = max(len(keywords_a), len(keywords_b))
195
+ return overlap / max_len
196
+
197
+
198
+ def _has_negation(text: str) -> bool:
199
+ """检测文本中是否含否定词"""
200
+ text_lower = text.lower()
201
+ return any(nw in text_lower for nw in NEGATION_WORDS)
202
+
203
+
204
+ def _has_number_change(text_old: str, text_new: str) -> bool:
205
+ """检测数值变化:两边都有数字但数字不同"""
206
+ nums_old = set(re.findall(r'\d+\.?\d*', text_old))
207
+ nums_new = set(re.findall(r'\d+\.?\d*', text_new))
208
+ if nums_old and nums_new and nums_old != nums_new:
209
+ return True
210
+ return False
211
+
212
+
213
+ def _has_contradictory_pair(text_old: str, text_new: str) -> bool:
214
+ """检测对立词对:一边含 A 另一边含 B"""
215
+ text_lower_old = text_old.lower()
216
+ text_lower_new = text_new.lower()
217
+ for pair in CONTRADICTORY_PAIRS:
218
+ words_old = set(pair & set(text_lower_old.split()))
219
+ words_new = set(pair & set(text_lower_new.split()))
220
+ if words_old and words_new and words_old != words_new:
221
+ return True
222
+ return False
223
+
224
+
225
+ def detect_knowledge_conflict(new_entry: dict, home: Path) -> list[dict]:
226
+ """
227
+ 检测 knowledge_base.jsonl 中的内容矛盾。
228
+ new_entry 格式:{"title": "...", "content": "...", ...}
229
+ 返回冲突列表: [{"seq": 行号, "title": "...", "reason": "规则1/2/3"}]
230
+ """
231
+ kb_file = home / "semantic" / "knowledge_base.jsonl"
232
+ if not kb_file.exists():
233
+ return []
234
+
235
+ new_title = new_entry.get("title", "")
236
+ new_content = new_entry.get("content", "")
237
+
238
+ conflicts = []
239
+ lines = []
240
+ with open(kb_file, encoding="utf-8") as f:
241
+ for i, line in enumerate(f):
242
+ if not line.strip():
243
+ continue
244
+ try:
245
+ entry = json.loads(line)
246
+ except json.JSONDecodeError:
247
+ continue
248
+ # 跳过已失效条目
249
+ if entry.get("superseded"):
250
+ continue
251
+ lines.append((i + 1, entry)) # 行号从1开始
252
+
253
+ for seq, old_entry in lines:
254
+ old_title = old_entry.get("title", "")
255
+ old_content = old_entry.get("content", "")
256
+
257
+ # 标题相似度 >= 0.5 才进入内容矛盾检测
258
+ sim = _title_similarity(new_title, old_title)
259
+ if sim < 0.5:
260
+ continue
261
+
262
+ # 矛盾规则检测
263
+ reason = None
264
+
265
+ # 规则1 — 否定词检测
266
+ has_neg_old = _has_negation(old_content)
267
+ has_neg_new = _has_negation(new_content)
268
+ if has_neg_old != has_neg_new:
269
+ reason = "规则1"
270
+
271
+ # 规则2 — 数值变化检测
272
+ elif _has_number_change(old_content, new_content):
273
+ reason = "规则2"
274
+
275
+ # 规则3 — 对立词检测
276
+ elif _has_contradictory_pair(old_content, new_content):
277
+ reason = "规则3"
278
+
279
+ if reason:
280
+ conflicts.append({
281
+ "seq": seq,
282
+ "title": old_title,
283
+ "reason": reason,
284
+ })
285
+
286
+ return conflicts
287
+
288
+
289
+ def mark_superseded(home: Path, jsonl_path: Path, seq_list: list[int]):
290
+ """
291
+ 将 knowledge_base.jsonl 中指定行号的条目标记为 superseded。
292
+ 原子写入:先写 tmp 文件再 rename。
293
+ """
294
+ if not seq_list:
295
+ return
296
+
297
+ seq_set = set(seq_list)
298
+
299
+ tmp_file = jsonl_path.with_suffix(".tmp")
300
+ written_seq = set()
301
+
302
+ with open(jsonl_path, encoding="utf-8") as f:
303
+ for i, line in enumerate(f):
304
+ if not line.strip():
305
+ continue
306
+ try:
307
+ entry = json.loads(line)
308
+ except json.JSONDecodeError:
309
+ continue
310
+
311
+ line_num = i + 1 # 行号从1开始
312
+ if line_num in seq_set and line_num not in written_seq:
313
+ entry["superseded"] = True
314
+ entry["superseded_at"] = _now_iso()
315
+ written_seq.add(line_num)
316
+
317
+ tmp_file.write(json.dumps(entry, ensure_ascii=False) + "\n")
318
+
319
+ tmp_file.replace(jsonl_path)