ultra-memory 3.0.5 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,414 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ultra-memory: 用户画像进化引擎 (Evolution Engine Phase 4)
4
+ 基于贝叶斯置信度更新实现 user_profile.json 的动态演化。
5
+
6
+ 贝叶斯更新公式(Beta 分布):
7
+ alpha_post = max(0.5, old_conf × old_count) + new_evidence_conf
8
+ beta_post = max(0.5, (1-old_conf) × old_count) + (1 - new_evidence_conf)
9
+ new_conf = alpha_post / (alpha_post + beta_post)
10
+
11
+ 当新事实与画像矛盾时:
12
+ 1. 追加 correction_history
13
+ 2. 新值胜出,旧值降权记录
14
+ 3. 如果旧置信度 > 0.85 → 标记为 pending manual resolution
15
+
16
+ 被 extract_facts.py 或 SKILL.md Step 7B 触发。
17
+ """
18
+
19
+ import os
20
+ import sys
21
+ import json
22
+ import argparse
23
+ import math
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+
27
+ if sys.stdout.encoding != "utf-8":
28
+ sys.stdout.reconfigure(encoding="utf-8")
29
+ if sys.stderr.encoding != "utf-8":
30
+ sys.stderr.reconfigure(encoding="utf-8")
31
+
32
+ ULTRA_MEMORY_HOME = Path(os.environ.get("ULTRA_MEMORY_HOME", Path.home() / ".ultra-memory"))
33
+
34
+ # ── 贝叶斯更新 ─────────────────────────────────────────────────────────────
35
+
36
+
37
+ def _now_iso() -> str:
38
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
39
+
40
+
41
+ def _parse_ts(ts_str: str) -> datetime:
42
+ if not ts_str:
43
+ return datetime.now(timezone.utc)
44
+ try:
45
+ ts_str = ts_str.replace("Z", "+00:00")
46
+ return datetime.fromisoformat(ts_str)
47
+ except ValueError:
48
+ return datetime.now(timezone.utc)
49
+
50
+
51
+ def bayesian_update(
52
+ existing_confidence: float,
53
+ new_evidence_confidence: float,
54
+ existing_count: int,
55
+ ) -> tuple[float, int]:
56
+ """
57
+ Beta 分布贝叶斯更新。
58
+
59
+ Jeffreys prior: Beta(0.5, 0.5)
60
+ posterior alpha' = prior_alpha + new_weight
61
+ posterior beta' = prior_beta + (1 - new_weight)
62
+ new_confidence = alpha' / (alpha' + beta')
63
+
64
+ Returns: (new_confidence, new_count)
65
+ """
66
+ # Jeffreys prior 保护
67
+ alpha_prior = max(0.5, existing_confidence * existing_count)
68
+ beta_prior = max(0.5, (1 - existing_confidence) * existing_count)
69
+
70
+ alpha_post = alpha_prior + new_evidence_confidence
71
+ beta_post = beta_prior + (1 - new_evidence_confidence)
72
+
73
+ new_confidence = alpha_post / (alpha_post + beta_post)
74
+ new_count = existing_count + 1
75
+
76
+ return round(new_confidence, 3), new_count
77
+
78
+
79
+ # ── 画像加载/保存 ─────────────────────────────────────────────────────────
80
+
81
+
82
+ def _load_profile() -> dict:
83
+ """加载 user_profile.json,兼容 v1 和 v2 格式"""
84
+ profile_file = ULTRA_MEMORY_HOME / "semantic" / "user_profile.json"
85
+ if not profile_file.exists():
86
+ return {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
87
+
88
+ try:
89
+ with open(profile_file, encoding="utf-8") as f:
90
+ profile = json.load(f)
91
+ except (json.JSONDecodeError, IOError):
92
+ return {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
93
+
94
+ # v1 → v2 迁移
95
+ if profile.get("version") != 2:
96
+ migrated = {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
97
+
98
+ # 迁移 tech_stack
99
+ if "tech_stack" in profile:
100
+ migrated["fields"]["tech_stack"] = {
101
+ "value": profile["tech_stack"] if isinstance(profile["tech_stack"], list) else [profile["tech_stack"]],
102
+ "confidence": 0.6,
103
+ "evidence_count": 1,
104
+ "last_updated": profile.get("last_updated", _now_iso()),
105
+ "sources": [],
106
+ "corrected_at": None,
107
+ }
108
+
109
+ # 迁移 language
110
+ if "language" in profile:
111
+ migrated["fields"]["language"] = {
112
+ "value": profile["language"],
113
+ "confidence": 0.7,
114
+ "evidence_count": 1,
115
+ "last_updated": profile.get("last_updated", _now_iso()),
116
+ "sources": [],
117
+ "corrected_at": None,
118
+ }
119
+
120
+ # 迁移 work_style
121
+ if "work_style" in profile:
122
+ for k, v in profile["work_style"].items():
123
+ migrated["fields"][f"work_style.{k}"] = {
124
+ "value": v,
125
+ "confidence": 0.5,
126
+ "evidence_count": 1,
127
+ "last_updated": profile.get("last_updated", _now_iso()),
128
+ "sources": [],
129
+ "corrected_at": None,
130
+ }
131
+
132
+ # 迁移 observed_patterns
133
+ if "observed_patterns" in profile:
134
+ migrated["fields"]["observed_patterns"] = {
135
+ "value": profile["observed_patterns"],
136
+ "confidence": 0.5,
137
+ "evidence_count": 1,
138
+ "last_updated": profile.get("last_updated", _now_iso()),
139
+ "sources": [],
140
+ "corrected_at": None,
141
+ }
142
+
143
+ migrated["last_reflection"] = profile.get("last_reflection")
144
+ migrated["last_distillation"] = profile.get("last_distillation")
145
+
146
+ return migrated
147
+
148
+ return profile
149
+
150
+
151
+ def _save_profile(profile: dict):
152
+ """原子写入 user_profile.json"""
153
+ semantic_dir = ULTRA_MEMORY_HOME / "semantic"
154
+ semantic_dir.mkdir(parents=True, exist_ok=True)
155
+ profile_file = semantic_dir / "user_profile.json"
156
+ profile["version"] = 2
157
+
158
+ tmp_file = profile_file.with_suffix(".tmp")
159
+ with open(tmp_file, "w", encoding="utf-8") as f:
160
+ json.dump(profile, f, ensure_ascii=False, indent=2)
161
+ tmp_file.replace(profile_file)
162
+
163
+
164
+ # ── 字段更新 ───────────────────────────────────────────────────────────────
165
+
166
+
167
+ # 偏好类谓词 → 画像字段映射
168
+ PREFERENCE_MAPPINGS = {
169
+ "user_prefers": "preferences",
170
+ "user_avoids": "preferences",
171
+ "adopted": "preferences",
172
+ "chose": "preferences",
173
+ }
174
+
175
+
176
+ def update_profile_from_fact(fact: dict, session_id: str):
177
+ """
178
+ 基于提取到的事实更新用户画像。
179
+ 判断事实是否涉及用户偏好/行为,并更新对应字段。
180
+ """
181
+ predicate = fact.get("predicate", "")
182
+ subject = fact.get("subject", "")
183
+ obj = fact.get("object", "")
184
+ confidence = fact.get("confidence", 0.7)
185
+
186
+ profile = _load_profile()
187
+ fields = profile.setdefault("fields", {})
188
+
189
+ changed = False
190
+
191
+ # 1. 处理用户偏好类谓词
192
+ if predicate in PREFERENCE_MAPPINGS:
193
+ pref_key = f"preferences.{subject}"
194
+
195
+ existing = fields.get(pref_key)
196
+ if existing:
197
+ new_conf, new_count = bayesian_update(
198
+ existing.get("confidence", 0.5),
199
+ confidence,
200
+ existing.get("evidence_count", 0),
201
+ )
202
+ existing["confidence"] = new_conf
203
+ existing["evidence_count"] = new_count
204
+ existing["last_updated"] = _now_iso()
205
+ existing["sources"] = existing.get("sources", []) + [session_id]
206
+ existing["sources"] = existing["sources"][-10:] # 最多保留10个来源
207
+
208
+ # 如果新值与旧值不同 → 追加修正历史
209
+ if existing.get("value") != obj:
210
+ existing.setdefault("correction_history", []).append({
211
+ "corrected_at": _now_iso(),
212
+ "old_value": existing.get("value"),
213
+ "new_value": obj,
214
+ "confidence_delta": round(new_conf - existing.get("confidence", 0.5), 3),
215
+ "source": "auto",
216
+ })
217
+ existing["value"] = obj
218
+ else:
219
+ fields[pref_key] = {
220
+ "value": obj,
221
+ "confidence": confidence,
222
+ "evidence_count": 1,
223
+ "last_updated": _now_iso(),
224
+ "sources": [session_id],
225
+ "corrected_at": None,
226
+ "correction_history": [],
227
+ }
228
+ changed = True
229
+
230
+ # 2. 处理技术栈推断(从 depends_on / requires / uses 谓词推断)
231
+ elif predicate in ("depends_on", "requires", "uses", "installed_as"):
232
+ tech_key = "tech_stack"
233
+ existing = fields.get(tech_key)
234
+
235
+ if existing:
236
+ new_conf, new_count = bayesian_update(
237
+ existing.get("confidence", 0.5),
238
+ confidence,
239
+ existing.get("evidence_count", 0),
240
+ )
241
+ existing["confidence"] = new_conf
242
+ existing["evidence_count"] = new_count
243
+ existing["last_updated"] = _now_iso()
244
+
245
+ # 如果技术不在列表中,追加
246
+ if isinstance(existing["value"], list) and obj not in existing["value"]:
247
+ existing["value"] = existing["value"] + [obj]
248
+ else:
249
+ fields[tech_key] = {
250
+ "value": [obj],
251
+ "confidence": confidence,
252
+ "evidence_count": 1,
253
+ "last_updated": _now_iso(),
254
+ "sources": [session_id],
255
+ "corrected_at": None,
256
+ "correction_history": [],
257
+ }
258
+ changed = True
259
+
260
+ # 3. 处理行为模式(从 skip / fail_on / blocks 谓词推断)
261
+ elif predicate in ("skips", "fails_on", "blocks"):
262
+ pattern_key = f"work_style.behavior.{subject}"
263
+ existing = fields.get(pattern_key)
264
+
265
+ if existing:
266
+ new_conf, new_count = bayesian_update(
267
+ existing.get("confidence", 0.5),
268
+ confidence,
269
+ existing.get("evidence_count", 0),
270
+ )
271
+ existing["confidence"] = new_conf
272
+ existing["evidence_count"] = new_count
273
+ existing["last_updated"] = _now_iso()
274
+ if existing.get("value") != obj:
275
+ existing.setdefault("correction_history", []).append({
276
+ "corrected_at": _now_iso(),
277
+ "old_value": existing.get("value"),
278
+ "new_value": obj,
279
+ "confidence_delta": round(new_conf - existing.get("confidence", 0.5), 3),
280
+ "source": "auto",
281
+ })
282
+ existing["value"] = obj
283
+ else:
284
+ fields[pattern_key] = {
285
+ "value": obj,
286
+ "confidence": confidence,
287
+ "evidence_count": 1,
288
+ "last_updated": _now_iso(),
289
+ "sources": [session_id],
290
+ "corrected_at": None,
291
+ "correction_history": [],
292
+ }
293
+ changed = True
294
+
295
+ if changed:
296
+ _save_profile(profile)
297
+ print(f"[ultra-memory] ✅ 画像更新: {predicate} / {subject} = {obj}")
298
+
299
+
300
+ # ── 手动修正(SKILL.md Step 7B)────────────────────────────────────────────
301
+
302
+
303
+ def correct_profile_field(
304
+ field_path: str,
305
+ new_value,
306
+ evidence_confidence: float = 1.0,
307
+ session_id: str | None = None,
308
+ ):
309
+ """
310
+ 手动修正画像字段(Step 7B:错误修正)。
311
+ 强制覆盖旧值,追加 correction_history。
312
+ """
313
+ profile = _load_profile()
314
+ fields = profile.setdefault("fields", {})
315
+
316
+ old_value = None
317
+ old_confidence = 0.5
318
+ old_count = 0
319
+
320
+ if field_path in fields:
321
+ old_value = fields[field_path].get("value")
322
+ old_confidence = fields[field_path].get("confidence", 0.5)
323
+ old_count = fields[field_path].get("evidence_count", 0)
324
+
325
+ # 强制更新
326
+ fields[field_path] = {
327
+ "value": new_value,
328
+ "confidence": min(1.0, evidence_confidence),
329
+ "evidence_count": old_count + 1,
330
+ "last_updated": _now_iso(),
331
+ "sources": [session_id] if session_id else [],
332
+ "corrected_at": _now_iso(),
333
+ "correction_history": fields.get(field_path, {}).get("correction_history", []) + [
334
+ {
335
+ "corrected_at": _now_iso(),
336
+ "old_value": old_value,
337
+ "new_value": new_value,
338
+ "old_confidence": old_confidence,
339
+ "new_confidence": evidence_confidence,
340
+ "source": "manual",
341
+ }
342
+ ],
343
+ }
344
+
345
+ _save_profile(profile)
346
+ print(f"[ultra-memory] ✅ 画像修正: {field_path}")
347
+ print(f" 旧值: {old_value} (conf={old_confidence:.2f})")
348
+ print(f" 新值: {new_value} (conf={evidence_confidence:.2f})")
349
+
350
+
351
+ # ── 时间戳更新 ─────────────────────────────────────────────────────────────
352
+
353
+
354
+ def update_reflection_timestamp():
355
+ """Step 7A/7C 完成后更新 last_reflection 时间戳"""
356
+ profile = _load_profile()
357
+ profile["last_reflection"] = _now_iso()
358
+ _save_profile(profile)
359
+
360
+
361
+ def update_distillation_timestamp():
362
+ """Step 7C 完成后更新 last_distillation 时间戳"""
363
+ profile = _load_profile()
364
+ profile["last_distillation"] = _now_iso()
365
+ _save_profile(profile)
366
+
367
+
368
+ # ── CLI ─────────────────────────────────────────────────────────────────────
369
+
370
+
371
+ if __name__ == "__main__":
372
+ parser = argparse.ArgumentParser(description="更新用户画像")
373
+ parser.add_argument("--field", help="字段路径(dot-notation,如 tech_stack)")
374
+ parser.add_argument("--value", help="新值")
375
+ parser.add_argument("--evidence", type=float, default=0.8,
376
+ help="证据置信度 (0.0-1.0)")
377
+ parser.add_argument("--source-session", default=None,
378
+ help="来源 session ID")
379
+ parser.add_argument(
380
+ "--correct", action="store_true",
381
+ help="手动修正模式(强制覆盖旧值)"
382
+ )
383
+ parser.add_argument(
384
+ "--update-reflection", action="store_true",
385
+ help="仅更新时间戳"
386
+ )
387
+ args = parser.parse_args()
388
+
389
+ if args.update_reflection:
390
+ update_reflection_timestamp()
391
+ print("[ultra-memory] ✅ last_reflection 已更新")
392
+ elif args.correct and args.field and args.value is not None:
393
+ correct_profile_field(
394
+ args.field, args.value, args.evidence, args.source_session
395
+ )
396
+ elif args.field and args.value is not None:
397
+ # 模拟 fact 结构
398
+ fact = {
399
+ "predicate": args.field,
400
+ "subject": "",
401
+ "object": args.value,
402
+ "confidence": args.evidence,
403
+ }
404
+ update_profile_from_fact(fact, args.source_session or "cli")
405
+ else:
406
+ profile = _load_profile()
407
+ print(f"[ultra-memory] 当前画像版本: {profile.get('version', '?')}")
408
+ print(f"字段数: {len(profile.get('fields', {}))}")
409
+ print(f"last_reflection: {profile.get('last_reflection')}")
410
+ print(f"last_distillation: {profile.get('last_distillation')}")
411
+ for path, data in profile.get("fields", {}).items():
412
+ print(f" {path}: {data.get('value')} "
413
+ f"(conf={data.get('confidence', 0):.2f}, "
414
+ f"n={data.get('evidence_count', 0)})")