ultra-memory 3.0.5 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +141 -0
- package/integrations/__init__.py +1 -0
- package/integrations/langchain_memory.py +118 -0
- package/integrations/langgraph_checkpointer.py +76 -0
- package/integrations/n8n_nodes.py +150 -0
- package/package.json +121 -108
- package/scripts/auto_decay.py +351 -0
- package/scripts/cleanup.py +21 -0
- package/scripts/detect_contradictions.py +537 -0
- package/scripts/evolve_profile.py +414 -0
- package/scripts/extract_facts.py +471 -0
- package/scripts/log_op.py +42 -0
- package/scripts/multimodal/__init__.py +2 -0
- package/scripts/multimodal/extract_from_image.py +138 -0
- package/scripts/multimodal/extract_from_pdf.py +182 -0
- package/scripts/multimodal/transcribe_video.py +157 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ultra-memory: 用户画像进化引擎 (Evolution Engine Phase 4)
|
|
4
|
+
基于贝叶斯置信度更新实现 user_profile.json 的动态演化。
|
|
5
|
+
|
|
6
|
+
贝叶斯更新公式(Beta 分布):
|
|
7
|
+
alpha_post = max(0.5, old_conf × old_count) + new_evidence_conf
|
|
8
|
+
beta_post = max(0.5, (1-old_conf) × old_count) + (1 - new_evidence_conf)
|
|
9
|
+
new_conf = alpha_post / (alpha_post + beta_post)
|
|
10
|
+
|
|
11
|
+
当新事实与画像矛盾时:
|
|
12
|
+
1. 追加 correction_history
|
|
13
|
+
2. 新值胜出,旧值降权记录
|
|
14
|
+
3. 如果旧置信度 > 0.85 → 标记为 pending manual resolution
|
|
15
|
+
|
|
16
|
+
被 extract_facts.py 或 SKILL.md Step 7B 触发。
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
import json
|
|
22
|
+
import argparse
|
|
23
|
+
import math
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
if sys.stdout.encoding != "utf-8":
|
|
28
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
29
|
+
if sys.stderr.encoding != "utf-8":
|
|
30
|
+
sys.stderr.reconfigure(encoding="utf-8")
|
|
31
|
+
|
|
32
|
+
ULTRA_MEMORY_HOME = Path(os.environ.get("ULTRA_MEMORY_HOME", Path.home() / ".ultra-memory"))
|
|
33
|
+
|
|
34
|
+
# ── 贝叶斯更新 ─────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _now_iso() -> str:
|
|
38
|
+
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_ts(ts_str: str) -> datetime:
|
|
42
|
+
if not ts_str:
|
|
43
|
+
return datetime.now(timezone.utc)
|
|
44
|
+
try:
|
|
45
|
+
ts_str = ts_str.replace("Z", "+00:00")
|
|
46
|
+
return datetime.fromisoformat(ts_str)
|
|
47
|
+
except ValueError:
|
|
48
|
+
return datetime.now(timezone.utc)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def bayesian_update(
|
|
52
|
+
existing_confidence: float,
|
|
53
|
+
new_evidence_confidence: float,
|
|
54
|
+
existing_count: int,
|
|
55
|
+
) -> tuple[float, int]:
|
|
56
|
+
"""
|
|
57
|
+
Beta 分布贝叶斯更新。
|
|
58
|
+
|
|
59
|
+
Jeffreys prior: Beta(0.5, 0.5)
|
|
60
|
+
posterior alpha' = prior_alpha + new_weight
|
|
61
|
+
posterior beta' = prior_beta + (1 - new_weight)
|
|
62
|
+
new_confidence = alpha' / (alpha' + beta')
|
|
63
|
+
|
|
64
|
+
Returns: (new_confidence, new_count)
|
|
65
|
+
"""
|
|
66
|
+
# Jeffreys prior 保护
|
|
67
|
+
alpha_prior = max(0.5, existing_confidence * existing_count)
|
|
68
|
+
beta_prior = max(0.5, (1 - existing_confidence) * existing_count)
|
|
69
|
+
|
|
70
|
+
alpha_post = alpha_prior + new_evidence_confidence
|
|
71
|
+
beta_post = beta_prior + (1 - new_evidence_confidence)
|
|
72
|
+
|
|
73
|
+
new_confidence = alpha_post / (alpha_post + beta_post)
|
|
74
|
+
new_count = existing_count + 1
|
|
75
|
+
|
|
76
|
+
return round(new_confidence, 3), new_count
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ── 画像加载/保存 ─────────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _load_profile() -> dict:
|
|
83
|
+
"""加载 user_profile.json,兼容 v1 和 v2 格式"""
|
|
84
|
+
profile_file = ULTRA_MEMORY_HOME / "semantic" / "user_profile.json"
|
|
85
|
+
if not profile_file.exists():
|
|
86
|
+
return {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
with open(profile_file, encoding="utf-8") as f:
|
|
90
|
+
profile = json.load(f)
|
|
91
|
+
except (json.JSONDecodeError, IOError):
|
|
92
|
+
return {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
|
|
93
|
+
|
|
94
|
+
# v1 → v2 迁移
|
|
95
|
+
if profile.get("version") != 2:
|
|
96
|
+
migrated = {"version": 2, "fields": {}, "last_reflection": None, "last_distillation": None}
|
|
97
|
+
|
|
98
|
+
# 迁移 tech_stack
|
|
99
|
+
if "tech_stack" in profile:
|
|
100
|
+
migrated["fields"]["tech_stack"] = {
|
|
101
|
+
"value": profile["tech_stack"] if isinstance(profile["tech_stack"], list) else [profile["tech_stack"]],
|
|
102
|
+
"confidence": 0.6,
|
|
103
|
+
"evidence_count": 1,
|
|
104
|
+
"last_updated": profile.get("last_updated", _now_iso()),
|
|
105
|
+
"sources": [],
|
|
106
|
+
"corrected_at": None,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# 迁移 language
|
|
110
|
+
if "language" in profile:
|
|
111
|
+
migrated["fields"]["language"] = {
|
|
112
|
+
"value": profile["language"],
|
|
113
|
+
"confidence": 0.7,
|
|
114
|
+
"evidence_count": 1,
|
|
115
|
+
"last_updated": profile.get("last_updated", _now_iso()),
|
|
116
|
+
"sources": [],
|
|
117
|
+
"corrected_at": None,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# 迁移 work_style
|
|
121
|
+
if "work_style" in profile:
|
|
122
|
+
for k, v in profile["work_style"].items():
|
|
123
|
+
migrated["fields"][f"work_style.{k}"] = {
|
|
124
|
+
"value": v,
|
|
125
|
+
"confidence": 0.5,
|
|
126
|
+
"evidence_count": 1,
|
|
127
|
+
"last_updated": profile.get("last_updated", _now_iso()),
|
|
128
|
+
"sources": [],
|
|
129
|
+
"corrected_at": None,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
# 迁移 observed_patterns
|
|
133
|
+
if "observed_patterns" in profile:
|
|
134
|
+
migrated["fields"]["observed_patterns"] = {
|
|
135
|
+
"value": profile["observed_patterns"],
|
|
136
|
+
"confidence": 0.5,
|
|
137
|
+
"evidence_count": 1,
|
|
138
|
+
"last_updated": profile.get("last_updated", _now_iso()),
|
|
139
|
+
"sources": [],
|
|
140
|
+
"corrected_at": None,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
migrated["last_reflection"] = profile.get("last_reflection")
|
|
144
|
+
migrated["last_distillation"] = profile.get("last_distillation")
|
|
145
|
+
|
|
146
|
+
return migrated
|
|
147
|
+
|
|
148
|
+
return profile
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _save_profile(profile: dict):
|
|
152
|
+
"""原子写入 user_profile.json"""
|
|
153
|
+
semantic_dir = ULTRA_MEMORY_HOME / "semantic"
|
|
154
|
+
semantic_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
profile_file = semantic_dir / "user_profile.json"
|
|
156
|
+
profile["version"] = 2
|
|
157
|
+
|
|
158
|
+
tmp_file = profile_file.with_suffix(".tmp")
|
|
159
|
+
with open(tmp_file, "w", encoding="utf-8") as f:
|
|
160
|
+
json.dump(profile, f, ensure_ascii=False, indent=2)
|
|
161
|
+
tmp_file.replace(profile_file)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ── 字段更新 ───────────────────────────────────────────────────────────────
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# 偏好类谓词 → 画像字段映射
|
|
168
|
+
PREFERENCE_MAPPINGS = {
|
|
169
|
+
"user_prefers": "preferences",
|
|
170
|
+
"user_avoids": "preferences",
|
|
171
|
+
"adopted": "preferences",
|
|
172
|
+
"chose": "preferences",
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def update_profile_from_fact(fact: dict, session_id: str):
|
|
177
|
+
"""
|
|
178
|
+
基于提取到的事实更新用户画像。
|
|
179
|
+
判断事实是否涉及用户偏好/行为,并更新对应字段。
|
|
180
|
+
"""
|
|
181
|
+
predicate = fact.get("predicate", "")
|
|
182
|
+
subject = fact.get("subject", "")
|
|
183
|
+
obj = fact.get("object", "")
|
|
184
|
+
confidence = fact.get("confidence", 0.7)
|
|
185
|
+
|
|
186
|
+
profile = _load_profile()
|
|
187
|
+
fields = profile.setdefault("fields", {})
|
|
188
|
+
|
|
189
|
+
changed = False
|
|
190
|
+
|
|
191
|
+
# 1. 处理用户偏好类谓词
|
|
192
|
+
if predicate in PREFERENCE_MAPPINGS:
|
|
193
|
+
pref_key = f"preferences.{subject}"
|
|
194
|
+
|
|
195
|
+
existing = fields.get(pref_key)
|
|
196
|
+
if existing:
|
|
197
|
+
new_conf, new_count = bayesian_update(
|
|
198
|
+
existing.get("confidence", 0.5),
|
|
199
|
+
confidence,
|
|
200
|
+
existing.get("evidence_count", 0),
|
|
201
|
+
)
|
|
202
|
+
existing["confidence"] = new_conf
|
|
203
|
+
existing["evidence_count"] = new_count
|
|
204
|
+
existing["last_updated"] = _now_iso()
|
|
205
|
+
existing["sources"] = existing.get("sources", []) + [session_id]
|
|
206
|
+
existing["sources"] = existing["sources"][-10:] # 最多保留10个来源
|
|
207
|
+
|
|
208
|
+
# 如果新值与旧值不同 → 追加修正历史
|
|
209
|
+
if existing.get("value") != obj:
|
|
210
|
+
existing.setdefault("correction_history", []).append({
|
|
211
|
+
"corrected_at": _now_iso(),
|
|
212
|
+
"old_value": existing.get("value"),
|
|
213
|
+
"new_value": obj,
|
|
214
|
+
"confidence_delta": round(new_conf - existing.get("confidence", 0.5), 3),
|
|
215
|
+
"source": "auto",
|
|
216
|
+
})
|
|
217
|
+
existing["value"] = obj
|
|
218
|
+
else:
|
|
219
|
+
fields[pref_key] = {
|
|
220
|
+
"value": obj,
|
|
221
|
+
"confidence": confidence,
|
|
222
|
+
"evidence_count": 1,
|
|
223
|
+
"last_updated": _now_iso(),
|
|
224
|
+
"sources": [session_id],
|
|
225
|
+
"corrected_at": None,
|
|
226
|
+
"correction_history": [],
|
|
227
|
+
}
|
|
228
|
+
changed = True
|
|
229
|
+
|
|
230
|
+
# 2. 处理技术栈推断(从 depends_on / requires / uses 谓词推断)
|
|
231
|
+
elif predicate in ("depends_on", "requires", "uses", "installed_as"):
|
|
232
|
+
tech_key = "tech_stack"
|
|
233
|
+
existing = fields.get(tech_key)
|
|
234
|
+
|
|
235
|
+
if existing:
|
|
236
|
+
new_conf, new_count = bayesian_update(
|
|
237
|
+
existing.get("confidence", 0.5),
|
|
238
|
+
confidence,
|
|
239
|
+
existing.get("evidence_count", 0),
|
|
240
|
+
)
|
|
241
|
+
existing["confidence"] = new_conf
|
|
242
|
+
existing["evidence_count"] = new_count
|
|
243
|
+
existing["last_updated"] = _now_iso()
|
|
244
|
+
|
|
245
|
+
# 如果技术不在列表中,追加
|
|
246
|
+
if isinstance(existing["value"], list) and obj not in existing["value"]:
|
|
247
|
+
existing["value"] = existing["value"] + [obj]
|
|
248
|
+
else:
|
|
249
|
+
fields[tech_key] = {
|
|
250
|
+
"value": [obj],
|
|
251
|
+
"confidence": confidence,
|
|
252
|
+
"evidence_count": 1,
|
|
253
|
+
"last_updated": _now_iso(),
|
|
254
|
+
"sources": [session_id],
|
|
255
|
+
"corrected_at": None,
|
|
256
|
+
"correction_history": [],
|
|
257
|
+
}
|
|
258
|
+
changed = True
|
|
259
|
+
|
|
260
|
+
# 3. 处理行为模式(从 skip / fail_on / blocks 谓词推断)
|
|
261
|
+
elif predicate in ("skips", "fails_on", "blocks"):
|
|
262
|
+
pattern_key = f"work_style.behavior.{subject}"
|
|
263
|
+
existing = fields.get(pattern_key)
|
|
264
|
+
|
|
265
|
+
if existing:
|
|
266
|
+
new_conf, new_count = bayesian_update(
|
|
267
|
+
existing.get("confidence", 0.5),
|
|
268
|
+
confidence,
|
|
269
|
+
existing.get("evidence_count", 0),
|
|
270
|
+
)
|
|
271
|
+
existing["confidence"] = new_conf
|
|
272
|
+
existing["evidence_count"] = new_count
|
|
273
|
+
existing["last_updated"] = _now_iso()
|
|
274
|
+
if existing.get("value") != obj:
|
|
275
|
+
existing.setdefault("correction_history", []).append({
|
|
276
|
+
"corrected_at": _now_iso(),
|
|
277
|
+
"old_value": existing.get("value"),
|
|
278
|
+
"new_value": obj,
|
|
279
|
+
"confidence_delta": round(new_conf - existing.get("confidence", 0.5), 3),
|
|
280
|
+
"source": "auto",
|
|
281
|
+
})
|
|
282
|
+
existing["value"] = obj
|
|
283
|
+
else:
|
|
284
|
+
fields[pattern_key] = {
|
|
285
|
+
"value": obj,
|
|
286
|
+
"confidence": confidence,
|
|
287
|
+
"evidence_count": 1,
|
|
288
|
+
"last_updated": _now_iso(),
|
|
289
|
+
"sources": [session_id],
|
|
290
|
+
"corrected_at": None,
|
|
291
|
+
"correction_history": [],
|
|
292
|
+
}
|
|
293
|
+
changed = True
|
|
294
|
+
|
|
295
|
+
if changed:
|
|
296
|
+
_save_profile(profile)
|
|
297
|
+
print(f"[ultra-memory] ✅ 画像更新: {predicate} / {subject} = {obj}")
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# ── 手动修正(SKILL.md Step 7B)────────────────────────────────────────────
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def correct_profile_field(
|
|
304
|
+
field_path: str,
|
|
305
|
+
new_value,
|
|
306
|
+
evidence_confidence: float = 1.0,
|
|
307
|
+
session_id: str | None = None,
|
|
308
|
+
):
|
|
309
|
+
"""
|
|
310
|
+
手动修正画像字段(Step 7B:错误修正)。
|
|
311
|
+
强制覆盖旧值,追加 correction_history。
|
|
312
|
+
"""
|
|
313
|
+
profile = _load_profile()
|
|
314
|
+
fields = profile.setdefault("fields", {})
|
|
315
|
+
|
|
316
|
+
old_value = None
|
|
317
|
+
old_confidence = 0.5
|
|
318
|
+
old_count = 0
|
|
319
|
+
|
|
320
|
+
if field_path in fields:
|
|
321
|
+
old_value = fields[field_path].get("value")
|
|
322
|
+
old_confidence = fields[field_path].get("confidence", 0.5)
|
|
323
|
+
old_count = fields[field_path].get("evidence_count", 0)
|
|
324
|
+
|
|
325
|
+
# 强制更新
|
|
326
|
+
fields[field_path] = {
|
|
327
|
+
"value": new_value,
|
|
328
|
+
"confidence": min(1.0, evidence_confidence),
|
|
329
|
+
"evidence_count": old_count + 1,
|
|
330
|
+
"last_updated": _now_iso(),
|
|
331
|
+
"sources": [session_id] if session_id else [],
|
|
332
|
+
"corrected_at": _now_iso(),
|
|
333
|
+
"correction_history": fields.get(field_path, {}).get("correction_history", []) + [
|
|
334
|
+
{
|
|
335
|
+
"corrected_at": _now_iso(),
|
|
336
|
+
"old_value": old_value,
|
|
337
|
+
"new_value": new_value,
|
|
338
|
+
"old_confidence": old_confidence,
|
|
339
|
+
"new_confidence": evidence_confidence,
|
|
340
|
+
"source": "manual",
|
|
341
|
+
}
|
|
342
|
+
],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
_save_profile(profile)
|
|
346
|
+
print(f"[ultra-memory] ✅ 画像修正: {field_path}")
|
|
347
|
+
print(f" 旧值: {old_value} (conf={old_confidence:.2f})")
|
|
348
|
+
print(f" 新值: {new_value} (conf={evidence_confidence:.2f})")
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
# ── 时间戳更新 ─────────────────────────────────────────────────────────────
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def update_reflection_timestamp():
|
|
355
|
+
"""Step 7A/7C 完成后更新 last_reflection 时间戳"""
|
|
356
|
+
profile = _load_profile()
|
|
357
|
+
profile["last_reflection"] = _now_iso()
|
|
358
|
+
_save_profile(profile)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def update_distillation_timestamp():
|
|
362
|
+
"""Step 7C 完成后更新 last_distillation 时间戳"""
|
|
363
|
+
profile = _load_profile()
|
|
364
|
+
profile["last_distillation"] = _now_iso()
|
|
365
|
+
_save_profile(profile)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# ── CLI ─────────────────────────────────────────────────────────────────────
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
if __name__ == "__main__":
|
|
372
|
+
parser = argparse.ArgumentParser(description="更新用户画像")
|
|
373
|
+
parser.add_argument("--field", help="字段路径(dot-notation,如 tech_stack)")
|
|
374
|
+
parser.add_argument("--value", help="新值")
|
|
375
|
+
parser.add_argument("--evidence", type=float, default=0.8,
|
|
376
|
+
help="证据置信度 (0.0-1.0)")
|
|
377
|
+
parser.add_argument("--source-session", default=None,
|
|
378
|
+
help="来源 session ID")
|
|
379
|
+
parser.add_argument(
|
|
380
|
+
"--correct", action="store_true",
|
|
381
|
+
help="手动修正模式(强制覆盖旧值)"
|
|
382
|
+
)
|
|
383
|
+
parser.add_argument(
|
|
384
|
+
"--update-reflection", action="store_true",
|
|
385
|
+
help="仅更新时间戳"
|
|
386
|
+
)
|
|
387
|
+
args = parser.parse_args()
|
|
388
|
+
|
|
389
|
+
if args.update_reflection:
|
|
390
|
+
update_reflection_timestamp()
|
|
391
|
+
print("[ultra-memory] ✅ last_reflection 已更新")
|
|
392
|
+
elif args.correct and args.field and args.value is not None:
|
|
393
|
+
correct_profile_field(
|
|
394
|
+
args.field, args.value, args.evidence, args.source_session
|
|
395
|
+
)
|
|
396
|
+
elif args.field and args.value is not None:
|
|
397
|
+
# 模拟 fact 结构
|
|
398
|
+
fact = {
|
|
399
|
+
"predicate": args.field,
|
|
400
|
+
"subject": "",
|
|
401
|
+
"object": args.value,
|
|
402
|
+
"confidence": args.evidence,
|
|
403
|
+
}
|
|
404
|
+
update_profile_from_fact(fact, args.source_session or "cli")
|
|
405
|
+
else:
|
|
406
|
+
profile = _load_profile()
|
|
407
|
+
print(f"[ultra-memory] 当前画像版本: {profile.get('version', '?')}")
|
|
408
|
+
print(f"字段数: {len(profile.get('fields', {}))}")
|
|
409
|
+
print(f"last_reflection: {profile.get('last_reflection')}")
|
|
410
|
+
print(f"last_distillation: {profile.get('last_distillation')}")
|
|
411
|
+
for path, data in profile.get("fields", {}).items():
|
|
412
|
+
print(f" {path}: {data.get('value')} "
|
|
413
|
+
f"(conf={data.get('confidence', 0):.2f}, "
|
|
414
|
+
f"n={data.get('evidence_count', 0)})")
|