ultra-memory 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -32
- package/SKILL.md +80 -1
- package/package.json +13 -6
- package/platform/server.py +23 -1
- package/scripts/__pycache__/auto_decay.cpython-313.pyc +0 -0
- package/scripts/__pycache__/conflict_detector.cpython-313.pyc +0 -0
- package/scripts/__pycache__/log_op.cpython-313.pyc +0 -0
- package/scripts/__pycache__/recall.cpython-313.pyc +0 -0
- package/scripts/__pycache__/summarize.cpython-313.pyc +0 -0
- package/scripts/auto_decay.py +75 -3
- package/scripts/conflict_detector.py +4 -3
- package/scripts/log_op.py +93 -21
- package/scripts/manage.py +426 -0
- package/scripts/multimodal/extract_from_docx.py +167 -0
- package/scripts/recall.py +593 -102
- package/scripts/summarize.py +38 -1
package/scripts/log_op.py
CHANGED
|
@@ -8,10 +8,45 @@ import os
|
|
|
8
8
|
import sys
|
|
9
9
|
import json
|
|
10
10
|
import re
|
|
11
|
+
import time
|
|
12
|
+
import logging
|
|
11
13
|
import argparse
|
|
14
|
+
import contextlib
|
|
12
15
|
from datetime import datetime, timezone
|
|
13
16
|
from pathlib import Path
|
|
14
17
|
|
|
18
|
+
logging.basicConfig(
|
|
19
|
+
level=logging.WARNING,
|
|
20
|
+
format="[ultra-memory] %(levelname)s %(message)s",
|
|
21
|
+
)
|
|
22
|
+
_log = logging.getLogger("ultra-memory.log_op")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@contextlib.contextmanager
|
|
26
|
+
def _advisory_lock(lock_path: Path, timeout: float = 5.0):
|
|
27
|
+
"""跨平台建议性文件锁(.lock 哨兵文件)"""
|
|
28
|
+
deadline = time.monotonic() + timeout
|
|
29
|
+
acquired = False
|
|
30
|
+
while True:
|
|
31
|
+
try:
|
|
32
|
+
fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
33
|
+
os.close(fd)
|
|
34
|
+
acquired = True
|
|
35
|
+
break
|
|
36
|
+
except FileExistsError:
|
|
37
|
+
if time.monotonic() >= deadline:
|
|
38
|
+
_log.warning("文件锁等待超时 %s,直接继续写入", lock_path)
|
|
39
|
+
break
|
|
40
|
+
time.sleep(0.05)
|
|
41
|
+
try:
|
|
42
|
+
yield
|
|
43
|
+
finally:
|
|
44
|
+
if acquired:
|
|
45
|
+
try:
|
|
46
|
+
lock_path.unlink(missing_ok=True)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
15
50
|
if sys.stdout.encoding != "utf-8":
|
|
16
51
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
17
52
|
if sys.stderr.encoding != "utf-8":
|
|
@@ -19,6 +54,20 @@ if sys.stderr.encoding != "utf-8":
|
|
|
19
54
|
|
|
20
55
|
ULTRA_MEMORY_HOME = Path(os.environ.get("ULTRA_MEMORY_HOME", Path.home() / ".ultra-memory"))
|
|
21
56
|
|
|
57
|
+
# 记忆注入标记(防止反馈环:AI 把自己的记忆输出又记录进去导致自引用积累)
|
|
58
|
+
MEMORY_INJECTION_PATTERNS = [
|
|
59
|
+
r'\[ultra-memory\][^\n]*', # 脚本自身的打印输出
|
|
60
|
+
r'MEMORY_READY[^\n]*', # 初始化信号
|
|
61
|
+
r'COMPRESS_SUGGESTED[^\n]*', # 压缩建议信号
|
|
62
|
+
r'SESSION_ID=sess_[A-Za-z0-9_]+', # 会话 ID 注入
|
|
63
|
+
r'session_id:\s*sess_[A-Za-z0-9_]+',
|
|
64
|
+
r'\[RECALL\][^\n]*', # recall.py 的输出头
|
|
65
|
+
r'\[ops #\d+[^\]]*\][^\n]*', # format_result 的 ops 格式
|
|
66
|
+
r'\[知识库[^\]]*\][^\n]*', # format_result 的知识库格式
|
|
67
|
+
r'\[实体/[^\]]*\][^\n]*', # format_result 的实体格式
|
|
68
|
+
r'\[摘要\][^\n]*', # format_result 的摘要格式
|
|
69
|
+
]
|
|
70
|
+
|
|
22
71
|
# 敏感词正则(防止记录密码/密钥)
|
|
23
72
|
SENSITIVE_PATTERNS = [
|
|
24
73
|
r'(?i)(password|passwd|pwd)\s*[=:]\s*\S+',
|
|
@@ -192,10 +241,20 @@ FILE_EXT_TAG_MAP = {
|
|
|
192
241
|
}
|
|
193
242
|
|
|
194
243
|
|
|
244
|
+
def filter_memory_markers(text: str) -> str:
|
|
245
|
+
"""过滤记忆注入标记,防止反馈环(AI 将自身记忆输出再次记录导致自引用噪音积累)"""
|
|
246
|
+
if not text:
|
|
247
|
+
return text
|
|
248
|
+
for pattern in MEMORY_INJECTION_PATTERNS:
|
|
249
|
+
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
|
250
|
+
return text.strip()
|
|
251
|
+
|
|
252
|
+
|
|
195
253
|
def sanitize(text: str) -> str:
|
|
196
|
-
"""过滤敏感信息"""
|
|
254
|
+
"""过滤敏感信息 + 反馈环标记"""
|
|
197
255
|
if not text:
|
|
198
256
|
return text
|
|
257
|
+
text = filter_memory_markers(text)
|
|
199
258
|
for pattern in SENSITIVE_PATTERNS:
|
|
200
259
|
text = re.sub(pattern, "[REDACTED]", text)
|
|
201
260
|
return text
|
|
@@ -284,18 +343,26 @@ def log_op(
|
|
|
284
343
|
# 2A:画像冲突检测(user_instruction/decision + profile_update)
|
|
285
344
|
if op_type in ("user_instruction", "decision") and detail.get("profile_update"):
|
|
286
345
|
try:
|
|
346
|
+
import sys as _sys
|
|
347
|
+
_scripts_dir_cd = Path(__file__).parent
|
|
348
|
+
if str(_scripts_dir_cd) not in _sys.path:
|
|
349
|
+
_sys.path.insert(0, str(_scripts_dir_cd))
|
|
287
350
|
from conflict_detector import detect_profile_conflict, mark_profile_superseded
|
|
288
351
|
conflicts = detect_profile_conflict(detail["profile_update"], ULTRA_MEMORY_HOME)
|
|
289
352
|
if conflicts:
|
|
290
353
|
entry["detail"]["profile_conflicts"] = conflicts
|
|
291
354
|
mark_profile_superseded(ULTRA_MEMORY_HOME, conflicts)
|
|
292
355
|
print(f"[ultra-memory] ⚡ 检测到 {len(conflicts)} 处画像矛盾,旧记录已标记失效")
|
|
293
|
-
except Exception:
|
|
294
|
-
|
|
356
|
+
except Exception as _e:
|
|
357
|
+
_log.debug("画像冲突检测失败(不影响主流程): %s", _e)
|
|
295
358
|
|
|
296
359
|
# 2B:知识库冲突检测(milestone/decision + knowledge_entry)
|
|
297
360
|
if op_type in ("milestone", "decision") and detail.get("knowledge_entry"):
|
|
298
361
|
try:
|
|
362
|
+
import sys as _sys
|
|
363
|
+
_scripts_dir_cd = Path(__file__).parent
|
|
364
|
+
if str(_scripts_dir_cd) not in _sys.path:
|
|
365
|
+
_sys.path.insert(0, str(_scripts_dir_cd))
|
|
299
366
|
from conflict_detector import detect_knowledge_conflict, mark_superseded
|
|
300
367
|
conflicts = detect_knowledge_conflict(detail["knowledge_entry"], ULTRA_MEMORY_HOME)
|
|
301
368
|
if conflicts:
|
|
@@ -304,20 +371,24 @@ def log_op(
|
|
|
304
371
|
mark_superseded(ULTRA_MEMORY_HOME, kb_path, seq_list)
|
|
305
372
|
entry["detail"]["knowledge_conflicts"] = conflicts
|
|
306
373
|
print(f"[ultra-memory] ⚡ 检测到 {len(conflicts)} 条知识库矛盾,旧记录已标记失效")
|
|
307
|
-
except Exception:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
# 追加写入(append-only
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
meta["
|
|
319
|
-
|
|
320
|
-
|
|
374
|
+
except Exception as _e:
|
|
375
|
+
_log.debug("知识库冲突检测失败(不影响主流程): %s", _e)
|
|
376
|
+
|
|
377
|
+
# 追加写入(append-only,永不覆盖);文件锁保护并发写入
|
|
378
|
+
_lock_file = ops_file.with_suffix(".lock")
|
|
379
|
+
with _advisory_lock(_lock_file):
|
|
380
|
+
with open(ops_file, "a", encoding="utf-8") as f:
|
|
381
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
382
|
+
|
|
383
|
+
# 更新 meta(在锁内,保证 op_count 单调递增)
|
|
384
|
+
meta["op_count"] = seq
|
|
385
|
+
meta["last_op_at"] = entry["ts"]
|
|
386
|
+
if op_type == "milestone":
|
|
387
|
+
meta["last_milestone"] = summary
|
|
388
|
+
_tmp_meta = meta_file.with_suffix(".tmp")
|
|
389
|
+
with open(_tmp_meta, "w", encoding="utf-8") as f:
|
|
390
|
+
json.dump(meta, f, ensure_ascii=False, indent=2)
|
|
391
|
+
_tmp_meta.replace(meta_file)
|
|
321
392
|
|
|
322
393
|
# 自动提取结构化实体(写入 semantic/entities.jsonl)
|
|
323
394
|
try:
|
|
@@ -327,8 +398,8 @@ def log_op(
|
|
|
327
398
|
_sys.path.insert(0, str(_scripts_dir))
|
|
328
399
|
from extract_entities import extract_and_store
|
|
329
400
|
extract_and_store(session_id, dict(entry))
|
|
330
|
-
except Exception:
|
|
331
|
-
|
|
401
|
+
except Exception as _e:
|
|
402
|
+
_log.debug("实体提取失败(不影响主流程): %s", _e)
|
|
332
403
|
|
|
333
404
|
# 自动提取结构化事实(写入 evolution/facts.jsonl,异步不阻塞)
|
|
334
405
|
try:
|
|
@@ -350,8 +421,9 @@ def log_op(
|
|
|
350
421
|
|
|
351
422
|
# 多模态处理:检测媒体文件并后台提取
|
|
352
423
|
try:
|
|
353
|
-
_media_exts = {".pdf": "extract_from_pdf.py", ".
|
|
354
|
-
".
|
|
424
|
+
_media_exts = {".pdf": "extract_from_pdf.py", ".docx": "extract_from_docx.py",
|
|
425
|
+
".png": "extract_from_image.py", ".jpg": "extract_from_image.py",
|
|
426
|
+
".jpeg": "extract_from_image.py",
|
|
355
427
|
".mp4": "transcribe_video.py", ".avi": "transcribe_video.py",
|
|
356
428
|
".mov": "transcribe_video.py"}
|
|
357
429
|
_file_path = detail.get("path", "")
|
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ultra-memory 管理命令行工具
|
|
4
|
+
提供 list / search / stats / export / gc / tier 六个子命令。
|
|
5
|
+
|
|
6
|
+
用法示例:
|
|
7
|
+
python3 scripts/manage.py list
|
|
8
|
+
python3 scripts/manage.py search "pandas 数据清洗"
|
|
9
|
+
python3 scripts/manage.py stats
|
|
10
|
+
python3 scripts/manage.py export --format json --output backup.json
|
|
11
|
+
python3 scripts/manage.py gc --dry-run
|
|
12
|
+
python3 scripts/manage.py tier --session sess_xxx
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import json
|
|
18
|
+
import argparse
|
|
19
|
+
from datetime import datetime, timezone, timedelta
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from collections import Counter
|
|
22
|
+
|
|
23
|
+
if sys.stdout.encoding != "utf-8":
|
|
24
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
25
|
+
if sys.stderr.encoding != "utf-8":
|
|
26
|
+
sys.stderr.reconfigure(encoding="utf-8")
|
|
27
|
+
|
|
28
|
+
ULTRA_MEMORY_HOME = Path(os.environ.get("ULTRA_MEMORY_HOME", Path.home() / ".ultra-memory"))
|
|
29
|
+
SCRIPTS_DIR = Path(__file__).parent
|
|
30
|
+
if str(SCRIPTS_DIR) not in sys.path:
|
|
31
|
+
sys.path.insert(0, str(SCRIPTS_DIR))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ── 公共工具 ────────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
def _load_meta(session_dir: Path) -> dict:
|
|
37
|
+
meta_file = session_dir / "meta.json"
|
|
38
|
+
if not meta_file.exists():
|
|
39
|
+
return {}
|
|
40
|
+
try:
|
|
41
|
+
return json.loads(meta_file.read_text(encoding="utf-8"))
|
|
42
|
+
except Exception:
|
|
43
|
+
return {}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _load_ops(session_dir: Path) -> list[dict]:
|
|
47
|
+
ops_file = session_dir / "ops.jsonl"
|
|
48
|
+
if not ops_file.exists():
|
|
49
|
+
return []
|
|
50
|
+
ops = []
|
|
51
|
+
for line in ops_file.read_text(encoding="utf-8").splitlines():
|
|
52
|
+
line = line.strip()
|
|
53
|
+
if line:
|
|
54
|
+
try:
|
|
55
|
+
ops.append(json.loads(line))
|
|
56
|
+
except json.JSONDecodeError:
|
|
57
|
+
pass
|
|
58
|
+
return ops
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _all_sessions() -> list[tuple[Path, dict]]:
|
|
62
|
+
"""返回 [(session_dir, meta), ...] 按最后活跃时间倒序"""
|
|
63
|
+
sessions_dir = ULTRA_MEMORY_HOME / "sessions"
|
|
64
|
+
if not sessions_dir.exists():
|
|
65
|
+
return []
|
|
66
|
+
result = []
|
|
67
|
+
for d in sessions_dir.iterdir():
|
|
68
|
+
if d.is_dir():
|
|
69
|
+
meta = _load_meta(d)
|
|
70
|
+
if meta:
|
|
71
|
+
result.append((d, meta))
|
|
72
|
+
result.sort(key=lambda x: x[1].get("last_op_at", ""), reverse=True)
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ── list 子命令 ──────────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
def cmd_list(args):
|
|
79
|
+
"""列出所有会话,显示项目名、操作数、最后活跃时间"""
|
|
80
|
+
sessions = _all_sessions()
|
|
81
|
+
if not sessions:
|
|
82
|
+
print("无会话记录。运行 init.py 创建第一个会话。")
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
project_filter = getattr(args, "project", None)
|
|
86
|
+
print(f"{'会话 ID':<28} {'项目':<20} {'操作数':>6} {'最后里程碑':<35} {'最后活跃'}")
|
|
87
|
+
print("-" * 110)
|
|
88
|
+
for d, meta in sessions:
|
|
89
|
+
proj = meta.get("project", "default")
|
|
90
|
+
if project_filter and project_filter not in proj:
|
|
91
|
+
continue
|
|
92
|
+
sid = meta.get("session_id", d.name)
|
|
93
|
+
op_count = meta.get("op_count", 0)
|
|
94
|
+
last_ms = (meta.get("last_milestone") or "—")[:34]
|
|
95
|
+
last_op = (meta.get("last_op_at") or "")[:16].replace("T", " ")
|
|
96
|
+
print(f"{sid:<28} {proj:<20} {op_count:>6} {last_ms:<35} {last_op}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# ── search 子命令 ──────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
def cmd_search(args):
|
|
102
|
+
"""跨所有会话全文搜索操作日志(关键词匹配)"""
|
|
103
|
+
query = args.query.lower()
|
|
104
|
+
limit = getattr(args, "limit", 20)
|
|
105
|
+
sessions = _all_sessions()
|
|
106
|
+
|
|
107
|
+
hits = []
|
|
108
|
+
for d, meta in sessions:
|
|
109
|
+
for op in _load_ops(d):
|
|
110
|
+
text = op.get("summary", "") + " " + json.dumps(op.get("detail", {}), ensure_ascii=False)
|
|
111
|
+
if query in text.lower():
|
|
112
|
+
hits.append({
|
|
113
|
+
"session": meta.get("session_id", d.name),
|
|
114
|
+
"project": meta.get("project", ""),
|
|
115
|
+
"seq": op.get("seq", 0),
|
|
116
|
+
"ts": op.get("ts", "")[:16].replace("T", " "),
|
|
117
|
+
"type": op.get("type", ""),
|
|
118
|
+
"summary": op.get("summary", ""),
|
|
119
|
+
"tier": op.get("tier", ""),
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
if not hits:
|
|
123
|
+
print(f"未找到包含「{args.query}」的记录")
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
print(f"找到 {len(hits)} 条记录(显示前 {limit} 条):\n")
|
|
127
|
+
for h in hits[:limit]:
|
|
128
|
+
tier_tag = f" [{h['tier']}]" if h["tier"] else ""
|
|
129
|
+
print(f" [{h['ts']}] {h['project']}/{h['session']} #{h['seq']} {h['type']}{tier_tag}")
|
|
130
|
+
print(f" {h['summary'][:100]}\n")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ── stats 子命令 ──────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def cmd_stats(args):
|
|
136
|
+
"""显示记忆库全局统计信息"""
|
|
137
|
+
sessions = _all_sessions()
|
|
138
|
+
total_ops = 0
|
|
139
|
+
type_dist: Counter = Counter()
|
|
140
|
+
tier_dist: Counter = Counter()
|
|
141
|
+
proj_dist: Counter = Counter()
|
|
142
|
+
kb_count = 0
|
|
143
|
+
ent_count = 0
|
|
144
|
+
|
|
145
|
+
for d, meta in sessions:
|
|
146
|
+
ops = _load_ops(d)
|
|
147
|
+
total_ops += len(ops)
|
|
148
|
+
for op in ops:
|
|
149
|
+
type_dist[op.get("type", "unknown")] += 1
|
|
150
|
+
tier_dist[op.get("tier", "unclassified")] += 1
|
|
151
|
+
proj_dist[meta.get("project", "default")] += 1
|
|
152
|
+
|
|
153
|
+
kb_file = ULTRA_MEMORY_HOME / "semantic" / "knowledge_base.jsonl"
|
|
154
|
+
ent_file = ULTRA_MEMORY_HOME / "semantic" / "entities.jsonl"
|
|
155
|
+
if kb_file.exists():
|
|
156
|
+
kb_count = sum(1 for l in kb_file.read_text(encoding="utf-8").splitlines() if l.strip())
|
|
157
|
+
if ent_file.exists():
|
|
158
|
+
ent_count = sum(1 for l in ent_file.read_text(encoding="utf-8").splitlines() if l.strip())
|
|
159
|
+
|
|
160
|
+
profile_file = ULTRA_MEMORY_HOME / "semantic" / "user_profile.json"
|
|
161
|
+
has_profile = profile_file.exists()
|
|
162
|
+
|
|
163
|
+
print(f"\n{'='*50}")
|
|
164
|
+
print(f" ultra-memory 记忆库统计")
|
|
165
|
+
print(f"{'='*50}")
|
|
166
|
+
print(f" 会话总数 : {len(sessions)}")
|
|
167
|
+
print(f" 操作总数 : {total_ops}")
|
|
168
|
+
print(f" 知识库条目 : {kb_count}")
|
|
169
|
+
print(f" 实体索引 : {ent_count}")
|
|
170
|
+
print(f" 用户画像 : {'已建立' if has_profile else '未建立'}")
|
|
171
|
+
print(f" 存储路径 : {ULTRA_MEMORY_HOME}")
|
|
172
|
+
print()
|
|
173
|
+
|
|
174
|
+
if proj_dist:
|
|
175
|
+
print(f" 项目分布(操作数):")
|
|
176
|
+
for proj, cnt in proj_dist.most_common(10):
|
|
177
|
+
print(f" {proj:<25} {cnt} 个会话")
|
|
178
|
+
print()
|
|
179
|
+
|
|
180
|
+
if type_dist:
|
|
181
|
+
print(f" 操作类型分布:")
|
|
182
|
+
for t, c in type_dist.most_common():
|
|
183
|
+
bar = "█" * min(c // max(1, total_ops // 30), 20)
|
|
184
|
+
print(f" {t:<20} {c:>5} {bar}")
|
|
185
|
+
print()
|
|
186
|
+
|
|
187
|
+
if tier_dist:
|
|
188
|
+
print(f" 记忆分层分布:")
|
|
189
|
+
for tier in ("core", "working", "peripheral", "unclassified"):
|
|
190
|
+
c = tier_dist[tier]
|
|
191
|
+
if c:
|
|
192
|
+
pct = c * 100 // max(total_ops, 1)
|
|
193
|
+
print(f" {tier:<15} {c:>5} ({pct}%)")
|
|
194
|
+
print()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ── export 子命令 ──────────────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
def cmd_export(args):
|
|
200
|
+
"""导出全部记忆为 JSON 或 Markdown"""
|
|
201
|
+
fmt = getattr(args, "format", "json")
|
|
202
|
+
output = getattr(args, "output", None)
|
|
203
|
+
sessions = _all_sessions()
|
|
204
|
+
|
|
205
|
+
if fmt == "json":
|
|
206
|
+
data = {
|
|
207
|
+
"exported_at": datetime.now(timezone.utc).isoformat(),
|
|
208
|
+
"sessions": [],
|
|
209
|
+
"knowledge_base": [],
|
|
210
|
+
"entities": [],
|
|
211
|
+
"user_profile": {},
|
|
212
|
+
}
|
|
213
|
+
for d, meta in sessions:
|
|
214
|
+
data["sessions"].append({
|
|
215
|
+
"meta": meta,
|
|
216
|
+
"ops": _load_ops(d),
|
|
217
|
+
"summary": (d / "summary.md").read_text(encoding="utf-8")
|
|
218
|
+
if (d / "summary.md").exists() else "",
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
kb_file = ULTRA_MEMORY_HOME / "semantic" / "knowledge_base.jsonl"
|
|
222
|
+
if kb_file.exists():
|
|
223
|
+
for line in kb_file.read_text(encoding="utf-8").splitlines():
|
|
224
|
+
if line.strip():
|
|
225
|
+
try:
|
|
226
|
+
data["knowledge_base"].append(json.loads(line))
|
|
227
|
+
except json.JSONDecodeError:
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
ent_file = ULTRA_MEMORY_HOME / "semantic" / "entities.jsonl"
|
|
231
|
+
if ent_file.exists():
|
|
232
|
+
for line in ent_file.read_text(encoding="utf-8").splitlines():
|
|
233
|
+
if line.strip():
|
|
234
|
+
try:
|
|
235
|
+
data["entities"].append(json.loads(line))
|
|
236
|
+
except json.JSONDecodeError:
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
profile_file = ULTRA_MEMORY_HOME / "semantic" / "user_profile.json"
|
|
240
|
+
if profile_file.exists():
|
|
241
|
+
try:
|
|
242
|
+
data["user_profile"] = json.loads(profile_file.read_text(encoding="utf-8"))
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
content = json.dumps(data, ensure_ascii=False, indent=2)
|
|
247
|
+
|
|
248
|
+
else: # markdown
|
|
249
|
+
lines = [f"# ultra-memory 导出\n导出时间: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')} UTC\n"]
|
|
250
|
+
for d, meta in sessions:
|
|
251
|
+
sid = meta.get("session_id", d.name)
|
|
252
|
+
lines.append(f"\n## 会话: {sid}({meta.get('project', 'default')})\n")
|
|
253
|
+
summary_file = d / "summary.md"
|
|
254
|
+
if summary_file.exists():
|
|
255
|
+
lines.append(summary_file.read_text(encoding="utf-8"))
|
|
256
|
+
content = "\n".join(lines)
|
|
257
|
+
|
|
258
|
+
if output:
|
|
259
|
+
Path(output).write_text(content, encoding="utf-8")
|
|
260
|
+
print(f"✅ 已导出到 {output}({len(content)} 字符)")
|
|
261
|
+
else:
|
|
262
|
+
print(content)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# ── gc 子命令 ──────────────────────────────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
def cmd_gc(args):
|
|
268
|
+
"""垃圾回收:清理超过 N 天且无核心操作的外围记忆会话"""
|
|
269
|
+
days = getattr(args, "days", 90)
|
|
270
|
+
dry_run = getattr(args, "dry_run", True)
|
|
271
|
+
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
|
272
|
+
sessions = _all_sessions()
|
|
273
|
+
|
|
274
|
+
candidates = []
|
|
275
|
+
for d, meta in sessions:
|
|
276
|
+
last_op_str = meta.get("last_op_at", "")
|
|
277
|
+
if not last_op_str:
|
|
278
|
+
continue
|
|
279
|
+
try:
|
|
280
|
+
last_op = datetime.fromisoformat(last_op_str.replace("Z", "+00:00"))
|
|
281
|
+
except ValueError:
|
|
282
|
+
continue
|
|
283
|
+
if last_op >= cutoff:
|
|
284
|
+
continue
|
|
285
|
+
# 只清理无核心操作的会话
|
|
286
|
+
ops = _load_ops(d)
|
|
287
|
+
core = sum(1 for op in ops if op.get("tier") == "core"
|
|
288
|
+
or op.get("type") in ("milestone", "decision"))
|
|
289
|
+
if core == 0:
|
|
290
|
+
candidates.append((d, meta, last_op))
|
|
291
|
+
|
|
292
|
+
if not candidates:
|
|
293
|
+
print(f"✅ 无符合清理条件的会话({days}天未活跃 且 无核心操作)")
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
mode = "(预演,未实际删除)" if dry_run else ""
|
|
297
|
+
print(f"发现 {len(candidates)} 个可清理会话 {mode}:\n")
|
|
298
|
+
freed = 0
|
|
299
|
+
for d, meta, last_op in candidates:
|
|
300
|
+
size = sum(f.stat().st_size for f in d.rglob("*") if f.is_file())
|
|
301
|
+
freed += size
|
|
302
|
+
print(f" {meta.get('session_id', d.name):<30} {meta.get('project', ''):<20}"
|
|
303
|
+
f" 最后活跃: {str(last_op)[:10]} 大小: {size//1024}KB")
|
|
304
|
+
if not dry_run:
|
|
305
|
+
import shutil
|
|
306
|
+
shutil.rmtree(d)
|
|
307
|
+
|
|
308
|
+
print(f"\n{'预计' if dry_run else ''}释放空间: {freed//1024}KB")
|
|
309
|
+
if dry_run:
|
|
310
|
+
print("加 --no-dry-run 参数执行实际清理")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
# ── tier 子命令 ──────────────────────────────────────────────────────────────
|
|
314
|
+
|
|
315
|
+
def cmd_tier(args):
|
|
316
|
+
"""对指定会话(或所有会话)的 ops 补写 tier 分级字段"""
|
|
317
|
+
from summarize import classify_tier
|
|
318
|
+
|
|
319
|
+
session_id = getattr(args, "session", None)
|
|
320
|
+
sessions = _all_sessions()
|
|
321
|
+
|
|
322
|
+
if session_id:
|
|
323
|
+
sessions = [(d, m) for d, m in sessions if m.get("session_id") == session_id]
|
|
324
|
+
if not sessions:
|
|
325
|
+
print(f"❌ 会话不存在: {session_id}")
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
total_updated = 0
|
|
329
|
+
for d, meta in sessions:
|
|
330
|
+
ops_file = d / "ops.jsonl"
|
|
331
|
+
if not ops_file.exists():
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
ops = _load_ops(d)
|
|
335
|
+
updated = 0
|
|
336
|
+
tmp = ops_file.with_suffix(".tmp")
|
|
337
|
+
|
|
338
|
+
with open(tmp, "w", encoding="utf-8") as fout:
|
|
339
|
+
for op in ops:
|
|
340
|
+
if "tier" not in op:
|
|
341
|
+
op["tier"] = classify_tier(op)
|
|
342
|
+
updated += 1
|
|
343
|
+
fout.write(json.dumps(op, ensure_ascii=False) + "\n")
|
|
344
|
+
|
|
345
|
+
if updated > 0:
|
|
346
|
+
tmp.replace(ops_file)
|
|
347
|
+
print(f" {meta.get('session_id', d.name)}: 已补写 {updated} 条 tier 标记")
|
|
348
|
+
total_updated += updated
|
|
349
|
+
else:
|
|
350
|
+
tmp.unlink(missing_ok=True)
|
|
351
|
+
|
|
352
|
+
print(f"\n✅ 共更新 {total_updated} 条操作的 tier 分级")
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ── 主入口 ───────────────────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
def main():
|
|
358
|
+
parser = argparse.ArgumentParser(
|
|
359
|
+
description="ultra-memory 记忆库管理工具",
|
|
360
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
361
|
+
epilog="""
|
|
362
|
+
子命令:
|
|
363
|
+
list 列出所有会话
|
|
364
|
+
search 跨会话全文搜索
|
|
365
|
+
stats 显示全局统计
|
|
366
|
+
export 导出为 JSON 或 Markdown
|
|
367
|
+
gc 垃圾回收旧会话
|
|
368
|
+
tier 补写记忆分层标记
|
|
369
|
+
""",
|
|
370
|
+
)
|
|
371
|
+
parser.add_argument("--storage", default=None, help="覆盖 ULTRA_MEMORY_HOME 路径")
|
|
372
|
+
|
|
373
|
+
sub = parser.add_subparsers(dest="command")
|
|
374
|
+
|
|
375
|
+
# list
|
|
376
|
+
p_list = sub.add_parser("list", help="列出所有会话")
|
|
377
|
+
p_list.add_argument("--project", default=None, help="按项目名过滤")
|
|
378
|
+
|
|
379
|
+
# search
|
|
380
|
+
p_search = sub.add_parser("search", help="跨会话全文搜索")
|
|
381
|
+
p_search.add_argument("query", help="搜索关键词")
|
|
382
|
+
p_search.add_argument("--limit", type=int, default=20, help="最多返回条数(默认20)")
|
|
383
|
+
|
|
384
|
+
# stats
|
|
385
|
+
sub.add_parser("stats", help="全局统计信息")
|
|
386
|
+
|
|
387
|
+
# export
|
|
388
|
+
p_export = sub.add_parser("export", help="导出记忆为 JSON 或 Markdown")
|
|
389
|
+
p_export.add_argument("--format", choices=["json", "markdown"], default="json")
|
|
390
|
+
p_export.add_argument("--output", default=None, help="输出文件路径(默认标准输出)")
|
|
391
|
+
|
|
392
|
+
# gc
|
|
393
|
+
p_gc = sub.add_parser("gc", help="垃圾回收旧会话")
|
|
394
|
+
p_gc.add_argument("--days", type=int, default=90, help="超过多少天未活跃(默认90)")
|
|
395
|
+
p_gc.add_argument("--no-dry-run", dest="dry_run", action="store_false", help="实际执行删除")
|
|
396
|
+
p_gc.set_defaults(dry_run=True)
|
|
397
|
+
|
|
398
|
+
# tier
|
|
399
|
+
p_tier = sub.add_parser("tier", help="补写记忆分层标记")
|
|
400
|
+
p_tier.add_argument("--session", default=None, help="指定会话 ID(默认所有会话)")
|
|
401
|
+
|
|
402
|
+
args = parser.parse_args()
|
|
403
|
+
|
|
404
|
+
if args.storage:
|
|
405
|
+
global ULTRA_MEMORY_HOME
|
|
406
|
+
ULTRA_MEMORY_HOME = Path(args.storage)
|
|
407
|
+
os.environ["ULTRA_MEMORY_HOME"] = str(ULTRA_MEMORY_HOME)
|
|
408
|
+
|
|
409
|
+
dispatch = {
|
|
410
|
+
"list": cmd_list,
|
|
411
|
+
"search": cmd_search,
|
|
412
|
+
"stats": cmd_stats,
|
|
413
|
+
"export": cmd_export,
|
|
414
|
+
"gc": cmd_gc,
|
|
415
|
+
"tier": cmd_tier,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
if args.command not in dispatch:
|
|
419
|
+
parser.print_help()
|
|
420
|
+
return
|
|
421
|
+
|
|
422
|
+
dispatch[args.command](args)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
if __name__ == "__main__":
|
|
426
|
+
main()
|