@morningljn/mnemo 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/retriever.js +30 -42
- package/dist/retriever.js.map +1 -1
- package/dist/schema.d.ts +1 -1
- package/dist/schema.js +21 -10
- package/dist/schema.js.map +1 -1
- package/dist/server.js +34 -1
- package/dist/server.js.map +1 -1
- package/dist/store.d.ts +37 -0
- package/dist/store.js +166 -9
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +4 -1
- package/docs/superpowers/plans/2026-05-16-memory-self-learning.md +932 -0
- package/openspec/changes/memory-self-learning/.openspec.yaml +2 -0
- package/openspec/changes/memory-self-learning/design.md +174 -0
- package/openspec/changes/memory-self-learning/proposal.md +35 -0
- package/openspec/changes/memory-self-learning/specs/fact-retrieval/spec.md +35 -0
- package/openspec/changes/memory-self-learning/specs/fact-summary/spec.md +45 -0
- package/openspec/changes/memory-self-learning/specs/length-penalty/spec.md +27 -0
- package/openspec/changes/memory-self-learning/specs/retrieval-log/spec.md +41 -0
- package/openspec/changes/memory-self-learning/specs/self-learning/spec.md +68 -0
- package/openspec/changes/memory-self-learning/tasks.md +56 -0
- package/package.json +1 -1
- package/src/retriever.ts +32 -44
- package/src/schema.ts +21 -10
- package/src/server.ts +36 -1
- package/src/store.ts +215 -9
- package/src/types.ts +4 -1
- package/tests/retriever.test.ts +53 -0
- package/tests/store.test.ts +112 -0
package/src/retriever.ts
CHANGED
|
@@ -106,38 +106,32 @@ export class FactRetriever {
|
|
|
106
106
|
const inferred = this.categoryInferFallback(searchQuery, minTrust, limit)
|
|
107
107
|
if (inferred.length > 0) return inferred
|
|
108
108
|
}
|
|
109
|
-
// 个人/身份相关的短查询触发 trust fallback
|
|
110
|
-
if (this.isPersonalQuery(
|
|
109
|
+
// 个人/身份相关的短查询触发 trust fallback(用原始 query,避免 refineQuery 拆词导致正则失配)
|
|
110
|
+
if (this.isPersonalQuery(query)) {
|
|
111
111
|
return this.trustFallback(category, minTrust, limit)
|
|
112
112
|
}
|
|
113
113
|
return []
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
-
// Stage 2-4: Jaccard 重排序 + 信任评分 + 时间衰减
|
|
117
|
-
// 动态权重:短查询偏 FTS,长查询偏 Jaccard
|
|
116
|
+
// Stage 2-4: Jaccard 重排序 + 信任评分 + 时间衰减 + length penalty
|
|
118
117
|
const queryTokens = this.tokenize(searchQuery)
|
|
119
|
-
const tokenCount = queryTokens.size
|
|
120
|
-
const ftsWeight = tokenCount <= 3 ? 0.7 : 0.3
|
|
121
|
-
const jaccardWeight = tokenCount <= 3 ? 0.3 : 0.7
|
|
122
118
|
|
|
123
119
|
const scored: ScoredFact[] = []
|
|
124
120
|
|
|
125
121
|
for (const fact of candidates) {
|
|
126
|
-
|
|
122
|
+
// summary 优先用于匹配
|
|
123
|
+
const matchText = fact.summary ?? fact.content
|
|
124
|
+
const matchTokens = this.tokenize(matchText)
|
|
127
125
|
const tagTokens = this.tokenize(fact.tags)
|
|
128
|
-
const allTokens = new Set([...
|
|
126
|
+
const allTokens = new Set([...matchTokens, ...tagTokens])
|
|
129
127
|
|
|
130
128
|
const jaccard = this.jaccardSimilarity(queryTokens, allTokens)
|
|
131
|
-
// Containment: 查询 token 在事实 token 中的覆盖率
|
|
132
129
|
const qInF = this.containmentScore(queryTokens, allTokens)
|
|
133
|
-
|
|
134
|
-
// 混合相似度:Jaccard + Containment(简化版,移除 keywordScore)
|
|
135
130
|
const similarity = 0.3 * jaccard + 0.7 * qInF
|
|
136
131
|
const ftsScore = fact.ftsRank
|
|
137
132
|
|
|
138
|
-
//
|
|
139
|
-
const relevance =
|
|
140
|
-
|
|
133
|
+
// 静态权重 0.5/0.5(回退 v3 动态权重)
|
|
134
|
+
const relevance = 0.5 * ftsScore + 0.5 * similarity
|
|
141
135
|
let score = relevance * fact.trustScore
|
|
142
136
|
|
|
143
137
|
// 时间衰减
|
|
@@ -145,37 +139,22 @@ export class FactRetriever {
|
|
|
145
139
|
score *= this.temporalDecay(fact.updatedAt || fact.createdAt)
|
|
146
140
|
}
|
|
147
141
|
|
|
142
|
+
// Length penalty:基于 matchText 长度
|
|
143
|
+
score *= Math.min(1.0, 300 / matchText.length)
|
|
144
|
+
|
|
148
145
|
scored.push({ ...fact, score })
|
|
149
146
|
}
|
|
150
147
|
|
|
151
148
|
scored.sort((a, b) => b.score - a.score)
|
|
152
149
|
|
|
153
|
-
//
|
|
154
|
-
const
|
|
155
|
-
const gated = scored.filter(s => s.score >= RELEVANCE_THRESHOLD)
|
|
156
|
-
const pool = gated.length > 0 ? gated : scored
|
|
157
|
-
|
|
158
|
-
// 内容去重:Jaccard > 0.7 的只保留高分
|
|
159
|
-
const results: ScoredFact[] = []
|
|
160
|
-
for (const candidate of pool) {
|
|
161
|
-
let isDuplicate = false
|
|
162
|
-
const candidateTokens = this.tokenize(candidate.content)
|
|
163
|
-
for (const kept of results) {
|
|
164
|
-
const keptTokens = this.tokenize(kept.content)
|
|
165
|
-
if (this.jaccardSimilarity(candidateTokens, keptTokens) > 0.7) {
|
|
166
|
-
isDuplicate = true
|
|
167
|
-
break
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
if (!isDuplicate) {
|
|
171
|
-
results.push(candidate)
|
|
172
|
-
if (results.length >= limit) break
|
|
173
|
-
}
|
|
174
|
-
}
|
|
150
|
+
// 取 limit 条(不再做 relevance gate 和 content dedup)
|
|
151
|
+
const results = scored.slice(0, limit)
|
|
175
152
|
|
|
176
153
|
// 检索追踪:递增 retrieval_count + top3 信任刷新
|
|
177
154
|
if (results.length > 0) {
|
|
178
155
|
this.trackRetrieval(results)
|
|
156
|
+
// 记录检索日志
|
|
157
|
+
this.store.logRetrieval(searchQuery, results.map(r => ({ id: r.factId, score: Math.round(r.score * 1000) / 1000 })))
|
|
179
158
|
}
|
|
180
159
|
|
|
181
160
|
// 缓存存储 + 指标记录
|
|
@@ -295,9 +274,11 @@ export class FactRetriever {
|
|
|
295
274
|
category: r.category as FactCategory,
|
|
296
275
|
tags: r.tags,
|
|
297
276
|
keywords: r.keywords ?? '[]',
|
|
277
|
+
summary: (r as any).summary ?? null,
|
|
298
278
|
trustScore: r.trust_score,
|
|
299
279
|
retrievalCount: r.retrieval_count,
|
|
300
280
|
helpfulCount: r.helpful_count,
|
|
281
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
301
282
|
createdAt: r.created_at,
|
|
302
283
|
updatedAt: r.updated_at,
|
|
303
284
|
score: r.trust_score * (1 - i * 0.05),
|
|
@@ -409,9 +390,11 @@ export class FactRetriever {
|
|
|
409
390
|
category: r.category as FactCategory,
|
|
410
391
|
tags: r.tags,
|
|
411
392
|
keywords: r.keywords ?? '[]',
|
|
393
|
+
summary: (r as any).summary ?? null,
|
|
412
394
|
trustScore: r.trust_score,
|
|
413
395
|
retrievalCount: 0,
|
|
414
396
|
helpfulCount: 0,
|
|
397
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
415
398
|
createdAt: r.created_at,
|
|
416
399
|
updatedAt: r.updated_at,
|
|
417
400
|
})
|
|
@@ -509,9 +492,11 @@ export class FactRetriever {
|
|
|
509
492
|
category: String(row.category) as FactCategory,
|
|
510
493
|
tags: String(row.tags),
|
|
511
494
|
keywords: String(row.keywords ?? '[]'),
|
|
495
|
+
summary: row.summary != null ? String(row.summary) : null,
|
|
512
496
|
trustScore: Number(row.trust_score),
|
|
513
497
|
retrievalCount: Number(row.retrieval_count),
|
|
514
498
|
helpfulCount: Number(row.helpful_count),
|
|
499
|
+
lastRetrievedAt: row.last_retrieved_at != null ? String(row.last_retrieved_at) : null,
|
|
515
500
|
createdAt: String(row.created_at),
|
|
516
501
|
updatedAt: String(row.updated_at),
|
|
517
502
|
ftsRank: rawRanks[i] / maxRank,
|
|
@@ -619,8 +604,8 @@ export class FactRetriever {
|
|
|
619
604
|
const conditions: string[] = []
|
|
620
605
|
const params: unknown[] = []
|
|
621
606
|
for (const word of words) {
|
|
622
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
623
|
-
params.push(`%${word}%`, `%${word}%`)
|
|
607
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
608
|
+
params.push(`%${word}%`, `%${word}%`, `%${word}%`)
|
|
624
609
|
}
|
|
625
610
|
|
|
626
611
|
// 中文子串分解:将中文查询拆为 2~3 字滑动窗口,追加 LIKE 条件
|
|
@@ -632,14 +617,14 @@ export class FactRetriever {
|
|
|
632
617
|
// 2-gram
|
|
633
618
|
for (let i = 0; i < seg.length - 1; i++) {
|
|
634
619
|
const bigram = seg.slice(i, i + 2)
|
|
635
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
636
|
-
params.push(`%${bigram}%`, `%${bigram}%`)
|
|
620
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
621
|
+
params.push(`%${bigram}%`, `%${bigram}%`, `%${bigram}%`)
|
|
637
622
|
}
|
|
638
623
|
// 3-gram(覆盖更长的短语匹配)
|
|
639
624
|
for (let i = 0; i < seg.length - 2; i++) {
|
|
640
625
|
const trigram = seg.slice(i, i + 3)
|
|
641
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
642
|
-
params.push(`%${trigram}%`, `%${trigram}%`)
|
|
626
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
627
|
+
params.push(`%${trigram}%`, `%${trigram}%`, `%${trigram}%`)
|
|
643
628
|
}
|
|
644
629
|
}
|
|
645
630
|
}
|
|
@@ -656,7 +641,7 @@ export class FactRetriever {
|
|
|
656
641
|
|
|
657
642
|
const sql = `
|
|
658
643
|
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords,
|
|
659
|
-
f.trust_score, f.retrieval_count, f.helpful_count,
|
|
644
|
+
f.summary, f.trust_score, f.retrieval_count, f.helpful_count,
|
|
660
645
|
f.created_at, f.updated_at
|
|
661
646
|
FROM facts f
|
|
662
647
|
WHERE (${conditionsSql})
|
|
@@ -668,6 +653,7 @@ export class FactRetriever {
|
|
|
668
653
|
|
|
669
654
|
const rows = this.db.prepare(sql).all(...params) as Array<{
|
|
670
655
|
fact_id: number; content: string; category: string; tags: string; keywords: string;
|
|
656
|
+
summary: string | null;
|
|
671
657
|
trust_score: number; retrieval_count: number; helpful_count: number;
|
|
672
658
|
created_at: string; updated_at: string;
|
|
673
659
|
}>
|
|
@@ -679,9 +665,11 @@ export class FactRetriever {
|
|
|
679
665
|
category: r.category as FactCategory,
|
|
680
666
|
tags: r.tags,
|
|
681
667
|
keywords: r.keywords ?? '[]',
|
|
668
|
+
summary: r.summary ?? null,
|
|
682
669
|
trustScore: r.trust_score,
|
|
683
670
|
retrievalCount: r.retrieval_count,
|
|
684
671
|
helpfulCount: r.helpful_count,
|
|
672
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
685
673
|
createdAt: r.created_at,
|
|
686
674
|
updatedAt: r.updated_at,
|
|
687
675
|
ftsRank: 0.5,
|
package/src/schema.ts
CHANGED
|
@@ -6,9 +6,11 @@ CREATE TABLE IF NOT EXISTS facts (
|
|
|
6
6
|
category TEXT DEFAULT 'general',
|
|
7
7
|
tags TEXT DEFAULT '',
|
|
8
8
|
keywords TEXT DEFAULT '[]',
|
|
9
|
+
summary TEXT DEFAULT NULL,
|
|
9
10
|
trust_score REAL DEFAULT 0.5,
|
|
10
11
|
retrieval_count INTEGER DEFAULT 0,
|
|
11
12
|
helpful_count INTEGER DEFAULT 0,
|
|
13
|
+
last_retrieved_at TEXT DEFAULT NULL,
|
|
12
14
|
created_at TEXT DEFAULT (datetime('now', 'localtime')),
|
|
13
15
|
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
|
|
14
16
|
);
|
|
@@ -29,33 +31,42 @@ CREATE TABLE IF NOT EXISTS fact_entities (
|
|
|
29
31
|
PRIMARY KEY (fact_id, entity_id)
|
|
30
32
|
);
|
|
31
33
|
|
|
34
|
+
-- 检索日志表
|
|
35
|
+
CREATE TABLE IF NOT EXISTS retrieval_log (
|
|
36
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
37
|
+
query TEXT NOT NULL,
|
|
38
|
+
results TEXT DEFAULT NULL,
|
|
39
|
+
timestamp TEXT DEFAULT (datetime('now', 'localtime'))
|
|
40
|
+
);
|
|
41
|
+
|
|
32
42
|
-- 索引
|
|
33
43
|
CREATE INDEX IF NOT EXISTS idx_facts_trust ON facts(trust_score DESC);
|
|
34
44
|
CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
|
|
35
45
|
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
|
|
36
46
|
CREATE INDEX IF NOT EXISTS idx_fact_entities_entity ON fact_entities(entity_id);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_retrieval_log_ts ON retrieval_log(timestamp);
|
|
37
48
|
|
|
38
|
-
-- FTS5
|
|
49
|
+
-- FTS5 全文索引(含 summary 列)
|
|
39
50
|
CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts
|
|
40
|
-
USING fts5(content, tags, content=facts, content_rowid=fact_id);
|
|
51
|
+
USING fts5(content, tags, summary, content=facts, content_rowid=fact_id);
|
|
41
52
|
|
|
42
53
|
-- FTS5 同步触发器:插入
|
|
43
54
|
CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN
|
|
44
|
-
INSERT INTO facts_fts(rowid, content, tags)
|
|
45
|
-
VALUES (new.fact_id, new.content, new.tags);
|
|
55
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
56
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
46
57
|
END;
|
|
47
58
|
|
|
48
59
|
-- FTS5 同步触发器:删除
|
|
49
60
|
CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN
|
|
50
|
-
INSERT INTO facts_fts(facts_fts, rowid, content, tags)
|
|
51
|
-
VALUES ('delete', old.fact_id, old.content, old.tags);
|
|
61
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
62
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
52
63
|
END;
|
|
53
64
|
|
|
54
65
|
-- FTS5 同步触发器:更新
|
|
55
66
|
CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN
|
|
56
|
-
INSERT INTO facts_fts(facts_fts, rowid, content, tags)
|
|
57
|
-
VALUES ('delete', old.fact_id, old.content, old.tags);
|
|
58
|
-
INSERT INTO facts_fts(rowid, content, tags)
|
|
59
|
-
VALUES (new.fact_id, new.content, new.tags);
|
|
67
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
68
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
69
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
70
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
60
71
|
END;
|
|
61
72
|
`
|
package/src/server.ts
CHANGED
|
@@ -27,8 +27,9 @@ const FACT_STORE_DESCRIPTION = `结构化事实记忆系统(SQLite+FTS5 索引
|
|
|
27
27
|
写入时先 search 检查是否已存在相似事实。identity/coding_style/tool_pref/workflow/general → 全局库,project → 项目库。`
|
|
28
28
|
|
|
29
29
|
const factStoreSchema = {
|
|
30
|
-
action: z.enum(['add', 'search', 'probe', 'related', 'reason', 'contradict', 'update', 'remove', 'list']),
|
|
30
|
+
action: z.enum(['add', 'search', 'probe', 'related', 'reason', 'contradict', 'update', 'remove', 'list', 'learn', 'audit']),
|
|
31
31
|
content: z.union([z.string(), z.array(z.string())]).optional().describe("事实内容('add' 必需,支持批量)"),
|
|
32
|
+
summary: z.string().optional().describe('超长事实的摘要(检索用 summary 匹配)'),
|
|
32
33
|
query: z.string().optional().describe("搜索查询('search' 必需)"),
|
|
33
34
|
entity: z.string().optional().describe("实体名('probe'/'related' 使用)"),
|
|
34
35
|
entities: z.array(z.string()).optional().describe("实体列表('reason' 使用)"),
|
|
@@ -62,6 +63,18 @@ const retriever = new FactRetriever(store, { temporalDecayHalfLife: 30 })
|
|
|
62
63
|
store.decayTrustScores()
|
|
63
64
|
store.auditContradictions()
|
|
64
65
|
|
|
66
|
+
// Auto-learn on startup (non-blocking)
|
|
67
|
+
process.nextTick(() => {
|
|
68
|
+
try {
|
|
69
|
+
const result = store.runLearning()
|
|
70
|
+
if (result.demoted > 0 || result.aged > 0 || result.long_facts.length > 0) {
|
|
71
|
+
console.error(`[mnemo:auto-learn] promoted=${result.promoted} demoted=${result.demoted} aged=${result.aged} long_facts=${result.long_facts.length}`)
|
|
72
|
+
}
|
|
73
|
+
} catch (err) {
|
|
74
|
+
console.error('[mnemo:auto-learn] error:', err)
|
|
75
|
+
}
|
|
76
|
+
})
|
|
77
|
+
|
|
65
78
|
// -- MCP Server --
|
|
66
79
|
const server = new McpServer({ name: 'mnemo-mcp', version: '0.1.0' })
|
|
67
80
|
|
|
@@ -93,13 +106,22 @@ server.tool(
|
|
|
93
106
|
let warnings: string[] | undefined
|
|
94
107
|
const scan = fullSecurityScan(content)
|
|
95
108
|
if (scan.warnings.length > 0 || scan.hasPii) warnings = [...scan.warnings]
|
|
109
|
+
if (content.length > 500 && !a.summary) {
|
|
110
|
+
warnings = [...(warnings ?? []), 'content 超过 500 字,建议提供 summary 或拆分为多条 fact']
|
|
111
|
+
}
|
|
96
112
|
|
|
97
113
|
if (similar) {
|
|
98
114
|
store.updateFact(similar.factId, { content, tags: a.tags, trustDelta: 0.05 })
|
|
115
|
+
if (a.summary) {
|
|
116
|
+
store.connection.prepare('UPDATE facts SET summary = ? WHERE fact_id = ?').run(a.summary, similar.factId)
|
|
117
|
+
}
|
|
99
118
|
const demoted = store.demoteContradictingFacts(similar.factId, content, category)
|
|
100
119
|
results.push({ fact_id: similar.factId, status: 'updated', reason: 'similar_fact_merged', ...(demoted > 0 ? { contradicted_demoted: demoted } : {}), ...(warnings ? { warnings } : {}) })
|
|
101
120
|
} else {
|
|
102
121
|
const factId = store.addFact(content, category, a.tags ?? '')
|
|
122
|
+
if (a.summary) {
|
|
123
|
+
store.connection.prepare('UPDATE facts SET summary = ? WHERE fact_id = ?').run(a.summary, factId)
|
|
124
|
+
}
|
|
103
125
|
const demoted = store.demoteContradictingFacts(factId, content, category)
|
|
104
126
|
results.push({ fact_id: factId, status: 'added', category, ...(demoted > 0 ? { contradicted_demoted: demoted } : {}), ...(warnings ? { warnings } : {}) })
|
|
105
127
|
}
|
|
@@ -144,6 +166,9 @@ server.tool(
|
|
|
144
166
|
case 'update': {
|
|
145
167
|
if (!a.fact_id) return { content: [{ type: 'text' as const, text: JSON.stringify({ error: 'Missing required argument: fact_id' }) }] }
|
|
146
168
|
const updated = store.updateFact(a.fact_id as number, { content: a.content as string | undefined, tags: a.tags, category, trustDelta: a.trust_delta })
|
|
169
|
+
if (a.summary !== undefined) {
|
|
170
|
+
store.connection.prepare('UPDATE facts SET summary = ? WHERE fact_id = ?').run(a.summary, a.fact_id as number)
|
|
171
|
+
}
|
|
147
172
|
retriever.getCache().clear()
|
|
148
173
|
resourceManager.invalidate()
|
|
149
174
|
return { content: [{ type: 'text' as const, text: JSON.stringify({ updated }) }] }
|
|
@@ -159,6 +184,16 @@ server.tool(
|
|
|
159
184
|
return { content: [{ type: 'text' as const, text: JSON.stringify(response) }] }
|
|
160
185
|
}
|
|
161
186
|
|
|
187
|
+
case 'learn': {
|
|
188
|
+
const result = store.runLearning()
|
|
189
|
+
return { content: [{ type: 'text' as const, text: JSON.stringify(result) }] }
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
case 'audit': {
|
|
193
|
+
const report = store.runAudit()
|
|
194
|
+
return { content: [{ type: 'text' as const, text: JSON.stringify(report) }] }
|
|
195
|
+
}
|
|
196
|
+
|
|
162
197
|
case 'list': {
|
|
163
198
|
const facts = store.listFacts(category, a.min_trust ?? 0.0, a.limit ?? 10)
|
|
164
199
|
return { content: [{ type: 'text' as const, text: JSON.stringify({ facts, count: facts.length }) }] }
|
package/src/store.ts
CHANGED
|
@@ -63,9 +63,11 @@ interface FactRow {
|
|
|
63
63
|
category: string
|
|
64
64
|
tags: string
|
|
65
65
|
keywords: string
|
|
66
|
+
summary?: string | null
|
|
66
67
|
trust_score: number
|
|
67
68
|
retrieval_count: number
|
|
68
69
|
helpful_count: number
|
|
70
|
+
last_retrieved_at?: string | null
|
|
69
71
|
created_at: string
|
|
70
72
|
updated_at: string
|
|
71
73
|
}
|
|
@@ -110,10 +112,58 @@ export class MemoryStore {
|
|
|
110
112
|
|
|
111
113
|
/** 增量迁移:添加新列(已存在则跳过) */
|
|
112
114
|
private migrateSchema(): void {
|
|
115
|
+
const addColumn = (table: string, column: string, def: string): void => {
|
|
116
|
+
try {
|
|
117
|
+
this.db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${def}`)
|
|
118
|
+
} catch { /* 列已存在 */ }
|
|
119
|
+
}
|
|
120
|
+
|
|
113
121
|
// keywords 列(v2)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
122
|
+
addColumn('facts', 'keywords', "TEXT DEFAULT '[]'")
|
|
123
|
+
// summary 列
|
|
124
|
+
addColumn('facts', 'summary', 'TEXT DEFAULT NULL')
|
|
125
|
+
// last_retrieved_at 列
|
|
126
|
+
addColumn('facts', 'last_retrieved_at', 'TEXT DEFAULT NULL')
|
|
127
|
+
|
|
128
|
+
// FTS5 重建:检查 facts_fts 是否包含 summary 列
|
|
129
|
+
const ftsCols = this.db.pragma('table_info(facts_fts)') as Array<{ name: string }>
|
|
130
|
+
const hasSummary = ftsCols.some(c => c.name === 'summary')
|
|
131
|
+
if (!hasSummary) {
|
|
132
|
+
// DROP 旧 FTS5 表和触发器,重建含 summary 的新版本
|
|
133
|
+
this.db.exec(`
|
|
134
|
+
DROP TABLE IF EXISTS facts_fts;
|
|
135
|
+
DROP TRIGGER IF EXISTS facts_ai;
|
|
136
|
+
DROP TRIGGER IF EXISTS facts_ad;
|
|
137
|
+
DROP TRIGGER IF EXISTS facts_au;
|
|
138
|
+
`)
|
|
139
|
+
// 重建 FTS5 虚拟表(含 summary)
|
|
140
|
+
this.db.exec(`
|
|
141
|
+
CREATE VIRTUAL TABLE facts_fts
|
|
142
|
+
USING fts5(content, tags, summary, content=facts, content_rowid=fact_id);
|
|
143
|
+
`)
|
|
144
|
+
// 重填充
|
|
145
|
+
this.db.exec(`
|
|
146
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
147
|
+
SELECT fact_id, content, tags, COALESCE(summary, '') FROM facts;
|
|
148
|
+
`)
|
|
149
|
+
// 重建触发器
|
|
150
|
+
this.db.exec(`
|
|
151
|
+
CREATE TRIGGER facts_ai AFTER INSERT ON facts BEGIN
|
|
152
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
153
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
154
|
+
END;
|
|
155
|
+
CREATE TRIGGER facts_ad AFTER DELETE ON facts BEGIN
|
|
156
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
157
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
158
|
+
END;
|
|
159
|
+
CREATE TRIGGER facts_au AFTER UPDATE ON facts BEGIN
|
|
160
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
161
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
162
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
163
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
164
|
+
END;
|
|
165
|
+
`)
|
|
166
|
+
}
|
|
117
167
|
}
|
|
118
168
|
|
|
119
169
|
private prepareStatements(): void {
|
|
@@ -361,8 +411,8 @@ export class MemoryStore {
|
|
|
361
411
|
params.push(limit)
|
|
362
412
|
|
|
363
413
|
const sql = `
|
|
364
|
-
SELECT fact_id, content, category, tags, keywords, trust_score,
|
|
365
|
-
retrieval_count, helpful_count, created_at, updated_at
|
|
414
|
+
SELECT fact_id, content, category, tags, keywords, summary, trust_score,
|
|
415
|
+
retrieval_count, helpful_count, last_retrieved_at, created_at, updated_at
|
|
366
416
|
FROM facts
|
|
367
417
|
WHERE trust_score >= ?
|
|
368
418
|
${categoryClause}
|
|
@@ -410,8 +460,8 @@ export class MemoryStore {
|
|
|
410
460
|
params.push(limit)
|
|
411
461
|
|
|
412
462
|
const sql = `
|
|
413
|
-
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords, f.trust_score,
|
|
414
|
-
f.retrieval_count, f.helpful_count, f.created_at, f.updated_at
|
|
463
|
+
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords, f.summary, f.trust_score,
|
|
464
|
+
f.retrieval_count, f.helpful_count, f.last_retrieved_at, f.created_at, f.updated_at
|
|
415
465
|
FROM facts f
|
|
416
466
|
JOIN fact_entities fe ON f.fact_id = fe.fact_id
|
|
417
467
|
JOIN entities e ON fe.entity_id = e.entity_id
|
|
@@ -445,8 +495,8 @@ export class MemoryStore {
|
|
|
445
495
|
params.push(limit)
|
|
446
496
|
|
|
447
497
|
const sql = `
|
|
448
|
-
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords, f.trust_score,
|
|
449
|
-
f.retrieval_count, f.helpful_count, f.created_at, f.updated_at
|
|
498
|
+
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords, f.summary, f.trust_score,
|
|
499
|
+
f.retrieval_count, f.helpful_count, f.last_retrieved_at, f.created_at, f.updated_at
|
|
450
500
|
FROM facts f
|
|
451
501
|
WHERE f.fact_id IN (${intersects})
|
|
452
502
|
${categoryClause}
|
|
@@ -635,6 +685,160 @@ export class MemoryStore {
|
|
|
635
685
|
return row.count
|
|
636
686
|
}
|
|
637
687
|
|
|
688
|
+
/** 记录检索日志并更新 last_retrieved_at */
|
|
689
|
+
logRetrieval(query: string, results: Array<{ id: number; score: number }>): void {
|
|
690
|
+
const resultsJson = JSON.stringify(results)
|
|
691
|
+
this.db.prepare(
|
|
692
|
+
"INSERT INTO retrieval_log (query, results) VALUES (?, ?)"
|
|
693
|
+
).run(query, resultsJson)
|
|
694
|
+
|
|
695
|
+
// 更新返回 fact 的 last_retrieved_at
|
|
696
|
+
if (results.length > 0) {
|
|
697
|
+
const ids = results.map(r => r.id)
|
|
698
|
+
const placeholders = ids.map(() => '?').join(',')
|
|
699
|
+
this.db.prepare(
|
|
700
|
+
`UPDATE facts SET last_retrieved_at = datetime('now', 'localtime') WHERE fact_id IN (${placeholders})`
|
|
701
|
+
).run(...ids)
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// 自动清理日志
|
|
705
|
+
this.pruneRetrievalLog(5000)
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
/** 清理检索日志,保留最近 maxEntries 条 */
|
|
709
|
+
pruneRetrievalLog(maxEntries = 5000): void {
|
|
710
|
+
this.db.prepare(
|
|
711
|
+
`DELETE FROM retrieval_log WHERE id NOT IN (
|
|
712
|
+
SELECT id FROM retrieval_log ORDER BY id DESC LIMIT ?
|
|
713
|
+
)`
|
|
714
|
+
).run(maxEntries)
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
/** 自学习:基于检索统计自动调整 trust_score */
|
|
718
|
+
runLearning(): {
|
|
719
|
+
promoted: number
|
|
720
|
+
demoted: number
|
|
721
|
+
aged: number
|
|
722
|
+
unchanged: number
|
|
723
|
+
long_facts: Array<{ id: number; content_length: number; penalty: number; has_summary: boolean }>
|
|
724
|
+
} {
|
|
725
|
+
const rows = this.db.prepare(
|
|
726
|
+
'SELECT fact_id, content, summary, retrieval_count, helpful_count, trust_score, last_retrieved_at FROM facts'
|
|
727
|
+
).all() as Array<{
|
|
728
|
+
fact_id: number; content: string; summary: string | null;
|
|
729
|
+
retrieval_count: number; helpful_count: number; trust_score: number; last_retrieved_at: string | null
|
|
730
|
+
}>
|
|
731
|
+
|
|
732
|
+
let promoted = 0
|
|
733
|
+
let demoted = 0
|
|
734
|
+
let aged = 0
|
|
735
|
+
let unchanged = 0
|
|
736
|
+
const longFacts: Array<{ id: number; content_length: number; penalty: number; has_summary: boolean }> = []
|
|
737
|
+
|
|
738
|
+
const now = Date.now()
|
|
739
|
+
|
|
740
|
+
for (const row of rows) {
|
|
741
|
+
let changed = false
|
|
742
|
+
const rate = row.retrieval_count > 0 ? row.helpful_count / row.retrieval_count : 0
|
|
743
|
+
|
|
744
|
+
// Rate-based adjustment (需要 30+ 次检索)
|
|
745
|
+
if (row.retrieval_count > 30) {
|
|
746
|
+
if (rate < 0.05) {
|
|
747
|
+
const newTrust = clampTrust(row.trust_score * 0.9)
|
|
748
|
+
this.db.prepare('UPDATE facts SET trust_score = ? WHERE fact_id = ?').run(newTrust, row.fact_id)
|
|
749
|
+
demoted++
|
|
750
|
+
changed = true
|
|
751
|
+
} else if (rate > 0.3) {
|
|
752
|
+
const newTrust = clampTrust(row.trust_score + 0.05)
|
|
753
|
+
this.db.prepare('UPDATE facts SET trust_score = ? WHERE fact_id = ?').run(newTrust, row.fact_id)
|
|
754
|
+
promoted++
|
|
755
|
+
changed = true
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
// Aging (60 天未检索)
|
|
760
|
+
if (row.last_retrieved_at) {
|
|
761
|
+
const lastRetrieved = new Date(row.last_retrieved_at + 'Z').getTime()
|
|
762
|
+
const daysSinceRetrieval = (now - lastRetrieved) / 86_400_000
|
|
763
|
+
if (daysSinceRetrieval > 60) {
|
|
764
|
+
const currentTrust = this.db.prepare('SELECT trust_score FROM facts WHERE fact_id = ?').get(row.fact_id) as any
|
|
765
|
+
const newTrust = clampTrust(currentTrust.trust_score * 0.95)
|
|
766
|
+
this.db.prepare('UPDATE facts SET trust_score = ? WHERE fact_id = ?').run(newTrust, row.fact_id)
|
|
767
|
+
aged++
|
|
768
|
+
changed = true
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
// last_retrieved_at 为 NULL = 新 fact,不老化
|
|
772
|
+
|
|
773
|
+
if (!changed) unchanged++
|
|
774
|
+
|
|
775
|
+
// Long facts report (content > 300 字无 summary)
|
|
776
|
+
const matchLength = row.summary ? row.summary.length : row.content.length
|
|
777
|
+
if (matchLength > 300) {
|
|
778
|
+
longFacts.push({
|
|
779
|
+
id: row.fact_id,
|
|
780
|
+
content_length: row.content.length,
|
|
781
|
+
penalty: Math.min(1.0, 300 / matchLength),
|
|
782
|
+
has_summary: !!row.summary,
|
|
783
|
+
})
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
return { promoted, demoted, aged, unchanged, long_facts: longFacts }
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/** 数据质量审计(只读,不修改数据) */
|
|
791
|
+
runAudit(): {
|
|
792
|
+
total_facts: number
|
|
793
|
+
long_without_summary: Array<{ id: number; content_length: number }>
|
|
794
|
+
low_helpful_rate: Array<{ id: number; rate: number; retrieval_count: number }>
|
|
795
|
+
aging_candidates: Array<{ id: number; last_retrieved_at: string | null }>
|
|
796
|
+
} {
|
|
797
|
+
const rows = this.db.prepare(
|
|
798
|
+
'SELECT fact_id, content, summary, retrieval_count, helpful_count, last_retrieved_at FROM facts'
|
|
799
|
+
).all() as Array<{
|
|
800
|
+
fact_id: number; content: string; summary: string | null;
|
|
801
|
+
retrieval_count: number; helpful_count: number; last_retrieved_at: string | null
|
|
802
|
+
}>
|
|
803
|
+
|
|
804
|
+
const longWithoutSummary: Array<{ id: number; content_length: number }> = []
|
|
805
|
+
const lowHelpfulRate: Array<{ id: number; rate: number; retrieval_count: number }> = []
|
|
806
|
+
const agingCandidates: Array<{ id: number; last_retrieved_at: string | null }> = []
|
|
807
|
+
|
|
808
|
+
const now = Date.now()
|
|
809
|
+
|
|
810
|
+
for (const row of rows) {
|
|
811
|
+
// 超 500 字无 summary
|
|
812
|
+
if (row.content.length > 500 && !row.summary) {
|
|
813
|
+
longWithoutSummary.push({ id: row.fact_id, content_length: row.content.length })
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
// 低 helpful 率(>30 次检索,rate < 5%)
|
|
817
|
+
if (row.retrieval_count > 30) {
|
|
818
|
+
const rate = row.helpful_count / row.retrieval_count
|
|
819
|
+
if (rate < 0.05) {
|
|
820
|
+
lowHelpfulRate.push({ id: row.fact_id, rate: Math.round(rate * 1000) / 1000, retrieval_count: row.retrieval_count })
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
// 老化候选(>60 天未检索)
|
|
825
|
+
if (row.last_retrieved_at) {
|
|
826
|
+
const lastRetrieved = new Date(row.last_retrieved_at + 'Z').getTime()
|
|
827
|
+
const daysSince = (now - lastRetrieved) / 86_400_000
|
|
828
|
+
if (daysSince > 60) {
|
|
829
|
+
agingCandidates.push({ id: row.fact_id, last_retrieved_at: row.last_retrieved_at })
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
return {
|
|
835
|
+
total_facts: rows.length,
|
|
836
|
+
long_without_summary: longWithoutSummary,
|
|
837
|
+
low_helpful_rate: lowHelpfulRate,
|
|
838
|
+
aging_candidates: agingCandidates,
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
638
842
|
/** 获取数据库连接(供 FactRetriever 直接使用) */
|
|
639
843
|
get connection(): Database.Database {
|
|
640
844
|
return this.db
|
|
@@ -795,9 +999,11 @@ export class MemoryStore {
|
|
|
795
999
|
category: row.category as FactCategory,
|
|
796
1000
|
tags: row.tags,
|
|
797
1001
|
keywords: row.keywords,
|
|
1002
|
+
summary: (row as any).summary ?? null,
|
|
798
1003
|
trustScore: row.trust_score,
|
|
799
1004
|
retrievalCount: row.retrieval_count,
|
|
800
1005
|
helpfulCount: row.helpful_count,
|
|
1006
|
+
lastRetrievedAt: (row as any).last_retrieved_at ?? null,
|
|
801
1007
|
createdAt: row.created_at,
|
|
802
1008
|
updatedAt: row.updated_at,
|
|
803
1009
|
}
|
package/src/types.ts
CHANGED
|
@@ -8,9 +8,11 @@ export interface Fact {
|
|
|
8
8
|
category: FactCategory
|
|
9
9
|
tags: string
|
|
10
10
|
keywords: string
|
|
11
|
+
summary: string | null
|
|
11
12
|
trustScore: number
|
|
12
13
|
retrievalCount: number
|
|
13
14
|
helpfulCount: number
|
|
15
|
+
lastRetrievedAt: string | null
|
|
14
16
|
createdAt: string
|
|
15
17
|
updatedAt: string
|
|
16
18
|
}
|
|
@@ -53,7 +55,7 @@ export interface RetrieverOptions {
|
|
|
53
55
|
|
|
54
56
|
/** fact_store 工具调用参数 */
|
|
55
57
|
export interface FactStoreArgs {
|
|
56
|
-
action: 'add' | 'search' | 'probe' | 'related' | 'reason' | 'contradict' | 'update' | 'remove' | 'list'
|
|
58
|
+
action: 'add' | 'search' | 'probe' | 'related' | 'reason' | 'contradict' | 'update' | 'remove' | 'list' | 'learn' | 'audit'
|
|
57
59
|
content?: string | string[]
|
|
58
60
|
query?: string
|
|
59
61
|
entity?: string
|
|
@@ -61,6 +63,7 @@ export interface FactStoreArgs {
|
|
|
61
63
|
fact_id?: number | number[]
|
|
62
64
|
category?: string
|
|
63
65
|
tags?: string
|
|
66
|
+
summary?: string
|
|
64
67
|
trust_delta?: number
|
|
65
68
|
min_trust?: number
|
|
66
69
|
limit?: number
|