@morningljn/mnemo 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dream.d.ts +2 -0
- package/dist/dream.js +20 -0
- package/dist/dream.js.map +1 -0
- package/dist/init.js +4 -24
- package/dist/init.js.map +1 -1
- package/dist/resources.d.ts +22 -8
- package/dist/resources.js +66 -20
- package/dist/resources.js.map +1 -1
- package/dist/retriever.js +42 -47
- package/dist/retriever.js.map +1 -1
- package/dist/schema.d.ts +1 -1
- package/dist/schema.js +21 -10
- package/dist/schema.js.map +1 -1
- package/dist/server.js +73 -6
- package/dist/server.js.map +1 -1
- package/dist/store.d.ts +59 -1
- package/dist/store.js +308 -10
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +30 -1
- package/docs/superpowers/plans/2026-05-16-memory-dreaming.md +626 -0
- package/docs/superpowers/plans/2026-05-16-memory-self-learning.md +932 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/design.md +71 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/proposal.md +32 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/specs/compact-search/spec.md +16 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/specs/dream-cycle/spec.md +38 -0
- package/openspec/changes/archive/2026-05-16-memory-dreaming/tasks.md +27 -0
- package/openspec/changes/memory-self-learning/.openspec.yaml +2 -0
- package/openspec/changes/memory-self-learning/design.md +174 -0
- package/openspec/changes/memory-self-learning/proposal.md +35 -0
- package/openspec/changes/memory-self-learning/specs/fact-retrieval/spec.md +35 -0
- package/openspec/changes/memory-self-learning/specs/fact-summary/spec.md +45 -0
- package/openspec/changes/memory-self-learning/specs/length-penalty/spec.md +27 -0
- package/openspec/changes/memory-self-learning/specs/retrieval-log/spec.md +41 -0
- package/openspec/changes/memory-self-learning/specs/self-learning/spec.md +68 -0
- package/openspec/changes/memory-self-learning/tasks.md +56 -0
- package/openspec/specs/compact-search/spec.md +16 -0
- package/openspec/specs/dream-cycle/spec.md +38 -0
- package/package.json +3 -2
- package/src/dream.ts +20 -0
- package/src/init.ts +4 -24
- package/src/resources.ts +77 -21
- package/src/retriever.ts +41 -49
- package/src/schema.ts +21 -10
- package/src/server.ts +81 -7
- package/src/store.ts +378 -11
- package/src/types.ts +28 -1
- package/tests/resource.test.ts +25 -23
- package/tests/retriever.test.ts +53 -0
- package/tests/store.test.ts +239 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
## 1. 存储层:schema 迁移
|
|
2
|
+
|
|
3
|
+
- [ ] 1.1 `src/store.ts` — 新增 `retrieval_log` 表创建(id, query, results JSON, timestamp)
|
|
4
|
+
- [ ] 1.2 `src/store.ts` — `facts` 表 `ALTER TABLE ADD COLUMN summary TEXT DEFAULT NULL`(兼容已有库)
|
|
5
|
+
- [ ] 1.3 `src/store.ts` — `facts` 表 `ALTER TABLE ADD COLUMN last_retrieved_at TEXT DEFAULT NULL`
|
|
6
|
+
- [ ] 1.4 `src/store.ts` — 重建 FTS5 虚拟表以包含 summary 列
|
|
7
|
+
- [ ] 1.5 `src/store.ts` — 新增 `logRetrieval(query, results: [{id, score}])` 方法,写入 retrieval_log + 更新各 fact 的 last_retrieved_at
|
|
8
|
+
- [ ] 1.6 `src/store.ts` — 新增 `pruneRetrievalLog(maxEntries=5000)` 方法,超出上限删除最旧记录
|
|
9
|
+
- [ ] 1.7 `src/types.ts` — 新增 `RetrievalLogEntry` 类型、`FactStoreArgs` 增加 `summary` 字段、Fact 类型增加 `summary` 和 `last_retrieved_at`
|
|
10
|
+
|
|
11
|
+
## 2. 检索层:回退 + length penalty + summary
|
|
12
|
+
|
|
13
|
+
- [ ] 2.1 `src/retriever.ts` — 回退动态权重为静态 `ftsWeight=0.5, jaccardWeight=0.5`
|
|
14
|
+
- [ ] 2.2 `src/retriever.ts` — 移除 relevance gate(`RELEVANCE_THRESHOLD` 相关代码)
|
|
15
|
+
- [ ] 2.3 `src/retriever.ts` — 评分公式末尾新增 length penalty:`score *= min(1.0, 300 / matchText.length)`,matchText = summary(非空时)或 content
|
|
16
|
+
- [ ] 2.4 `src/retriever.ts` — FTS5 候选查询改为优先匹配 summary(非空时用 summary,空时用 content)
|
|
17
|
+
- [ ] 2.5 `src/retriever.ts` — Jaccard tokenization 同样优先使用 summary
|
|
18
|
+
- [ ] 2.6 `src/retriever.ts` — search() 方法末尾调用 `store.logRetrieval(query, results)` 记录检索日志
|
|
19
|
+
- [ ] 2.7 保留 `src/refine.ts` 的 refineQuery(过滤纯操作指令的功能与权重策略无关)
|
|
20
|
+
|
|
21
|
+
## 3. 自学习层:learn + audit
|
|
22
|
+
|
|
23
|
+
- [ ] 3.1 `src/store.ts` — 新增 `runLearning()` 方法:遍历所有 fact,按 rate 规则调整 trust_score
|
|
24
|
+
- `retrieval_count > 30 && rate < 0.05` → `trust_score *= 0.9`
|
|
25
|
+
- `retrieval_count > 30 && rate > 0.3` → `trust_score = min(1.0, trust_score + 0.05)`
|
|
26
|
+
- `last_retrieved_at` 超过 60 天 → `trust_score *= 0.95`
|
|
27
|
+
- `last_retrieved_at` 为 NULL(新 fact)→ 不老化
|
|
28
|
+
- [ ] 3.2 `src/store.ts` — `runLearning()` 返回 `{promoted, demoted, aged, unchanged, long_facts: [{id, content_length, penalty, has_summary}]}`
|
|
29
|
+
- [ ] 3.3 `src/store.ts` — 新增 `runAudit()` 方法:返回数据质量报告(超长无 summary、低 helpful 率、老化候选),不修改数据
|
|
30
|
+
- [ ] 3.4 `src/server.ts` — 新增 `fact_store(action="learn")` handler,调用 `store.runLearning()`
|
|
31
|
+
- [ ] 3.5 `src/server.ts` — 新增 `fact_store(action="audit")` handler,调用 `store.runAudit()`
|
|
32
|
+
- [ ] 3.6 `src/server.ts` — server 启动时通过 `process.nextTick()` 延迟调用 `store.runLearning()`,输出摘要到 stderr
|
|
33
|
+
|
|
34
|
+
## 4. 写入端:质量控制
|
|
35
|
+
|
|
36
|
+
- [ ] 4.1 `src/server.ts` — add handler 支持 `summary` 参数,存入 summary 列
|
|
37
|
+
- [ ] 4.2 `src/server.ts` — add/update 时 content 长度 > 500 且无 summary → 返回 warnings 提示
|
|
38
|
+
- [ ] 4.3 `src/server.ts` — add/update 写操作后调用 `store.pruneRetrievalLog()` 保持日志上限
|
|
39
|
+
- [ ] 4.4 `src/server.ts` — update handler 支持 `summary` 参数更新
|
|
40
|
+
|
|
41
|
+
## 5. 清理:移除 v3 遗留代码
|
|
42
|
+
|
|
43
|
+
- [ ] 5.1 `src/retriever.ts` — 移除动态权重计算逻辑(`tokenCount <= 3` 判断分支)
|
|
44
|
+
- [ ] 5.2 `src/retriever.ts` — 移除 content dedup(Jaccard > 0.7 去重),改为仅 score 排序
|
|
45
|
+
- [ ] 5.3 `src/refine.ts` — 保留 refineQuery(过滤纯操作指令),但移除与动态权重的耦合
|
|
46
|
+
|
|
47
|
+
## 6. 测试 + 验证
|
|
48
|
+
|
|
49
|
+
- [ ] 6.1 `tests/store.test.ts` — 新增 retrieval_log CRUD 测试(写入、查询、自动清理)
|
|
50
|
+
- [ ] 6.2 `tests/store.test.ts` — 新增 summary 列读写测试
|
|
51
|
+
- [ ] 6.3 `tests/store.test.ts` — 新增 `runLearning()` 信任调整测试(promote/demote/aging/新 fact 保护)
|
|
52
|
+
- [ ] 6.4 `tests/store.test.ts` — 新增 `runAudit()` 测试(返回报告不修改数据)
|
|
53
|
+
- [ ] 6.5 `tests/retriever.test.ts` — 新增 length penalty 测试(有/无 summary 两种场景)
|
|
54
|
+
- [ ] 6.6 `tests/retriever.test.ts` — 新增 summary 匹配测试(FTS5 + Jaccard 用 summary)
|
|
55
|
+
- [ ] 6.7 `tests/retriever.test.ts` — 验证静态权重(不再随查询长度变化)
|
|
56
|
+
- [ ] 6.8 端到端验证:`npm run build && npx vitest run`
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
|
|
3
|
+
### Requirement: 搜索结果精简格式
|
|
4
|
+
搜索返回结果 SHALL 优先返回 summary 而非完整 content,减少 token 消耗。
|
|
5
|
+
|
|
6
|
+
#### Scenario: 有 summary 的 fact
|
|
7
|
+
- **WHEN** 搜索结果中的 fact 有 summary 字段且非空
|
|
8
|
+
- **THEN** 返回 summary 作为 display 字段,不返回完整 content
|
|
9
|
+
|
|
10
|
+
#### Scenario: 无 summary 的 fact
|
|
11
|
+
- **WHEN** 搜索结果中的 fact 的 summary 为 NULL
|
|
12
|
+
- **THEN** 返回 content 前 100 字 + "..." 作为 display 字段
|
|
13
|
+
|
|
14
|
+
#### Scenario: 返回字段精简
|
|
15
|
+
- **WHEN** 搜索结果返回给调用方
|
|
16
|
+
- **THEN** 每条结果包含 factId、display(精简内容)、category、trustScore、score,不包含完整 content、keywords、tags 等冗余字段
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
## ADDED Requirements
|
|
2
|
+
|
|
3
|
+
### Requirement: Dream action 整理记忆库
|
|
4
|
+
系统 SHALL 提供 `fact_store(action="dream")` 操作,执行三阶段整理:Collect → Consolidate → Evaluate。
|
|
5
|
+
|
|
6
|
+
#### Scenario: 合并重叠 fact
|
|
7
|
+
- **WHEN** 同 category 内两条 fact 的 Jaccard 相似度 > 0.6
|
|
8
|
+
- **THEN** 系统保留 content 更长的 fact,将另一条标记删除,并在 dream report 中记录合并对
|
|
9
|
+
|
|
10
|
+
#### Scenario: 压缩长 fact
|
|
11
|
+
- **WHEN** fact 的 content 长度 > 200 字且 summary 为 NULL
|
|
12
|
+
- **THEN** 系统从 content 提取前 2 个完整句子(总长 ≤ 150 字)写入 summary 字段
|
|
13
|
+
|
|
14
|
+
#### Scenario: 分类修正
|
|
15
|
+
- **WHEN** fact 的 category 与内容不匹配(如 identity 类 fact 内容包含"编码规范")
|
|
16
|
+
- **THEN** 系统根据关键词规则表将 fact 挪到正确 category
|
|
17
|
+
|
|
18
|
+
#### Scenario: Dream 前备份
|
|
19
|
+
- **WHEN** dream action 被触发
|
|
20
|
+
- **THEN** 系统在执行任何修改前,自动将数据库备份到 `~/.mnemo/backup/dream-<timestamp>.db`
|
|
21
|
+
|
|
22
|
+
#### Scenario: 输出 dream report
|
|
23
|
+
- **WHEN** dream 整理完成
|
|
24
|
+
- **THEN** 系统返回 JSON 报告,包含 merged、compressed、reclassified、deleted 计数和 health 统计
|
|
25
|
+
|
|
26
|
+
### Requirement: CLI dream 命令
|
|
27
|
+
系统 SHALL 提供 `mnemo dream` CLI 命令,手动触发 dream 整理。
|
|
28
|
+
|
|
29
|
+
#### Scenario: 手动执行 dream
|
|
30
|
+
- **WHEN** 用户运行 `npx mnemo dream` 或 `mnemo dream`
|
|
31
|
+
- **THEN** 系统执行完整 dream cycle 并输出 report 到 stdout
|
|
32
|
+
|
|
33
|
+
### Requirement: 高频 fact 保护
|
|
34
|
+
Dream 整理 SHALL 保护检索次数 > 100 的 fact 不被删除。
|
|
35
|
+
|
|
36
|
+
#### Scenario: 高频 fact 不被合并删除
|
|
37
|
+
- **WHEN** 两条 fact 满足合并条件,但其中一条 retrieval_count > 100
|
|
38
|
+
- **THEN** 系统保留高频 fact,仅删除另一条低频 fact
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@morningljn/mnemo",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Structured fact memory MCP server — SQLite + FTS5, trust scoring, entity graph, bilingual retrieval for Claude Code & Codex",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/server.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"mnemo": "dist/server.js",
|
|
9
|
-
"mnemo-init": "dist/init.js"
|
|
9
|
+
"mnemo-init": "dist/init.js",
|
|
10
|
+
"mnemo-dream": "dist/dream.js"
|
|
10
11
|
},
|
|
11
12
|
"publishConfig": {
|
|
12
13
|
"access": "public"
|
package/src/dream.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { MemoryStore } from './store.js'
|
|
4
|
+
import { join } from 'node:path'
|
|
5
|
+
import { homedir } from 'node:os'
|
|
6
|
+
|
|
7
|
+
const dbPath = join(homedir(), '.mnemo', 'facts.db')
|
|
8
|
+
const store = new MemoryStore(dbPath)
|
|
9
|
+
|
|
10
|
+
try {
|
|
11
|
+
console.log('[mnemo dream] 开始整理记忆库...\n')
|
|
12
|
+
const report = await store.runDream()
|
|
13
|
+
console.log(JSON.stringify(report, null, 2))
|
|
14
|
+
console.log(`\n[mnemo dream] 完成: merged=${report.merged} compressed=${report.compressed} reclassified=${report.reclassified} deleted=${report.deleted}`)
|
|
15
|
+
} catch (err) {
|
|
16
|
+
console.error('[mnemo dream] error:', err)
|
|
17
|
+
process.exit(1)
|
|
18
|
+
} finally {
|
|
19
|
+
store.close()
|
|
20
|
+
}
|
package/src/init.ts
CHANGED
|
@@ -19,32 +19,12 @@ const CLAUDE_MD_PATH = join(CLAUDE_DIR, 'CLAUDE.md')
|
|
|
19
19
|
const SETTINGS_PATH = join(CLAUDE_DIR, 'settings.json')
|
|
20
20
|
|
|
21
21
|
const MEMORY_RULES = `
|
|
22
|
-
# 记忆系统使用规则
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
# mnemo 记忆系统
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
## 规则 2:按需补充查询
|
|
31
|
-
仅在以下情况调用 \`fact_store(action="search")\`:
|
|
32
|
-
- 用户消息涉及个人偏好/习惯/工具选择且预热中未覆盖
|
|
33
|
-
- 用户明确查询记忆("我之前说过什么""按我的习惯")
|
|
34
|
-
- 技术选型时需要确认用户偏好
|
|
35
|
-
|
|
36
|
-
不触发查询的情况:
|
|
37
|
-
- 纯操作指令("运行测试""git commit")
|
|
38
|
-
- 通用技术问题("Promise 怎么用")
|
|
39
|
-
- 代码审查/解释请求
|
|
40
|
-
|
|
41
|
-
## 规则 3:写入记忆
|
|
42
|
-
用户说"记住"、"记下来"时,调用 \`fact_store(action="add", content="...", category="...")\`。
|
|
43
|
-
- 先 search 检查是否已有相似事实,有则 update
|
|
44
|
-
- category:identity / coding_style / tool_pref / workflow / general
|
|
45
|
-
|
|
46
|
-
## 规则 4:反馈强化
|
|
47
|
-
成功使用某条记忆时,调用 \`fact_feedback(action="helpful", fact_id=...)\`。
|
|
25
|
+
- 身份问题("你是谁"等)→ 先 fact_store(search, query="角色设定"),按设定回答
|
|
26
|
+
- 用户说"记住"→ fact_store(add),先 search 去重
|
|
27
|
+
- 成功使用记忆 → fact_feedback(helpful, fact_id)
|
|
48
28
|
`
|
|
49
29
|
|
|
50
30
|
const MCP_TOOLS = [
|
package/src/resources.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MCP Resource manager for mnemo-mcp.
|
|
3
|
-
* Exposes per-category memory
|
|
3
|
+
* Exposes per-category memory as MCP Resources for session warmup injection.
|
|
4
|
+
*
|
|
5
|
+
* identity → 指令格式(Claude 应遵循的行为设定)
|
|
6
|
+
* 其他 → 参考格式(供 Claude 查阅的用户偏好)
|
|
4
7
|
*/
|
|
5
8
|
|
|
6
9
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
@@ -8,7 +11,7 @@ import type { MemoryStore } from './store.js'
|
|
|
8
11
|
import type { FactCategory } from './types.js'
|
|
9
12
|
|
|
10
13
|
const CATEGORIES: FactCategory[] = ['identity', 'coding_style', 'tool_pref', 'workflow', 'general']
|
|
11
|
-
const RESOURCE_LIMIT =
|
|
14
|
+
const RESOURCE_LIMIT = 15
|
|
12
15
|
|
|
13
16
|
export interface ResourceFact {
|
|
14
17
|
fact_id: number
|
|
@@ -17,13 +20,12 @@ export interface ResourceFact {
|
|
|
17
20
|
}
|
|
18
21
|
|
|
19
22
|
export class ResourceManager {
|
|
20
|
-
private cache = new Map<FactCategory,
|
|
23
|
+
private cache = new Map<FactCategory, string>()
|
|
21
24
|
|
|
22
25
|
constructor(
|
|
23
26
|
private store: MemoryStore,
|
|
24
27
|
) {}
|
|
25
28
|
|
|
26
|
-
/** Register all category resources with the MCP server */
|
|
27
29
|
registerResources(server: McpServer): void {
|
|
28
30
|
for (const category of CATEGORIES) {
|
|
29
31
|
const uri = `mnemo://global/${category}`
|
|
@@ -32,46 +34,100 @@ export class ResourceManager {
|
|
|
32
34
|
uri,
|
|
33
35
|
{
|
|
34
36
|
description: `${category} category global facts (top ${RESOURCE_LIMIT} by trust)`,
|
|
35
|
-
mimeType: '
|
|
37
|
+
mimeType: 'text/markdown',
|
|
36
38
|
},
|
|
37
39
|
async () => this.readCategory(category),
|
|
38
40
|
)
|
|
39
41
|
}
|
|
40
42
|
}
|
|
41
43
|
|
|
42
|
-
/** Read handler for a specific category */
|
|
43
|
-
|
|
44
|
-
const
|
|
44
|
+
/** Read handler for a specific category (public for server instructions) */
|
|
45
|
+
readCategory(category: FactCategory): { contents: Array<{ uri: string; mimeType: string; text: string }> } {
|
|
46
|
+
const text = this.getFormattedFacts(category)
|
|
45
47
|
return {
|
|
46
48
|
contents: [{
|
|
47
49
|
uri: `mnemo://global/${category}`,
|
|
48
|
-
mimeType: '
|
|
49
|
-
text
|
|
50
|
+
mimeType: 'text/markdown',
|
|
51
|
+
text,
|
|
50
52
|
}],
|
|
51
53
|
}
|
|
52
54
|
}
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
getFacts(category: FactCategory): ResourceFact[] {
|
|
56
|
+
private getFormattedFacts(category: FactCategory): string {
|
|
56
57
|
const cached = this.cache.get(category)
|
|
57
58
|
if (cached) return cached
|
|
58
59
|
|
|
59
|
-
const facts = this.store.listFacts(category, 0.0, RESOURCE_LIMIT)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}))
|
|
60
|
+
const facts = this.store.listFacts(category, 0.0, RESOURCE_LIMIT)
|
|
61
|
+
const text = category === 'identity'
|
|
62
|
+
? this.formatAsInstructions(facts)
|
|
63
|
+
: this.formatAsReference(facts, category)
|
|
64
64
|
|
|
65
|
-
this.cache.set(category,
|
|
66
|
-
return
|
|
65
|
+
this.cache.set(category, text)
|
|
66
|
+
return text
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* identity 类事实格式化为指令——Claude 应直接遵循这些设定。
|
|
71
|
+
* 角色设定排在最前面,用祈使句。
|
|
72
|
+
*/
|
|
73
|
+
private formatAsInstructions(facts: ReturnType<MemoryStore['listFacts']>): string {
|
|
74
|
+
const lines: string[] = ['# 身份与行为设定', '', '以下是你的身份设定和用户偏好,请直接遵循:', '']
|
|
75
|
+
|
|
76
|
+
// 角色/身份相关的 fact 排在最前面
|
|
77
|
+
const roleFacts = facts.filter(f =>
|
|
78
|
+
f.content.includes('角色设定') ||
|
|
79
|
+
f.content.includes('你是') ||
|
|
80
|
+
f.content.includes('身份是') ||
|
|
81
|
+
f.content.includes('女朋友')
|
|
82
|
+
)
|
|
83
|
+
const otherFacts = facts.filter(f => !roleFacts.includes(f))
|
|
84
|
+
|
|
85
|
+
if (roleFacts.length > 0) {
|
|
86
|
+
lines.push('## 你的身份')
|
|
87
|
+
for (const f of roleFacts) {
|
|
88
|
+
// 把描述性语句转为指令
|
|
89
|
+
const content = f.content
|
|
90
|
+
.replace(/^AI角色设定[::]/, '')
|
|
91
|
+
.replace(/^你是/, '')
|
|
92
|
+
.trim()
|
|
93
|
+
lines.push(`- ${content}`)
|
|
94
|
+
}
|
|
95
|
+
lines.push('')
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (otherFacts.length > 0) {
|
|
99
|
+
lines.push('## 用户信息')
|
|
100
|
+
for (const f of otherFacts) {
|
|
101
|
+
lines.push(`- ${f.content}`)
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return lines.join('\n')
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* 非 identity 类事实格式化为参考——供 Claude 查阅但不强制遵循。
|
|
110
|
+
*/
|
|
111
|
+
private formatAsReference(facts: ReturnType<MemoryStore['listFacts']>, category: string): string {
|
|
112
|
+
const title: Record<string, string> = {
|
|
113
|
+
coding_style: '编码风格偏好',
|
|
114
|
+
tool_pref: '工具偏好',
|
|
115
|
+
workflow: '工作流偏好',
|
|
116
|
+
general: '通用知识',
|
|
117
|
+
}
|
|
118
|
+
const lines: string[] = [`# ${title[category] ?? category}`, '']
|
|
119
|
+
|
|
120
|
+
for (const f of facts) {
|
|
121
|
+
lines.push(`- ${f.content}`)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return lines.join('\n')
|
|
67
125
|
}
|
|
68
126
|
|
|
69
|
-
/** Invalidate all caches — call after any write operation */
|
|
70
127
|
invalidate(): void {
|
|
71
128
|
this.cache.clear()
|
|
72
129
|
}
|
|
73
130
|
|
|
74
|
-
/** Get cache entry count for debugging */
|
|
75
131
|
cacheSize(): number {
|
|
76
132
|
return this.cache.size
|
|
77
133
|
}
|
package/src/retriever.ts
CHANGED
|
@@ -106,38 +106,32 @@ export class FactRetriever {
|
|
|
106
106
|
const inferred = this.categoryInferFallback(searchQuery, minTrust, limit)
|
|
107
107
|
if (inferred.length > 0) return inferred
|
|
108
108
|
}
|
|
109
|
-
// 个人/身份相关的短查询触发 trust fallback
|
|
110
|
-
if (this.isPersonalQuery(
|
|
109
|
+
// 个人/身份相关的短查询触发 trust fallback(用原始 query,避免 refineQuery 拆词导致正则失配)
|
|
110
|
+
if (this.isPersonalQuery(query)) {
|
|
111
111
|
return this.trustFallback(category, minTrust, limit)
|
|
112
112
|
}
|
|
113
113
|
return []
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
-
// Stage 2-4: Jaccard 重排序 + 信任评分 + 时间衰减
|
|
117
|
-
// 动态权重:短查询偏 FTS,长查询偏 Jaccard
|
|
116
|
+
// Stage 2-4: Jaccard 重排序 + 信任评分 + 时间衰减 + length penalty
|
|
118
117
|
const queryTokens = this.tokenize(searchQuery)
|
|
119
|
-
const tokenCount = queryTokens.size
|
|
120
|
-
const ftsWeight = tokenCount <= 3 ? 0.7 : 0.3
|
|
121
|
-
const jaccardWeight = tokenCount <= 3 ? 0.3 : 0.7
|
|
122
118
|
|
|
123
119
|
const scored: ScoredFact[] = []
|
|
124
120
|
|
|
125
121
|
for (const fact of candidates) {
|
|
126
|
-
|
|
122
|
+
// summary 优先用于匹配
|
|
123
|
+
const matchText = fact.summary ?? fact.content
|
|
124
|
+
const matchTokens = this.tokenize(matchText)
|
|
127
125
|
const tagTokens = this.tokenize(fact.tags)
|
|
128
|
-
const allTokens = new Set([...
|
|
126
|
+
const allTokens = new Set([...matchTokens, ...tagTokens])
|
|
129
127
|
|
|
130
128
|
const jaccard = this.jaccardSimilarity(queryTokens, allTokens)
|
|
131
|
-
// Containment: 查询 token 在事实 token 中的覆盖率
|
|
132
129
|
const qInF = this.containmentScore(queryTokens, allTokens)
|
|
133
|
-
|
|
134
|
-
// 混合相似度:Jaccard + Containment(简化版,移除 keywordScore)
|
|
135
130
|
const similarity = 0.3 * jaccard + 0.7 * qInF
|
|
136
131
|
const ftsScore = fact.ftsRank
|
|
137
132
|
|
|
138
|
-
//
|
|
139
|
-
const relevance =
|
|
140
|
-
|
|
133
|
+
// 静态权重 0.5/0.5(回退 v3 动态权重)
|
|
134
|
+
const relevance = 0.5 * ftsScore + 0.5 * similarity
|
|
141
135
|
let score = relevance * fact.trustScore
|
|
142
136
|
|
|
143
137
|
// 时间衰减
|
|
@@ -145,37 +139,22 @@ export class FactRetriever {
|
|
|
145
139
|
score *= this.temporalDecay(fact.updatedAt || fact.createdAt)
|
|
146
140
|
}
|
|
147
141
|
|
|
142
|
+
// Length penalty:基于 matchText 长度
|
|
143
|
+
score *= Math.min(1.0, 300 / matchText.length)
|
|
144
|
+
|
|
148
145
|
scored.push({ ...fact, score })
|
|
149
146
|
}
|
|
150
147
|
|
|
151
148
|
scored.sort((a, b) => b.score - a.score)
|
|
152
149
|
|
|
153
|
-
//
|
|
154
|
-
const
|
|
155
|
-
const gated = scored.filter(s => s.score >= RELEVANCE_THRESHOLD)
|
|
156
|
-
const pool = gated.length > 0 ? gated : scored
|
|
157
|
-
|
|
158
|
-
// 内容去重:Jaccard > 0.7 的只保留高分
|
|
159
|
-
const results: ScoredFact[] = []
|
|
160
|
-
for (const candidate of pool) {
|
|
161
|
-
let isDuplicate = false
|
|
162
|
-
const candidateTokens = this.tokenize(candidate.content)
|
|
163
|
-
for (const kept of results) {
|
|
164
|
-
const keptTokens = this.tokenize(kept.content)
|
|
165
|
-
if (this.jaccardSimilarity(candidateTokens, keptTokens) > 0.7) {
|
|
166
|
-
isDuplicate = true
|
|
167
|
-
break
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
if (!isDuplicate) {
|
|
171
|
-
results.push(candidate)
|
|
172
|
-
if (results.length >= limit) break
|
|
173
|
-
}
|
|
174
|
-
}
|
|
150
|
+
// 取 limit 条(不再做 relevance gate 和 content dedup)
|
|
151
|
+
const results = scored.slice(0, limit)
|
|
175
152
|
|
|
176
153
|
// 检索追踪:递增 retrieval_count + top3 信任刷新
|
|
177
154
|
if (results.length > 0) {
|
|
178
155
|
this.trackRetrieval(results)
|
|
156
|
+
// 记录检索日志
|
|
157
|
+
this.store.logRetrieval(searchQuery, results.map(r => ({ id: r.factId, score: Math.round(r.score * 1000) / 1000 })))
|
|
179
158
|
}
|
|
180
159
|
|
|
181
160
|
// 缓存存储 + 指标记录
|
|
@@ -295,9 +274,11 @@ export class FactRetriever {
|
|
|
295
274
|
category: r.category as FactCategory,
|
|
296
275
|
tags: r.tags,
|
|
297
276
|
keywords: r.keywords ?? '[]',
|
|
277
|
+
summary: (r as any).summary ?? null,
|
|
298
278
|
trustScore: r.trust_score,
|
|
299
279
|
retrievalCount: r.retrieval_count,
|
|
300
280
|
helpfulCount: r.helpful_count,
|
|
281
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
301
282
|
createdAt: r.created_at,
|
|
302
283
|
updatedAt: r.updated_at,
|
|
303
284
|
score: r.trust_score * (1 - i * 0.05),
|
|
@@ -409,9 +390,11 @@ export class FactRetriever {
|
|
|
409
390
|
category: r.category as FactCategory,
|
|
410
391
|
tags: r.tags,
|
|
411
392
|
keywords: r.keywords ?? '[]',
|
|
393
|
+
summary: (r as any).summary ?? null,
|
|
412
394
|
trustScore: r.trust_score,
|
|
413
395
|
retrievalCount: 0,
|
|
414
396
|
helpfulCount: 0,
|
|
397
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
415
398
|
createdAt: r.created_at,
|
|
416
399
|
updatedAt: r.updated_at,
|
|
417
400
|
})
|
|
@@ -454,15 +437,19 @@ export class FactRetriever {
|
|
|
454
437
|
const ftsParts: string[] = []
|
|
455
438
|
|
|
456
439
|
for (const word of parts) {
|
|
457
|
-
|
|
458
|
-
// 对中文部分追加 bigram
|
|
459
|
-
const cnChars = word.match(/[\u4e00-\u9fff]+/g)
|
|
440
|
+
const cnChars = word.match(/[一-鿿]+/g)
|
|
460
441
|
if (cnChars) {
|
|
442
|
+
// 中文部分:trigram tokenizer 需要至少 3 字符
|
|
461
443
|
for (const seg of cnChars) {
|
|
462
|
-
|
|
463
|
-
|
|
444
|
+
if (seg.length >= 3) ftsParts.push(seg)
|
|
445
|
+
// 提取 trigram(3 字符子串)
|
|
446
|
+
for (let i = 0; i <= seg.length - 3; i++) {
|
|
447
|
+
ftsParts.push(seg.slice(i, i + 3))
|
|
464
448
|
}
|
|
465
449
|
}
|
|
450
|
+
} else {
|
|
451
|
+
// 非中文部分:用引号包裹(短语匹配),至少 1 字符
|
|
452
|
+
if (word.length >= 1) ftsParts.push(`"${word}"`)
|
|
466
453
|
}
|
|
467
454
|
}
|
|
468
455
|
|
|
@@ -509,9 +496,11 @@ export class FactRetriever {
|
|
|
509
496
|
category: String(row.category) as FactCategory,
|
|
510
497
|
tags: String(row.tags),
|
|
511
498
|
keywords: String(row.keywords ?? '[]'),
|
|
499
|
+
summary: row.summary != null ? String(row.summary) : null,
|
|
512
500
|
trustScore: Number(row.trust_score),
|
|
513
501
|
retrievalCount: Number(row.retrieval_count),
|
|
514
502
|
helpfulCount: Number(row.helpful_count),
|
|
503
|
+
lastRetrievedAt: row.last_retrieved_at != null ? String(row.last_retrieved_at) : null,
|
|
515
504
|
createdAt: String(row.created_at),
|
|
516
505
|
updatedAt: String(row.updated_at),
|
|
517
506
|
ftsRank: rawRanks[i] / maxRank,
|
|
@@ -619,8 +608,8 @@ export class FactRetriever {
|
|
|
619
608
|
const conditions: string[] = []
|
|
620
609
|
const params: unknown[] = []
|
|
621
610
|
for (const word of words) {
|
|
622
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
623
|
-
params.push(`%${word}%`, `%${word}%`)
|
|
611
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
612
|
+
params.push(`%${word}%`, `%${word}%`, `%${word}%`)
|
|
624
613
|
}
|
|
625
614
|
|
|
626
615
|
// 中文子串分解:将中文查询拆为 2~3 字滑动窗口,追加 LIKE 条件
|
|
@@ -632,14 +621,14 @@ export class FactRetriever {
|
|
|
632
621
|
// 2-gram
|
|
633
622
|
for (let i = 0; i < seg.length - 1; i++) {
|
|
634
623
|
const bigram = seg.slice(i, i + 2)
|
|
635
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
636
|
-
params.push(`%${bigram}%`, `%${bigram}%`)
|
|
624
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
625
|
+
params.push(`%${bigram}%`, `%${bigram}%`, `%${bigram}%`)
|
|
637
626
|
}
|
|
638
627
|
// 3-gram(覆盖更长的短语匹配)
|
|
639
628
|
for (let i = 0; i < seg.length - 2; i++) {
|
|
640
629
|
const trigram = seg.slice(i, i + 3)
|
|
641
|
-
conditions.push('(f.content LIKE ? OR f.tags LIKE ?)')
|
|
642
|
-
params.push(`%${trigram}%`, `%${trigram}%`)
|
|
630
|
+
conditions.push('(f.content LIKE ? OR f.tags LIKE ? OR f.summary LIKE ?)')
|
|
631
|
+
params.push(`%${trigram}%`, `%${trigram}%`, `%${trigram}%`)
|
|
643
632
|
}
|
|
644
633
|
}
|
|
645
634
|
}
|
|
@@ -656,7 +645,7 @@ export class FactRetriever {
|
|
|
656
645
|
|
|
657
646
|
const sql = `
|
|
658
647
|
SELECT f.fact_id, f.content, f.category, f.tags, f.keywords,
|
|
659
|
-
f.trust_score, f.retrieval_count, f.helpful_count,
|
|
648
|
+
f.summary, f.trust_score, f.retrieval_count, f.helpful_count,
|
|
660
649
|
f.created_at, f.updated_at
|
|
661
650
|
FROM facts f
|
|
662
651
|
WHERE (${conditionsSql})
|
|
@@ -668,6 +657,7 @@ export class FactRetriever {
|
|
|
668
657
|
|
|
669
658
|
const rows = this.db.prepare(sql).all(...params) as Array<{
|
|
670
659
|
fact_id: number; content: string; category: string; tags: string; keywords: string;
|
|
660
|
+
summary: string | null;
|
|
671
661
|
trust_score: number; retrieval_count: number; helpful_count: number;
|
|
672
662
|
created_at: string; updated_at: string;
|
|
673
663
|
}>
|
|
@@ -679,9 +669,11 @@ export class FactRetriever {
|
|
|
679
669
|
category: r.category as FactCategory,
|
|
680
670
|
tags: r.tags,
|
|
681
671
|
keywords: r.keywords ?? '[]',
|
|
672
|
+
summary: r.summary ?? null,
|
|
682
673
|
trustScore: r.trust_score,
|
|
683
674
|
retrievalCount: r.retrieval_count,
|
|
684
675
|
helpfulCount: r.helpful_count,
|
|
676
|
+
lastRetrievedAt: (r as any).last_retrieved_at ?? null,
|
|
685
677
|
createdAt: r.created_at,
|
|
686
678
|
updatedAt: r.updated_at,
|
|
687
679
|
ftsRank: 0.5,
|
package/src/schema.ts
CHANGED
|
@@ -6,9 +6,11 @@ CREATE TABLE IF NOT EXISTS facts (
|
|
|
6
6
|
category TEXT DEFAULT 'general',
|
|
7
7
|
tags TEXT DEFAULT '',
|
|
8
8
|
keywords TEXT DEFAULT '[]',
|
|
9
|
+
summary TEXT DEFAULT NULL,
|
|
9
10
|
trust_score REAL DEFAULT 0.5,
|
|
10
11
|
retrieval_count INTEGER DEFAULT 0,
|
|
11
12
|
helpful_count INTEGER DEFAULT 0,
|
|
13
|
+
last_retrieved_at TEXT DEFAULT NULL,
|
|
12
14
|
created_at TEXT DEFAULT (datetime('now', 'localtime')),
|
|
13
15
|
updated_at TEXT DEFAULT (datetime('now', 'localtime'))
|
|
14
16
|
);
|
|
@@ -29,33 +31,42 @@ CREATE TABLE IF NOT EXISTS fact_entities (
|
|
|
29
31
|
PRIMARY KEY (fact_id, entity_id)
|
|
30
32
|
);
|
|
31
33
|
|
|
34
|
+
-- 检索日志表
|
|
35
|
+
CREATE TABLE IF NOT EXISTS retrieval_log (
|
|
36
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
37
|
+
query TEXT NOT NULL,
|
|
38
|
+
results TEXT DEFAULT NULL,
|
|
39
|
+
timestamp TEXT DEFAULT (datetime('now', 'localtime'))
|
|
40
|
+
);
|
|
41
|
+
|
|
32
42
|
-- 索引
|
|
33
43
|
CREATE INDEX IF NOT EXISTS idx_facts_trust ON facts(trust_score DESC);
|
|
34
44
|
CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
|
|
35
45
|
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
|
|
36
46
|
CREATE INDEX IF NOT EXISTS idx_fact_entities_entity ON fact_entities(entity_id);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_retrieval_log_ts ON retrieval_log(timestamp);
|
|
37
48
|
|
|
38
|
-
-- FTS5
|
|
49
|
+
-- FTS5 全文索引(trigram tokenizer 支持中文子串匹配)
|
|
39
50
|
CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts
|
|
40
|
-
USING fts5(content, tags, content=facts, content_rowid=fact_id);
|
|
51
|
+
USING fts5(content, tags, summary, content=facts, content_rowid=fact_id, tokenize='trigram');
|
|
41
52
|
|
|
42
53
|
-- FTS5 同步触发器:插入
|
|
43
54
|
CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN
|
|
44
|
-
INSERT INTO facts_fts(rowid, content, tags)
|
|
45
|
-
VALUES (new.fact_id, new.content, new.tags);
|
|
55
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
56
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
46
57
|
END;
|
|
47
58
|
|
|
48
59
|
-- FTS5 同步触发器:删除
|
|
49
60
|
CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN
|
|
50
|
-
INSERT INTO facts_fts(facts_fts, rowid, content, tags)
|
|
51
|
-
VALUES ('delete', old.fact_id, old.content, old.tags);
|
|
61
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
62
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
52
63
|
END;
|
|
53
64
|
|
|
54
65
|
-- FTS5 同步触发器:更新
|
|
55
66
|
CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN
|
|
56
|
-
INSERT INTO facts_fts(facts_fts, rowid, content, tags)
|
|
57
|
-
VALUES ('delete', old.fact_id, old.content, old.tags);
|
|
58
|
-
INSERT INTO facts_fts(rowid, content, tags)
|
|
59
|
-
VALUES (new.fact_id, new.content, new.tags);
|
|
67
|
+
INSERT INTO facts_fts(facts_fts, rowid, content, tags, summary)
|
|
68
|
+
VALUES ('delete', old.fact_id, old.content, old.tags, COALESCE(old.summary, ''));
|
|
69
|
+
INSERT INTO facts_fts(rowid, content, tags, summary)
|
|
70
|
+
VALUES (new.fact_id, new.content, new.tags, COALESCE(new.summary, ''));
|
|
60
71
|
END;
|
|
61
72
|
`
|