@morningljn/mnemo 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -15
- package/README_zh.md +1 -1
- package/dist/cache.d.ts +23 -0
- package/dist/cache.js +44 -0
- package/dist/cache.js.map +1 -0
- package/dist/init.js +16 -8
- package/dist/init.js.map +1 -1
- package/dist/metrics.d.ts +31 -0
- package/dist/metrics.js +57 -0
- package/dist/metrics.js.map +1 -0
- package/dist/refine.d.ts +14 -0
- package/dist/refine.js +115 -0
- package/dist/refine.js.map +1 -0
- package/dist/resources.d.ts +27 -0
- package/dist/resources.js +56 -0
- package/dist/resources.js.map +1 -0
- package/dist/retriever.d.ts +14 -2
- package/dist/retriever.js +126 -36
- package/dist/retriever.js.map +1 -1
- package/dist/server.js +40 -16
- package/dist/server.js.map +1 -1
- package/dist/types.d.ts +2 -2
- package/docs/superpowers/plans/2026-05-15-mnemo-mcp.md +1154 -0
- package/docs/superpowers/plans/2026-05-16-mnemo-query-cache.md +613 -0
- package/docs/superpowers/plans/2026-05-16-retrieval-and-injection-optimization.md +770 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/design.md +83 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/proposal.md +32 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/specs/fact-retrieval/spec.md +75 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/specs/fact-store/spec.md +83 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/specs/mcp-server/spec.md +34 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/specs/security/spec.md +37 -0
- package/openspec/changes/archive/2026-05-15-mnemo-mcp/tasks.md +44 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/design.md +96 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/proposal.md +29 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/specs/batch-operations/spec.md +42 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/specs/perf-metrics/spec.md +55 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/specs/query-cache/spec.md +65 -0
- package/openspec/changes/archive/2026-05-16-mnemo-query-cache/tasks.md +45 -0
- package/openspec/changes/retrieval-and-injection-optimization/.openspec.yaml +2 -0
- package/openspec/changes/retrieval-and-injection-optimization/design.md +117 -0
- package/openspec/changes/retrieval-and-injection-optimization/proposal.md +30 -0
- package/openspec/changes/retrieval-and-injection-optimization/specs/adaptive-scoring/spec.md +43 -0
- package/openspec/changes/retrieval-and-injection-optimization/specs/injection-protocol/spec.md +48 -0
- package/openspec/changes/retrieval-and-injection-optimization/specs/mcp-resources/spec.md +39 -0
- package/openspec/changes/retrieval-and-injection-optimization/specs/query-refinement/spec.md +39 -0
- package/openspec/changes/retrieval-and-injection-optimization/tasks.md +33 -0
- package/openspec/config.yaml +20 -0
- package/package.json +1 -1
- package/src/cache.ts +65 -0
- package/src/init.ts +17 -9
- package/src/metrics.ts +81 -0
- package/src/refine.ts +127 -0
- package/src/resources.ts +78 -0
- package/src/retriever.ts +141 -34
- package/src/server.ts +42 -17
- package/src/types.ts +2 -2
- package/tests/refine.test.ts +52 -0
- package/tests/resource.test.ts +62 -0
package/src/metrics.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Performance metrics for mnemo-mcp.
|
|
3
|
+
* Tracks query timing, cache hit/miss, and retrieval paths.
|
|
4
|
+
* Only active when MNEMO_DEBUG=1.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface QueryMetrics {
|
|
8
|
+
action: string
|
|
9
|
+
durationMs: number
|
|
10
|
+
resultCount: number
|
|
11
|
+
cacheHit: boolean
|
|
12
|
+
retrievalPath?: string
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export class PerfMetrics {
|
|
16
|
+
private enabled: boolean
|
|
17
|
+
private totalQueries = 0
|
|
18
|
+
private cacheHits = 0
|
|
19
|
+
private cacheMisses = 0
|
|
20
|
+
private totalMissTimeMs = 0
|
|
21
|
+
|
|
22
|
+
constructor() {
|
|
23
|
+
this.enabled = process.env.MNEMO_DEBUG === '1'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
isEnabled(): boolean {
|
|
27
|
+
return this.enabled
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
record(metrics: QueryMetrics): void {
|
|
31
|
+
if (!this.enabled) return
|
|
32
|
+
|
|
33
|
+
this.totalQueries++
|
|
34
|
+
if (metrics.cacheHit) {
|
|
35
|
+
this.cacheHits++
|
|
36
|
+
} else {
|
|
37
|
+
this.cacheMisses++
|
|
38
|
+
this.totalMissTimeMs += metrics.durationMs
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const hitRatio = this.totalQueries > 0 ? (this.cacheHits / this.totalQueries * 100).toFixed(1) : '0.0'
|
|
42
|
+
const path = metrics.retrievalPath ? ` [${metrics.retrievalPath}]` : ''
|
|
43
|
+
console.error(
|
|
44
|
+
`[mnemo:debug] ${metrics.action} | ${metrics.cacheHit ? 'HIT' : 'MISS'} | ` +
|
|
45
|
+
`${metrics.durationMs.toFixed(2)}ms | ${metrics.resultCount} results | ` +
|
|
46
|
+
`hit_ratio=${hitRatio}%${path}`
|
|
47
|
+
)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
getStats(): {
|
|
51
|
+
totalQueries: number
|
|
52
|
+
cacheHits: number
|
|
53
|
+
cacheMisses: number
|
|
54
|
+
hitRatio: number
|
|
55
|
+
avgQueryTime: number
|
|
56
|
+
totalTimeSaved: number
|
|
57
|
+
} {
|
|
58
|
+
const hitRatio = this.totalQueries > 0 ? this.cacheHits / this.totalQueries : 0
|
|
59
|
+
const avgQueryTime = this.cacheMisses > 0 ? this.totalMissTimeMs / this.cacheMisses : 0
|
|
60
|
+
const totalTimeSaved = this.cacheHits * avgQueryTime
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
totalQueries: this.totalQueries,
|
|
64
|
+
cacheHits: this.cacheHits,
|
|
65
|
+
cacheMisses: this.cacheMisses,
|
|
66
|
+
hitRatio,
|
|
67
|
+
avgQueryTime,
|
|
68
|
+
totalTimeSaved,
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
logStats(): void {
|
|
73
|
+
if (!this.enabled) return
|
|
74
|
+
const stats = this.getStats()
|
|
75
|
+
console.error(
|
|
76
|
+
`[mnemo:debug] stats | total=${stats.totalQueries} hits=${stats.cacheHits} ` +
|
|
77
|
+
`misses=${stats.cacheMisses} hit_ratio=${(stats.hitRatio * 100).toFixed(1)}% ` +
|
|
78
|
+
`avg_time=${stats.avgQueryTime.toFixed(2)}ms saved=${stats.totalTimeSaved.toFixed(2)}ms`
|
|
79
|
+
)
|
|
80
|
+
}
|
|
81
|
+
}
|
package/src/refine.ts
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query refinement: strip noise tokens from user messages before memory search.
|
|
3
|
+
* Pure function — no side effects, no DB access.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { FactCategory } from './types.js'
|
|
7
|
+
|
|
8
|
+
// Action words / helper phrases to strip (Chinese)
|
|
9
|
+
const ACTION_WORDS = [
|
|
10
|
+
'帮我看看', '能不能帮我', '给我看看',
|
|
11
|
+
'帮我', '看看', '看一下', '做一下', '能不能', '为什么', '怎么',
|
|
12
|
+
'是什么', '如何', '请', '麻烦', '可以', '给我',
|
|
13
|
+
'给我做', '给我写', '给我查', '给我找', '给我说', '给我讲',
|
|
14
|
+
'告诉我', '跟我说', '跟我讲', '给我解释', '给我说明', '给我介绍',
|
|
15
|
+
'运行', '执行', '启动', '停止', '创建', '删除', '修改', '更新', '查看',
|
|
16
|
+
'检查', '测试', '提交', '推送', '拉取', '合并', '切换', '重置', '重构',
|
|
17
|
+
'运行测试', '创建文件',
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
// Common CLI commands / low-signal English tokens to filter
|
|
21
|
+
const NOISE_WORDS = new Set([
|
|
22
|
+
'git', 'npm', 'npx', 'yarn', 'pnpm', 'status', 'log', 'diff', 'add',
|
|
23
|
+
'commit', 'push', 'pull', 'merge', 'checkout', 'branch', 'stash',
|
|
24
|
+
'install', 'build', 'run', 'start', 'stop', 'test', 'lint', 'format',
|
|
25
|
+
])
|
|
26
|
+
// Sort by length descending so longer phrases match first during replacement
|
|
27
|
+
const ACTION_WORDS_SORTED = [...ACTION_WORDS].sort((a, b) => b.length - a.length)
|
|
28
|
+
const ACTION_WORDS_SET = new Set(ACTION_WORDS)
|
|
29
|
+
|
|
30
|
+
// Reuse existing stop words from retriever
|
|
31
|
+
const CN_STOP_WORDS = new Set([
|
|
32
|
+
'的', '了', '是', '在', '有', '和', '就', '不', '人', '都',
|
|
33
|
+
'一', '个', '上', '也', '很', '到', '说', '要', '去', '你',
|
|
34
|
+
'会', '着', '没', '看', '好', '自', '这', '他', '她', '它',
|
|
35
|
+
'那', '些', '用', '对', '下', '为', '从', '被', '把', '能',
|
|
36
|
+
'可', '以', '所', '而', '又', '与', '但', '或', '等', '中',
|
|
37
|
+
'大', '小', '多', '少', '其', '之', '做', '让', '给', '已',
|
|
38
|
+
'还', '来', '地', '得', '过', '时', '里', '后', '前', '当',
|
|
39
|
+
])
|
|
40
|
+
|
|
41
|
+
export interface RefineResult {
|
|
42
|
+
query: string | null
|
|
43
|
+
tokens: string[]
|
|
44
|
+
entityTokens: string[]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Refine a raw user message into memory-searchable keywords.
|
|
49
|
+
* Returns null if the message is a pure operation command with no memory relevance.
|
|
50
|
+
*/
|
|
51
|
+
export function refineQuery(raw: string): RefineResult | null {
|
|
52
|
+
const trimmed = raw.trim()
|
|
53
|
+
if (!trimmed) return null
|
|
54
|
+
|
|
55
|
+
// Extract high-signal tokens first: quoted content, book titles, capitalized phrases
|
|
56
|
+
const entityTokens: string[] = []
|
|
57
|
+
|
|
58
|
+
// Chinese quotes: 「深色主题」 or "深色主题" or '深色主题'
|
|
59
|
+
for (const m of trimmed.matchAll(/[「""'']([^「""''」]{2,20})[」""'']/g)) {
|
|
60
|
+
entityTokens.push(m[1])
|
|
61
|
+
}
|
|
62
|
+
// Book titles: 《记忆系统》
|
|
63
|
+
for (const m of trimmed.matchAll(/《([^》]+)》/g)) {
|
|
64
|
+
entityTokens.push(m[1])
|
|
65
|
+
}
|
|
66
|
+
// Capitalized English phrases: "TypeScript", "Visual Studio Code"
|
|
67
|
+
for (const m of trimmed.matchAll(/\b([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)+)\b/g)) {
|
|
68
|
+
entityTokens.push(m[1])
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Tokenize: split by spaces and Chinese character boundaries
|
|
72
|
+
const tokens: string[] = []
|
|
73
|
+
const parts = trimmed.split(/\s+/)
|
|
74
|
+
for (const part of parts) {
|
|
75
|
+
// English words
|
|
76
|
+
for (const word of part.match(/[a-zA-Z0-9_\-.]+/g) ?? []) {
|
|
77
|
+
if (word.length >= 2) tokens.push(word)
|
|
78
|
+
}
|
|
79
|
+
// For Chinese: strip action words first, then extract remaining chars
|
|
80
|
+
let cnText = part.replace(/[\u4e00-\u9fff]+/g, (seg) => {
|
|
81
|
+
let result = seg
|
|
82
|
+
for (const aw of ACTION_WORDS_SORTED) {
|
|
83
|
+
result = result.replaceAll(aw, '')
|
|
84
|
+
}
|
|
85
|
+
return result
|
|
86
|
+
})
|
|
87
|
+
const cnChars = cnText.match(/[\u4e00-\u9fff]/g) ?? []
|
|
88
|
+
for (const c of cnChars) {
|
|
89
|
+
if (!CN_STOP_WORDS.has(c)) tokens.push(c)
|
|
90
|
+
}
|
|
91
|
+
// Chinese 2-grams for better matching
|
|
92
|
+
for (let i = 0; i < cnChars.length - 1; i++) {
|
|
93
|
+
const bigram = cnChars[i] + cnChars[i + 1]
|
|
94
|
+
tokens.push(bigram)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Filter stop words, noise, and short tokens
|
|
99
|
+
const filtered = tokens.filter(t => {
|
|
100
|
+
if (ACTION_WORDS_SET.has(t)) return false
|
|
101
|
+
if (CN_STOP_WORDS.has(t)) return false
|
|
102
|
+
if (NOISE_WORDS.has(t.toLowerCase())) return false
|
|
103
|
+
if (t.length < 2) return false
|
|
104
|
+
return true
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
// Deduplicate while preserving order
|
|
108
|
+
const seen = new Set<string>()
|
|
109
|
+
const deduped: string[] = []
|
|
110
|
+
for (const t of filtered) {
|
|
111
|
+
if (!seen.has(t)) {
|
|
112
|
+
seen.add(t)
|
|
113
|
+
deduped.push(t)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// If nothing left after filtering, check if we have entity tokens
|
|
118
|
+
if (deduped.length === 0 && entityTokens.length === 0) {
|
|
119
|
+
return null
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Combine: entity tokens first (higher signal), then deduped tokens
|
|
123
|
+
const allTokens = [...entityTokens, ...deduped.filter(t => !entityTokens.includes(t))]
|
|
124
|
+
const query = allTokens.join(' ')
|
|
125
|
+
|
|
126
|
+
return { query, tokens: deduped, entityTokens }
|
|
127
|
+
}
|
package/src/resources.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Resource manager for mnemo-mcp.
|
|
3
|
+
* Exposes per-category memory summaries as MCP Resources for session warmup injection.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
7
|
+
import type { MemoryStore } from './store.js'
|
|
8
|
+
import type { FactCategory } from './types.js'
|
|
9
|
+
|
|
10
|
+
const CATEGORIES: FactCategory[] = ['identity', 'coding_style', 'tool_pref', 'workflow', 'general']
|
|
11
|
+
const RESOURCE_LIMIT = 10
|
|
12
|
+
|
|
13
|
+
export interface ResourceFact {
|
|
14
|
+
fact_id: number
|
|
15
|
+
content: string
|
|
16
|
+
trust_score: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class ResourceManager {
|
|
20
|
+
private cache = new Map<FactCategory, ResourceFact[]>()
|
|
21
|
+
|
|
22
|
+
constructor(
|
|
23
|
+
private store: MemoryStore,
|
|
24
|
+
) {}
|
|
25
|
+
|
|
26
|
+
/** Register all category resources with the MCP server */
|
|
27
|
+
registerResources(server: McpServer): void {
|
|
28
|
+
for (const category of CATEGORIES) {
|
|
29
|
+
const uri = `mnemo://global/${category}`
|
|
30
|
+
server.registerResource(
|
|
31
|
+
`mnemo-global-${category}`,
|
|
32
|
+
uri,
|
|
33
|
+
{
|
|
34
|
+
description: `${category} category global facts (top ${RESOURCE_LIMIT} by trust)`,
|
|
35
|
+
mimeType: 'application/json',
|
|
36
|
+
},
|
|
37
|
+
async () => this.readCategory(category),
|
|
38
|
+
)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Read handler for a specific category */
|
|
43
|
+
private readCategory(category: FactCategory): { contents: Array<{ uri: string; mimeType: string; text: string }> } {
|
|
44
|
+
const facts = this.getFacts(category)
|
|
45
|
+
return {
|
|
46
|
+
contents: [{
|
|
47
|
+
uri: `mnemo://global/${category}`,
|
|
48
|
+
mimeType: 'application/json',
|
|
49
|
+
text: JSON.stringify(facts, null, 2),
|
|
50
|
+
}],
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Get facts for a category — with caching */
|
|
55
|
+
getFacts(category: FactCategory): ResourceFact[] {
|
|
56
|
+
const cached = this.cache.get(category)
|
|
57
|
+
if (cached) return cached
|
|
58
|
+
|
|
59
|
+
const facts = this.store.listFacts(category, 0.0, RESOURCE_LIMIT).map(f => ({
|
|
60
|
+
fact_id: f.factId,
|
|
61
|
+
content: f.content,
|
|
62
|
+
trust_score: f.trustScore,
|
|
63
|
+
}))
|
|
64
|
+
|
|
65
|
+
this.cache.set(category, facts)
|
|
66
|
+
return facts
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Invalidate all caches — call after any write operation */
|
|
70
|
+
invalidate(): void {
|
|
71
|
+
this.cache.clear()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Get cache entry count for debugging */
|
|
75
|
+
cacheSize(): number {
|
|
76
|
+
return this.cache.size
|
|
77
|
+
}
|
|
78
|
+
}
|
package/src/retriever.ts
CHANGED
|
@@ -10,6 +10,9 @@
|
|
|
10
10
|
import type Database from 'better-sqlite3'
|
|
11
11
|
import type { Fact, FactCategory, ScoredFact, Contradiction, SearchOptions, ContradictOptions, RetrieverOptions } from './types.js'
|
|
12
12
|
import { MemoryStore } from './store.js'
|
|
13
|
+
import { QueryCache } from './cache.js'
|
|
14
|
+
import { PerfMetrics } from './metrics.js'
|
|
15
|
+
import { refineQuery } from './refine.js'
|
|
13
16
|
|
|
14
17
|
// 中文字符级匹配的虚词集合(这些单字太常见,不参与字符交叉匹配)
|
|
15
18
|
const CN_OVERLAP_STOP = new Set([
|
|
@@ -31,6 +34,10 @@ export class FactRetriever {
|
|
|
31
34
|
private ftsWeight: number
|
|
32
35
|
private jaccardWeight: number
|
|
33
36
|
private halfLifeDays: number
|
|
37
|
+
/** 查询缓存(60s TTL,进程内 Map) */
|
|
38
|
+
private cache: QueryCache
|
|
39
|
+
/** 性能指标(MNEMO_DEBUG=1 时生效) */
|
|
40
|
+
private metrics: PerfMetrics
|
|
34
41
|
/** category → 高频 tag 集合(从事实库自动学习,惰性初始化) */
|
|
35
42
|
private _categoryTagMap: Map<FactCategory, Set<string>> | null = null
|
|
36
43
|
/** 中英术语对列表(从事实库自动学习,惰性初始化) */
|
|
@@ -44,16 +51,46 @@ export class FactRetriever {
|
|
|
44
51
|
this.ftsWeight = options?.ftsWeight ?? 0.5
|
|
45
52
|
this.jaccardWeight = options?.jaccardWeight ?? 0.5
|
|
46
53
|
this.halfLifeDays = options?.temporalDecayHalfLife ?? 0
|
|
54
|
+
this.cache = new QueryCache()
|
|
55
|
+
this.metrics = new PerfMetrics()
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** 获取缓存实例(供 server.ts 写操作时调用 cache.clear()) */
|
|
59
|
+
getCache(): QueryCache {
|
|
60
|
+
return this.cache
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** 获取性能指标实例(供调试接口使用) */
|
|
64
|
+
getMetrics(): PerfMetrics {
|
|
65
|
+
return this.metrics
|
|
47
66
|
}
|
|
48
67
|
|
|
49
68
|
/** 主搜索:FTS5 → LIKE → 字符交叉 → 分类推断 → Jaccard → 信任评分 → 时间衰减 */
|
|
50
|
-
search(query: string, options?: SearchOptions): ScoredFact[] {
|
|
69
|
+
search(query: string, options?: SearchOptions & { skipRefine?: boolean }): ScoredFact[] {
|
|
70
|
+
const startTime = performance.now()
|
|
51
71
|
const minTrust = options?.minTrust ?? 0.3
|
|
52
72
|
const limit = options?.limit ?? 10
|
|
53
73
|
const category = options?.category
|
|
54
74
|
|
|
75
|
+
// 查询提炼(除非显式跳过)
|
|
76
|
+
let searchQuery = query
|
|
77
|
+
if (!options?.skipRefine) {
|
|
78
|
+
const refined = refineQuery(query)
|
|
79
|
+
if (refined?.query) {
|
|
80
|
+
searchQuery = refined.query
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// 缓存检查
|
|
85
|
+
const cacheKey = this.cache.makeKey({ action: 'search', query: searchQuery, category, minTrust, limit })
|
|
86
|
+
const cached = this.cache.get(cacheKey)
|
|
87
|
+
if (cached) {
|
|
88
|
+
this.metrics.record({ action: 'search', durationMs: performance.now() - startTime, resultCount: cached.length, cacheHit: true })
|
|
89
|
+
return cached
|
|
90
|
+
}
|
|
91
|
+
|
|
55
92
|
// 查询双语扩展:中文术语追加英文,英文术语追加中文
|
|
56
|
-
const expandedQuery = this.expandQueryBilingually(
|
|
93
|
+
const expandedQuery = this.expandQueryBilingually(searchQuery)
|
|
57
94
|
|
|
58
95
|
// Stage 1: FTS5 候选集,空时逐级 fallback(使用双语扩展后的查询)
|
|
59
96
|
let candidates = this.ftsCandidates(expandedQuery, category, minTrust, limit * 3)
|
|
@@ -66,18 +103,22 @@ export class FactRetriever {
|
|
|
66
103
|
if (candidates.length === 0) {
|
|
67
104
|
// 分类推断 fallback(仅无 category 过滤时生效)
|
|
68
105
|
if (!category) {
|
|
69
|
-
const inferred = this.categoryInferFallback(
|
|
106
|
+
const inferred = this.categoryInferFallback(searchQuery, minTrust, limit)
|
|
70
107
|
if (inferred.length > 0) return inferred
|
|
71
108
|
}
|
|
72
109
|
// 个人/身份相关的短查询触发 trust fallback
|
|
73
|
-
if (this.isPersonalQuery(
|
|
110
|
+
if (this.isPersonalQuery(searchQuery)) {
|
|
74
111
|
return this.trustFallback(category, minTrust, limit)
|
|
75
112
|
}
|
|
76
113
|
return []
|
|
77
114
|
}
|
|
78
115
|
|
|
79
116
|
// Stage 2-4: Jaccard 重排序 + 信任评分 + 时间衰减
|
|
80
|
-
|
|
117
|
+
// 动态权重:短查询偏 FTS,长查询偏 Jaccard
|
|
118
|
+
const queryTokens = this.tokenize(searchQuery)
|
|
119
|
+
const tokenCount = queryTokens.size
|
|
120
|
+
const ftsWeight = tokenCount <= 3 ? 0.7 : 0.3
|
|
121
|
+
const jaccardWeight = tokenCount <= 3 ? 0.3 : 0.7
|
|
81
122
|
|
|
82
123
|
const scored: ScoredFact[] = []
|
|
83
124
|
|
|
@@ -95,7 +136,7 @@ export class FactRetriever {
|
|
|
95
136
|
const ftsScore = fact.ftsRank
|
|
96
137
|
|
|
97
138
|
// 综合评分
|
|
98
|
-
const relevance =
|
|
139
|
+
const relevance = ftsWeight * ftsScore + jaccardWeight * similarity
|
|
99
140
|
|
|
100
141
|
let score = relevance * fact.trustScore
|
|
101
142
|
|
|
@@ -109,52 +150,80 @@ export class FactRetriever {
|
|
|
109
150
|
|
|
110
151
|
scored.sort((a, b) => b.score - a.score)
|
|
111
152
|
|
|
112
|
-
//
|
|
113
|
-
const
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
diverse.push(s)
|
|
128
|
-
if (diverse.length >= limit) break
|
|
153
|
+
// 相关性门控:过滤低相关性结果
|
|
154
|
+
const RELEVANCE_THRESHOLD = 0.15
|
|
155
|
+
const gated = scored.filter(s => s.score >= RELEVANCE_THRESHOLD)
|
|
156
|
+
const pool = gated.length > 0 ? gated : scored
|
|
157
|
+
|
|
158
|
+
// 内容去重:Jaccard > 0.7 的只保留高分
|
|
159
|
+
const results: ScoredFact[] = []
|
|
160
|
+
for (const candidate of pool) {
|
|
161
|
+
let isDuplicate = false
|
|
162
|
+
const candidateTokens = this.tokenize(candidate.content)
|
|
163
|
+
for (const kept of results) {
|
|
164
|
+
const keptTokens = this.tokenize(kept.content)
|
|
165
|
+
if (this.jaccardSimilarity(candidateTokens, keptTokens) > 0.7) {
|
|
166
|
+
isDuplicate = true
|
|
167
|
+
break
|
|
129
168
|
}
|
|
130
169
|
}
|
|
170
|
+
if (!isDuplicate) {
|
|
171
|
+
results.push(candidate)
|
|
172
|
+
if (results.length >= limit) break
|
|
173
|
+
}
|
|
131
174
|
}
|
|
132
175
|
|
|
133
|
-
const results = diverse
|
|
134
|
-
|
|
135
176
|
// 检索追踪:递增 retrieval_count + top3 信任刷新
|
|
136
177
|
if (results.length > 0) {
|
|
137
178
|
this.trackRetrieval(results)
|
|
138
179
|
}
|
|
139
180
|
|
|
181
|
+
// 缓存存储 + 指标记录
|
|
182
|
+
this.cache.set(cacheKey, results)
|
|
183
|
+
this.metrics.record({ action: 'search', durationMs: performance.now() - startTime, resultCount: results.length, cacheHit: false, retrievalPath: 'FTS5' })
|
|
140
184
|
return results
|
|
141
185
|
}
|
|
142
186
|
|
|
143
187
|
/** 实体探测:查询某实体关联的所有事实 */
|
|
144
188
|
probe(entity: string, options?: SearchOptions): ScoredFact[] {
|
|
189
|
+
const startTime = performance.now()
|
|
145
190
|
const limit = options?.limit ?? 10
|
|
146
|
-
const
|
|
147
|
-
|
|
191
|
+
const category = options?.category
|
|
192
|
+
|
|
193
|
+
// 缓存检查
|
|
194
|
+
const cacheKey = this.cache.makeKey({ action: 'probe', entity, category, limit })
|
|
195
|
+
const cached = this.cache.get(cacheKey)
|
|
196
|
+
if (cached) {
|
|
197
|
+
this.metrics.record({ action: 'probe', durationMs: performance.now() - startTime, resultCount: cached.length, cacheHit: true })
|
|
198
|
+
return cached
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const facts = this.store.getFactsByEntity(entity, category, limit)
|
|
202
|
+
const results = facts.map((f, i) => ({
|
|
148
203
|
...f,
|
|
149
204
|
score: f.trustScore * (1 - i * 0.05), // 按信任评分排序并给微小梯度
|
|
150
205
|
}))
|
|
206
|
+
|
|
207
|
+
// 缓存存储 + 指标记录
|
|
208
|
+
this.cache.set(cacheKey, results)
|
|
209
|
+
this.metrics.record({ action: 'probe', durationMs: performance.now() - startTime, resultCount: results.length, cacheHit: false, retrievalPath: 'entity' })
|
|
210
|
+
return results
|
|
151
211
|
}
|
|
152
212
|
|
|
153
213
|
/** 实体关联:查找与某实体共享上下文的其他事实 */
|
|
154
214
|
related(entity: string, options?: SearchOptions): ScoredFact[] {
|
|
215
|
+
const startTime = performance.now()
|
|
155
216
|
const limit = options?.limit ?? 10
|
|
156
217
|
const category = options?.category
|
|
157
218
|
|
|
219
|
+
// 缓存检查
|
|
220
|
+
const cacheKey = this.cache.makeKey({ action: 'related', entity, category, limit })
|
|
221
|
+
const cached = this.cache.get(cacheKey)
|
|
222
|
+
if (cached) {
|
|
223
|
+
this.metrics.record({ action: 'related', durationMs: performance.now() - startTime, resultCount: cached.length, cacheHit: true })
|
|
224
|
+
return cached
|
|
225
|
+
}
|
|
226
|
+
|
|
158
227
|
// Step 1: 获取实体关联的 fact_id 列表
|
|
159
228
|
const entityFactsSql = `
|
|
160
229
|
SELECT fe.fact_id FROM fact_entities fe
|
|
@@ -162,7 +231,12 @@ export class FactRetriever {
|
|
|
162
231
|
WHERE e.name LIKE ?
|
|
163
232
|
`
|
|
164
233
|
const entityFactRows = this.db.prepare(entityFactsSql).all(entity) as Array<{ fact_id: number }>
|
|
165
|
-
if (entityFactRows.length === 0)
|
|
234
|
+
if (entityFactRows.length === 0) {
|
|
235
|
+
const emptyResults: ScoredFact[] = []
|
|
236
|
+
this.cache.set(cacheKey, emptyResults)
|
|
237
|
+
this.metrics.record({ action: 'related', durationMs: performance.now() - startTime, resultCount: 0, cacheHit: false, retrievalPath: 'entity' })
|
|
238
|
+
return emptyResults
|
|
239
|
+
}
|
|
166
240
|
|
|
167
241
|
const factIds = entityFactRows.map(r => r.fact_id)
|
|
168
242
|
|
|
@@ -175,7 +249,12 @@ export class FactRetriever {
|
|
|
175
249
|
AND e.name NOT LIKE ?
|
|
176
250
|
`).all(...factIds, entity) as Array<{ name: string }>
|
|
177
251
|
|
|
178
|
-
if (otherEntityRows.length === 0)
|
|
252
|
+
if (otherEntityRows.length === 0) {
|
|
253
|
+
const emptyResults: ScoredFact[] = []
|
|
254
|
+
this.cache.set(cacheKey, emptyResults)
|
|
255
|
+
this.metrics.record({ action: 'related', durationMs: performance.now() - startTime, resultCount: 0, cacheHit: false, retrievalPath: 'entity' })
|
|
256
|
+
return emptyResults
|
|
257
|
+
}
|
|
179
258
|
|
|
180
259
|
// Step 3: 获取关联这些其他实体但不包含原始事实的 facts
|
|
181
260
|
const otherEntities = otherEntityRows.map(r => r.name)
|
|
@@ -210,7 +289,7 @@ export class FactRetriever {
|
|
|
210
289
|
created_at: string; updated_at: string;
|
|
211
290
|
}>
|
|
212
291
|
|
|
213
|
-
|
|
292
|
+
const results = rows.map((r, i) => ({
|
|
214
293
|
factId: r.fact_id,
|
|
215
294
|
content: r.content,
|
|
216
295
|
category: r.category as FactCategory,
|
|
@@ -223,20 +302,44 @@ export class FactRetriever {
|
|
|
223
302
|
updatedAt: r.updated_at,
|
|
224
303
|
score: r.trust_score * (1 - i * 0.05),
|
|
225
304
|
}))
|
|
305
|
+
|
|
306
|
+
// 缓存存储 + 指标记录
|
|
307
|
+
this.cache.set(cacheKey, results)
|
|
308
|
+
this.metrics.record({ action: 'related', durationMs: performance.now() - startTime, resultCount: results.length, cacheHit: false, retrievalPath: 'entity' })
|
|
309
|
+
return results
|
|
226
310
|
}
|
|
227
311
|
|
|
228
312
|
/** 多实体推理:查找同时关联多个实体的事实 */
|
|
229
313
|
reason(entities: string[], options?: SearchOptions): ScoredFact[] {
|
|
314
|
+
const startTime = performance.now()
|
|
230
315
|
if (entities.length === 0) return []
|
|
231
|
-
|
|
232
|
-
|
|
316
|
+
|
|
317
|
+
const category = options?.category
|
|
318
|
+
const limit = options?.limit ?? 10
|
|
319
|
+
|
|
320
|
+
// 缓存检查
|
|
321
|
+
const cacheKey = this.cache.makeKey({ action: 'reason', entities, category, limit })
|
|
322
|
+
const cached = this.cache.get(cacheKey)
|
|
323
|
+
if (cached) {
|
|
324
|
+
this.metrics.record({ action: 'reason', durationMs: performance.now() - startTime, resultCount: cached.length, cacheHit: true })
|
|
325
|
+
return cached
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const facts = this.store.getFactsByEntities(entities, category, limit)
|
|
329
|
+
const results = facts.map((f, i) => ({
|
|
233
330
|
...f,
|
|
234
331
|
score: f.trustScore * (1 - i * 0.05),
|
|
235
332
|
}))
|
|
333
|
+
|
|
334
|
+
// 缓存存储 + 指标记录
|
|
335
|
+
this.cache.set(cacheKey, results)
|
|
336
|
+
this.metrics.record({ action: 'reason', durationMs: performance.now() - startTime, resultCount: results.length, cacheHit: false, retrievalPath: 'entity' })
|
|
337
|
+
return results
|
|
236
338
|
}
|
|
237
339
|
|
|
238
|
-
/** 矛盾检测:实体重叠 +
|
|
340
|
+
/** 矛盾检测:实体重叠 + 内容差异(仅指标,不缓存 — 返回类型不同) */
|
|
239
341
|
contradict(options?: ContradictOptions): Contradiction[] {
|
|
342
|
+
const startTime = performance.now()
|
|
240
343
|
const threshold = options?.threshold ?? 0.3
|
|
241
344
|
const limit = options?.limit ?? 10
|
|
242
345
|
const category = options?.category
|
|
@@ -326,7 +429,11 @@ export class FactRetriever {
|
|
|
326
429
|
}
|
|
327
430
|
|
|
328
431
|
contradictions.sort((a, b) => b.contradictionScore - a.contradictionScore)
|
|
329
|
-
|
|
432
|
+
const results = contradictions.slice(0, limit)
|
|
433
|
+
|
|
434
|
+
// 指标记录(无缓存 — Contradiction[] 不适用于 ScoredFact 缓存)
|
|
435
|
+
this.metrics.record({ action: 'contradict', durationMs: performance.now() - startTime, resultCount: results.length, cacheHit: false, retrievalPath: 'O(n²)' })
|
|
436
|
+
return results
|
|
330
437
|
}
|
|
331
438
|
|
|
332
439
|
// ------------------------------------------------------------------
|