claude-brain 0.30.2 → 0.30.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +29 -29
- package/package.json +7 -3
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/scripts/postinstall.mjs +531 -531
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +210 -205
- package/src/cli/auto-setup.ts +75 -75
- package/src/cli/auto-start.ts +266 -266
- package/src/cli/bin.ts +264 -264
- package/src/cli/commands/autostart.ts +90 -90
- package/src/cli/commands/chroma.ts +578 -577
- package/src/cli/commands/export-training.ts +70 -70
- package/src/cli/commands/export.ts +130 -130
- package/src/cli/commands/git-hook.ts +183 -183
- package/src/cli/commands/hooks.ts +217 -217
- package/src/cli/commands/init.ts +123 -123
- package/src/cli/commands/install-mcp.ts +122 -111
- package/src/cli/commands/models.ts +979 -979
- package/src/cli/commands/pack.ts +200 -200
- package/src/cli/commands/refresh.ts +344 -339
- package/src/cli/commands/reindex.ts +120 -120
- package/src/cli/commands/serve.ts +466 -463
- package/src/cli/commands/start.ts +44 -44
- package/src/cli/commands/status.ts +220 -203
- package/src/cli/commands/uninstall-mcp.ts +45 -41
- package/src/cli/commands/update.ts +130 -124
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/code-intelligence/indexer.ts +352 -352
- package/src/code-intelligence/linker.ts +178 -178
- package/src/code-intelligence/parser.ts +484 -484
- package/src/code-intelligence/query.ts +291 -291
- package/src/code-intelligence/schema.ts +83 -83
- package/src/code-intelligence/types.ts +95 -95
- package/src/config/defaults.ts +52 -52
- package/src/config/home.ts +56 -56
- package/src/config/index.ts +5 -5
- package/src/config/loader.ts +192 -192
- package/src/config/schema.ts +446 -415
- package/src/config/validator.ts +182 -182
- package/src/context/assembler.ts +407 -400
- package/src/context/index.ts +79 -79
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +122 -121
- package/src/health/index.ts +233 -232
- package/src/hooks/brain-hook.ts +134 -131
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/claude-code-mastery.md +112 -112
- package/src/hooks/context-hook.ts +260 -245
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +211 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +306 -288
- package/src/hooks/interceptor-hook.ts +204 -201
- package/src/hooks/passive-classifier.ts +397 -397
- package/src/hooks/queue.ts +160 -129
- package/src/hooks/session-tracker.ts +312 -312
- package/src/hooks/types.ts +52 -52
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +7 -7
- package/src/intelligence/hf-downloader.ts +222 -222
- package/src/intelligence/hf-manifest.json +78 -78
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/inference-router.ts +762 -762
- package/src/intelligence/model-manager.ts +263 -245
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +213 -207
- package/src/intelligence/prediction/index.ts +7 -7
- package/src/intelligence/prediction/recommender.ts +276 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
- package/src/intelligence/reasoning/index.ts +7 -7
- package/src/intelligence/temporal/evolution.ts +193 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +272 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/intelligence/tokenizer.ts +118 -118
- package/src/knowledge/entity-extractor.ts +447 -443
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +166 -166
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +211 -192
- package/src/memory/chroma/collection-manager.ts +92 -92
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +177 -175
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +319 -315
- package/src/memory/chroma/store.ts +755 -747
- package/src/memory/compression.ts +121 -121
- package/src/memory/consolidation/archiver.ts +162 -165
- package/src/memory/consolidation/merger.ts +182 -186
- package/src/memory/consolidation/scorer.ts +136 -136
- package/src/memory/database.ts +9 -0
- package/src/memory/dual-write.ts +145 -0
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +347 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/fts5-search.ts +692 -633
- package/src/memory/index.ts +943 -1060
- package/src/memory/migrations/add-fts5.ts +118 -108
- package/src/memory/patterns.ts +438 -438
- package/src/memory/pruning.ts +60 -60
- package/src/memory/schema.ts +88 -88
- package/src/memory/store.ts +911 -787
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -297
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +221 -221
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +221 -221
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +165 -165
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +203 -203
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +252 -252
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +189 -188
- package/src/retrieval/reranker/model.ts +99 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +454 -454
- package/src/routing/handlers/exploration-handler.ts +369 -0
- package/src/routing/handlers/index.ts +19 -0
- package/src/routing/handlers/memory-handler.ts +273 -0
- package/src/routing/handlers/mutation-handler.ts +241 -0
- package/src/routing/handlers/recall-handler.ts +642 -0
- package/src/routing/handlers/shared.ts +515 -0
- package/src/routing/handlers/types.ts +48 -0
- package/src/routing/intent-classifier.ts +552 -552
- package/src/routing/response-filter.ts +399 -391
- package/src/routing/router.ts +245 -2193
- package/src/routing/search-engine.ts +521 -514
- package/src/routing/types.ts +104 -94
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/auto-updater.ts +283 -276
- package/src/server/handlers/call-tool.ts +159 -159
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/auto-remember.ts +165 -165
- package/src/server/handlers/tools/brain.ts +86 -86
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/get-code-standards.ts +123 -123
- package/src/server/handlers/tools/get-corrections.ts +152 -152
- package/src/server/handlers/tools/get-patterns.ts +156 -156
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/index.ts +30 -30
- package/src/server/handlers/tools/init-project.ts +756 -756
- package/src/server/handlers/tools/list-projects.ts +126 -126
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +132 -132
- package/src/server/handlers/tools/record-correction.ts +131 -131
- package/src/server/handlers/tools/remember-decision.ts +168 -168
- package/src/server/handlers/tools/schemas.ts +179 -179
- package/src/server/handlers/tools/search-code.ts +122 -122
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/http-api.ts +215 -1229
- package/src/server/mcp-proxy.ts +85 -84
- package/src/server/mcp-server.ts +285 -284
- package/src/server/middleware/auth.ts +39 -0
- package/src/server/middleware/error-handler.ts +37 -0
- package/src/server/middleware/rate-limit.ts +53 -0
- package/src/server/middleware/validate.ts +42 -0
- package/src/server/pid-manager.ts +137 -136
- package/src/server/providers/resources.ts +581 -581
- package/src/server/routes/code.ts +228 -0
- package/src/server/routes/context.ts +26 -0
- package/src/server/routes/health.ts +19 -0
- package/src/server/routes/helpers.ts +100 -0
- package/src/server/routes/hooks.ts +197 -0
- package/src/server/routes/mcp.ts +47 -0
- package/src/server/routes/memory.ts +397 -0
- package/src/server/routes/models.ts +96 -0
- package/src/server/routes/projects.ts +89 -0
- package/src/server/routes/types.ts +21 -0
- package/src/server/schemas/api-schemas.ts +202 -0
- package/src/server/services.ts +720 -720
- package/src/server/utils/memory-indicator.ts +84 -84
- package/src/server/utils/response-formatter.ts +129 -129
- package/src/server/web-viewer.ts +1145 -1115
- package/src/setup/index.ts +38 -38
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.ts +666 -666
- package/src/tools/types.ts +412 -412
- package/src/training/data-store.ts +320 -298
- package/src/training/retrain-pipeline.ts +399 -394
- package/src/utils/error-handler.ts +136 -136
- package/src/utils/index.ts +58 -58
- package/src/utils/kill-port.ts +55 -53
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/safe-path.ts +43 -0
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/index.ts +4 -3
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +4 -1
- package/src/vault/reader.ts +44 -1
- package/src/vault/watcher.ts +24 -1
- package/src/vault/writer.ts +487 -413
- package/skills/persistent-memory/SKILL.md +0 -148
- package/skills/persistent-memory/references/tool-reference.md +0 -90
|
@@ -1,297 +1,300 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* BM25 Sparse Search Engine
|
|
3
|
-
* Uses MiniSearch for fast keyword-based retrieval
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import type { Logger } from 'pino'
|
|
7
|
-
import MiniSearch from 'minisearch'
|
|
8
|
-
import { tokenize, type TokenizerOptions } from './tokenizer'
|
|
9
|
-
|
|
10
|
-
export interface BM25Document {
|
|
11
|
-
id: string
|
|
12
|
-
content: string
|
|
13
|
-
metadata: Record<string, unknown>
|
|
14
|
-
collection: string
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export interface BM25SearchResult {
|
|
18
|
-
id: string
|
|
19
|
-
content: string
|
|
20
|
-
metadata: Record<string, unknown>
|
|
21
|
-
collection: string
|
|
22
|
-
score: number
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
export interface BM25Config {
|
|
26
|
-
/** Fields to index */
|
|
27
|
-
fields: string[]
|
|
28
|
-
/** Fields to store */
|
|
29
|
-
storeFields: string[]
|
|
30
|
-
/** BM25 k1 parameter (term frequency saturation) */
|
|
31
|
-
k1?: number
|
|
32
|
-
/** BM25 b parameter (document length normalization) */
|
|
33
|
-
b?: number
|
|
34
|
-
/** Boost for exact matches */
|
|
35
|
-
boostExact?: number
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
const DEFAULT_CONFIG: BM25Config = {
|
|
39
|
-
fields: ['content'],
|
|
40
|
-
storeFields: ['content', 'metadata', 'collection'],
|
|
41
|
-
k1: 1.2,
|
|
42
|
-
b: 0.75,
|
|
43
|
-
boostExact: 2.0
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export class BM25Engine {
|
|
47
|
-
private logger: Logger
|
|
48
|
-
private config: BM25Config
|
|
49
|
-
private index: MiniSearch<BM25Document>
|
|
50
|
-
private documentCount: number = 0
|
|
51
|
-
private tokenizerOptions: TokenizerOptions
|
|
52
|
-
|
|
53
|
-
constructor(logger: Logger, config: Partial<BM25Config> = {}) {
|
|
54
|
-
this.logger = logger.child({ component: 'bm25-engine' })
|
|
55
|
-
this.config = { ...DEFAULT_CONFIG, ...config }
|
|
56
|
-
this.tokenizerOptions = {
|
|
57
|
-
minLength: 2,
|
|
58
|
-
removeStopwords: true,
|
|
59
|
-
stemming: true,
|
|
60
|
-
splitCamelCase: true
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
this.index = this.createIndex()
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* Create a new MiniSearch index
|
|
68
|
-
*/
|
|
69
|
-
private createIndex(): MiniSearch<BM25Document> {
|
|
70
|
-
return new MiniSearch<BM25Document>({
|
|
71
|
-
fields: this.config.fields,
|
|
72
|
-
storeFields: this.config.storeFields,
|
|
73
|
-
idField: 'id',
|
|
74
|
-
tokenize: (text: string) => tokenize(text, this.tokenizerOptions),
|
|
75
|
-
processTerm: (term: string) => term.toLowerCase()
|
|
76
|
-
})
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Build index from documents
|
|
81
|
-
*/
|
|
82
|
-
async buildIndex(documents: BM25Document[]): Promise<void> {
|
|
83
|
-
this.logger.info({ count: documents.length }, 'Building BM25 index')
|
|
84
|
-
|
|
85
|
-
// Create fresh index
|
|
86
|
-
this.index = this.createIndex()
|
|
87
|
-
this.documentCount = 0
|
|
88
|
-
|
|
89
|
-
// Add documents in batches
|
|
90
|
-
const batchSize = 1000
|
|
91
|
-
for (let i = 0; i < documents.length; i += batchSize) {
|
|
92
|
-
const batch = documents.slice(i, i + batchSize)
|
|
93
|
-
await this.addDocuments(batch)
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
this.logger.info({
|
|
97
|
-
indexed: this.documentCount,
|
|
98
|
-
terms: this.index.termCount
|
|
99
|
-
}, 'BM25 index built')
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Add documents to index
|
|
104
|
-
*/
|
|
105
|
-
async addDocuments(documents: BM25Document[]): Promise<void> {
|
|
106
|
-
try {
|
|
107
|
-
this.index.addAll(documents)
|
|
108
|
-
this.documentCount += documents.length
|
|
109
|
-
} catch (error) {
|
|
110
|
-
this.logger.error({ error }, 'Failed to add documents to BM25 index')
|
|
111
|
-
throw error
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* Add single document
|
|
117
|
-
*/
|
|
118
|
-
addDocument(document: BM25Document): void {
|
|
119
|
-
try {
|
|
120
|
-
this.index.add(document)
|
|
121
|
-
this.documentCount++
|
|
122
|
-
} catch (error) {
|
|
123
|
-
this.logger.error({ error, id: document.id }, 'Failed to add document to BM25 index')
|
|
124
|
-
throw error
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* Remove document from index
|
|
130
|
-
*/
|
|
131
|
-
removeDocument(document: BM25Document): void {
|
|
132
|
-
try {
|
|
133
|
-
this.index.remove(document)
|
|
134
|
-
this.documentCount--
|
|
135
|
-
} catch (error) {
|
|
136
|
-
this.logger.error({ error, id: document.id }, 'Failed to remove document from BM25 index')
|
|
137
|
-
// Don't throw - document might not exist
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
/**
|
|
142
|
-
* Update document in index
|
|
143
|
-
*/
|
|
144
|
-
updateDocument(document: BM25Document): void {
|
|
145
|
-
this.removeDocument(document)
|
|
146
|
-
this.addDocument(document)
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
/**
|
|
150
|
-
* Search the index
|
|
151
|
-
*/
|
|
152
|
-
search(
|
|
153
|
-
query: string,
|
|
154
|
-
options: {
|
|
155
|
-
limit?: number
|
|
156
|
-
filter?: (result: BM25SearchResult) => boolean
|
|
157
|
-
collection?: string
|
|
158
|
-
} = {}
|
|
159
|
-
): BM25SearchResult[] {
|
|
160
|
-
const { limit = 20, filter, collection } = options
|
|
161
|
-
|
|
162
|
-
if (!query.trim()) {
|
|
163
|
-
return []
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
this.logger.debug({
|
|
167
|
-
query: query.slice(0, 50),
|
|
168
|
-
limit
|
|
169
|
-
}, 'BM25 search')
|
|
170
|
-
|
|
171
|
-
try {
|
|
172
|
-
// Tokenize query
|
|
173
|
-
// Search with MiniSearch options
|
|
174
|
-
const results = this.index.search(query, {
|
|
175
|
-
prefix: true, // Allow prefix matching
|
|
176
|
-
fuzzy: 0.2, // Allow minor typos
|
|
177
|
-
combineWith: 'OR', // Match any term
|
|
178
|
-
boost: {
|
|
179
|
-
content: this.config.boostExact!
|
|
180
|
-
}
|
|
181
|
-
})
|
|
182
|
-
|
|
183
|
-
// Transform and filter results
|
|
184
|
-
let searchResults: BM25SearchResult[] = results.map(result =>
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
//
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
this.
|
|
243
|
-
return
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
this.
|
|
291
|
-
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Sparse Search Engine
|
|
3
|
+
* Uses MiniSearch for fast keyword-based retrieval
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { Logger } from 'pino'
|
|
7
|
+
import MiniSearch from 'minisearch'
|
|
8
|
+
import { tokenize, type TokenizerOptions } from './tokenizer'
|
|
9
|
+
|
|
10
|
+
export interface BM25Document {
|
|
11
|
+
id: string
|
|
12
|
+
content: string
|
|
13
|
+
metadata: Record<string, unknown>
|
|
14
|
+
collection: string
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface BM25SearchResult {
|
|
18
|
+
id: string
|
|
19
|
+
content: string
|
|
20
|
+
metadata: Record<string, unknown>
|
|
21
|
+
collection: string
|
|
22
|
+
score: number
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface BM25Config {
|
|
26
|
+
/** Fields to index */
|
|
27
|
+
fields: string[]
|
|
28
|
+
/** Fields to store */
|
|
29
|
+
storeFields: string[]
|
|
30
|
+
/** BM25 k1 parameter (term frequency saturation) */
|
|
31
|
+
k1?: number
|
|
32
|
+
/** BM25 b parameter (document length normalization) */
|
|
33
|
+
b?: number
|
|
34
|
+
/** Boost for exact matches */
|
|
35
|
+
boostExact?: number
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const DEFAULT_CONFIG: BM25Config = {
|
|
39
|
+
fields: ['content'],
|
|
40
|
+
storeFields: ['content', 'metadata', 'collection'],
|
|
41
|
+
k1: 1.2,
|
|
42
|
+
b: 0.75,
|
|
43
|
+
boostExact: 2.0
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export class BM25Engine {
|
|
47
|
+
private logger: Logger
|
|
48
|
+
private config: BM25Config
|
|
49
|
+
private index: MiniSearch<BM25Document>
|
|
50
|
+
private documentCount: number = 0
|
|
51
|
+
private tokenizerOptions: TokenizerOptions
|
|
52
|
+
|
|
53
|
+
constructor(logger: Logger, config: Partial<BM25Config> = {}) {
|
|
54
|
+
this.logger = logger.child({ component: 'bm25-engine' })
|
|
55
|
+
this.config = { ...DEFAULT_CONFIG, ...config }
|
|
56
|
+
this.tokenizerOptions = {
|
|
57
|
+
minLength: 2,
|
|
58
|
+
removeStopwords: true,
|
|
59
|
+
stemming: true,
|
|
60
|
+
splitCamelCase: true
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
this.index = this.createIndex()
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Create a new MiniSearch index
|
|
68
|
+
*/
|
|
69
|
+
private createIndex(): MiniSearch<BM25Document> {
|
|
70
|
+
return new MiniSearch<BM25Document>({
|
|
71
|
+
fields: this.config.fields,
|
|
72
|
+
storeFields: this.config.storeFields,
|
|
73
|
+
idField: 'id',
|
|
74
|
+
tokenize: (text: string) => tokenize(text, this.tokenizerOptions),
|
|
75
|
+
processTerm: (term: string) => term.toLowerCase()
|
|
76
|
+
})
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Build index from documents
|
|
81
|
+
*/
|
|
82
|
+
async buildIndex(documents: BM25Document[]): Promise<void> {
|
|
83
|
+
this.logger.info({ count: documents.length }, 'Building BM25 index')
|
|
84
|
+
|
|
85
|
+
// Create fresh index
|
|
86
|
+
this.index = this.createIndex()
|
|
87
|
+
this.documentCount = 0
|
|
88
|
+
|
|
89
|
+
// Add documents in batches
|
|
90
|
+
const batchSize = 1000
|
|
91
|
+
for (let i = 0; i < documents.length; i += batchSize) {
|
|
92
|
+
const batch = documents.slice(i, i + batchSize)
|
|
93
|
+
await this.addDocuments(batch)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
this.logger.info({
|
|
97
|
+
indexed: this.documentCount,
|
|
98
|
+
terms: this.index.termCount
|
|
99
|
+
}, 'BM25 index built')
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Add documents to index
|
|
104
|
+
*/
|
|
105
|
+
async addDocuments(documents: BM25Document[]): Promise<void> {
|
|
106
|
+
try {
|
|
107
|
+
this.index.addAll(documents)
|
|
108
|
+
this.documentCount += documents.length
|
|
109
|
+
} catch (error) {
|
|
110
|
+
this.logger.error({ error }, 'Failed to add documents to BM25 index')
|
|
111
|
+
throw error
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Add single document
|
|
117
|
+
*/
|
|
118
|
+
addDocument(document: BM25Document): void {
|
|
119
|
+
try {
|
|
120
|
+
this.index.add(document)
|
|
121
|
+
this.documentCount++
|
|
122
|
+
} catch (error) {
|
|
123
|
+
this.logger.error({ error, id: document.id }, 'Failed to add document to BM25 index')
|
|
124
|
+
throw error
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Remove document from index
|
|
130
|
+
*/
|
|
131
|
+
removeDocument(document: BM25Document): void {
|
|
132
|
+
try {
|
|
133
|
+
this.index.remove(document)
|
|
134
|
+
this.documentCount--
|
|
135
|
+
} catch (error) {
|
|
136
|
+
this.logger.error({ error, id: document.id }, 'Failed to remove document from BM25 index')
|
|
137
|
+
// Don't throw - document might not exist
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Update document in index
|
|
143
|
+
*/
|
|
144
|
+
updateDocument(document: BM25Document): void {
|
|
145
|
+
this.removeDocument(document)
|
|
146
|
+
this.addDocument(document)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Search the index
|
|
151
|
+
*/
|
|
152
|
+
search(
|
|
153
|
+
query: string,
|
|
154
|
+
options: {
|
|
155
|
+
limit?: number
|
|
156
|
+
filter?: (result: BM25SearchResult) => boolean
|
|
157
|
+
collection?: string
|
|
158
|
+
} = {}
|
|
159
|
+
): BM25SearchResult[] {
|
|
160
|
+
const { limit = 20, filter, collection } = options
|
|
161
|
+
|
|
162
|
+
if (!query.trim()) {
|
|
163
|
+
return []
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
this.logger.debug({
|
|
167
|
+
query: query.slice(0, 50),
|
|
168
|
+
limit
|
|
169
|
+
}, 'BM25 search')
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
// Tokenize query
|
|
173
|
+
// Search with MiniSearch options
|
|
174
|
+
const results = this.index.search(query, {
|
|
175
|
+
prefix: true, // Allow prefix matching
|
|
176
|
+
fuzzy: 0.2, // Allow minor typos
|
|
177
|
+
combineWith: 'OR', // Match any term
|
|
178
|
+
boost: {
|
|
179
|
+
content: this.config.boostExact!
|
|
180
|
+
}
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
// Transform and filter results
|
|
184
|
+
let searchResults: BM25SearchResult[] = results.map(result => {
|
|
185
|
+
const r = result as Record<string, unknown>
|
|
186
|
+
return {
|
|
187
|
+
id: result.id,
|
|
188
|
+
content: (r.content as string) || '',
|
|
189
|
+
metadata: (r.metadata as Record<string, unknown>) || {},
|
|
190
|
+
collection: (r.collection as string) || '',
|
|
191
|
+
score: result.score
|
|
192
|
+
}
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
// Filter by collection if specified
|
|
196
|
+
if (collection) {
|
|
197
|
+
searchResults = searchResults.filter(r => r.collection === collection)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Apply custom filter
|
|
201
|
+
if (filter) {
|
|
202
|
+
searchResults = searchResults.filter(filter)
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Normalize scores to 0-1 range
|
|
206
|
+
searchResults = this.normalizeScores(searchResults)
|
|
207
|
+
|
|
208
|
+
// Apply limit
|
|
209
|
+
return searchResults.slice(0, limit)
|
|
210
|
+
|
|
211
|
+
} catch (error) {
|
|
212
|
+
this.logger.error({ error, query }, 'BM25 search failed')
|
|
213
|
+
return []
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Normalize scores to 0-1 range
|
|
219
|
+
*/
|
|
220
|
+
private normalizeScores(results: BM25SearchResult[]): BM25SearchResult[] {
|
|
221
|
+
if (results.length === 0) return []
|
|
222
|
+
if (results.length === 1) {
|
|
223
|
+
return [{ ...results[0]!, score: 1.0 }]
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const maxScore = Math.max(...results.map(r => r.score))
|
|
227
|
+
if (maxScore === 0) {
|
|
228
|
+
return results.map(r => ({ ...r, score: 0 }))
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return results.map(r => ({
|
|
232
|
+
...r,
|
|
233
|
+
score: r.score / maxScore
|
|
234
|
+
}))
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Get suggestions for autocomplete
|
|
239
|
+
*/
|
|
240
|
+
suggest(query: string, limit: number = 5): string[] {
|
|
241
|
+
try {
|
|
242
|
+
const results = this.index.autoSuggest(query)
|
|
243
|
+
return results.slice(0, limit).map(r => r.suggestion)
|
|
244
|
+
} catch (error) {
|
|
245
|
+
this.logger.error({ error, query }, 'BM25 suggest failed')
|
|
246
|
+
return []
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Get index statistics
|
|
252
|
+
*/
|
|
253
|
+
getStats(): {
|
|
254
|
+
documentCount: number
|
|
255
|
+
termCount: number
|
|
256
|
+
} {
|
|
257
|
+
return {
|
|
258
|
+
documentCount: this.documentCount,
|
|
259
|
+
termCount: this.index.termCount
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Clear the index
|
|
265
|
+
*/
|
|
266
|
+
clear(): void {
|
|
267
|
+
this.index = this.createIndex()
|
|
268
|
+
this.documentCount = 0
|
|
269
|
+
this.logger.info('BM25 index cleared')
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Export index for persistence
|
|
274
|
+
*/
|
|
275
|
+
exportIndex(): string {
|
|
276
|
+
return JSON.stringify(this.index.toJSON())
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Import index from persisted data
|
|
281
|
+
*/
|
|
282
|
+
importIndex(data: string): void {
|
|
283
|
+
try {
|
|
284
|
+
this.index = MiniSearch.loadJSON(data, {
|
|
285
|
+
fields: this.config.fields,
|
|
286
|
+
storeFields: this.config.storeFields,
|
|
287
|
+
idField: 'id'
|
|
288
|
+
})
|
|
289
|
+
// Count documents after import
|
|
290
|
+
this.documentCount = this.index.documentCount
|
|
291
|
+
this.logger.info({ documentCount: this.documentCount }, 'BM25 index imported')
|
|
292
|
+
} catch (error) {
|
|
293
|
+
this.logger.error({ error }, 'Failed to import BM25 index')
|
|
294
|
+
throw error
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export { tokenize, tokenizeForSearch, getNGrams } from './tokenizer'
|
|
300
|
+
export type { TokenizerOptions } from './tokenizer'
|