claude-brain 0.15.2 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +191 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -11
  5. package/bunfig.toml +8 -8
  6. package/package.json +82 -82
  7. package/packs/backend/node.json +173 -173
  8. package/packs/core/javascript.json +176 -176
  9. package/packs/core/typescript.json +222 -222
  10. package/packs/frontend/react.json +254 -254
  11. package/packs/meta/testing.json +172 -172
  12. package/scripts/postinstall.mjs +341 -341
  13. package/src/automation/auto-context.ts +240 -240
  14. package/src/automation/decision-detector.ts +452 -452
  15. package/src/automation/index.ts +11 -11
  16. package/src/automation/phase12-manager.ts +456 -456
  17. package/src/automation/proactive-recall.ts +373 -373
  18. package/src/automation/project-detector.ts +310 -310
  19. package/src/automation/repo-scanner.ts +205 -205
  20. package/src/cli/auto-setup.ts +82 -82
  21. package/src/cli/bin.ts +209 -202
  22. package/src/cli/commands/chroma.ts +573 -573
  23. package/src/cli/commands/git-hook.ts +189 -189
  24. package/src/cli/commands/hooks.ts +213 -213
  25. package/src/cli/commands/init.ts +122 -122
  26. package/src/cli/commands/install-mcp.ts +92 -92
  27. package/src/cli/commands/pack.ts +197 -197
  28. package/src/cli/commands/refresh.ts +323 -0
  29. package/src/cli/commands/serve.ts +167 -173
  30. package/src/cli/commands/start.ts +42 -42
  31. package/src/cli/commands/uninstall-mcp.ts +41 -41
  32. package/src/cli/commands/update.ts +124 -121
  33. package/src/cli/diagnose.ts +4 -4
  34. package/src/cli/health-check.ts +4 -4
  35. package/src/cli/migrate-chroma.ts +106 -106
  36. package/src/cli/setup.ts +4 -4
  37. package/src/cli/ui/animations.ts +80 -80
  38. package/src/cli/ui/components.ts +82 -82
  39. package/src/cli/ui/index.ts +4 -4
  40. package/src/cli/ui/logo.ts +36 -36
  41. package/src/cli/ui/theme.ts +55 -55
  42. package/src/config/defaults.ts +50 -50
  43. package/src/config/home.ts +55 -55
  44. package/src/config/index.ts +7 -7
  45. package/src/config/loader.ts +166 -166
  46. package/src/config/migration.ts +76 -76
  47. package/src/config/schema.ts +360 -360
  48. package/src/config/validator.ts +184 -184
  49. package/src/config/watcher.ts +86 -86
  50. package/src/context/assembler.ts +398 -398
  51. package/src/context/cache-manager.ts +101 -101
  52. package/src/context/formatter.ts +84 -84
  53. package/src/context/hierarchy.ts +85 -85
  54. package/src/context/index.ts +83 -83
  55. package/src/context/progress-tracker.ts +174 -174
  56. package/src/context/standards-manager.ts +287 -287
  57. package/src/context/types.ts +252 -252
  58. package/src/context/validator.ts +58 -58
  59. package/src/diagnostics/index.ts +123 -123
  60. package/src/health/index.ts +229 -229
  61. package/src/hooks/brain-hook.ts +128 -112
  62. package/src/hooks/capture.ts +168 -205
  63. package/src/hooks/context-hook.ts +137 -0
  64. package/src/hooks/deduplicator.ts +72 -72
  65. package/src/hooks/git-capture.ts +109 -109
  66. package/src/hooks/git-hook-installer.ts +207 -207
  67. package/src/hooks/index.ts +20 -20
  68. package/src/hooks/installer.ts +244 -194
  69. package/src/hooks/passive-classifier.ts +404 -723
  70. package/src/hooks/queue.ts +129 -129
  71. package/src/hooks/session-tracker.ts +312 -275
  72. package/src/hooks/types.ts +52 -47
  73. package/src/index.ts +7 -7
  74. package/src/intelligence/cross-project/affinity.ts +162 -162
  75. package/src/intelligence/cross-project/generalizer.ts +283 -283
  76. package/src/intelligence/cross-project/index.ts +13 -13
  77. package/src/intelligence/cross-project/transfer.ts +201 -201
  78. package/src/intelligence/index.ts +24 -24
  79. package/src/intelligence/optimization/index.ts +10 -10
  80. package/src/intelligence/optimization/precompute.ts +202 -202
  81. package/src/intelligence/optimization/semantic-cache.ts +207 -207
  82. package/src/intelligence/prediction/context-anticipator.ts +198 -198
  83. package/src/intelligence/prediction/decision-predictor.ts +184 -184
  84. package/src/intelligence/prediction/index.ts +13 -13
  85. package/src/intelligence/prediction/recommender.ts +268 -268
  86. package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
  87. package/src/intelligence/reasoning/counterfactual.ts +248 -248
  88. package/src/intelligence/reasoning/index.ts +13 -13
  89. package/src/intelligence/reasoning/synthesizer.ts +169 -169
  90. package/src/intelligence/temporal/evolution.ts +197 -197
  91. package/src/intelligence/temporal/index.ts +16 -16
  92. package/src/intelligence/temporal/query-processor.ts +190 -190
  93. package/src/intelligence/temporal/timeline.ts +259 -259
  94. package/src/intelligence/temporal/trends.ts +263 -263
  95. package/src/knowledge/entity-extractor.ts +416 -416
  96. package/src/knowledge/graph/builder.ts +185 -185
  97. package/src/knowledge/graph/linker.ts +201 -201
  98. package/src/knowledge/graph/memory-graph.ts +359 -359
  99. package/src/knowledge/graph/schema.ts +99 -99
  100. package/src/knowledge/graph/search.ts +168 -168
  101. package/src/knowledge/relationship-extractor.ts +108 -108
  102. package/src/memory/chroma/client.ts +174 -174
  103. package/src/memory/chroma/collection-manager.ts +94 -94
  104. package/src/memory/chroma/config.ts +57 -57
  105. package/src/memory/chroma/embeddings.ts +155 -155
  106. package/src/memory/chroma/index.ts +82 -82
  107. package/src/memory/chroma/migration.ts +270 -270
  108. package/src/memory/chroma/schemas.ts +69 -69
  109. package/src/memory/chroma/search.ts +315 -315
  110. package/src/memory/chroma/store.ts +741 -741
  111. package/src/memory/consolidation/archiver.ts +164 -164
  112. package/src/memory/consolidation/merger.ts +186 -186
  113. package/src/memory/consolidation/scorer.ts +138 -138
  114. package/src/memory/context-builder.ts +236 -236
  115. package/src/memory/database.ts +169 -169
  116. package/src/memory/embedding-utils.ts +156 -156
  117. package/src/memory/embeddings.ts +226 -226
  118. package/src/memory/episodic/detector.ts +108 -108
  119. package/src/memory/episodic/manager.ts +351 -351
  120. package/src/memory/episodic/summarizer.ts +179 -179
  121. package/src/memory/episodic/types.ts +52 -52
  122. package/src/memory/index.ts +582 -582
  123. package/src/memory/knowledge-extractor.ts +455 -455
  124. package/src/memory/learning.ts +378 -378
  125. package/src/memory/patterns.ts +396 -396
  126. package/src/memory/schema.ts +88 -88
  127. package/src/memory/search.ts +309 -309
  128. package/src/memory/store.ts +787 -787
  129. package/src/memory/types.ts +121 -121
  130. package/src/orchestrator/coordinator.ts +272 -272
  131. package/src/orchestrator/decision-logger.ts +228 -228
  132. package/src/orchestrator/event-emitter.ts +198 -198
  133. package/src/orchestrator/event-queue.ts +184 -184
  134. package/src/orchestrator/handlers/base-handler.ts +70 -70
  135. package/src/orchestrator/handlers/context-handler.ts +73 -73
  136. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  137. package/src/orchestrator/handlers/index.ts +10 -10
  138. package/src/orchestrator/handlers/status-handler.ts +131 -131
  139. package/src/orchestrator/handlers/task-handler.ts +171 -171
  140. package/src/orchestrator/index.ts +275 -275
  141. package/src/orchestrator/task-parser.ts +284 -284
  142. package/src/orchestrator/types.ts +98 -98
  143. package/src/packs/index.ts +9 -9
  144. package/src/packs/loader.ts +134 -134
  145. package/src/packs/manager.ts +204 -204
  146. package/src/packs/ranker.ts +78 -78
  147. package/src/packs/types.ts +81 -81
  148. package/src/phase12/index.ts +5 -5
  149. package/src/retrieval/bm25/index.ts +300 -300
  150. package/src/retrieval/bm25/tokenizer.ts +184 -184
  151. package/src/retrieval/feedback/adaptive.ts +223 -223
  152. package/src/retrieval/feedback/index.ts +16 -16
  153. package/src/retrieval/feedback/metrics.ts +223 -223
  154. package/src/retrieval/feedback/store.ts +283 -283
  155. package/src/retrieval/fusion/index.ts +194 -194
  156. package/src/retrieval/fusion/rrf.ts +163 -163
  157. package/src/retrieval/index.ts +12 -12
  158. package/src/retrieval/pipeline.ts +375 -375
  159. package/src/retrieval/query/expander.ts +198 -198
  160. package/src/retrieval/query/index.ts +27 -27
  161. package/src/retrieval/query/intent-classifier.ts +236 -236
  162. package/src/retrieval/query/temporal-parser.ts +295 -295
  163. package/src/retrieval/reranker/index.ts +188 -188
  164. package/src/retrieval/reranker/model.ts +95 -95
  165. package/src/retrieval/service.ts +125 -125
  166. package/src/retrieval/types.ts +162 -162
  167. package/src/routing/entity-extractor.ts +428 -428
  168. package/src/routing/intent-classifier.ts +450 -436
  169. package/src/routing/response-filter.ts +261 -258
  170. package/src/routing/router.ts +1441 -1322
  171. package/src/routing/search-engine.ts +515 -475
  172. package/src/routing/types.ts +94 -94
  173. package/src/scripts/health-check.ts +118 -118
  174. package/src/scripts/setup.ts +122 -122
  175. package/src/server/handlers/call-tool.ts +156 -156
  176. package/src/server/handlers/index.ts +9 -9
  177. package/src/server/handlers/list-tools.ts +35 -35
  178. package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
  179. package/src/server/handlers/tools/auto-remember.ts +200 -200
  180. package/src/server/handlers/tools/brain.ts +85 -85
  181. package/src/server/handlers/tools/create-project.ts +135 -135
  182. package/src/server/handlers/tools/detect-trends.ts +144 -144
  183. package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
  184. package/src/server/handlers/tools/get-activity-log.ts +194 -194
  185. package/src/server/handlers/tools/get-code-standards.ts +124 -124
  186. package/src/server/handlers/tools/get-corrections.ts +154 -154
  187. package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
  188. package/src/server/handlers/tools/get-episode.ts +103 -103
  189. package/src/server/handlers/tools/get-patterns.ts +158 -158
  190. package/src/server/handlers/tools/get-phase12-status.ts +63 -63
  191. package/src/server/handlers/tools/get-project-context.ts +75 -75
  192. package/src/server/handlers/tools/get-recommendations.ts +145 -145
  193. package/src/server/handlers/tools/index.ts +31 -31
  194. package/src/server/handlers/tools/init-project.ts +757 -757
  195. package/src/server/handlers/tools/list-episodes.ts +90 -90
  196. package/src/server/handlers/tools/list-projects.ts +125 -125
  197. package/src/server/handlers/tools/rate-memory.ts +101 -101
  198. package/src/server/handlers/tools/recall-similar.ts +87 -87
  199. package/src/server/handlers/tools/recognize-pattern.ts +126 -126
  200. package/src/server/handlers/tools/record-correction.ts +125 -125
  201. package/src/server/handlers/tools/remember-decision.ts +153 -153
  202. package/src/server/handlers/tools/schemas.ts +253 -253
  203. package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
  204. package/src/server/handlers/tools/smart-context.ts +146 -146
  205. package/src/server/handlers/tools/update-progress.ts +131 -131
  206. package/src/server/handlers/tools/what-if-analysis.ts +135 -135
  207. package/src/server/http-api.ts +761 -693
  208. package/src/server/index.ts +40 -40
  209. package/src/server/mcp-server.ts +283 -283
  210. package/src/server/providers/index.ts +7 -7
  211. package/src/server/providers/prompts.ts +327 -327
  212. package/src/server/providers/resources.ts +622 -622
  213. package/src/server/services.ts +468 -468
  214. package/src/server/types.ts +39 -39
  215. package/src/server/utils/error-handler.ts +155 -155
  216. package/src/server/utils/index.ts +13 -13
  217. package/src/server/utils/memory-indicator.ts +83 -83
  218. package/src/server/utils/request-context.ts +122 -122
  219. package/src/server/utils/response-formatter.ts +129 -129
  220. package/src/server/utils/validators.ts +210 -210
  221. package/src/setup/index.ts +48 -48
  222. package/src/setup/wizard.ts +461 -461
  223. package/src/tools/index.ts +24 -24
  224. package/src/tools/registry.ts +115 -115
  225. package/src/tools/schemas.test.ts +30 -30
  226. package/src/tools/schemas.ts +617 -617
  227. package/src/tools/types.ts +412 -412
  228. package/src/utils/circuit-breaker.ts +130 -130
  229. package/src/utils/cleanup.ts +34 -34
  230. package/src/utils/error-handler.ts +132 -132
  231. package/src/utils/error-messages.ts +60 -60
  232. package/src/utils/fallback.ts +45 -45
  233. package/src/utils/index.ts +54 -54
  234. package/src/utils/logger-utils.ts +80 -80
  235. package/src/utils/logger.ts +88 -88
  236. package/src/utils/phase12-helper.ts +56 -56
  237. package/src/utils/retry.ts +94 -94
  238. package/src/utils/timing.ts +47 -47
  239. package/src/utils/transaction.ts +63 -63
  240. package/src/vault/frontmatter.ts +264 -264
  241. package/src/vault/index.ts +318 -318
  242. package/src/vault/paths.ts +106 -106
  243. package/src/vault/query.ts +422 -422
  244. package/src/vault/reader.ts +264 -264
  245. package/src/vault/templates.ts +186 -186
  246. package/src/vault/types.ts +73 -73
  247. package/src/vault/watcher.ts +277 -277
  248. package/src/vault/writer.ts +413 -413
  249. package/tsconfig.json +30 -30
  250. package/src/cli/auto-update.ts +0 -157
@@ -1,300 +1,300 @@
1
- /**
2
- * BM25 Sparse Search Engine
3
- * Uses MiniSearch for fast keyword-based retrieval
4
- */
5
-
6
- import type { Logger } from 'pino'
7
- import MiniSearch from 'minisearch'
8
- import { tokenize, tokenizeForSearch, type TokenizerOptions } from './tokenizer'
9
-
10
- export interface BM25Document {
11
- id: string
12
- content: string
13
- metadata: Record<string, unknown>
14
- collection: string
15
- }
16
-
17
- export interface BM25SearchResult {
18
- id: string
19
- content: string
20
- metadata: Record<string, unknown>
21
- collection: string
22
- score: number
23
- }
24
-
25
- export interface BM25Config {
26
- /** Fields to index */
27
- fields: string[]
28
- /** Fields to store */
29
- storeFields: string[]
30
- /** BM25 k1 parameter (term frequency saturation) */
31
- k1?: number
32
- /** BM25 b parameter (document length normalization) */
33
- b?: number
34
- /** Boost for exact matches */
35
- boostExact?: number
36
- }
37
-
38
- const DEFAULT_CONFIG: BM25Config = {
39
- fields: ['content'],
40
- storeFields: ['content', 'metadata', 'collection'],
41
- k1: 1.2,
42
- b: 0.75,
43
- boostExact: 2.0
44
- }
45
-
46
- export class BM25Engine {
47
- private logger: Logger
48
- private config: BM25Config
49
- private index: MiniSearch<BM25Document>
50
- private documentCount: number = 0
51
- private tokenizerOptions: TokenizerOptions
52
-
53
- constructor(logger: Logger, config: Partial<BM25Config> = {}) {
54
- this.logger = logger.child({ component: 'bm25-engine' })
55
- this.config = { ...DEFAULT_CONFIG, ...config }
56
- this.tokenizerOptions = {
57
- minLength: 2,
58
- removeStopwords: true,
59
- stemming: true,
60
- splitCamelCase: true
61
- }
62
-
63
- this.index = this.createIndex()
64
- }
65
-
66
- /**
67
- * Create a new MiniSearch index
68
- */
69
- private createIndex(): MiniSearch<BM25Document> {
70
- return new MiniSearch<BM25Document>({
71
- fields: this.config.fields,
72
- storeFields: this.config.storeFields,
73
- idField: 'id',
74
- tokenize: (text: string) => tokenize(text, this.tokenizerOptions),
75
- processTerm: (term: string) => term.toLowerCase()
76
- })
77
- }
78
-
79
- /**
80
- * Build index from documents
81
- */
82
- async buildIndex(documents: BM25Document[]): Promise<void> {
83
- this.logger.info({ count: documents.length }, 'Building BM25 index')
84
-
85
- // Create fresh index
86
- this.index = this.createIndex()
87
- this.documentCount = 0
88
-
89
- // Add documents in batches
90
- const batchSize = 1000
91
- for (let i = 0; i < documents.length; i += batchSize) {
92
- const batch = documents.slice(i, i + batchSize)
93
- await this.addDocuments(batch)
94
- }
95
-
96
- this.logger.info({
97
- indexed: this.documentCount,
98
- terms: this.index.termCount
99
- }, 'BM25 index built')
100
- }
101
-
102
- /**
103
- * Add documents to index
104
- */
105
- async addDocuments(documents: BM25Document[]): Promise<void> {
106
- try {
107
- this.index.addAll(documents)
108
- this.documentCount += documents.length
109
- } catch (error) {
110
- this.logger.error({ error }, 'Failed to add documents to BM25 index')
111
- throw error
112
- }
113
- }
114
-
115
- /**
116
- * Add single document
117
- */
118
- addDocument(document: BM25Document): void {
119
- try {
120
- this.index.add(document)
121
- this.documentCount++
122
- } catch (error) {
123
- this.logger.error({ error, id: document.id }, 'Failed to add document to BM25 index')
124
- throw error
125
- }
126
- }
127
-
128
- /**
129
- * Remove document from index
130
- */
131
- removeDocument(document: BM25Document): void {
132
- try {
133
- this.index.remove(document)
134
- this.documentCount--
135
- } catch (error) {
136
- this.logger.error({ error, id: document.id }, 'Failed to remove document from BM25 index')
137
- // Don't throw - document might not exist
138
- }
139
- }
140
-
141
- /**
142
- * Update document in index
143
- */
144
- updateDocument(document: BM25Document): void {
145
- this.removeDocument(document)
146
- this.addDocument(document)
147
- }
148
-
149
- /**
150
- * Search the index
151
- */
152
- search(
153
- query: string,
154
- options: {
155
- limit?: number
156
- filter?: (result: BM25SearchResult) => boolean
157
- collection?: string
158
- } = {}
159
- ): BM25SearchResult[] {
160
- const { limit = 20, filter, collection } = options
161
-
162
- if (!query.trim()) {
163
- return []
164
- }
165
-
166
- this.logger.debug({
167
- query: query.slice(0, 50),
168
- limit
169
- }, 'BM25 search')
170
-
171
- try {
172
- // Tokenize query
173
- const queryTokens = tokenizeForSearch(query, this.tokenizerOptions)
174
-
175
- // Search with MiniSearch options
176
- const results = this.index.search(query, {
177
- prefix: true, // Allow prefix matching
178
- fuzzy: 0.2, // Allow minor typos
179
- combineWith: 'OR', // Match any term
180
- boost: {
181
- content: this.config.boostExact!
182
- }
183
- })
184
-
185
- // Transform and filter results
186
- let searchResults: BM25SearchResult[] = results.map(result => ({
187
- id: result.id,
188
- content: (result as any).content || '',
189
- metadata: (result as any).metadata || {},
190
- collection: (result as any).collection || '',
191
- score: result.score
192
- }))
193
-
194
- // Filter by collection if specified
195
- if (collection) {
196
- searchResults = searchResults.filter(r => r.collection === collection)
197
- }
198
-
199
- // Apply custom filter
200
- if (filter) {
201
- searchResults = searchResults.filter(filter)
202
- }
203
-
204
- // Normalize scores to 0-1 range
205
- searchResults = this.normalizeScores(searchResults)
206
-
207
- // Apply limit
208
- return searchResults.slice(0, limit)
209
-
210
- } catch (error) {
211
- this.logger.error({ error, query }, 'BM25 search failed')
212
- return []
213
- }
214
- }
215
-
216
- /**
217
- * Normalize scores to 0-1 range
218
- */
219
- private normalizeScores(results: BM25SearchResult[]): BM25SearchResult[] {
220
- if (results.length === 0) return []
221
- if (results.length === 1) {
222
- return [{ ...results[0], score: 1.0 }]
223
- }
224
-
225
- const maxScore = Math.max(...results.map(r => r.score))
226
- if (maxScore === 0) {
227
- return results.map(r => ({ ...r, score: 0 }))
228
- }
229
-
230
- return results.map(r => ({
231
- ...r,
232
- score: r.score / maxScore
233
- }))
234
- }
235
-
236
- /**
237
- * Get suggestions for autocomplete
238
- */
239
- suggest(query: string, limit: number = 5): string[] {
240
- try {
241
- const results = this.index.autoSuggest(query, { limit })
242
- return results.map(r => r.suggestion)
243
- } catch (error) {
244
- this.logger.error({ error, query }, 'BM25 suggest failed')
245
- return []
246
- }
247
- }
248
-
249
- /**
250
- * Get index statistics
251
- */
252
- getStats(): {
253
- documentCount: number
254
- termCount: number
255
- } {
256
- return {
257
- documentCount: this.documentCount,
258
- termCount: this.index.termCount
259
- }
260
- }
261
-
262
- /**
263
- * Clear the index
264
- */
265
- clear(): void {
266
- this.index = this.createIndex()
267
- this.documentCount = 0
268
- this.logger.info('BM25 index cleared')
269
- }
270
-
271
- /**
272
- * Export index for persistence
273
- */
274
- exportIndex(): string {
275
- return JSON.stringify(this.index.toJSON())
276
- }
277
-
278
- /**
279
- * Import index from persisted data
280
- */
281
- importIndex(data: string): void {
282
- try {
283
- const parsed = JSON.parse(data)
284
- this.index = MiniSearch.loadJSON(data, {
285
- fields: this.config.fields,
286
- storeFields: this.config.storeFields,
287
- idField: 'id'
288
- })
289
- // Count documents after import
290
- this.documentCount = this.index.documentCount
291
- this.logger.info({ documentCount: this.documentCount }, 'BM25 index imported')
292
- } catch (error) {
293
- this.logger.error({ error }, 'Failed to import BM25 index')
294
- throw error
295
- }
296
- }
297
- }
298
-
299
- export { tokenize, tokenizeForSearch, getNGrams } from './tokenizer'
300
- export type { TokenizerOptions } from './tokenizer'
1
+ /**
2
+ * BM25 Sparse Search Engine
3
+ * Uses MiniSearch for fast keyword-based retrieval
4
+ */
5
+
6
+ import type { Logger } from 'pino'
7
+ import MiniSearch from 'minisearch'
8
+ import { tokenize, tokenizeForSearch, type TokenizerOptions } from './tokenizer'
9
+
10
+ export interface BM25Document {
11
+ id: string
12
+ content: string
13
+ metadata: Record<string, unknown>
14
+ collection: string
15
+ }
16
+
17
+ export interface BM25SearchResult {
18
+ id: string
19
+ content: string
20
+ metadata: Record<string, unknown>
21
+ collection: string
22
+ score: number
23
+ }
24
+
25
+ export interface BM25Config {
26
+ /** Fields to index */
27
+ fields: string[]
28
+ /** Fields to store */
29
+ storeFields: string[]
30
+ /** BM25 k1 parameter (term frequency saturation) */
31
+ k1?: number
32
+ /** BM25 b parameter (document length normalization) */
33
+ b?: number
34
+ /** Boost for exact matches */
35
+ boostExact?: number
36
+ }
37
+
38
+ const DEFAULT_CONFIG: BM25Config = {
39
+ fields: ['content'],
40
+ storeFields: ['content', 'metadata', 'collection'],
41
+ k1: 1.2,
42
+ b: 0.75,
43
+ boostExact: 2.0
44
+ }
45
+
46
+ export class BM25Engine {
47
+ private logger: Logger
48
+ private config: BM25Config
49
+ private index: MiniSearch<BM25Document>
50
+ private documentCount: number = 0
51
+ private tokenizerOptions: TokenizerOptions
52
+
53
+ constructor(logger: Logger, config: Partial<BM25Config> = {}) {
54
+ this.logger = logger.child({ component: 'bm25-engine' })
55
+ this.config = { ...DEFAULT_CONFIG, ...config }
56
+ this.tokenizerOptions = {
57
+ minLength: 2,
58
+ removeStopwords: true,
59
+ stemming: true,
60
+ splitCamelCase: true
61
+ }
62
+
63
+ this.index = this.createIndex()
64
+ }
65
+
66
+ /**
67
+ * Create a new MiniSearch index
68
+ */
69
+ private createIndex(): MiniSearch<BM25Document> {
70
+ return new MiniSearch<BM25Document>({
71
+ fields: this.config.fields,
72
+ storeFields: this.config.storeFields,
73
+ idField: 'id',
74
+ tokenize: (text: string) => tokenize(text, this.tokenizerOptions),
75
+ processTerm: (term: string) => term.toLowerCase()
76
+ })
77
+ }
78
+
79
+ /**
80
+ * Build index from documents
81
+ */
82
+ async buildIndex(documents: BM25Document[]): Promise<void> {
83
+ this.logger.info({ count: documents.length }, 'Building BM25 index')
84
+
85
+ // Create fresh index
86
+ this.index = this.createIndex()
87
+ this.documentCount = 0
88
+
89
+ // Add documents in batches
90
+ const batchSize = 1000
91
+ for (let i = 0; i < documents.length; i += batchSize) {
92
+ const batch = documents.slice(i, i + batchSize)
93
+ await this.addDocuments(batch)
94
+ }
95
+
96
+ this.logger.info({
97
+ indexed: this.documentCount,
98
+ terms: this.index.termCount
99
+ }, 'BM25 index built')
100
+ }
101
+
102
+ /**
103
+ * Add documents to index
104
+ */
105
+ async addDocuments(documents: BM25Document[]): Promise<void> {
106
+ try {
107
+ this.index.addAll(documents)
108
+ this.documentCount += documents.length
109
+ } catch (error) {
110
+ this.logger.error({ error }, 'Failed to add documents to BM25 index')
111
+ throw error
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Add single document
117
+ */
118
+ addDocument(document: BM25Document): void {
119
+ try {
120
+ this.index.add(document)
121
+ this.documentCount++
122
+ } catch (error) {
123
+ this.logger.error({ error, id: document.id }, 'Failed to add document to BM25 index')
124
+ throw error
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Remove document from index
130
+ */
131
+ removeDocument(document: BM25Document): void {
132
+ try {
133
+ this.index.remove(document)
134
+ this.documentCount--
135
+ } catch (error) {
136
+ this.logger.error({ error, id: document.id }, 'Failed to remove document from BM25 index')
137
+ // Don't throw - document might not exist
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Update document in index
143
+ */
144
+ updateDocument(document: BM25Document): void {
145
+ this.removeDocument(document)
146
+ this.addDocument(document)
147
+ }
148
+
149
+ /**
150
+ * Search the index
151
+ */
152
+ search(
153
+ query: string,
154
+ options: {
155
+ limit?: number
156
+ filter?: (result: BM25SearchResult) => boolean
157
+ collection?: string
158
+ } = {}
159
+ ): BM25SearchResult[] {
160
+ const { limit = 20, filter, collection } = options
161
+
162
+ if (!query.trim()) {
163
+ return []
164
+ }
165
+
166
+ this.logger.debug({
167
+ query: query.slice(0, 50),
168
+ limit
169
+ }, 'BM25 search')
170
+
171
+ try {
172
+ // Tokenize query
173
+ const queryTokens = tokenizeForSearch(query, this.tokenizerOptions)
174
+
175
+ // Search with MiniSearch options
176
+ const results = this.index.search(query, {
177
+ prefix: true, // Allow prefix matching
178
+ fuzzy: 0.2, // Allow minor typos
179
+ combineWith: 'OR', // Match any term
180
+ boost: {
181
+ content: this.config.boostExact!
182
+ }
183
+ })
184
+
185
+ // Transform and filter results
186
+ let searchResults: BM25SearchResult[] = results.map(result => ({
187
+ id: result.id,
188
+ content: (result as any).content || '',
189
+ metadata: (result as any).metadata || {},
190
+ collection: (result as any).collection || '',
191
+ score: result.score
192
+ }))
193
+
194
+ // Filter by collection if specified
195
+ if (collection) {
196
+ searchResults = searchResults.filter(r => r.collection === collection)
197
+ }
198
+
199
+ // Apply custom filter
200
+ if (filter) {
201
+ searchResults = searchResults.filter(filter)
202
+ }
203
+
204
+ // Normalize scores to 0-1 range
205
+ searchResults = this.normalizeScores(searchResults)
206
+
207
+ // Apply limit
208
+ return searchResults.slice(0, limit)
209
+
210
+ } catch (error) {
211
+ this.logger.error({ error, query }, 'BM25 search failed')
212
+ return []
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Normalize scores to 0-1 range
218
+ */
219
+ private normalizeScores(results: BM25SearchResult[]): BM25SearchResult[] {
220
+ if (results.length === 0) return []
221
+ if (results.length === 1) {
222
+ return [{ ...results[0], score: 1.0 }]
223
+ }
224
+
225
+ const maxScore = Math.max(...results.map(r => r.score))
226
+ if (maxScore === 0) {
227
+ return results.map(r => ({ ...r, score: 0 }))
228
+ }
229
+
230
+ return results.map(r => ({
231
+ ...r,
232
+ score: r.score / maxScore
233
+ }))
234
+ }
235
+
236
+ /**
237
+ * Get suggestions for autocomplete
238
+ */
239
+ suggest(query: string, limit: number = 5): string[] {
240
+ try {
241
+ const results = this.index.autoSuggest(query, { limit })
242
+ return results.map(r => r.suggestion)
243
+ } catch (error) {
244
+ this.logger.error({ error, query }, 'BM25 suggest failed')
245
+ return []
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Get index statistics
251
+ */
252
+ getStats(): {
253
+ documentCount: number
254
+ termCount: number
255
+ } {
256
+ return {
257
+ documentCount: this.documentCount,
258
+ termCount: this.index.termCount
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Clear the index
264
+ */
265
+ clear(): void {
266
+ this.index = this.createIndex()
267
+ this.documentCount = 0
268
+ this.logger.info('BM25 index cleared')
269
+ }
270
+
271
+ /**
272
+ * Export index for persistence
273
+ */
274
+ exportIndex(): string {
275
+ return JSON.stringify(this.index.toJSON())
276
+ }
277
+
278
+ /**
279
+ * Import index from persisted data
280
+ */
281
+ importIndex(data: string): void {
282
+ try {
283
+ const parsed = JSON.parse(data)
284
+ this.index = MiniSearch.loadJSON(data, {
285
+ fields: this.config.fields,
286
+ storeFields: this.config.storeFields,
287
+ idField: 'id'
288
+ })
289
+ // Count documents after import
290
+ this.documentCount = this.index.documentCount
291
+ this.logger.info({ documentCount: this.documentCount }, 'BM25 index imported')
292
+ } catch (error) {
293
+ this.logger.error({ error }, 'Failed to import BM25 index')
294
+ throw error
295
+ }
296
+ }
297
+ }
298
+
299
+ export { tokenize, tokenizeForSearch, getNGrams } from './tokenizer'
300
+ export type { TokenizerOptions } from './tokenizer'