claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +241 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -29
  5. package/package.json +7 -3
  6. package/packs/backend/node.json +173 -173
  7. package/packs/core/javascript.json +176 -176
  8. package/packs/core/typescript.json +222 -222
  9. package/packs/frontend/react.json +254 -254
  10. package/packs/meta/testing.json +172 -172
  11. package/scripts/postinstall.mjs +531 -531
  12. package/src/automation/decision-detector.ts +452 -452
  13. package/src/automation/phase12-manager.ts +456 -456
  14. package/src/automation/proactive-recall.ts +373 -373
  15. package/src/automation/project-detector.ts +310 -310
  16. package/src/automation/repo-scanner.ts +210 -205
  17. package/src/cli/auto-setup.ts +75 -75
  18. package/src/cli/auto-start.ts +266 -266
  19. package/src/cli/bin.ts +264 -264
  20. package/src/cli/commands/autostart.ts +90 -90
  21. package/src/cli/commands/chroma.ts +578 -577
  22. package/src/cli/commands/export-training.ts +70 -70
  23. package/src/cli/commands/export.ts +130 -130
  24. package/src/cli/commands/git-hook.ts +183 -183
  25. package/src/cli/commands/hooks.ts +217 -217
  26. package/src/cli/commands/init.ts +123 -123
  27. package/src/cli/commands/install-mcp.ts +122 -111
  28. package/src/cli/commands/models.ts +979 -979
  29. package/src/cli/commands/pack.ts +200 -200
  30. package/src/cli/commands/refresh.ts +344 -339
  31. package/src/cli/commands/reindex.ts +120 -120
  32. package/src/cli/commands/serve.ts +466 -463
  33. package/src/cli/commands/start.ts +44 -44
  34. package/src/cli/commands/status.ts +220 -203
  35. package/src/cli/commands/uninstall-mcp.ts +45 -41
  36. package/src/cli/commands/update.ts +130 -124
  37. package/src/cli/migrate-chroma.ts +106 -106
  38. package/src/cli/ui/animations.ts +80 -80
  39. package/src/cli/ui/components.ts +82 -82
  40. package/src/cli/ui/index.ts +4 -4
  41. package/src/cli/ui/logo.ts +36 -36
  42. package/src/cli/ui/theme.ts +55 -55
  43. package/src/code-intelligence/indexer.ts +352 -352
  44. package/src/code-intelligence/linker.ts +178 -178
  45. package/src/code-intelligence/parser.ts +484 -484
  46. package/src/code-intelligence/query.ts +291 -291
  47. package/src/code-intelligence/schema.ts +83 -83
  48. package/src/code-intelligence/types.ts +95 -95
  49. package/src/config/defaults.ts +52 -52
  50. package/src/config/home.ts +56 -56
  51. package/src/config/index.ts +5 -5
  52. package/src/config/loader.ts +192 -192
  53. package/src/config/schema.ts +446 -415
  54. package/src/config/validator.ts +182 -182
  55. package/src/context/assembler.ts +407 -400
  56. package/src/context/index.ts +79 -79
  57. package/src/context/progress-tracker.ts +174 -174
  58. package/src/context/standards-manager.ts +287 -287
  59. package/src/context/validator.ts +58 -58
  60. package/src/diagnostics/index.ts +122 -121
  61. package/src/health/index.ts +233 -232
  62. package/src/hooks/brain-hook.ts +134 -131
  63. package/src/hooks/capture.ts +168 -168
  64. package/src/hooks/claude-code-mastery.md +112 -112
  65. package/src/hooks/context-hook.ts +260 -245
  66. package/src/hooks/deduplicator.ts +72 -72
  67. package/src/hooks/git-capture.ts +109 -109
  68. package/src/hooks/git-hook-installer.ts +211 -207
  69. package/src/hooks/index.ts +20 -20
  70. package/src/hooks/installer.ts +306 -288
  71. package/src/hooks/interceptor-hook.ts +204 -201
  72. package/src/hooks/passive-classifier.ts +397 -397
  73. package/src/hooks/queue.ts +160 -129
  74. package/src/hooks/session-tracker.ts +312 -312
  75. package/src/hooks/types.ts +52 -52
  76. package/src/index.ts +7 -7
  77. package/src/intelligence/cross-project/generalizer.ts +283 -283
  78. package/src/intelligence/cross-project/index.ts +7 -7
  79. package/src/intelligence/hf-downloader.ts +222 -222
  80. package/src/intelligence/hf-manifest.json +78 -78
  81. package/src/intelligence/index.ts +24 -24
  82. package/src/intelligence/inference-router.ts +762 -762
  83. package/src/intelligence/model-manager.ts +263 -245
  84. package/src/intelligence/optimization/index.ts +10 -10
  85. package/src/intelligence/optimization/precompute.ts +202 -202
  86. package/src/intelligence/optimization/semantic-cache.ts +213 -207
  87. package/src/intelligence/prediction/index.ts +7 -7
  88. package/src/intelligence/prediction/recommender.ts +276 -268
  89. package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
  90. package/src/intelligence/reasoning/index.ts +7 -7
  91. package/src/intelligence/temporal/evolution.ts +193 -197
  92. package/src/intelligence/temporal/index.ts +16 -16
  93. package/src/intelligence/temporal/query-processor.ts +190 -190
  94. package/src/intelligence/temporal/timeline.ts +272 -259
  95. package/src/intelligence/temporal/trends.ts +263 -263
  96. package/src/intelligence/tokenizer.ts +118 -118
  97. package/src/knowledge/entity-extractor.ts +447 -443
  98. package/src/knowledge/graph/builder.ts +185 -185
  99. package/src/knowledge/graph/linker.ts +201 -201
  100. package/src/knowledge/graph/memory-graph.ts +359 -359
  101. package/src/knowledge/graph/schema.ts +99 -99
  102. package/src/knowledge/graph/search.ts +166 -166
  103. package/src/knowledge/relationship-extractor.ts +108 -108
  104. package/src/memory/chroma/client.ts +211 -192
  105. package/src/memory/chroma/collection-manager.ts +92 -92
  106. package/src/memory/chroma/config.ts +57 -57
  107. package/src/memory/chroma/embeddings.ts +177 -175
  108. package/src/memory/chroma/index.ts +82 -82
  109. package/src/memory/chroma/migration.ts +270 -270
  110. package/src/memory/chroma/schemas.ts +69 -69
  111. package/src/memory/chroma/search.ts +319 -315
  112. package/src/memory/chroma/store.ts +755 -747
  113. package/src/memory/compression.ts +121 -121
  114. package/src/memory/consolidation/archiver.ts +162 -165
  115. package/src/memory/consolidation/merger.ts +182 -186
  116. package/src/memory/consolidation/scorer.ts +136 -136
  117. package/src/memory/database.ts +9 -0
  118. package/src/memory/dual-write.ts +145 -0
  119. package/src/memory/embeddings.ts +226 -226
  120. package/src/memory/episodic/detector.ts +108 -108
  121. package/src/memory/episodic/manager.ts +347 -351
  122. package/src/memory/episodic/summarizer.ts +179 -179
  123. package/src/memory/episodic/types.ts +52 -52
  124. package/src/memory/fts5-search.ts +692 -633
  125. package/src/memory/index.ts +943 -1060
  126. package/src/memory/migrations/add-fts5.ts +118 -108
  127. package/src/memory/patterns.ts +438 -438
  128. package/src/memory/pruning.ts +60 -60
  129. package/src/memory/schema.ts +88 -88
  130. package/src/memory/store.ts +911 -787
  131. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  132. package/src/packs/index.ts +9 -9
  133. package/src/packs/loader.ts +134 -134
  134. package/src/packs/manager.ts +204 -204
  135. package/src/packs/ranker.ts +78 -78
  136. package/src/packs/types.ts +81 -81
  137. package/src/phase12/index.ts +5 -5
  138. package/src/retrieval/bm25/index.ts +300 -297
  139. package/src/retrieval/bm25/tokenizer.ts +184 -184
  140. package/src/retrieval/feedback/adaptive.ts +221 -221
  141. package/src/retrieval/feedback/index.ts +16 -16
  142. package/src/retrieval/feedback/metrics.ts +221 -221
  143. package/src/retrieval/feedback/store.ts +283 -283
  144. package/src/retrieval/fusion/index.ts +194 -194
  145. package/src/retrieval/fusion/rrf.ts +165 -165
  146. package/src/retrieval/index.ts +12 -12
  147. package/src/retrieval/pipeline.ts +375 -375
  148. package/src/retrieval/query/expander.ts +203 -203
  149. package/src/retrieval/query/index.ts +27 -27
  150. package/src/retrieval/query/intent-classifier.ts +252 -252
  151. package/src/retrieval/query/temporal-parser.ts +295 -295
  152. package/src/retrieval/reranker/index.ts +189 -188
  153. package/src/retrieval/reranker/model.ts +99 -95
  154. package/src/retrieval/service.ts +125 -125
  155. package/src/retrieval/types.ts +162 -162
  156. package/src/routing/entity-extractor.ts +454 -454
  157. package/src/routing/handlers/exploration-handler.ts +369 -0
  158. package/src/routing/handlers/index.ts +19 -0
  159. package/src/routing/handlers/memory-handler.ts +273 -0
  160. package/src/routing/handlers/mutation-handler.ts +241 -0
  161. package/src/routing/handlers/recall-handler.ts +642 -0
  162. package/src/routing/handlers/shared.ts +515 -0
  163. package/src/routing/handlers/types.ts +48 -0
  164. package/src/routing/intent-classifier.ts +552 -552
  165. package/src/routing/response-filter.ts +399 -391
  166. package/src/routing/router.ts +245 -2193
  167. package/src/routing/search-engine.ts +521 -514
  168. package/src/routing/types.ts +104 -94
  169. package/src/scripts/health-check.ts +118 -118
  170. package/src/scripts/setup.ts +122 -122
  171. package/src/server/auto-updater.ts +283 -276
  172. package/src/server/handlers/call-tool.ts +159 -159
  173. package/src/server/handlers/list-tools.ts +35 -35
  174. package/src/server/handlers/tools/auto-remember.ts +165 -165
  175. package/src/server/handlers/tools/brain.ts +86 -86
  176. package/src/server/handlers/tools/create-project.ts +135 -135
  177. package/src/server/handlers/tools/get-code-standards.ts +123 -123
  178. package/src/server/handlers/tools/get-corrections.ts +152 -152
  179. package/src/server/handlers/tools/get-patterns.ts +156 -156
  180. package/src/server/handlers/tools/get-project-context.ts +75 -75
  181. package/src/server/handlers/tools/index.ts +30 -30
  182. package/src/server/handlers/tools/init-project.ts +756 -756
  183. package/src/server/handlers/tools/list-projects.ts +126 -126
  184. package/src/server/handlers/tools/recall-similar.ts +87 -87
  185. package/src/server/handlers/tools/recognize-pattern.ts +132 -132
  186. package/src/server/handlers/tools/record-correction.ts +131 -131
  187. package/src/server/handlers/tools/remember-decision.ts +168 -168
  188. package/src/server/handlers/tools/schemas.ts +179 -179
  189. package/src/server/handlers/tools/search-code.ts +122 -122
  190. package/src/server/handlers/tools/smart-context.ts +146 -146
  191. package/src/server/handlers/tools/update-progress.ts +131 -131
  192. package/src/server/http-api.ts +215 -1229
  193. package/src/server/mcp-proxy.ts +85 -84
  194. package/src/server/mcp-server.ts +285 -284
  195. package/src/server/middleware/auth.ts +39 -0
  196. package/src/server/middleware/error-handler.ts +37 -0
  197. package/src/server/middleware/rate-limit.ts +53 -0
  198. package/src/server/middleware/validate.ts +42 -0
  199. package/src/server/pid-manager.ts +137 -136
  200. package/src/server/providers/resources.ts +581 -581
  201. package/src/server/routes/code.ts +228 -0
  202. package/src/server/routes/context.ts +26 -0
  203. package/src/server/routes/health.ts +19 -0
  204. package/src/server/routes/helpers.ts +100 -0
  205. package/src/server/routes/hooks.ts +197 -0
  206. package/src/server/routes/mcp.ts +47 -0
  207. package/src/server/routes/memory.ts +397 -0
  208. package/src/server/routes/models.ts +96 -0
  209. package/src/server/routes/projects.ts +89 -0
  210. package/src/server/routes/types.ts +21 -0
  211. package/src/server/schemas/api-schemas.ts +202 -0
  212. package/src/server/services.ts +720 -720
  213. package/src/server/utils/memory-indicator.ts +84 -84
  214. package/src/server/utils/response-formatter.ts +129 -129
  215. package/src/server/web-viewer.ts +1145 -1115
  216. package/src/setup/index.ts +38 -38
  217. package/src/tools/registry.ts +115 -115
  218. package/src/tools/schemas.ts +666 -666
  219. package/src/tools/types.ts +412 -412
  220. package/src/training/data-store.ts +320 -298
  221. package/src/training/retrain-pipeline.ts +399 -394
  222. package/src/utils/error-handler.ts +136 -136
  223. package/src/utils/index.ts +58 -58
  224. package/src/utils/kill-port.ts +55 -53
  225. package/src/utils/phase12-helper.ts +56 -56
  226. package/src/utils/safe-path.ts +43 -0
  227. package/src/utils/timing.ts +47 -47
  228. package/src/utils/transaction.ts +63 -63
  229. package/src/vault/index.ts +4 -3
  230. package/src/vault/paths.ts +106 -106
  231. package/src/vault/query.ts +4 -1
  232. package/src/vault/reader.ts +44 -1
  233. package/src/vault/watcher.ts +24 -1
  234. package/src/vault/writer.ts +487 -413
  235. package/skills/persistent-memory/SKILL.md +0 -148
  236. package/skills/persistent-memory/references/tool-reference.md +0 -90
@@ -1,375 +1,375 @@
1
- /**
2
- * Hybrid Retrieval Pipeline
3
- * Orchestrates Dense -> Sparse -> Fusion -> Rerank flow
4
- */
5
-
6
- import type { Logger } from 'pino'
7
- import type { CollectionManager } from '@/memory/chroma/collection-manager'
8
- import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
9
- import type { RetrievalConfig } from '@/config/schema'
10
- import type { HybridSearchResult, HybridSearchOptions } from './types'
11
- import { BM25Engine, type BM25Document } from './bm25'
12
- import { createFusionStrategy, RRFFusion, LinearFusion, MaxFusion } from './fusion'
13
- import { Reranker } from './reranker'
14
- import { COLLECTIONS } from '@/memory/chroma/schemas'
15
-
16
- export class RetrievalPipeline {
17
- private logger: Logger
18
- private config: RetrievalConfig
19
- private collections: CollectionManager
20
- private embeddings: EmbeddingProvider
21
- private bm25Engine: BM25Engine
22
- private reranker: Reranker | null = null
23
- private fusionStrategy: RRFFusion | LinearFusion | MaxFusion
24
- private initialized: boolean = false
25
- private indexBuilt: boolean = false
26
-
27
- constructor(
28
- logger: Logger,
29
- collections: CollectionManager,
30
- embeddings: EmbeddingProvider,
31
- config: RetrievalConfig
32
- ) {
33
- this.logger = logger.child({ component: 'retrieval-pipeline' })
34
- this.config = config
35
- this.collections = collections
36
- this.embeddings = embeddings
37
-
38
- // Initialize BM25 engine
39
- this.bm25Engine = new BM25Engine(logger)
40
-
41
- // Initialize fusion strategy
42
- this.fusionStrategy = createFusionStrategy({
43
- method: config.fusion.method,
44
- rrfK: config.fusion.rrfK,
45
- denseWeight: config.dense.weight,
46
- sparseWeight: config.sparse.weight
47
- })
48
- }
49
-
50
- /**
51
- * Initialize the pipeline
52
- */
53
- async initialize(): Promise<void> {
54
- if (this.initialized) {
55
- return
56
- }
57
-
58
- this.logger.info('Initializing retrieval pipeline')
59
-
60
- // Initialize reranker if enabled
61
- if (this.config.reranker.enabled) {
62
- this.reranker = new Reranker(this.logger, {
63
- model: this.config.reranker.model,
64
- topK: this.config.reranker.topK
65
- })
66
- await this.reranker.initialize()
67
- this.logger.info('Reranker initialized')
68
- }
69
-
70
- // Build BM25 index from existing documents
71
- if (this.config.sparse.enabled) {
72
- await this.buildBM25Index()
73
- }
74
-
75
- this.initialized = true
76
- this.logger.info('Retrieval pipeline initialized')
77
- }
78
-
79
- /**
80
- * Build BM25 index from ChromaDB collections
81
- */
82
- async buildBM25Index(): Promise<void> {
83
- this.logger.info('Building BM25 index from ChromaDB')
84
-
85
- const documents: BM25Document[] = []
86
- const collectionsToIndex = [
87
- COLLECTIONS.DECISIONS,
88
- COLLECTIONS.MEMORIES,
89
- COLLECTIONS.PATTERNS,
90
- COLLECTIONS.CORRECTIONS
91
- ]
92
-
93
- for (const collectionName of collectionsToIndex) {
94
- try {
95
- const collection = await this.collections.getDecisions() // Use appropriate getter
96
- const results = await collection.get({
97
- include: ['documents', 'metadatas']
98
- })
99
-
100
- if (results.ids.length > 0) {
101
- for (let i = 0; i < results.ids.length; i++) {
102
- documents.push({
103
- id: results.ids[i]!,
104
- content: (results.documents?.[i] ?? '') as string,
105
- metadata: (results.metadatas?.[i] ?? {}) as Record<string, unknown>,
106
- collection: collectionName
107
- })
108
- }
109
- }
110
- } catch (error) {
111
- this.logger.warn({ error, collection: collectionName }, 'Failed to index collection')
112
- }
113
- }
114
-
115
- await this.bm25Engine.buildIndex(documents)
116
- this.indexBuilt = true
117
- this.logger.info({ documentCount: documents.length }, 'BM25 index built')
118
- }
119
-
120
- /**
121
- * Perform hybrid search
122
- */
123
- async search(
124
- query: string,
125
- options: HybridSearchOptions = {}
126
- ): Promise<HybridSearchResult[]> {
127
- const {
128
- project,
129
- collections = ['decisions', 'memories', 'patterns', 'corrections'],
130
- limit = 10,
131
- minScore = 0,
132
- useReranker = this.config.reranker.enabled,
133
- dateRange,
134
- tags
135
- } = options
136
-
137
- this.logger.debug({
138
- query: query.slice(0, 50),
139
- options
140
- }, 'Hybrid search')
141
-
142
- // Step 1: Dense (semantic) search
143
- const denseResults = await this.denseSearch(query, {
144
- project,
145
- collections,
146
- limit: this.config.dense.limit,
147
- minSimilarity: this.config.dense.minSimilarity,
148
- dateRange,
149
- tags
150
- })
151
-
152
- // Step 2: Sparse (BM25) search
153
- let sparseResults: HybridSearchResult[] = []
154
- if (this.config.sparse.enabled && this.indexBuilt) {
155
- sparseResults = await this.sparseSearch(query, {
156
- project,
157
- collections,
158
- limit: this.config.sparse.limit
159
- })
160
- }
161
-
162
- // Step 3: Fusion
163
- let fusedResults: HybridSearchResult[]
164
- if (sparseResults.length > 0) {
165
- fusedResults = this.fusionStrategy.fuse(denseResults, sparseResults)
166
- } else {
167
- // If no sparse results, use dense results directly
168
- fusedResults = denseResults.map(r => ({
169
- ...r,
170
- scores: { ...r.scores, fusion: r.scores.dense, final: r.scores.dense }
171
- }))
172
- }
173
-
174
- // Step 4: Reranking (optional)
175
- if (useReranker && this.reranker && fusedResults.length > 0) {
176
- fusedResults = await this.reranker.rerank(query, fusedResults)
177
- }
178
-
179
- // Filter by minimum score
180
- if (minScore > 0) {
181
- fusedResults = fusedResults.filter(r => r.scores.final >= minScore)
182
- }
183
-
184
- // Apply final limit
185
- return fusedResults.slice(0, limit)
186
- }
187
-
188
- /**
189
- * Dense (semantic) search using ChromaDB
190
- */
191
- private async denseSearch(
192
- query: string,
193
- options: {
194
- project?: string
195
- collections: string[]
196
- limit: number
197
- minSimilarity: number
198
- dateRange?: { start?: string; end?: string }
199
- tags?: string[]
200
- }
201
- ): Promise<HybridSearchResult[]> {
202
- const results: HybridSearchResult[] = []
203
-
204
- // Generate query embedding
205
- const embedding = await this.embeddings.generate(query)
206
-
207
- // Search each collection
208
- for (const collectionName of options.collections) {
209
- try {
210
- const collection = await this.getCollection(collectionName)
211
- if (!collection) continue
212
-
213
- // Build where clause
214
- const where: Record<string, any> = {}
215
- if (options.project) {
216
- where['project'] = { $eq: options.project }
217
- }
218
-
219
- const queryResults = await collection.query({
220
- queryEmbeddings: [embedding],
221
- nResults: Math.ceil(options.limit / options.collections.length),
222
- where: Object.keys(where).length > 0 ? where : undefined,
223
- include: ['documents', 'metadatas', 'distances']
224
- })
225
-
226
- // Process results
227
- if (queryResults.ids[0]) {
228
- for (let i = 0; i < queryResults.ids[0].length; i++) {
229
- const distance = queryResults.distances?.[0]?.[i] || 0
230
- const similarity = 1 - distance // Convert distance to similarity
231
-
232
- if (similarity >= options.minSimilarity) {
233
- results.push({
234
- id: queryResults.ids[0]![i]!,
235
- content: (queryResults.documents?.[0]?.[i] ?? '') as string,
236
- metadata: (queryResults.metadatas?.[0]?.[i] ?? {}) as Record<string, unknown>,
237
- collection: collectionName,
238
- scores: {
239
- dense: similarity,
240
- sparse: 0,
241
- fusion: similarity,
242
- final: similarity
243
- },
244
- provenance: 'dense'
245
- })
246
- }
247
- }
248
- }
249
- } catch (error) {
250
- this.logger.error({ error, collection: collectionName }, 'Dense search failed')
251
- }
252
- }
253
-
254
- // Sort by dense score
255
- results.sort((a, b) => b.scores.dense - a.scores.dense)
256
- return results
257
- }
258
-
259
- /**
260
- * Sparse (BM25) search
261
- */
262
- private async sparseSearch(
263
- query: string,
264
- options: {
265
- project?: string
266
- collections: string[]
267
- limit: number
268
- }
269
- ): Promise<HybridSearchResult[]> {
270
- const bm25Results = this.bm25Engine.search(query, {
271
- limit: options.limit,
272
- filter: (result) => {
273
- // Filter by project if specified
274
- if (options.project && result.metadata.project !== options.project) {
275
- return false
276
- }
277
- // Filter by collection
278
- if (!options.collections.includes(result.collection)) {
279
- return false
280
- }
281
- return true
282
- }
283
- })
284
-
285
- return bm25Results.map(result => ({
286
- id: result.id,
287
- content: result.content,
288
- metadata: result.metadata,
289
- collection: result.collection,
290
- scores: {
291
- dense: 0,
292
- sparse: result.score,
293
- fusion: result.score,
294
- final: result.score
295
- },
296
- provenance: 'sparse' as const
297
- }))
298
- }
299
-
300
- /**
301
- * Get collection by name
302
- */
303
- private async getCollection(name: string) {
304
- switch (name) {
305
- case 'decisions':
306
- return this.collections.getDecisions()
307
- case 'memories':
308
- return this.collections.getMemories()
309
- case 'patterns':
310
- return this.collections.getPatterns()
311
- case 'corrections':
312
- return this.collections.getCorrections()
313
- default:
314
- return null
315
- }
316
- }
317
-
318
- /**
319
- * Add document to BM25 index
320
- */
321
- addToIndex(document: BM25Document): void {
322
- if (this.config.sparse.enabled) {
323
- this.bm25Engine.addDocument(document)
324
- }
325
- }
326
-
327
- /**
328
- * Remove document from BM25 index
329
- */
330
- removeFromIndex(document: BM25Document): void {
331
- if (this.config.sparse.enabled) {
332
- this.bm25Engine.removeDocument(document)
333
- }
334
- }
335
-
336
- /**
337
- * Rebuild BM25 index
338
- */
339
- async rebuildIndex(): Promise<void> {
340
- await this.buildBM25Index()
341
- }
342
-
343
- /**
344
- * Get pipeline status
345
- */
346
- getStatus(): {
347
- initialized: boolean
348
- indexBuilt: boolean
349
- rerankerEnabled: boolean
350
- sparseEnabled: boolean
351
- fusionMethod: string
352
- bm25Stats: { documentCount: number; termCount: number }
353
- } {
354
- return {
355
- initialized: this.initialized,
356
- indexBuilt: this.indexBuilt,
357
- rerankerEnabled: this.reranker !== null,
358
- sparseEnabled: this.config.sparse.enabled,
359
- fusionMethod: this.config.fusion.method,
360
- bm25Stats: this.bm25Engine.getStats()
361
- }
362
- }
363
-
364
- /**
365
- * Cleanup resources
366
- */
367
- cleanup(): void {
368
- if (this.reranker) {
369
- this.reranker.cleanup()
370
- }
371
- this.bm25Engine.clear()
372
- this.initialized = false
373
- this.indexBuilt = false
374
- }
375
- }
1
+ /**
2
+ * Hybrid Retrieval Pipeline
3
+ * Orchestrates Dense -> Sparse -> Fusion -> Rerank flow
4
+ */
5
+
6
+ import type { Logger } from 'pino'
7
+ import type { CollectionManager } from '@/memory/chroma/collection-manager'
8
+ import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
9
+ import type { RetrievalConfig } from '@/config/schema'
10
+ import type { HybridSearchResult, HybridSearchOptions } from './types'
11
+ import { BM25Engine, type BM25Document } from './bm25'
12
+ import { createFusionStrategy, RRFFusion, LinearFusion, MaxFusion } from './fusion'
13
+ import { Reranker } from './reranker'
14
+ import { COLLECTIONS } from '@/memory/chroma/schemas'
15
+
16
+ export class RetrievalPipeline {
17
+ private logger: Logger
18
+ private config: RetrievalConfig
19
+ private collections: CollectionManager
20
+ private embeddings: EmbeddingProvider
21
+ private bm25Engine: BM25Engine
22
+ private reranker: Reranker | null = null
23
+ private fusionStrategy: RRFFusion | LinearFusion | MaxFusion
24
+ private initialized: boolean = false
25
+ private indexBuilt: boolean = false
26
+
27
+ constructor(
28
+ logger: Logger,
29
+ collections: CollectionManager,
30
+ embeddings: EmbeddingProvider,
31
+ config: RetrievalConfig
32
+ ) {
33
+ this.logger = logger.child({ component: 'retrieval-pipeline' })
34
+ this.config = config
35
+ this.collections = collections
36
+ this.embeddings = embeddings
37
+
38
+ // Initialize BM25 engine
39
+ this.bm25Engine = new BM25Engine(logger)
40
+
41
+ // Initialize fusion strategy
42
+ this.fusionStrategy = createFusionStrategy({
43
+ method: config.fusion.method,
44
+ rrfK: config.fusion.rrfK,
45
+ denseWeight: config.dense.weight,
46
+ sparseWeight: config.sparse.weight
47
+ })
48
+ }
49
+
50
+ /**
51
+ * Initialize the pipeline
52
+ */
53
+ async initialize(): Promise<void> {
54
+ if (this.initialized) {
55
+ return
56
+ }
57
+
58
+ this.logger.info('Initializing retrieval pipeline')
59
+
60
+ // Initialize reranker if enabled
61
+ if (this.config.reranker.enabled) {
62
+ this.reranker = new Reranker(this.logger, {
63
+ model: this.config.reranker.model,
64
+ topK: this.config.reranker.topK
65
+ })
66
+ await this.reranker.initialize()
67
+ this.logger.info('Reranker initialized')
68
+ }
69
+
70
+ // Build BM25 index from existing documents
71
+ if (this.config.sparse.enabled) {
72
+ await this.buildBM25Index()
73
+ }
74
+
75
+ this.initialized = true
76
+ this.logger.info('Retrieval pipeline initialized')
77
+ }
78
+
79
+ /**
80
+ * Build BM25 index from ChromaDB collections
81
+ */
82
+ async buildBM25Index(): Promise<void> {
83
+ this.logger.info('Building BM25 index from ChromaDB')
84
+
85
+ const documents: BM25Document[] = []
86
+ const collectionsToIndex = [
87
+ COLLECTIONS.DECISIONS,
88
+ COLLECTIONS.MEMORIES,
89
+ COLLECTIONS.PATTERNS,
90
+ COLLECTIONS.CORRECTIONS
91
+ ]
92
+
93
+ for (const collectionName of collectionsToIndex) {
94
+ try {
95
+ const collection = await this.collections.getDecisions() // Use appropriate getter
96
+ const results = await collection.get({
97
+ include: ['documents', 'metadatas']
98
+ })
99
+
100
+ if (results.ids.length > 0) {
101
+ for (let i = 0; i < results.ids.length; i++) {
102
+ documents.push({
103
+ id: results.ids[i]!,
104
+ content: (results.documents?.[i] ?? '') as string,
105
+ metadata: (results.metadatas?.[i] ?? {}) as Record<string, unknown>,
106
+ collection: collectionName
107
+ })
108
+ }
109
+ }
110
+ } catch (error) {
111
+ this.logger.warn({ error, collection: collectionName }, 'Failed to index collection')
112
+ }
113
+ }
114
+
115
+ await this.bm25Engine.buildIndex(documents)
116
+ this.indexBuilt = true
117
+ this.logger.info({ documentCount: documents.length }, 'BM25 index built')
118
+ }
119
+
120
+ /**
121
+ * Perform hybrid search
122
+ */
123
+ async search(
124
+ query: string,
125
+ options: HybridSearchOptions = {}
126
+ ): Promise<HybridSearchResult[]> {
127
+ const {
128
+ project,
129
+ collections = ['decisions', 'memories', 'patterns', 'corrections'],
130
+ limit = 10,
131
+ minScore = 0,
132
+ useReranker = this.config.reranker.enabled,
133
+ dateRange,
134
+ tags
135
+ } = options
136
+
137
+ this.logger.debug({
138
+ query: query.slice(0, 50),
139
+ options
140
+ }, 'Hybrid search')
141
+
142
+ // Step 1: Dense (semantic) search
143
+ const denseResults = await this.denseSearch(query, {
144
+ project,
145
+ collections,
146
+ limit: this.config.dense.limit,
147
+ minSimilarity: this.config.dense.minSimilarity,
148
+ dateRange,
149
+ tags
150
+ })
151
+
152
+ // Step 2: Sparse (BM25) search
153
+ let sparseResults: HybridSearchResult[] = []
154
+ if (this.config.sparse.enabled && this.indexBuilt) {
155
+ sparseResults = await this.sparseSearch(query, {
156
+ project,
157
+ collections,
158
+ limit: this.config.sparse.limit
159
+ })
160
+ }
161
+
162
+ // Step 3: Fusion
163
+ let fusedResults: HybridSearchResult[]
164
+ if (sparseResults.length > 0) {
165
+ fusedResults = this.fusionStrategy.fuse(denseResults, sparseResults)
166
+ } else {
167
+ // If no sparse results, use dense results directly
168
+ fusedResults = denseResults.map(r => ({
169
+ ...r,
170
+ scores: { ...r.scores, fusion: r.scores.dense, final: r.scores.dense }
171
+ }))
172
+ }
173
+
174
+ // Step 4: Reranking (optional)
175
+ if (useReranker && this.reranker && fusedResults.length > 0) {
176
+ fusedResults = await this.reranker.rerank(query, fusedResults)
177
+ }
178
+
179
+ // Filter by minimum score
180
+ if (minScore > 0) {
181
+ fusedResults = fusedResults.filter(r => r.scores.final >= minScore)
182
+ }
183
+
184
+ // Apply final limit
185
+ return fusedResults.slice(0, limit)
186
+ }
187
+
188
+ /**
189
+ * Dense (semantic) search using ChromaDB
190
+ */
191
+ private async denseSearch(
192
+ query: string,
193
+ options: {
194
+ project?: string
195
+ collections: string[]
196
+ limit: number
197
+ minSimilarity: number
198
+ dateRange?: { start?: string; end?: string }
199
+ tags?: string[]
200
+ }
201
+ ): Promise<HybridSearchResult[]> {
202
+ const results: HybridSearchResult[] = []
203
+
204
+ // Generate query embedding
205
+ const embedding = await this.embeddings.generate(query)
206
+
207
+ // Search each collection
208
+ for (const collectionName of options.collections) {
209
+ try {
210
+ const collection = await this.getCollection(collectionName)
211
+ if (!collection) continue
212
+
213
+ // Build where clause
214
+ const where: Record<string, any> = {}
215
+ if (options.project) {
216
+ where['project'] = { $eq: options.project }
217
+ }
218
+
219
+ const queryResults = await collection.query({
220
+ queryEmbeddings: [embedding],
221
+ nResults: Math.ceil(options.limit / options.collections.length),
222
+ where: Object.keys(where).length > 0 ? where : undefined,
223
+ include: ['documents', 'metadatas', 'distances']
224
+ })
225
+
226
+ // Process results
227
+ if (queryResults.ids[0]) {
228
+ for (let i = 0; i < queryResults.ids[0].length; i++) {
229
+ const distance = queryResults.distances?.[0]?.[i] || 0
230
+ const similarity = 1 - distance // Convert distance to similarity
231
+
232
+ if (similarity >= options.minSimilarity) {
233
+ results.push({
234
+ id: queryResults.ids[0]![i]!,
235
+ content: (queryResults.documents?.[0]?.[i] ?? '') as string,
236
+ metadata: (queryResults.metadatas?.[0]?.[i] ?? {}) as Record<string, unknown>,
237
+ collection: collectionName,
238
+ scores: {
239
+ dense: similarity,
240
+ sparse: 0,
241
+ fusion: similarity,
242
+ final: similarity
243
+ },
244
+ provenance: 'dense'
245
+ })
246
+ }
247
+ }
248
+ }
249
+ } catch (error) {
250
+ this.logger.error({ error, collection: collectionName }, 'Dense search failed')
251
+ }
252
+ }
253
+
254
+ // Sort by dense score
255
+ results.sort((a, b) => b.scores.dense - a.scores.dense)
256
+ return results
257
+ }
258
+
259
+ /**
260
+ * Sparse (BM25) search
261
+ */
262
+ private async sparseSearch(
263
+ query: string,
264
+ options: {
265
+ project?: string
266
+ collections: string[]
267
+ limit: number
268
+ }
269
+ ): Promise<HybridSearchResult[]> {
270
+ const bm25Results = this.bm25Engine.search(query, {
271
+ limit: options.limit,
272
+ filter: (result) => {
273
+ // Filter by project if specified
274
+ if (options.project && result.metadata.project !== options.project) {
275
+ return false
276
+ }
277
+ // Filter by collection
278
+ if (!options.collections.includes(result.collection)) {
279
+ return false
280
+ }
281
+ return true
282
+ }
283
+ })
284
+
285
+ return bm25Results.map(result => ({
286
+ id: result.id,
287
+ content: result.content,
288
+ metadata: result.metadata,
289
+ collection: result.collection,
290
+ scores: {
291
+ dense: 0,
292
+ sparse: result.score,
293
+ fusion: result.score,
294
+ final: result.score
295
+ },
296
+ provenance: 'sparse' as const
297
+ }))
298
+ }
299
+
300
+ /**
301
+ * Get collection by name
302
+ */
303
+ private async getCollection(name: string) {
304
+ switch (name) {
305
+ case 'decisions':
306
+ return this.collections.getDecisions()
307
+ case 'memories':
308
+ return this.collections.getMemories()
309
+ case 'patterns':
310
+ return this.collections.getPatterns()
311
+ case 'corrections':
312
+ return this.collections.getCorrections()
313
+ default:
314
+ return null
315
+ }
316
+ }
317
+
318
+ /**
319
+ * Add document to BM25 index
320
+ */
321
+ addToIndex(document: BM25Document): void {
322
+ if (this.config.sparse.enabled) {
323
+ this.bm25Engine.addDocument(document)
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Remove document from BM25 index
329
+ */
330
+ removeFromIndex(document: BM25Document): void {
331
+ if (this.config.sparse.enabled) {
332
+ this.bm25Engine.removeDocument(document)
333
+ }
334
+ }
335
+
336
+ /**
337
+ * Rebuild BM25 index
338
+ */
339
+ async rebuildIndex(): Promise<void> {
340
+ await this.buildBM25Index()
341
+ }
342
+
343
+ /**
344
+ * Get pipeline status
345
+ */
346
+ getStatus(): {
347
+ initialized: boolean
348
+ indexBuilt: boolean
349
+ rerankerEnabled: boolean
350
+ sparseEnabled: boolean
351
+ fusionMethod: string
352
+ bm25Stats: { documentCount: number; termCount: number }
353
+ } {
354
+ return {
355
+ initialized: this.initialized,
356
+ indexBuilt: this.indexBuilt,
357
+ rerankerEnabled: this.reranker !== null,
358
+ sparseEnabled: this.config.sparse.enabled,
359
+ fusionMethod: this.config.fusion.method,
360
+ bm25Stats: this.bm25Engine.getStats()
361
+ }
362
+ }
363
+
364
+ /**
365
+ * Cleanup resources
366
+ */
367
+ cleanup(): void {
368
+ if (this.reranker) {
369
+ this.reranker.cleanup()
370
+ }
371
+ this.bm25Engine.clear()
372
+ this.initialized = false
373
+ this.indexBuilt = false
374
+ }
375
+ }