claude-brain 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +191 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +11 -11
- package/bunfig.toml +8 -8
- package/package.json +80 -80
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/src/automation/auto-context.ts +240 -240
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/index.ts +11 -11
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +205 -205
- package/src/cli/auto-setup.ts +82 -82
- package/src/cli/bin.ts +202 -202
- package/src/cli/commands/chroma.ts +573 -573
- package/src/cli/commands/git-hook.ts +189 -189
- package/src/cli/commands/hooks.ts +213 -213
- package/src/cli/commands/init.ts +122 -122
- package/src/cli/commands/install-mcp.ts +92 -92
- package/src/cli/commands/pack.ts +197 -197
- package/src/cli/commands/serve.ts +167 -167
- package/src/cli/commands/start.ts +42 -42
- package/src/cli/commands/uninstall-mcp.ts +41 -41
- package/src/cli/commands/update.ts +121 -121
- package/src/cli/diagnose.ts +4 -4
- package/src/cli/health-check.ts +4 -4
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/setup.ts +4 -4
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/config/defaults.ts +50 -50
- package/src/config/home.ts +55 -55
- package/src/config/index.ts +7 -7
- package/src/config/loader.ts +166 -166
- package/src/config/migration.ts +76 -76
- package/src/config/schema.ts +360 -360
- package/src/config/validator.ts +184 -184
- package/src/config/watcher.ts +86 -86
- package/src/context/assembler.ts +398 -398
- package/src/context/cache-manager.ts +101 -101
- package/src/context/formatter.ts +84 -84
- package/src/context/hierarchy.ts +85 -85
- package/src/context/index.ts +83 -83
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/types.ts +252 -252
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +123 -123
- package/src/health/index.ts +229 -229
- package/src/hooks/brain-hook.ts +112 -112
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +207 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +191 -194
- package/src/hooks/passive-classifier.ts +366 -366
- package/src/hooks/queue.ts +129 -129
- package/src/hooks/session-tracker.ts +275 -275
- package/src/hooks/types.ts +47 -47
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/affinity.ts +162 -162
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +13 -13
- package/src/intelligence/cross-project/transfer.ts +201 -201
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +207 -207
- package/src/intelligence/prediction/context-anticipator.ts +198 -198
- package/src/intelligence/prediction/decision-predictor.ts +184 -184
- package/src/intelligence/prediction/index.ts +13 -13
- package/src/intelligence/prediction/recommender.ts +268 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
- package/src/intelligence/reasoning/counterfactual.ts +248 -248
- package/src/intelligence/reasoning/index.ts +13 -13
- package/src/intelligence/reasoning/synthesizer.ts +169 -169
- package/src/intelligence/temporal/evolution.ts +197 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +259 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/knowledge/entity-extractor.ts +416 -416
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +168 -168
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +174 -174
- package/src/memory/chroma/collection-manager.ts +94 -94
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +153 -153
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +315 -315
- package/src/memory/chroma/store.ts +741 -741
- package/src/memory/consolidation/archiver.ts +164 -164
- package/src/memory/consolidation/merger.ts +186 -186
- package/src/memory/consolidation/scorer.ts +138 -138
- package/src/memory/context-builder.ts +236 -236
- package/src/memory/database.ts +169 -169
- package/src/memory/embedding-utils.ts +156 -156
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +351 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/index.ts +582 -582
- package/src/memory/knowledge-extractor.ts +455 -455
- package/src/memory/learning.ts +378 -378
- package/src/memory/patterns.ts +396 -396
- package/src/memory/schema.ts +88 -88
- package/src/memory/search.ts +309 -309
- package/src/memory/store.ts +787 -787
- package/src/memory/types.ts +121 -121
- package/src/orchestrator/coordinator.ts +272 -272
- package/src/orchestrator/decision-logger.ts +228 -228
- package/src/orchestrator/event-emitter.ts +198 -198
- package/src/orchestrator/event-queue.ts +184 -184
- package/src/orchestrator/handlers/base-handler.ts +70 -70
- package/src/orchestrator/handlers/context-handler.ts +73 -73
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/orchestrator/handlers/index.ts +10 -10
- package/src/orchestrator/handlers/status-handler.ts +131 -131
- package/src/orchestrator/handlers/task-handler.ts +171 -171
- package/src/orchestrator/index.ts +275 -275
- package/src/orchestrator/task-parser.ts +284 -284
- package/src/orchestrator/types.ts +98 -98
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -300
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +223 -223
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +223 -223
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +163 -163
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +198 -198
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +236 -236
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +188 -188
- package/src/retrieval/reranker/model.ts +95 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +428 -428
- package/src/routing/intent-classifier.ts +436 -436
- package/src/routing/response-filter.ts +258 -254
- package/src/routing/router.ts +1322 -1314
- package/src/routing/search-engine.ts +475 -475
- package/src/routing/types.ts +94 -84
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/handlers/call-tool.ts +156 -156
- package/src/server/handlers/index.ts +9 -9
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
- package/src/server/handlers/tools/auto-remember.ts +200 -200
- package/src/server/handlers/tools/brain.ts +85 -85
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/detect-trends.ts +144 -144
- package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
- package/src/server/handlers/tools/get-activity-log.ts +194 -194
- package/src/server/handlers/tools/get-code-standards.ts +124 -124
- package/src/server/handlers/tools/get-corrections.ts +154 -154
- package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
- package/src/server/handlers/tools/get-episode.ts +103 -103
- package/src/server/handlers/tools/get-patterns.ts +158 -158
- package/src/server/handlers/tools/get-phase12-status.ts +63 -63
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/get-recommendations.ts +145 -145
- package/src/server/handlers/tools/index.ts +31 -31
- package/src/server/handlers/tools/init-project.ts +757 -757
- package/src/server/handlers/tools/list-episodes.ts +90 -90
- package/src/server/handlers/tools/list-projects.ts +125 -125
- package/src/server/handlers/tools/rate-memory.ts +101 -101
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +126 -126
- package/src/server/handlers/tools/record-correction.ts +125 -125
- package/src/server/handlers/tools/remember-decision.ts +153 -153
- package/src/server/handlers/tools/schemas.ts +253 -253
- package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/handlers/tools/what-if-analysis.ts +135 -135
- package/src/server/http-api.ts +693 -693
- package/src/server/index.ts +40 -40
- package/src/server/mcp-server.ts +283 -283
- package/src/server/providers/index.ts +7 -7
- package/src/server/providers/prompts.ts +327 -327
- package/src/server/providers/resources.ts +622 -622
- package/src/server/services.ts +468 -468
- package/src/server/types.ts +39 -39
- package/src/server/utils/error-handler.ts +155 -155
- package/src/server/utils/index.ts +13 -13
- package/src/server/utils/memory-indicator.ts +83 -83
- package/src/server/utils/request-context.ts +122 -122
- package/src/server/utils/response-formatter.ts +129 -124
- package/src/server/utils/validators.ts +210 -210
- package/src/setup/index.ts +48 -48
- package/src/setup/wizard.ts +461 -461
- package/src/tools/index.ts +24 -24
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.test.ts +30 -30
- package/src/tools/schemas.ts +617 -617
- package/src/tools/types.ts +412 -412
- package/src/utils/circuit-breaker.ts +130 -130
- package/src/utils/cleanup.ts +34 -34
- package/src/utils/error-handler.ts +132 -132
- package/src/utils/error-messages.ts +60 -60
- package/src/utils/fallback.ts +45 -45
- package/src/utils/index.ts +54 -54
- package/src/utils/logger-utils.ts +80 -80
- package/src/utils/logger.ts +88 -88
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/retry.ts +94 -94
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/frontmatter.ts +264 -264
- package/src/vault/index.ts +318 -318
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +422 -422
- package/src/vault/reader.ts +264 -264
- package/src/vault/templates.ts +186 -186
- package/src/vault/types.ts +73 -73
- package/src/vault/watcher.ts +277 -277
- package/src/vault/writer.ts +413 -413
- package/tsconfig.json +30 -30
|
@@ -1,375 +1,375 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Hybrid Retrieval Pipeline
|
|
3
|
-
* Orchestrates Dense -> Sparse -> Fusion -> Rerank flow
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import type { Logger } from 'pino'
|
|
7
|
-
import type { CollectionManager } from '@/memory/chroma/collection-manager'
|
|
8
|
-
import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
|
|
9
|
-
import type { RetrievalConfig } from '@/config/schema'
|
|
10
|
-
import type { HybridSearchResult, HybridSearchOptions } from './types'
|
|
11
|
-
import { BM25Engine, type BM25Document } from './bm25'
|
|
12
|
-
import { createFusionStrategy, RRFFusion, LinearFusion, MaxFusion } from './fusion'
|
|
13
|
-
import { Reranker } from './reranker'
|
|
14
|
-
import { COLLECTIONS } from '@/memory/chroma/schemas'
|
|
15
|
-
|
|
16
|
-
export class RetrievalPipeline {
|
|
17
|
-
private logger: Logger
|
|
18
|
-
private config: RetrievalConfig
|
|
19
|
-
private collections: CollectionManager
|
|
20
|
-
private embeddings: EmbeddingProvider
|
|
21
|
-
private bm25Engine: BM25Engine
|
|
22
|
-
private reranker: Reranker | null = null
|
|
23
|
-
private fusionStrategy: RRFFusion | LinearFusion | MaxFusion
|
|
24
|
-
private initialized: boolean = false
|
|
25
|
-
private indexBuilt: boolean = false
|
|
26
|
-
|
|
27
|
-
constructor(
|
|
28
|
-
logger: Logger,
|
|
29
|
-
collections: CollectionManager,
|
|
30
|
-
embeddings: EmbeddingProvider,
|
|
31
|
-
config: RetrievalConfig
|
|
32
|
-
) {
|
|
33
|
-
this.logger = logger.child({ component: 'retrieval-pipeline' })
|
|
34
|
-
this.config = config
|
|
35
|
-
this.collections = collections
|
|
36
|
-
this.embeddings = embeddings
|
|
37
|
-
|
|
38
|
-
// Initialize BM25 engine
|
|
39
|
-
this.bm25Engine = new BM25Engine(logger)
|
|
40
|
-
|
|
41
|
-
// Initialize fusion strategy
|
|
42
|
-
this.fusionStrategy = createFusionStrategy({
|
|
43
|
-
method: config.fusion.method,
|
|
44
|
-
rrfK: config.fusion.rrfK,
|
|
45
|
-
denseWeight: config.dense.weight,
|
|
46
|
-
sparseWeight: config.sparse.weight
|
|
47
|
-
})
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Initialize the pipeline
|
|
52
|
-
*/
|
|
53
|
-
async initialize(): Promise<void> {
|
|
54
|
-
if (this.initialized) {
|
|
55
|
-
return
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
this.logger.info('Initializing retrieval pipeline')
|
|
59
|
-
|
|
60
|
-
// Initialize reranker if enabled
|
|
61
|
-
if (this.config.reranker.enabled) {
|
|
62
|
-
this.reranker = new Reranker(this.logger, {
|
|
63
|
-
model: this.config.reranker.model,
|
|
64
|
-
topK: this.config.reranker.topK
|
|
65
|
-
})
|
|
66
|
-
await this.reranker.initialize()
|
|
67
|
-
this.logger.info('Reranker initialized')
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// Build BM25 index from existing documents
|
|
71
|
-
if (this.config.sparse.enabled) {
|
|
72
|
-
await this.buildBM25Index()
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
this.initialized = true
|
|
76
|
-
this.logger.info('Retrieval pipeline initialized')
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Build BM25 index from ChromaDB collections
|
|
81
|
-
*/
|
|
82
|
-
async buildBM25Index(): Promise<void> {
|
|
83
|
-
this.logger.info('Building BM25 index from ChromaDB')
|
|
84
|
-
|
|
85
|
-
const documents: BM25Document[] = []
|
|
86
|
-
const collectionsToIndex = [
|
|
87
|
-
COLLECTIONS.DECISIONS,
|
|
88
|
-
COLLECTIONS.MEMORIES,
|
|
89
|
-
COLLECTIONS.PATTERNS,
|
|
90
|
-
COLLECTIONS.CORRECTIONS
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
for (const collectionName of collectionsToIndex) {
|
|
94
|
-
try {
|
|
95
|
-
const collection = await this.collections.getDecisions() // Use appropriate getter
|
|
96
|
-
const results = await collection.get({
|
|
97
|
-
include: ['documents', 'metadatas']
|
|
98
|
-
})
|
|
99
|
-
|
|
100
|
-
if (results.ids.length > 0) {
|
|
101
|
-
for (let i = 0; i < results.ids.length; i++) {
|
|
102
|
-
documents.push({
|
|
103
|
-
id: results.ids[i],
|
|
104
|
-
content: results.documents?.[i] || '',
|
|
105
|
-
metadata: results.metadatas?.[i] || {},
|
|
106
|
-
collection: collectionName
|
|
107
|
-
})
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
} catch (error) {
|
|
111
|
-
this.logger.warn({ error, collection: collectionName }, 'Failed to index collection')
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
await this.bm25Engine.buildIndex(documents)
|
|
116
|
-
this.indexBuilt = true
|
|
117
|
-
this.logger.info({ documentCount: documents.length }, 'BM25 index built')
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Perform hybrid search
|
|
122
|
-
*/
|
|
123
|
-
async search(
|
|
124
|
-
query: string,
|
|
125
|
-
options: HybridSearchOptions = {}
|
|
126
|
-
): Promise<HybridSearchResult[]> {
|
|
127
|
-
const {
|
|
128
|
-
project,
|
|
129
|
-
collections = ['decisions', 'memories', 'patterns', 'corrections'],
|
|
130
|
-
limit = 10,
|
|
131
|
-
minScore = 0,
|
|
132
|
-
useReranker = this.config.reranker.enabled,
|
|
133
|
-
dateRange,
|
|
134
|
-
tags
|
|
135
|
-
} = options
|
|
136
|
-
|
|
137
|
-
this.logger.debug({
|
|
138
|
-
query: query.slice(0, 50),
|
|
139
|
-
options
|
|
140
|
-
}, 'Hybrid search')
|
|
141
|
-
|
|
142
|
-
// Step 1: Dense (semantic) search
|
|
143
|
-
const denseResults = await this.denseSearch(query, {
|
|
144
|
-
project,
|
|
145
|
-
collections,
|
|
146
|
-
limit: this.config.dense.limit,
|
|
147
|
-
minSimilarity: this.config.dense.minSimilarity,
|
|
148
|
-
dateRange,
|
|
149
|
-
tags
|
|
150
|
-
})
|
|
151
|
-
|
|
152
|
-
// Step 2: Sparse (BM25) search
|
|
153
|
-
let sparseResults: HybridSearchResult[] = []
|
|
154
|
-
if (this.config.sparse.enabled && this.indexBuilt) {
|
|
155
|
-
sparseResults = await this.sparseSearch(query, {
|
|
156
|
-
project,
|
|
157
|
-
collections,
|
|
158
|
-
limit: this.config.sparse.limit
|
|
159
|
-
})
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// Step 3: Fusion
|
|
163
|
-
let fusedResults: HybridSearchResult[]
|
|
164
|
-
if (sparseResults.length > 0) {
|
|
165
|
-
fusedResults = this.fusionStrategy.fuse(denseResults, sparseResults)
|
|
166
|
-
} else {
|
|
167
|
-
// If no sparse results, use dense results directly
|
|
168
|
-
fusedResults = denseResults.map(r => ({
|
|
169
|
-
...r,
|
|
170
|
-
scores: { ...r.scores, fusion: r.scores.dense, final: r.scores.dense }
|
|
171
|
-
}))
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// Step 4: Reranking (optional)
|
|
175
|
-
if (useReranker && this.reranker && fusedResults.length > 0) {
|
|
176
|
-
fusedResults = await this.reranker.rerank(query, fusedResults)
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
// Filter by minimum score
|
|
180
|
-
if (minScore > 0) {
|
|
181
|
-
fusedResults = fusedResults.filter(r => r.scores.final >= minScore)
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// Apply final limit
|
|
185
|
-
return fusedResults.slice(0, limit)
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Dense (semantic) search using ChromaDB
|
|
190
|
-
*/
|
|
191
|
-
private async denseSearch(
|
|
192
|
-
query: string,
|
|
193
|
-
options: {
|
|
194
|
-
project?: string
|
|
195
|
-
collections: string[]
|
|
196
|
-
limit: number
|
|
197
|
-
minSimilarity: number
|
|
198
|
-
dateRange?: { start?: string; end?: string }
|
|
199
|
-
tags?: string[]
|
|
200
|
-
}
|
|
201
|
-
): Promise<HybridSearchResult[]> {
|
|
202
|
-
const results: HybridSearchResult[] = []
|
|
203
|
-
|
|
204
|
-
// Generate query embedding
|
|
205
|
-
const embedding = await this.embeddings.generate(query)
|
|
206
|
-
|
|
207
|
-
// Search each collection
|
|
208
|
-
for (const collectionName of options.collections) {
|
|
209
|
-
try {
|
|
210
|
-
const collection = await this.getCollection(collectionName)
|
|
211
|
-
if (!collection) continue
|
|
212
|
-
|
|
213
|
-
// Build where clause
|
|
214
|
-
const where: Record<string, any> = {}
|
|
215
|
-
if (options.project) {
|
|
216
|
-
where['project'] = { $eq: options.project }
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
const queryResults = await collection.query({
|
|
220
|
-
queryEmbeddings: [embedding],
|
|
221
|
-
nResults: Math.ceil(options.limit / options.collections.length),
|
|
222
|
-
where: Object.keys(where).length > 0 ? where : undefined,
|
|
223
|
-
include: ['documents', 'metadatas', 'distances']
|
|
224
|
-
})
|
|
225
|
-
|
|
226
|
-
// Process results
|
|
227
|
-
if (queryResults.ids[0]) {
|
|
228
|
-
for (let i = 0; i < queryResults.ids[0].length; i++) {
|
|
229
|
-
const distance = queryResults.distances?.[0]?.[i] || 0
|
|
230
|
-
const similarity = 1 - distance // Convert distance to similarity
|
|
231
|
-
|
|
232
|
-
if (similarity >= options.minSimilarity) {
|
|
233
|
-
results.push({
|
|
234
|
-
id: queryResults.ids[0][i],
|
|
235
|
-
content: queryResults.documents?.[0]?.[i] || '',
|
|
236
|
-
metadata: queryResults.metadatas?.[0]?.[i] || {},
|
|
237
|
-
collection: collectionName,
|
|
238
|
-
scores: {
|
|
239
|
-
dense: similarity,
|
|
240
|
-
sparse: 0,
|
|
241
|
-
fusion: similarity,
|
|
242
|
-
final: similarity
|
|
243
|
-
},
|
|
244
|
-
provenance: 'dense'
|
|
245
|
-
})
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
} catch (error) {
|
|
250
|
-
this.logger.error({ error, collection: collectionName }, 'Dense search failed')
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
// Sort by dense score
|
|
255
|
-
results.sort((a, b) => b.scores.dense - a.scores.dense)
|
|
256
|
-
return results
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
/**
|
|
260
|
-
* Sparse (BM25) search
|
|
261
|
-
*/
|
|
262
|
-
private async sparseSearch(
|
|
263
|
-
query: string,
|
|
264
|
-
options: {
|
|
265
|
-
project?: string
|
|
266
|
-
collections: string[]
|
|
267
|
-
limit: number
|
|
268
|
-
}
|
|
269
|
-
): Promise<HybridSearchResult[]> {
|
|
270
|
-
const bm25Results = this.bm25Engine.search(query, {
|
|
271
|
-
limit: options.limit,
|
|
272
|
-
filter: (result) => {
|
|
273
|
-
// Filter by project if specified
|
|
274
|
-
if (options.project && result.metadata.project !== options.project) {
|
|
275
|
-
return false
|
|
276
|
-
}
|
|
277
|
-
// Filter by collection
|
|
278
|
-
if (!options.collections.includes(result.collection)) {
|
|
279
|
-
return false
|
|
280
|
-
}
|
|
281
|
-
return true
|
|
282
|
-
}
|
|
283
|
-
})
|
|
284
|
-
|
|
285
|
-
return bm25Results.map(result => ({
|
|
286
|
-
id: result.id,
|
|
287
|
-
content: result.content,
|
|
288
|
-
metadata: result.metadata,
|
|
289
|
-
collection: result.collection,
|
|
290
|
-
scores: {
|
|
291
|
-
dense: 0,
|
|
292
|
-
sparse: result.score,
|
|
293
|
-
fusion: result.score,
|
|
294
|
-
final: result.score
|
|
295
|
-
},
|
|
296
|
-
provenance: 'sparse' as const
|
|
297
|
-
}))
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Get collection by name
|
|
302
|
-
*/
|
|
303
|
-
private async getCollection(name: string) {
|
|
304
|
-
switch (name) {
|
|
305
|
-
case 'decisions':
|
|
306
|
-
return this.collections.getDecisions()
|
|
307
|
-
case 'memories':
|
|
308
|
-
return this.collections.getMemories()
|
|
309
|
-
case 'patterns':
|
|
310
|
-
return this.collections.getPatterns()
|
|
311
|
-
case 'corrections':
|
|
312
|
-
return this.collections.getCorrections()
|
|
313
|
-
default:
|
|
314
|
-
return null
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
/**
|
|
319
|
-
* Add document to BM25 index
|
|
320
|
-
*/
|
|
321
|
-
addToIndex(document: BM25Document): void {
|
|
322
|
-
if (this.config.sparse.enabled) {
|
|
323
|
-
this.bm25Engine.addDocument(document)
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
/**
|
|
328
|
-
* Remove document from BM25 index
|
|
329
|
-
*/
|
|
330
|
-
removeFromIndex(document: BM25Document): void {
|
|
331
|
-
if (this.config.sparse.enabled) {
|
|
332
|
-
this.bm25Engine.removeDocument(document)
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
/**
|
|
337
|
-
* Rebuild BM25 index
|
|
338
|
-
*/
|
|
339
|
-
async rebuildIndex(): Promise<void> {
|
|
340
|
-
await this.buildBM25Index()
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
/**
|
|
344
|
-
* Get pipeline status
|
|
345
|
-
*/
|
|
346
|
-
getStatus(): {
|
|
347
|
-
initialized: boolean
|
|
348
|
-
indexBuilt: boolean
|
|
349
|
-
rerankerEnabled: boolean
|
|
350
|
-
sparseEnabled: boolean
|
|
351
|
-
fusionMethod: string
|
|
352
|
-
bm25Stats: { documentCount: number; termCount: number }
|
|
353
|
-
} {
|
|
354
|
-
return {
|
|
355
|
-
initialized: this.initialized,
|
|
356
|
-
indexBuilt: this.indexBuilt,
|
|
357
|
-
rerankerEnabled: this.reranker !== null,
|
|
358
|
-
sparseEnabled: this.config.sparse.enabled,
|
|
359
|
-
fusionMethod: this.config.fusion.method,
|
|
360
|
-
bm25Stats: this.bm25Engine.getStats()
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
/**
|
|
365
|
-
* Cleanup resources
|
|
366
|
-
*/
|
|
367
|
-
cleanup(): void {
|
|
368
|
-
if (this.reranker) {
|
|
369
|
-
this.reranker.cleanup()
|
|
370
|
-
}
|
|
371
|
-
this.bm25Engine.clear()
|
|
372
|
-
this.initialized = false
|
|
373
|
-
this.indexBuilt = false
|
|
374
|
-
}
|
|
375
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Retrieval Pipeline
|
|
3
|
+
* Orchestrates Dense -> Sparse -> Fusion -> Rerank flow
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { Logger } from 'pino'
|
|
7
|
+
import type { CollectionManager } from '@/memory/chroma/collection-manager'
|
|
8
|
+
import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
|
|
9
|
+
import type { RetrievalConfig } from '@/config/schema'
|
|
10
|
+
import type { HybridSearchResult, HybridSearchOptions } from './types'
|
|
11
|
+
import { BM25Engine, type BM25Document } from './bm25'
|
|
12
|
+
import { createFusionStrategy, RRFFusion, LinearFusion, MaxFusion } from './fusion'
|
|
13
|
+
import { Reranker } from './reranker'
|
|
14
|
+
import { COLLECTIONS } from '@/memory/chroma/schemas'
|
|
15
|
+
|
|
16
|
+
export class RetrievalPipeline {
|
|
17
|
+
private logger: Logger
|
|
18
|
+
private config: RetrievalConfig
|
|
19
|
+
private collections: CollectionManager
|
|
20
|
+
private embeddings: EmbeddingProvider
|
|
21
|
+
private bm25Engine: BM25Engine
|
|
22
|
+
private reranker: Reranker | null = null
|
|
23
|
+
private fusionStrategy: RRFFusion | LinearFusion | MaxFusion
|
|
24
|
+
private initialized: boolean = false
|
|
25
|
+
private indexBuilt: boolean = false
|
|
26
|
+
|
|
27
|
+
constructor(
|
|
28
|
+
logger: Logger,
|
|
29
|
+
collections: CollectionManager,
|
|
30
|
+
embeddings: EmbeddingProvider,
|
|
31
|
+
config: RetrievalConfig
|
|
32
|
+
) {
|
|
33
|
+
this.logger = logger.child({ component: 'retrieval-pipeline' })
|
|
34
|
+
this.config = config
|
|
35
|
+
this.collections = collections
|
|
36
|
+
this.embeddings = embeddings
|
|
37
|
+
|
|
38
|
+
// Initialize BM25 engine
|
|
39
|
+
this.bm25Engine = new BM25Engine(logger)
|
|
40
|
+
|
|
41
|
+
// Initialize fusion strategy
|
|
42
|
+
this.fusionStrategy = createFusionStrategy({
|
|
43
|
+
method: config.fusion.method,
|
|
44
|
+
rrfK: config.fusion.rrfK,
|
|
45
|
+
denseWeight: config.dense.weight,
|
|
46
|
+
sparseWeight: config.sparse.weight
|
|
47
|
+
})
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Initialize the pipeline
|
|
52
|
+
*/
|
|
53
|
+
async initialize(): Promise<void> {
|
|
54
|
+
if (this.initialized) {
|
|
55
|
+
return
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
this.logger.info('Initializing retrieval pipeline')
|
|
59
|
+
|
|
60
|
+
// Initialize reranker if enabled
|
|
61
|
+
if (this.config.reranker.enabled) {
|
|
62
|
+
this.reranker = new Reranker(this.logger, {
|
|
63
|
+
model: this.config.reranker.model,
|
|
64
|
+
topK: this.config.reranker.topK
|
|
65
|
+
})
|
|
66
|
+
await this.reranker.initialize()
|
|
67
|
+
this.logger.info('Reranker initialized')
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Build BM25 index from existing documents
|
|
71
|
+
if (this.config.sparse.enabled) {
|
|
72
|
+
await this.buildBM25Index()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
this.initialized = true
|
|
76
|
+
this.logger.info('Retrieval pipeline initialized')
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Build BM25 index from ChromaDB collections
|
|
81
|
+
*/
|
|
82
|
+
async buildBM25Index(): Promise<void> {
|
|
83
|
+
this.logger.info('Building BM25 index from ChromaDB')
|
|
84
|
+
|
|
85
|
+
const documents: BM25Document[] = []
|
|
86
|
+
const collectionsToIndex = [
|
|
87
|
+
COLLECTIONS.DECISIONS,
|
|
88
|
+
COLLECTIONS.MEMORIES,
|
|
89
|
+
COLLECTIONS.PATTERNS,
|
|
90
|
+
COLLECTIONS.CORRECTIONS
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
for (const collectionName of collectionsToIndex) {
|
|
94
|
+
try {
|
|
95
|
+
const collection = await this.collections.getDecisions() // Use appropriate getter
|
|
96
|
+
const results = await collection.get({
|
|
97
|
+
include: ['documents', 'metadatas']
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
if (results.ids.length > 0) {
|
|
101
|
+
for (let i = 0; i < results.ids.length; i++) {
|
|
102
|
+
documents.push({
|
|
103
|
+
id: results.ids[i],
|
|
104
|
+
content: results.documents?.[i] || '',
|
|
105
|
+
metadata: results.metadatas?.[i] || {},
|
|
106
|
+
collection: collectionName
|
|
107
|
+
})
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
} catch (error) {
|
|
111
|
+
this.logger.warn({ error, collection: collectionName }, 'Failed to index collection')
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
await this.bm25Engine.buildIndex(documents)
|
|
116
|
+
this.indexBuilt = true
|
|
117
|
+
this.logger.info({ documentCount: documents.length }, 'BM25 index built')
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Perform hybrid search
|
|
122
|
+
*/
|
|
123
|
+
async search(
|
|
124
|
+
query: string,
|
|
125
|
+
options: HybridSearchOptions = {}
|
|
126
|
+
): Promise<HybridSearchResult[]> {
|
|
127
|
+
const {
|
|
128
|
+
project,
|
|
129
|
+
collections = ['decisions', 'memories', 'patterns', 'corrections'],
|
|
130
|
+
limit = 10,
|
|
131
|
+
minScore = 0,
|
|
132
|
+
useReranker = this.config.reranker.enabled,
|
|
133
|
+
dateRange,
|
|
134
|
+
tags
|
|
135
|
+
} = options
|
|
136
|
+
|
|
137
|
+
this.logger.debug({
|
|
138
|
+
query: query.slice(0, 50),
|
|
139
|
+
options
|
|
140
|
+
}, 'Hybrid search')
|
|
141
|
+
|
|
142
|
+
// Step 1: Dense (semantic) search
|
|
143
|
+
const denseResults = await this.denseSearch(query, {
|
|
144
|
+
project,
|
|
145
|
+
collections,
|
|
146
|
+
limit: this.config.dense.limit,
|
|
147
|
+
minSimilarity: this.config.dense.minSimilarity,
|
|
148
|
+
dateRange,
|
|
149
|
+
tags
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
// Step 2: Sparse (BM25) search
|
|
153
|
+
let sparseResults: HybridSearchResult[] = []
|
|
154
|
+
if (this.config.sparse.enabled && this.indexBuilt) {
|
|
155
|
+
sparseResults = await this.sparseSearch(query, {
|
|
156
|
+
project,
|
|
157
|
+
collections,
|
|
158
|
+
limit: this.config.sparse.limit
|
|
159
|
+
})
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Step 3: Fusion
|
|
163
|
+
let fusedResults: HybridSearchResult[]
|
|
164
|
+
if (sparseResults.length > 0) {
|
|
165
|
+
fusedResults = this.fusionStrategy.fuse(denseResults, sparseResults)
|
|
166
|
+
} else {
|
|
167
|
+
// If no sparse results, use dense results directly
|
|
168
|
+
fusedResults = denseResults.map(r => ({
|
|
169
|
+
...r,
|
|
170
|
+
scores: { ...r.scores, fusion: r.scores.dense, final: r.scores.dense }
|
|
171
|
+
}))
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Step 4: Reranking (optional)
|
|
175
|
+
if (useReranker && this.reranker && fusedResults.length > 0) {
|
|
176
|
+
fusedResults = await this.reranker.rerank(query, fusedResults)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Filter by minimum score
|
|
180
|
+
if (minScore > 0) {
|
|
181
|
+
fusedResults = fusedResults.filter(r => r.scores.final >= minScore)
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Apply final limit
|
|
185
|
+
return fusedResults.slice(0, limit)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Dense (semantic) search using ChromaDB
|
|
190
|
+
*/
|
|
191
|
+
private async denseSearch(
|
|
192
|
+
query: string,
|
|
193
|
+
options: {
|
|
194
|
+
project?: string
|
|
195
|
+
collections: string[]
|
|
196
|
+
limit: number
|
|
197
|
+
minSimilarity: number
|
|
198
|
+
dateRange?: { start?: string; end?: string }
|
|
199
|
+
tags?: string[]
|
|
200
|
+
}
|
|
201
|
+
): Promise<HybridSearchResult[]> {
|
|
202
|
+
const results: HybridSearchResult[] = []
|
|
203
|
+
|
|
204
|
+
// Generate query embedding
|
|
205
|
+
const embedding = await this.embeddings.generate(query)
|
|
206
|
+
|
|
207
|
+
// Search each collection
|
|
208
|
+
for (const collectionName of options.collections) {
|
|
209
|
+
try {
|
|
210
|
+
const collection = await this.getCollection(collectionName)
|
|
211
|
+
if (!collection) continue
|
|
212
|
+
|
|
213
|
+
// Build where clause
|
|
214
|
+
const where: Record<string, any> = {}
|
|
215
|
+
if (options.project) {
|
|
216
|
+
where['project'] = { $eq: options.project }
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const queryResults = await collection.query({
|
|
220
|
+
queryEmbeddings: [embedding],
|
|
221
|
+
nResults: Math.ceil(options.limit / options.collections.length),
|
|
222
|
+
where: Object.keys(where).length > 0 ? where : undefined,
|
|
223
|
+
include: ['documents', 'metadatas', 'distances']
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
// Process results
|
|
227
|
+
if (queryResults.ids[0]) {
|
|
228
|
+
for (let i = 0; i < queryResults.ids[0].length; i++) {
|
|
229
|
+
const distance = queryResults.distances?.[0]?.[i] || 0
|
|
230
|
+
const similarity = 1 - distance // Convert distance to similarity
|
|
231
|
+
|
|
232
|
+
if (similarity >= options.minSimilarity) {
|
|
233
|
+
results.push({
|
|
234
|
+
id: queryResults.ids[0][i],
|
|
235
|
+
content: queryResults.documents?.[0]?.[i] || '',
|
|
236
|
+
metadata: queryResults.metadatas?.[0]?.[i] || {},
|
|
237
|
+
collection: collectionName,
|
|
238
|
+
scores: {
|
|
239
|
+
dense: similarity,
|
|
240
|
+
sparse: 0,
|
|
241
|
+
fusion: similarity,
|
|
242
|
+
final: similarity
|
|
243
|
+
},
|
|
244
|
+
provenance: 'dense'
|
|
245
|
+
})
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
} catch (error) {
|
|
250
|
+
this.logger.error({ error, collection: collectionName }, 'Dense search failed')
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Sort by dense score
|
|
255
|
+
results.sort((a, b) => b.scores.dense - a.scores.dense)
|
|
256
|
+
return results
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Sparse (BM25) search
|
|
261
|
+
*/
|
|
262
|
+
private async sparseSearch(
|
|
263
|
+
query: string,
|
|
264
|
+
options: {
|
|
265
|
+
project?: string
|
|
266
|
+
collections: string[]
|
|
267
|
+
limit: number
|
|
268
|
+
}
|
|
269
|
+
): Promise<HybridSearchResult[]> {
|
|
270
|
+
const bm25Results = this.bm25Engine.search(query, {
|
|
271
|
+
limit: options.limit,
|
|
272
|
+
filter: (result) => {
|
|
273
|
+
// Filter by project if specified
|
|
274
|
+
if (options.project && result.metadata.project !== options.project) {
|
|
275
|
+
return false
|
|
276
|
+
}
|
|
277
|
+
// Filter by collection
|
|
278
|
+
if (!options.collections.includes(result.collection)) {
|
|
279
|
+
return false
|
|
280
|
+
}
|
|
281
|
+
return true
|
|
282
|
+
}
|
|
283
|
+
})
|
|
284
|
+
|
|
285
|
+
return bm25Results.map(result => ({
|
|
286
|
+
id: result.id,
|
|
287
|
+
content: result.content,
|
|
288
|
+
metadata: result.metadata,
|
|
289
|
+
collection: result.collection,
|
|
290
|
+
scores: {
|
|
291
|
+
dense: 0,
|
|
292
|
+
sparse: result.score,
|
|
293
|
+
fusion: result.score,
|
|
294
|
+
final: result.score
|
|
295
|
+
},
|
|
296
|
+
provenance: 'sparse' as const
|
|
297
|
+
}))
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Get collection by name
|
|
302
|
+
*/
|
|
303
|
+
private async getCollection(name: string) {
|
|
304
|
+
switch (name) {
|
|
305
|
+
case 'decisions':
|
|
306
|
+
return this.collections.getDecisions()
|
|
307
|
+
case 'memories':
|
|
308
|
+
return this.collections.getMemories()
|
|
309
|
+
case 'patterns':
|
|
310
|
+
return this.collections.getPatterns()
|
|
311
|
+
case 'corrections':
|
|
312
|
+
return this.collections.getCorrections()
|
|
313
|
+
default:
|
|
314
|
+
return null
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Add document to BM25 index
|
|
320
|
+
*/
|
|
321
|
+
addToIndex(document: BM25Document): void {
|
|
322
|
+
if (this.config.sparse.enabled) {
|
|
323
|
+
this.bm25Engine.addDocument(document)
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Remove document from BM25 index
|
|
329
|
+
*/
|
|
330
|
+
removeFromIndex(document: BM25Document): void {
|
|
331
|
+
if (this.config.sparse.enabled) {
|
|
332
|
+
this.bm25Engine.removeDocument(document)
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Rebuild BM25 index
|
|
338
|
+
*/
|
|
339
|
+
async rebuildIndex(): Promise<void> {
|
|
340
|
+
await this.buildBM25Index()
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Get pipeline status
|
|
345
|
+
*/
|
|
346
|
+
getStatus(): {
|
|
347
|
+
initialized: boolean
|
|
348
|
+
indexBuilt: boolean
|
|
349
|
+
rerankerEnabled: boolean
|
|
350
|
+
sparseEnabled: boolean
|
|
351
|
+
fusionMethod: string
|
|
352
|
+
bm25Stats: { documentCount: number; termCount: number }
|
|
353
|
+
} {
|
|
354
|
+
return {
|
|
355
|
+
initialized: this.initialized,
|
|
356
|
+
indexBuilt: this.indexBuilt,
|
|
357
|
+
rerankerEnabled: this.reranker !== null,
|
|
358
|
+
sparseEnabled: this.config.sparse.enabled,
|
|
359
|
+
fusionMethod: this.config.fusion.method,
|
|
360
|
+
bm25Stats: this.bm25Engine.getStats()
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Cleanup resources
|
|
366
|
+
*/
|
|
367
|
+
cleanup(): void {
|
|
368
|
+
if (this.reranker) {
|
|
369
|
+
this.reranker.cleanup()
|
|
370
|
+
}
|
|
371
|
+
this.bm25Engine.clear()
|
|
372
|
+
this.initialized = false
|
|
373
|
+
this.indexBuilt = false
|
|
374
|
+
}
|
|
375
|
+
}
|