claude-brain 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +191 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +11 -11
- package/bunfig.toml +8 -8
- package/package.json +80 -80
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/src/automation/auto-context.ts +240 -240
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/index.ts +11 -11
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +205 -205
- package/src/cli/auto-setup.ts +82 -82
- package/src/cli/bin.ts +202 -202
- package/src/cli/commands/chroma.ts +573 -573
- package/src/cli/commands/git-hook.ts +189 -189
- package/src/cli/commands/hooks.ts +213 -213
- package/src/cli/commands/init.ts +122 -122
- package/src/cli/commands/install-mcp.ts +92 -92
- package/src/cli/commands/pack.ts +197 -197
- package/src/cli/commands/serve.ts +167 -167
- package/src/cli/commands/start.ts +42 -42
- package/src/cli/commands/uninstall-mcp.ts +41 -41
- package/src/cli/commands/update.ts +121 -121
- package/src/cli/diagnose.ts +4 -4
- package/src/cli/health-check.ts +4 -4
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/setup.ts +4 -4
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/config/defaults.ts +50 -50
- package/src/config/home.ts +55 -55
- package/src/config/index.ts +7 -7
- package/src/config/loader.ts +166 -166
- package/src/config/migration.ts +76 -76
- package/src/config/schema.ts +360 -360
- package/src/config/validator.ts +184 -184
- package/src/config/watcher.ts +86 -86
- package/src/context/assembler.ts +398 -398
- package/src/context/cache-manager.ts +101 -101
- package/src/context/formatter.ts +84 -84
- package/src/context/hierarchy.ts +85 -85
- package/src/context/index.ts +83 -83
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/types.ts +252 -252
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +123 -123
- package/src/health/index.ts +229 -229
- package/src/hooks/brain-hook.ts +112 -112
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +207 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +191 -194
- package/src/hooks/passive-classifier.ts +366 -366
- package/src/hooks/queue.ts +129 -129
- package/src/hooks/session-tracker.ts +275 -275
- package/src/hooks/types.ts +47 -47
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/affinity.ts +162 -162
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +13 -13
- package/src/intelligence/cross-project/transfer.ts +201 -201
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +207 -207
- package/src/intelligence/prediction/context-anticipator.ts +198 -198
- package/src/intelligence/prediction/decision-predictor.ts +184 -184
- package/src/intelligence/prediction/index.ts +13 -13
- package/src/intelligence/prediction/recommender.ts +268 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
- package/src/intelligence/reasoning/counterfactual.ts +248 -248
- package/src/intelligence/reasoning/index.ts +13 -13
- package/src/intelligence/reasoning/synthesizer.ts +169 -169
- package/src/intelligence/temporal/evolution.ts +197 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +259 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/knowledge/entity-extractor.ts +416 -416
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +168 -168
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +174 -174
- package/src/memory/chroma/collection-manager.ts +94 -94
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +153 -153
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +315 -315
- package/src/memory/chroma/store.ts +741 -741
- package/src/memory/consolidation/archiver.ts +164 -164
- package/src/memory/consolidation/merger.ts +186 -186
- package/src/memory/consolidation/scorer.ts +138 -138
- package/src/memory/context-builder.ts +236 -236
- package/src/memory/database.ts +169 -169
- package/src/memory/embedding-utils.ts +156 -156
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +351 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/index.ts +582 -582
- package/src/memory/knowledge-extractor.ts +455 -455
- package/src/memory/learning.ts +378 -378
- package/src/memory/patterns.ts +396 -396
- package/src/memory/schema.ts +88 -88
- package/src/memory/search.ts +309 -309
- package/src/memory/store.ts +787 -787
- package/src/memory/types.ts +121 -121
- package/src/orchestrator/coordinator.ts +272 -272
- package/src/orchestrator/decision-logger.ts +228 -228
- package/src/orchestrator/event-emitter.ts +198 -198
- package/src/orchestrator/event-queue.ts +184 -184
- package/src/orchestrator/handlers/base-handler.ts +70 -70
- package/src/orchestrator/handlers/context-handler.ts +73 -73
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/orchestrator/handlers/index.ts +10 -10
- package/src/orchestrator/handlers/status-handler.ts +131 -131
- package/src/orchestrator/handlers/task-handler.ts +171 -171
- package/src/orchestrator/index.ts +275 -275
- package/src/orchestrator/task-parser.ts +284 -284
- package/src/orchestrator/types.ts +98 -98
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -300
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +223 -223
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +223 -223
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +163 -163
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +198 -198
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +236 -236
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +188 -188
- package/src/retrieval/reranker/model.ts +95 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +428 -428
- package/src/routing/intent-classifier.ts +436 -436
- package/src/routing/response-filter.ts +258 -254
- package/src/routing/router.ts +1322 -1314
- package/src/routing/search-engine.ts +475 -475
- package/src/routing/types.ts +94 -84
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/handlers/call-tool.ts +156 -156
- package/src/server/handlers/index.ts +9 -9
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
- package/src/server/handlers/tools/auto-remember.ts +200 -200
- package/src/server/handlers/tools/brain.ts +85 -85
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/detect-trends.ts +144 -144
- package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
- package/src/server/handlers/tools/get-activity-log.ts +194 -194
- package/src/server/handlers/tools/get-code-standards.ts +124 -124
- package/src/server/handlers/tools/get-corrections.ts +154 -154
- package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
- package/src/server/handlers/tools/get-episode.ts +103 -103
- package/src/server/handlers/tools/get-patterns.ts +158 -158
- package/src/server/handlers/tools/get-phase12-status.ts +63 -63
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/get-recommendations.ts +145 -145
- package/src/server/handlers/tools/index.ts +31 -31
- package/src/server/handlers/tools/init-project.ts +757 -757
- package/src/server/handlers/tools/list-episodes.ts +90 -90
- package/src/server/handlers/tools/list-projects.ts +125 -125
- package/src/server/handlers/tools/rate-memory.ts +101 -101
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +126 -126
- package/src/server/handlers/tools/record-correction.ts +125 -125
- package/src/server/handlers/tools/remember-decision.ts +153 -153
- package/src/server/handlers/tools/schemas.ts +253 -253
- package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/handlers/tools/what-if-analysis.ts +135 -135
- package/src/server/http-api.ts +693 -693
- package/src/server/index.ts +40 -40
- package/src/server/mcp-server.ts +283 -283
- package/src/server/providers/index.ts +7 -7
- package/src/server/providers/prompts.ts +327 -327
- package/src/server/providers/resources.ts +622 -622
- package/src/server/services.ts +468 -468
- package/src/server/types.ts +39 -39
- package/src/server/utils/error-handler.ts +155 -155
- package/src/server/utils/index.ts +13 -13
- package/src/server/utils/memory-indicator.ts +83 -83
- package/src/server/utils/request-context.ts +122 -122
- package/src/server/utils/response-formatter.ts +129 -124
- package/src/server/utils/validators.ts +210 -210
- package/src/setup/index.ts +48 -48
- package/src/setup/wizard.ts +461 -461
- package/src/tools/index.ts +24 -24
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.test.ts +30 -30
- package/src/tools/schemas.ts +617 -617
- package/src/tools/types.ts +412 -412
- package/src/utils/circuit-breaker.ts +130 -130
- package/src/utils/cleanup.ts +34 -34
- package/src/utils/error-handler.ts +132 -132
- package/src/utils/error-messages.ts +60 -60
- package/src/utils/fallback.ts +45 -45
- package/src/utils/index.ts +54 -54
- package/src/utils/logger-utils.ts +80 -80
- package/src/utils/logger.ts +88 -88
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/retry.ts +94 -94
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/frontmatter.ts +264 -264
- package/src/vault/index.ts +318 -318
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +422 -422
- package/src/vault/reader.ts +264 -264
- package/src/vault/templates.ts +186 -186
- package/src/vault/types.ts +73 -73
- package/src/vault/watcher.ts +277 -277
- package/src/vault/writer.ts +413 -413
- package/tsconfig.json +30 -30
|
@@ -1,247 +1,247 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chain Retrieval
|
|
3
|
-
* Iterative search with query refinement for multi-hop reasoning
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import type { Logger } from 'pino'
|
|
7
|
-
import type { CollectionManager } from '@/memory/chroma/collection-manager'
|
|
8
|
-
import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
|
|
9
|
-
|
|
10
|
-
export interface ChainStep {
|
|
11
|
-
query: string
|
|
12
|
-
results: ChainResult[]
|
|
13
|
-
refinedQuery?: string
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface ChainResult {
|
|
17
|
-
id: string
|
|
18
|
-
content: string
|
|
19
|
-
metadata: Record<string, any>
|
|
20
|
-
similarity: number
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export interface ChainRetrievalResult {
|
|
24
|
-
query: string
|
|
25
|
-
hops: number
|
|
26
|
-
steps: ChainStep[]
|
|
27
|
-
allResults: ChainResult[]
|
|
28
|
-
synthesizedAnswer: string
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export class ChainRetrieval {
|
|
32
|
-
private logger: Logger
|
|
33
|
-
private collections: CollectionManager
|
|
34
|
-
private embeddings?: EmbeddingProvider
|
|
35
|
-
|
|
36
|
-
constructor(logger: Logger, collections: CollectionManager, embeddings?: EmbeddingProvider) {
|
|
37
|
-
this.logger = logger.child({ component: 'chain-retrieval' })
|
|
38
|
-
this.collections = collections
|
|
39
|
-
this.embeddings = embeddings
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Perform iterative multi-hop retrieval
|
|
44
|
-
* Each hop refines the query based on previous results
|
|
45
|
-
*/
|
|
46
|
-
async retrieve(query: string, options: {
|
|
47
|
-
maxHops?: number
|
|
48
|
-
resultsPerHop?: number
|
|
49
|
-
minSimilarity?: number
|
|
50
|
-
project?: string
|
|
51
|
-
} = {}): Promise<ChainRetrievalResult> {
|
|
52
|
-
const {
|
|
53
|
-
maxHops = 3,
|
|
54
|
-
resultsPerHop = 5,
|
|
55
|
-
minSimilarity = 0.3,
|
|
56
|
-
project
|
|
57
|
-
} = options
|
|
58
|
-
|
|
59
|
-
const steps: ChainStep[] = []
|
|
60
|
-
const seenIds = new Set<string>()
|
|
61
|
-
let currentQuery = query
|
|
62
|
-
|
|
63
|
-
for (let hop = 0; hop < maxHops; hop++) {
|
|
64
|
-
// Search with current query
|
|
65
|
-
const results = await this.search(currentQuery, project, resultsPerHop, minSimilarity)
|
|
66
|
-
|
|
67
|
-
// Filter out already-seen results
|
|
68
|
-
const newResults = results.filter(r => !seenIds.has(r.id))
|
|
69
|
-
newResults.forEach(r => seenIds.add(r.id))
|
|
70
|
-
|
|
71
|
-
// If no new results, stop
|
|
72
|
-
if (newResults.length === 0 && hop > 0) break
|
|
73
|
-
|
|
74
|
-
// Refine query based on results
|
|
75
|
-
const refinedQuery = this.refineQuery(currentQuery, newResults)
|
|
76
|
-
|
|
77
|
-
steps.push({
|
|
78
|
-
query: currentQuery,
|
|
79
|
-
results: newResults,
|
|
80
|
-
refinedQuery: refinedQuery !== currentQuery ? refinedQuery : undefined
|
|
81
|
-
})
|
|
82
|
-
|
|
83
|
-
// If query didn't change, we've converged
|
|
84
|
-
if (refinedQuery === currentQuery && hop > 0) break
|
|
85
|
-
|
|
86
|
-
currentQuery = refinedQuery
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// Collect all unique results
|
|
90
|
-
const allResults = steps.flatMap(s => s.results)
|
|
91
|
-
|
|
92
|
-
return {
|
|
93
|
-
query,
|
|
94
|
-
hops: steps.length,
|
|
95
|
-
steps,
|
|
96
|
-
allResults,
|
|
97
|
-
synthesizedAnswer: this.synthesize(query, allResults)
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
private async search(
|
|
102
|
-
query: string,
|
|
103
|
-
project?: string,
|
|
104
|
-
limit: number = 5,
|
|
105
|
-
minSimilarity: number = 0.3
|
|
106
|
-
): Promise<ChainResult[]> {
|
|
107
|
-
try {
|
|
108
|
-
const collection = await this.collections.getMemories()
|
|
109
|
-
|
|
110
|
-
const where: any = project ? { project: { $eq: project } } : undefined
|
|
111
|
-
|
|
112
|
-
let results: any
|
|
113
|
-
|
|
114
|
-
if (this.embeddings) {
|
|
115
|
-
const embedding = await this.embeddings.generate(query)
|
|
116
|
-
results = await collection.query({
|
|
117
|
-
queryEmbeddings: [embedding],
|
|
118
|
-
nResults: limit,
|
|
119
|
-
where,
|
|
120
|
-
include: ['documents', 'metadatas', 'distances']
|
|
121
|
-
})
|
|
122
|
-
} else {
|
|
123
|
-
results = await collection.query({
|
|
124
|
-
queryTexts: [query],
|
|
125
|
-
nResults: limit,
|
|
126
|
-
where,
|
|
127
|
-
include: ['documents', 'metadatas', 'distances']
|
|
128
|
-
})
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
if (!results.ids || !results.ids[0]) return []
|
|
132
|
-
|
|
133
|
-
const processed: ChainResult[] = []
|
|
134
|
-
const ids = results.ids[0]
|
|
135
|
-
const documents = results.documents?.[0] || []
|
|
136
|
-
const metadatas = results.metadatas?.[0] || []
|
|
137
|
-
const distances = results.distances?.[0] || []
|
|
138
|
-
|
|
139
|
-
for (let i = 0; i < ids.length; i++) {
|
|
140
|
-
const similarity = 1 - (distances[i] || 0)
|
|
141
|
-
if (similarity < minSimilarity) continue
|
|
142
|
-
|
|
143
|
-
processed.push({
|
|
144
|
-
id: ids[i],
|
|
145
|
-
content: documents[i] || '',
|
|
146
|
-
metadata: metadatas[i] || {},
|
|
147
|
-
similarity
|
|
148
|
-
})
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
return processed
|
|
152
|
-
} catch (error) {
|
|
153
|
-
this.logger.warn({ error, query }, 'Chain retrieval search failed')
|
|
154
|
-
return []
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Refine query based on retrieved results
|
|
160
|
-
* Extracts key terms from results to expand the query
|
|
161
|
-
*/
|
|
162
|
-
private refineQuery(originalQuery: string, results: ChainResult[]): string {
|
|
163
|
-
if (results.length === 0) return originalQuery
|
|
164
|
-
|
|
165
|
-
// Extract key terms from top results
|
|
166
|
-
const keyTerms = new Set<string>()
|
|
167
|
-
const queryWords = new Set(originalQuery.toLowerCase().split(/\s+/))
|
|
168
|
-
|
|
169
|
-
for (const result of results.slice(0, 3)) {
|
|
170
|
-
const words = result.content.toLowerCase().split(/\s+/)
|
|
171
|
-
|
|
172
|
-
// Extract distinctive words not in the original query
|
|
173
|
-
for (const word of words) {
|
|
174
|
-
if (
|
|
175
|
-
word.length > 4 &&
|
|
176
|
-
!queryWords.has(word) &&
|
|
177
|
-
!this.isStopWord(word)
|
|
178
|
-
) {
|
|
179
|
-
keyTerms.add(word)
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Also add relevant metadata terms
|
|
184
|
-
if (result.metadata.tags) {
|
|
185
|
-
const tags = String(result.metadata.tags).split(',')
|
|
186
|
-
for (const tag of tags) {
|
|
187
|
-
if (tag.trim() && !queryWords.has(tag.trim().toLowerCase())) {
|
|
188
|
-
keyTerms.add(tag.trim().toLowerCase())
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Add top distinctive terms to query
|
|
195
|
-
const topTerms = Array.from(keyTerms).slice(0, 3)
|
|
196
|
-
if (topTerms.length === 0) return originalQuery
|
|
197
|
-
|
|
198
|
-
return `${originalQuery} ${topTerms.join(' ')}`
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
/**
|
|
202
|
-
* Synthesize results into a coherent answer
|
|
203
|
-
*/
|
|
204
|
-
private synthesize(query: string, results: ChainResult[]): string {
|
|
205
|
-
if (results.length === 0) {
|
|
206
|
-
return 'No relevant information found across multiple retrieval hops.'
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// Group by type/source
|
|
210
|
-
const byType: Record<string, ChainResult[]> = {}
|
|
211
|
-
for (const r of results) {
|
|
212
|
-
const type = r.metadata.type || r.metadata.source || 'general'
|
|
213
|
-
if (!byType[type]) byType[type] = []
|
|
214
|
-
byType[type].push(r)
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
const parts: string[] = [`Multi-hop retrieval for: "${query}"`, '']
|
|
218
|
-
|
|
219
|
-
for (const [type, items] of Object.entries(byType)) {
|
|
220
|
-
parts.push(`### ${type.charAt(0).toUpperCase() + type.slice(1)} (${items.length})`)
|
|
221
|
-
for (const item of items.slice(0, 5)) {
|
|
222
|
-
const similarity = Math.round(item.similarity * 100)
|
|
223
|
-
parts.push(`- [${similarity}%] ${item.content.slice(0, 150)}${item.content.length > 150 ? '...' : ''}`)
|
|
224
|
-
}
|
|
225
|
-
parts.push('')
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
return parts.join('\n')
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
private isStopWord(word: string): boolean {
|
|
232
|
-
const stopWords = new Set([
|
|
233
|
-
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can',
|
|
234
|
-
'her', 'was', 'one', 'our', 'out', 'has', 'had', 'been', 'have',
|
|
235
|
-
'with', 'this', 'that', 'from', 'they', 'will', 'would', 'there',
|
|
236
|
-
'their', 'what', 'about', 'which', 'when', 'make', 'like', 'time',
|
|
237
|
-
'just', 'know', 'take', 'people', 'into', 'year', 'your', 'good',
|
|
238
|
-
'some', 'could', 'them', 'than', 'other', 'then', 'look', 'only',
|
|
239
|
-
'come', 'over', 'think', 'also', 'back', 'after', 'work', 'first',
|
|
240
|
-
'well', 'even', 'give', 'most', 'find', 'here', 'thing', 'many',
|
|
241
|
-
'still', 'should', 'because', 'does', 'each', 'much', 'before',
|
|
242
|
-
'between', 'must', 'through', 'being', 'using', 'used', 'decision',
|
|
243
|
-
'decided', 'recommend', 'instead', 'project', 'context', 'reasoning'
|
|
244
|
-
])
|
|
245
|
-
return stopWords.has(word)
|
|
246
|
-
}
|
|
247
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Chain Retrieval
|
|
3
|
+
* Iterative search with query refinement for multi-hop reasoning
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { Logger } from 'pino'
|
|
7
|
+
import type { CollectionManager } from '@/memory/chroma/collection-manager'
|
|
8
|
+
import type { EmbeddingProvider } from '@/memory/chroma/embeddings'
|
|
9
|
+
|
|
10
|
+
export interface ChainStep {
|
|
11
|
+
query: string
|
|
12
|
+
results: ChainResult[]
|
|
13
|
+
refinedQuery?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface ChainResult {
|
|
17
|
+
id: string
|
|
18
|
+
content: string
|
|
19
|
+
metadata: Record<string, any>
|
|
20
|
+
similarity: number
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface ChainRetrievalResult {
|
|
24
|
+
query: string
|
|
25
|
+
hops: number
|
|
26
|
+
steps: ChainStep[]
|
|
27
|
+
allResults: ChainResult[]
|
|
28
|
+
synthesizedAnswer: string
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export class ChainRetrieval {
|
|
32
|
+
private logger: Logger
|
|
33
|
+
private collections: CollectionManager
|
|
34
|
+
private embeddings?: EmbeddingProvider
|
|
35
|
+
|
|
36
|
+
constructor(logger: Logger, collections: CollectionManager, embeddings?: EmbeddingProvider) {
|
|
37
|
+
this.logger = logger.child({ component: 'chain-retrieval' })
|
|
38
|
+
this.collections = collections
|
|
39
|
+
this.embeddings = embeddings
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Perform iterative multi-hop retrieval
|
|
44
|
+
* Each hop refines the query based on previous results
|
|
45
|
+
*/
|
|
46
|
+
async retrieve(query: string, options: {
|
|
47
|
+
maxHops?: number
|
|
48
|
+
resultsPerHop?: number
|
|
49
|
+
minSimilarity?: number
|
|
50
|
+
project?: string
|
|
51
|
+
} = {}): Promise<ChainRetrievalResult> {
|
|
52
|
+
const {
|
|
53
|
+
maxHops = 3,
|
|
54
|
+
resultsPerHop = 5,
|
|
55
|
+
minSimilarity = 0.3,
|
|
56
|
+
project
|
|
57
|
+
} = options
|
|
58
|
+
|
|
59
|
+
const steps: ChainStep[] = []
|
|
60
|
+
const seenIds = new Set<string>()
|
|
61
|
+
let currentQuery = query
|
|
62
|
+
|
|
63
|
+
for (let hop = 0; hop < maxHops; hop++) {
|
|
64
|
+
// Search with current query
|
|
65
|
+
const results = await this.search(currentQuery, project, resultsPerHop, minSimilarity)
|
|
66
|
+
|
|
67
|
+
// Filter out already-seen results
|
|
68
|
+
const newResults = results.filter(r => !seenIds.has(r.id))
|
|
69
|
+
newResults.forEach(r => seenIds.add(r.id))
|
|
70
|
+
|
|
71
|
+
// If no new results, stop
|
|
72
|
+
if (newResults.length === 0 && hop > 0) break
|
|
73
|
+
|
|
74
|
+
// Refine query based on results
|
|
75
|
+
const refinedQuery = this.refineQuery(currentQuery, newResults)
|
|
76
|
+
|
|
77
|
+
steps.push({
|
|
78
|
+
query: currentQuery,
|
|
79
|
+
results: newResults,
|
|
80
|
+
refinedQuery: refinedQuery !== currentQuery ? refinedQuery : undefined
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
// If query didn't change, we've converged
|
|
84
|
+
if (refinedQuery === currentQuery && hop > 0) break
|
|
85
|
+
|
|
86
|
+
currentQuery = refinedQuery
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Collect all unique results
|
|
90
|
+
const allResults = steps.flatMap(s => s.results)
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
query,
|
|
94
|
+
hops: steps.length,
|
|
95
|
+
steps,
|
|
96
|
+
allResults,
|
|
97
|
+
synthesizedAnswer: this.synthesize(query, allResults)
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
private async search(
|
|
102
|
+
query: string,
|
|
103
|
+
project?: string,
|
|
104
|
+
limit: number = 5,
|
|
105
|
+
minSimilarity: number = 0.3
|
|
106
|
+
): Promise<ChainResult[]> {
|
|
107
|
+
try {
|
|
108
|
+
const collection = await this.collections.getMemories()
|
|
109
|
+
|
|
110
|
+
const where: any = project ? { project: { $eq: project } } : undefined
|
|
111
|
+
|
|
112
|
+
let results: any
|
|
113
|
+
|
|
114
|
+
if (this.embeddings) {
|
|
115
|
+
const embedding = await this.embeddings.generate(query)
|
|
116
|
+
results = await collection.query({
|
|
117
|
+
queryEmbeddings: [embedding],
|
|
118
|
+
nResults: limit,
|
|
119
|
+
where,
|
|
120
|
+
include: ['documents', 'metadatas', 'distances']
|
|
121
|
+
})
|
|
122
|
+
} else {
|
|
123
|
+
results = await collection.query({
|
|
124
|
+
queryTexts: [query],
|
|
125
|
+
nResults: limit,
|
|
126
|
+
where,
|
|
127
|
+
include: ['documents', 'metadatas', 'distances']
|
|
128
|
+
})
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (!results.ids || !results.ids[0]) return []
|
|
132
|
+
|
|
133
|
+
const processed: ChainResult[] = []
|
|
134
|
+
const ids = results.ids[0]
|
|
135
|
+
const documents = results.documents?.[0] || []
|
|
136
|
+
const metadatas = results.metadatas?.[0] || []
|
|
137
|
+
const distances = results.distances?.[0] || []
|
|
138
|
+
|
|
139
|
+
for (let i = 0; i < ids.length; i++) {
|
|
140
|
+
const similarity = 1 - (distances[i] || 0)
|
|
141
|
+
if (similarity < minSimilarity) continue
|
|
142
|
+
|
|
143
|
+
processed.push({
|
|
144
|
+
id: ids[i],
|
|
145
|
+
content: documents[i] || '',
|
|
146
|
+
metadata: metadatas[i] || {},
|
|
147
|
+
similarity
|
|
148
|
+
})
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return processed
|
|
152
|
+
} catch (error) {
|
|
153
|
+
this.logger.warn({ error, query }, 'Chain retrieval search failed')
|
|
154
|
+
return []
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Refine query based on retrieved results
|
|
160
|
+
* Extracts key terms from results to expand the query
|
|
161
|
+
*/
|
|
162
|
+
private refineQuery(originalQuery: string, results: ChainResult[]): string {
|
|
163
|
+
if (results.length === 0) return originalQuery
|
|
164
|
+
|
|
165
|
+
// Extract key terms from top results
|
|
166
|
+
const keyTerms = new Set<string>()
|
|
167
|
+
const queryWords = new Set(originalQuery.toLowerCase().split(/\s+/))
|
|
168
|
+
|
|
169
|
+
for (const result of results.slice(0, 3)) {
|
|
170
|
+
const words = result.content.toLowerCase().split(/\s+/)
|
|
171
|
+
|
|
172
|
+
// Extract distinctive words not in the original query
|
|
173
|
+
for (const word of words) {
|
|
174
|
+
if (
|
|
175
|
+
word.length > 4 &&
|
|
176
|
+
!queryWords.has(word) &&
|
|
177
|
+
!this.isStopWord(word)
|
|
178
|
+
) {
|
|
179
|
+
keyTerms.add(word)
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Also add relevant metadata terms
|
|
184
|
+
if (result.metadata.tags) {
|
|
185
|
+
const tags = String(result.metadata.tags).split(',')
|
|
186
|
+
for (const tag of tags) {
|
|
187
|
+
if (tag.trim() && !queryWords.has(tag.trim().toLowerCase())) {
|
|
188
|
+
keyTerms.add(tag.trim().toLowerCase())
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Add top distinctive terms to query
|
|
195
|
+
const topTerms = Array.from(keyTerms).slice(0, 3)
|
|
196
|
+
if (topTerms.length === 0) return originalQuery
|
|
197
|
+
|
|
198
|
+
return `${originalQuery} ${topTerms.join(' ')}`
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Synthesize results into a coherent answer
|
|
203
|
+
*/
|
|
204
|
+
private synthesize(query: string, results: ChainResult[]): string {
|
|
205
|
+
if (results.length === 0) {
|
|
206
|
+
return 'No relevant information found across multiple retrieval hops.'
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Group by type/source
|
|
210
|
+
const byType: Record<string, ChainResult[]> = {}
|
|
211
|
+
for (const r of results) {
|
|
212
|
+
const type = r.metadata.type || r.metadata.source || 'general'
|
|
213
|
+
if (!byType[type]) byType[type] = []
|
|
214
|
+
byType[type].push(r)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const parts: string[] = [`Multi-hop retrieval for: "${query}"`, '']
|
|
218
|
+
|
|
219
|
+
for (const [type, items] of Object.entries(byType)) {
|
|
220
|
+
parts.push(`### ${type.charAt(0).toUpperCase() + type.slice(1)} (${items.length})`)
|
|
221
|
+
for (const item of items.slice(0, 5)) {
|
|
222
|
+
const similarity = Math.round(item.similarity * 100)
|
|
223
|
+
parts.push(`- [${similarity}%] ${item.content.slice(0, 150)}${item.content.length > 150 ? '...' : ''}`)
|
|
224
|
+
}
|
|
225
|
+
parts.push('')
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return parts.join('\n')
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
private isStopWord(word: string): boolean {
|
|
232
|
+
const stopWords = new Set([
|
|
233
|
+
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can',
|
|
234
|
+
'her', 'was', 'one', 'our', 'out', 'has', 'had', 'been', 'have',
|
|
235
|
+
'with', 'this', 'that', 'from', 'they', 'will', 'would', 'there',
|
|
236
|
+
'their', 'what', 'about', 'which', 'when', 'make', 'like', 'time',
|
|
237
|
+
'just', 'know', 'take', 'people', 'into', 'year', 'your', 'good',
|
|
238
|
+
'some', 'could', 'them', 'than', 'other', 'then', 'look', 'only',
|
|
239
|
+
'come', 'over', 'think', 'also', 'back', 'after', 'work', 'first',
|
|
240
|
+
'well', 'even', 'give', 'most', 'find', 'here', 'thing', 'many',
|
|
241
|
+
'still', 'should', 'because', 'does', 'each', 'much', 'before',
|
|
242
|
+
'between', 'must', 'through', 'being', 'using', 'used', 'decision',
|
|
243
|
+
'decided', 'recommend', 'instead', 'project', 'context', 'reasoning'
|
|
244
|
+
])
|
|
245
|
+
return stopWords.has(word)
|
|
246
|
+
}
|
|
247
|
+
}
|