claude-brain 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/README.md +157 -0
  2. package/VERSION +1 -0
  3. package/assets/CLAUDE.md +307 -0
  4. package/bunfig.toml +8 -0
  5. package/package.json +74 -0
  6. package/src/automation/auto-context.ts +240 -0
  7. package/src/automation/decision-detector.ts +452 -0
  8. package/src/automation/index.ts +11 -0
  9. package/src/automation/proactive-recall.ts +373 -0
  10. package/src/automation/project-detector.ts +297 -0
  11. package/src/cli/auto-setup.ts +74 -0
  12. package/src/cli/bin.ts +110 -0
  13. package/src/cli/commands/install-mcp.ts +50 -0
  14. package/src/cli/commands/serve.ts +129 -0
  15. package/src/cli/diagnose.ts +4 -0
  16. package/src/cli/health-check.ts +4 -0
  17. package/src/cli/migrate-chroma.ts +106 -0
  18. package/src/cli/setup.ts +4 -0
  19. package/src/config/defaults.ts +47 -0
  20. package/src/config/home.ts +55 -0
  21. package/src/config/index.ts +7 -0
  22. package/src/config/loader.ts +166 -0
  23. package/src/config/migration.ts +76 -0
  24. package/src/config/schema.ts +257 -0
  25. package/src/config/validator.ts +184 -0
  26. package/src/config/watcher.ts +86 -0
  27. package/src/context/assembler.ts +398 -0
  28. package/src/context/cache-manager.ts +101 -0
  29. package/src/context/formatter.ts +84 -0
  30. package/src/context/hierarchy.ts +85 -0
  31. package/src/context/index.ts +83 -0
  32. package/src/context/progress-tracker.ts +174 -0
  33. package/src/context/standards-manager.ts +267 -0
  34. package/src/context/types.ts +252 -0
  35. package/src/context/validator.ts +58 -0
  36. package/src/cross-project/affinity.ts +162 -0
  37. package/src/cross-project/generalizer.ts +283 -0
  38. package/src/cross-project/index.ts +13 -0
  39. package/src/cross-project/transfer.ts +201 -0
  40. package/src/diagnostics/index.ts +123 -0
  41. package/src/health/index.ts +229 -0
  42. package/src/index.ts +7 -0
  43. package/src/knowledge/entity-extractor.ts +416 -0
  44. package/src/knowledge/graph/builder.ts +159 -0
  45. package/src/knowledge/graph/linker.ts +201 -0
  46. package/src/knowledge/graph/memory-graph.ts +359 -0
  47. package/src/knowledge/graph/schema.ts +99 -0
  48. package/src/knowledge/graph/search.ts +168 -0
  49. package/src/knowledge/relationship-extractor.ts +108 -0
  50. package/src/memory/chroma/client.ts +169 -0
  51. package/src/memory/chroma/collection-manager.ts +94 -0
  52. package/src/memory/chroma/config.ts +46 -0
  53. package/src/memory/chroma/embeddings.ts +153 -0
  54. package/src/memory/chroma/index.ts +82 -0
  55. package/src/memory/chroma/migration.ts +270 -0
  56. package/src/memory/chroma/schemas.ts +69 -0
  57. package/src/memory/chroma/search.ts +315 -0
  58. package/src/memory/chroma/store.ts +694 -0
  59. package/src/memory/consolidation/archiver.ts +164 -0
  60. package/src/memory/consolidation/merger.ts +186 -0
  61. package/src/memory/consolidation/scorer.ts +138 -0
  62. package/src/memory/context-builder.ts +236 -0
  63. package/src/memory/database.ts +169 -0
  64. package/src/memory/embedding-utils.ts +156 -0
  65. package/src/memory/embeddings.ts +226 -0
  66. package/src/memory/episodic/detector.ts +108 -0
  67. package/src/memory/episodic/manager.ts +334 -0
  68. package/src/memory/episodic/summarizer.ts +179 -0
  69. package/src/memory/episodic/types.ts +52 -0
  70. package/src/memory/index.ts +395 -0
  71. package/src/memory/knowledge-extractor.ts +455 -0
  72. package/src/memory/learning.ts +378 -0
  73. package/src/memory/patterns.ts +396 -0
  74. package/src/memory/schema.ts +56 -0
  75. package/src/memory/search.ts +309 -0
  76. package/src/memory/store.ts +344 -0
  77. package/src/memory/types.ts +121 -0
  78. package/src/optimization/index.ts +10 -0
  79. package/src/optimization/precompute.ts +202 -0
  80. package/src/optimization/semantic-cache.ts +207 -0
  81. package/src/orchestrator/coordinator.ts +272 -0
  82. package/src/orchestrator/decision-logger.ts +228 -0
  83. package/src/orchestrator/event-emitter.ts +198 -0
  84. package/src/orchestrator/event-queue.ts +184 -0
  85. package/src/orchestrator/handlers/base-handler.ts +70 -0
  86. package/src/orchestrator/handlers/context-handler.ts +73 -0
  87. package/src/orchestrator/handlers/decision-handler.ts +204 -0
  88. package/src/orchestrator/handlers/index.ts +10 -0
  89. package/src/orchestrator/handlers/status-handler.ts +131 -0
  90. package/src/orchestrator/handlers/task-handler.ts +171 -0
  91. package/src/orchestrator/index.ts +275 -0
  92. package/src/orchestrator/task-parser.ts +284 -0
  93. package/src/orchestrator/types.ts +98 -0
  94. package/src/phase12/index.ts +456 -0
  95. package/src/prediction/context-anticipator.ts +198 -0
  96. package/src/prediction/decision-predictor.ts +184 -0
  97. package/src/prediction/index.ts +13 -0
  98. package/src/prediction/recommender.ts +268 -0
  99. package/src/reasoning/chain-retrieval.ts +247 -0
  100. package/src/reasoning/counterfactual.ts +248 -0
  101. package/src/reasoning/index.ts +13 -0
  102. package/src/reasoning/synthesizer.ts +169 -0
  103. package/src/retrieval/bm25/index.ts +300 -0
  104. package/src/retrieval/bm25/tokenizer.ts +184 -0
  105. package/src/retrieval/feedback/adaptive.ts +223 -0
  106. package/src/retrieval/feedback/index.ts +16 -0
  107. package/src/retrieval/feedback/metrics.ts +223 -0
  108. package/src/retrieval/feedback/store.ts +283 -0
  109. package/src/retrieval/fusion/index.ts +194 -0
  110. package/src/retrieval/fusion/rrf.ts +163 -0
  111. package/src/retrieval/index.ts +12 -0
  112. package/src/retrieval/pipeline.ts +375 -0
  113. package/src/retrieval/query/expander.ts +198 -0
  114. package/src/retrieval/query/index.ts +27 -0
  115. package/src/retrieval/query/intent-classifier.ts +236 -0
  116. package/src/retrieval/query/temporal-parser.ts +295 -0
  117. package/src/retrieval/reranker/index.ts +188 -0
  118. package/src/retrieval/reranker/model.ts +95 -0
  119. package/src/retrieval/service.ts +125 -0
  120. package/src/retrieval/types.ts +162 -0
  121. package/src/scripts/health-check.ts +118 -0
  122. package/src/scripts/setup.ts +122 -0
  123. package/src/server/handlers/call-tool.ts +194 -0
  124. package/src/server/handlers/index.ts +9 -0
  125. package/src/server/handlers/list-tools.ts +18 -0
  126. package/src/server/handlers/tools/analyze-decision-evolution.ts +71 -0
  127. package/src/server/handlers/tools/auto-remember.ts +200 -0
  128. package/src/server/handlers/tools/create-project.ts +135 -0
  129. package/src/server/handlers/tools/detect-trends.ts +80 -0
  130. package/src/server/handlers/tools/find-cross-project-patterns.ts +73 -0
  131. package/src/server/handlers/tools/get-activity-log.ts +194 -0
  132. package/src/server/handlers/tools/get-code-standards.ts +124 -0
  133. package/src/server/handlers/tools/get-corrections.ts +154 -0
  134. package/src/server/handlers/tools/get-decision-timeline.ts +86 -0
  135. package/src/server/handlers/tools/get-episode.ts +93 -0
  136. package/src/server/handlers/tools/get-patterns.ts +158 -0
  137. package/src/server/handlers/tools/get-phase12-status.ts +63 -0
  138. package/src/server/handlers/tools/get-project-context.ts +75 -0
  139. package/src/server/handlers/tools/get-recommendations.ts +65 -0
  140. package/src/server/handlers/tools/index.ts +33 -0
  141. package/src/server/handlers/tools/init-project.ts +710 -0
  142. package/src/server/handlers/tools/list-episodes.ts +80 -0
  143. package/src/server/handlers/tools/list-projects.ts +125 -0
  144. package/src/server/handlers/tools/rate-memory.ts +95 -0
  145. package/src/server/handlers/tools/recall-similar.ts +87 -0
  146. package/src/server/handlers/tools/recognize-pattern.ts +126 -0
  147. package/src/server/handlers/tools/record-correction.ts +125 -0
  148. package/src/server/handlers/tools/remember-decision.ts +153 -0
  149. package/src/server/handlers/tools/schemas.ts +241 -0
  150. package/src/server/handlers/tools/search-knowledge-graph.ts +89 -0
  151. package/src/server/handlers/tools/smart-context.ts +124 -0
  152. package/src/server/handlers/tools/update-progress.ts +114 -0
  153. package/src/server/handlers/tools/what-if-analysis.ts +73 -0
  154. package/src/server/http-api.ts +474 -0
  155. package/src/server/index.ts +40 -0
  156. package/src/server/mcp-server.ts +283 -0
  157. package/src/server/providers/index.ts +7 -0
  158. package/src/server/providers/prompts.ts +327 -0
  159. package/src/server/providers/resources.ts +427 -0
  160. package/src/server/services.ts +388 -0
  161. package/src/server/types.ts +39 -0
  162. package/src/server/utils/error-handler.ts +155 -0
  163. package/src/server/utils/index.ts +13 -0
  164. package/src/server/utils/memory-indicator.ts +83 -0
  165. package/src/server/utils/request-context.ts +122 -0
  166. package/src/server/utils/response-formatter.ts +124 -0
  167. package/src/server/utils/validators.ts +210 -0
  168. package/src/setup/index.ts +22 -0
  169. package/src/setup/wizard.ts +321 -0
  170. package/src/temporal/evolution.ts +197 -0
  171. package/src/temporal/index.ts +16 -0
  172. package/src/temporal/query-processor.ts +190 -0
  173. package/src/temporal/timeline.ts +259 -0
  174. package/src/temporal/trends.ts +263 -0
  175. package/src/tools/index.ts +24 -0
  176. package/src/tools/registry.ts +106 -0
  177. package/src/tools/schemas.test.ts +30 -0
  178. package/src/tools/schemas.ts +907 -0
  179. package/src/tools/types.ts +412 -0
  180. package/src/utils/circuit-breaker.ts +130 -0
  181. package/src/utils/cleanup.ts +34 -0
  182. package/src/utils/error-handler.ts +132 -0
  183. package/src/utils/error-messages.ts +60 -0
  184. package/src/utils/fallback.ts +45 -0
  185. package/src/utils/index.ts +54 -0
  186. package/src/utils/logger-utils.ts +80 -0
  187. package/src/utils/logger.ts +88 -0
  188. package/src/utils/phase12-helper.ts +56 -0
  189. package/src/utils/retry.ts +94 -0
  190. package/src/utils/transaction.ts +63 -0
  191. package/src/vault/frontmatter.ts +264 -0
  192. package/src/vault/index.ts +318 -0
  193. package/src/vault/paths.ts +106 -0
  194. package/src/vault/query.ts +422 -0
  195. package/src/vault/reader.ts +264 -0
  196. package/src/vault/templates.ts +186 -0
  197. package/src/vault/types.ts +73 -0
  198. package/src/vault/watcher.ts +277 -0
  199. package/src/vault/writer.ts +393 -0
  200. package/tsconfig.json +30 -0
@@ -0,0 +1,300 @@
1
+ /**
2
+ * BM25 Sparse Search Engine
3
+ * Uses MiniSearch for fast keyword-based retrieval
4
+ */
5
+
6
+ import type { Logger } from 'pino'
7
+ import MiniSearch from 'minisearch'
8
+ import { tokenize, tokenizeForSearch, type TokenizerOptions } from './tokenizer'
9
+
10
+ export interface BM25Document {
11
+ id: string
12
+ content: string
13
+ metadata: Record<string, unknown>
14
+ collection: string
15
+ }
16
+
17
+ export interface BM25SearchResult {
18
+ id: string
19
+ content: string
20
+ metadata: Record<string, unknown>
21
+ collection: string
22
+ score: number
23
+ }
24
+
25
+ export interface BM25Config {
26
+ /** Fields to index */
27
+ fields: string[]
28
+ /** Fields to store */
29
+ storeFields: string[]
30
+ /** BM25 k1 parameter (term frequency saturation) */
31
+ k1?: number
32
+ /** BM25 b parameter (document length normalization) */
33
+ b?: number
34
+ /** Boost for exact matches */
35
+ boostExact?: number
36
+ }
37
+
38
+ const DEFAULT_CONFIG: BM25Config = {
39
+ fields: ['content'],
40
+ storeFields: ['content', 'metadata', 'collection'],
41
+ k1: 1.2,
42
+ b: 0.75,
43
+ boostExact: 2.0
44
+ }
45
+
46
+ export class BM25Engine {
47
+ private logger: Logger
48
+ private config: BM25Config
49
+ private index: MiniSearch<BM25Document>
50
+ private documentCount: number = 0
51
+ private tokenizerOptions: TokenizerOptions
52
+
53
+ constructor(logger: Logger, config: Partial<BM25Config> = {}) {
54
+ this.logger = logger.child({ component: 'bm25-engine' })
55
+ this.config = { ...DEFAULT_CONFIG, ...config }
56
+ this.tokenizerOptions = {
57
+ minLength: 2,
58
+ removeStopwords: true,
59
+ stemming: true,
60
+ splitCamelCase: true
61
+ }
62
+
63
+ this.index = this.createIndex()
64
+ }
65
+
66
+ /**
67
+ * Create a new MiniSearch index
68
+ */
69
+ private createIndex(): MiniSearch<BM25Document> {
70
+ return new MiniSearch<BM25Document>({
71
+ fields: this.config.fields,
72
+ storeFields: this.config.storeFields,
73
+ idField: 'id',
74
+ tokenize: (text: string) => tokenize(text, this.tokenizerOptions),
75
+ processTerm: (term: string) => term.toLowerCase()
76
+ })
77
+ }
78
+
79
+ /**
80
+ * Build index from documents
81
+ */
82
+ async buildIndex(documents: BM25Document[]): Promise<void> {
83
+ this.logger.info({ count: documents.length }, 'Building BM25 index')
84
+
85
+ // Create fresh index
86
+ this.index = this.createIndex()
87
+ this.documentCount = 0
88
+
89
+ // Add documents in batches
90
+ const batchSize = 1000
91
+ for (let i = 0; i < documents.length; i += batchSize) {
92
+ const batch = documents.slice(i, i + batchSize)
93
+ await this.addDocuments(batch)
94
+ }
95
+
96
+ this.logger.info({
97
+ indexed: this.documentCount,
98
+ terms: this.index.termCount
99
+ }, 'BM25 index built')
100
+ }
101
+
102
+ /**
103
+ * Add documents to index
104
+ */
105
+ async addDocuments(documents: BM25Document[]): Promise<void> {
106
+ try {
107
+ this.index.addAll(documents)
108
+ this.documentCount += documents.length
109
+ } catch (error) {
110
+ this.logger.error({ error }, 'Failed to add documents to BM25 index')
111
+ throw error
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Add single document
117
+ */
118
+ addDocument(document: BM25Document): void {
119
+ try {
120
+ this.index.add(document)
121
+ this.documentCount++
122
+ } catch (error) {
123
+ this.logger.error({ error, id: document.id }, 'Failed to add document to BM25 index')
124
+ throw error
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Remove document from index
130
+ */
131
+ removeDocument(document: BM25Document): void {
132
+ try {
133
+ this.index.remove(document)
134
+ this.documentCount--
135
+ } catch (error) {
136
+ this.logger.error({ error, id: document.id }, 'Failed to remove document from BM25 index')
137
+ // Don't throw - document might not exist
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Update document in index
143
+ */
144
+ updateDocument(document: BM25Document): void {
145
+ this.removeDocument(document)
146
+ this.addDocument(document)
147
+ }
148
+
149
+ /**
150
+ * Search the index
151
+ */
152
+ search(
153
+ query: string,
154
+ options: {
155
+ limit?: number
156
+ filter?: (result: BM25SearchResult) => boolean
157
+ collection?: string
158
+ } = {}
159
+ ): BM25SearchResult[] {
160
+ const { limit = 20, filter, collection } = options
161
+
162
+ if (!query.trim()) {
163
+ return []
164
+ }
165
+
166
+ this.logger.debug({
167
+ query: query.slice(0, 50),
168
+ limit
169
+ }, 'BM25 search')
170
+
171
+ try {
172
+ // Tokenize query
173
+ const queryTokens = tokenizeForSearch(query, this.tokenizerOptions)
174
+
175
+ // Search with MiniSearch options
176
+ const results = this.index.search(query, {
177
+ prefix: true, // Allow prefix matching
178
+ fuzzy: 0.2, // Allow minor typos
179
+ combineWith: 'OR', // Match any term
180
+ boost: {
181
+ content: this.config.boostExact!
182
+ }
183
+ })
184
+
185
+ // Transform and filter results
186
+ let searchResults: BM25SearchResult[] = results.map(result => ({
187
+ id: result.id,
188
+ content: (result as any).content || '',
189
+ metadata: (result as any).metadata || {},
190
+ collection: (result as any).collection || '',
191
+ score: result.score
192
+ }))
193
+
194
+ // Filter by collection if specified
195
+ if (collection) {
196
+ searchResults = searchResults.filter(r => r.collection === collection)
197
+ }
198
+
199
+ // Apply custom filter
200
+ if (filter) {
201
+ searchResults = searchResults.filter(filter)
202
+ }
203
+
204
+ // Normalize scores to 0-1 range
205
+ searchResults = this.normalizeScores(searchResults)
206
+
207
+ // Apply limit
208
+ return searchResults.slice(0, limit)
209
+
210
+ } catch (error) {
211
+ this.logger.error({ error, query }, 'BM25 search failed')
212
+ return []
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Normalize scores to 0-1 range
218
+ */
219
+ private normalizeScores(results: BM25SearchResult[]): BM25SearchResult[] {
220
+ if (results.length === 0) return []
221
+ if (results.length === 1) {
222
+ return [{ ...results[0], score: 1.0 }]
223
+ }
224
+
225
+ const maxScore = Math.max(...results.map(r => r.score))
226
+ if (maxScore === 0) {
227
+ return results.map(r => ({ ...r, score: 0 }))
228
+ }
229
+
230
+ return results.map(r => ({
231
+ ...r,
232
+ score: r.score / maxScore
233
+ }))
234
+ }
235
+
236
+ /**
237
+ * Get suggestions for autocomplete
238
+ */
239
+ suggest(query: string, limit: number = 5): string[] {
240
+ try {
241
+ const results = this.index.autoSuggest(query, { limit })
242
+ return results.map(r => r.suggestion)
243
+ } catch (error) {
244
+ this.logger.error({ error, query }, 'BM25 suggest failed')
245
+ return []
246
+ }
247
+ }
248
+
249
+ /**
250
+ * Get index statistics
251
+ */
252
+ getStats(): {
253
+ documentCount: number
254
+ termCount: number
255
+ } {
256
+ return {
257
+ documentCount: this.documentCount,
258
+ termCount: this.index.termCount
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Clear the index
264
+ */
265
+ clear(): void {
266
+ this.index = this.createIndex()
267
+ this.documentCount = 0
268
+ this.logger.info('BM25 index cleared')
269
+ }
270
+
271
+ /**
272
+ * Export index for persistence
273
+ */
274
+ exportIndex(): string {
275
+ return JSON.stringify(this.index.toJSON())
276
+ }
277
+
278
+ /**
279
+ * Import index from persisted data
280
+ */
281
+ importIndex(data: string): void {
282
+ try {
283
+ const parsed = JSON.parse(data)
284
+ this.index = MiniSearch.loadJSON(data, {
285
+ fields: this.config.fields,
286
+ storeFields: this.config.storeFields,
287
+ idField: 'id'
288
+ })
289
+ // Count documents after import
290
+ this.documentCount = this.index.documentCount
291
+ this.logger.info({ documentCount: this.documentCount }, 'BM25 index imported')
292
+ } catch (error) {
293
+ this.logger.error({ error }, 'Failed to import BM25 index')
294
+ throw error
295
+ }
296
+ }
297
+ }
298
+
299
+ export { tokenize, tokenizeForSearch, getNGrams } from './tokenizer'
300
+ export type { TokenizerOptions } from './tokenizer'
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Text Tokenizer for BM25
3
+ * Handles text preprocessing for sparse search
4
+ */
5
+
6
+ // Common English stopwords to filter
7
+ const STOPWORDS = new Set([
8
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
9
+ 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',
10
+ 'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
11
+ 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought',
12
+ 'used', 'it', 'its', 'this', 'that', 'these', 'those', 'i', 'me', 'my',
13
+ 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours',
14
+ 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her',
15
+ 'hers', 'herself', 'they', 'them', 'their', 'theirs', 'themselves',
16
+ 'what', 'which', 'who', 'whom', 'when', 'where', 'why', 'how', 'all',
17
+ 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some', 'such',
18
+ 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very',
19
+ 's', 't', 'just', 'don', 'now', 'then', 'here', 'there', 'also'
20
+ ])
21
+
22
+ // Common programming terms to keep
23
+ const KEEP_TERMS = new Set([
24
+ 'api', 'sql', 'css', 'html', 'json', 'xml', 'http', 'https', 'url', 'uri',
25
+ 'jwt', 'oauth', 'rest', 'graphql', 'grpc', 'tcp', 'udp', 'ip', 'dns',
26
+ 'aws', 'gcp', 'azure', 'docker', 'kubernetes', 'k8s', 'npm', 'yarn', 'pnpm',
27
+ 'git', 'github', 'gitlab', 'ci', 'cd', 'devops', 'mlops', 'db', 'orm',
28
+ 'ui', 'ux', 'cli', 'gui', 'ide', 'sdk', 'mcp', 'llm', 'ai', 'ml'
29
+ ])
30
+
31
+ export interface TokenizerOptions {
32
+ /** Minimum token length to keep */
33
+ minLength?: number
34
+ /** Maximum token length */
35
+ maxLength?: number
36
+ /** Remove stopwords */
37
+ removeStopwords?: boolean
38
+ /** Convert to lowercase */
39
+ lowercase?: boolean
40
+ /** Apply stemming (basic) */
41
+ stemming?: boolean
42
+ /** Split on camelCase */
43
+ splitCamelCase?: boolean
44
+ }
45
+
46
+ const DEFAULT_OPTIONS: TokenizerOptions = {
47
+ minLength: 2,
48
+ maxLength: 50,
49
+ removeStopwords: true,
50
+ lowercase: true,
51
+ stemming: true,
52
+ splitCamelCase: true
53
+ }
54
+
55
+ /**
56
+ * Tokenize text for BM25 indexing and search
57
+ */
58
+ export function tokenize(text: string, options: TokenizerOptions = {}): string[] {
59
+ const opts = { ...DEFAULT_OPTIONS, ...options }
60
+
61
+ if (!text || typeof text !== 'string') {
62
+ return []
63
+ }
64
+
65
+ // Split on camelCase if enabled
66
+ let processedText = text
67
+ if (opts.splitCamelCase) {
68
+ processedText = text.replace(/([a-z])([A-Z])/g, '$1 $2')
69
+ }
70
+
71
+ // Lowercase if enabled
72
+ if (opts.lowercase) {
73
+ processedText = processedText.toLowerCase()
74
+ }
75
+
76
+ // Split into tokens
77
+ // Match words, numbers, and hyphenated compounds
78
+ const tokenRegex = /[\w]+(?:[-_][\w]+)*/g
79
+ const rawTokens = processedText.match(tokenRegex) || []
80
+
81
+ // Process tokens
82
+ let tokens = rawTokens
83
+ .map(token => {
84
+ // Keep programming terms intact
85
+ if (KEEP_TERMS.has(token.toLowerCase())) {
86
+ return token
87
+ }
88
+
89
+ // Apply basic stemming if enabled
90
+ if (opts.stemming) {
91
+ return basicStem(token)
92
+ }
93
+
94
+ return token
95
+ })
96
+ .filter(token => {
97
+ // Length filter
98
+ if (token.length < opts.minLength! || token.length > opts.maxLength!) {
99
+ return false
100
+ }
101
+
102
+ // Stopword filter
103
+ if (opts.removeStopwords && STOPWORDS.has(token.toLowerCase())) {
104
+ return false
105
+ }
106
+
107
+ // Filter pure numbers (but keep alphanumeric)
108
+ if (/^\d+$/.test(token)) {
109
+ return false
110
+ }
111
+
112
+ return true
113
+ })
114
+
115
+ return tokens
116
+ }
117
+
118
+ /**
119
+ * Basic Porter-like stemming
120
+ * Simplified for performance - handles common English suffixes
121
+ */
122
+ function basicStem(word: string): string {
123
+ if (word.length < 4) return word
124
+
125
+ // Common suffix replacements
126
+ const suffixRules: [RegExp, string][] = [
127
+ [/ies$/, 'y'],
128
+ [/ied$/, 'y'],
129
+ [/es$/, ''],
130
+ [/s$/, ''],
131
+ [/ing$/, ''],
132
+ [/ed$/, ''],
133
+ [/tion$/, 't'],
134
+ [/ness$/, ''],
135
+ [/ment$/, ''],
136
+ [/able$/, ''],
137
+ [/ible$/, ''],
138
+ [/ful$/, ''],
139
+ [/less$/, ''],
140
+ [/ly$/, '']
141
+ ]
142
+
143
+ let stemmed = word
144
+ for (const [pattern, replacement] of suffixRules) {
145
+ if (pattern.test(word)) {
146
+ const candidate = word.replace(pattern, replacement)
147
+ // Only apply if result is at least 3 chars
148
+ if (candidate.length >= 3) {
149
+ stemmed = candidate
150
+ break
151
+ }
152
+ }
153
+ }
154
+
155
+ return stemmed
156
+ }
157
+
158
+ /**
159
+ * Get n-grams from tokens
160
+ */
161
+ export function getNGrams(tokens: string[], n: number = 2): string[] {
162
+ if (tokens.length < n) return []
163
+
164
+ const ngrams: string[] = []
165
+ for (let i = 0; i <= tokens.length - n; i++) {
166
+ ngrams.push(tokens.slice(i, i + n).join(' '))
167
+ }
168
+
169
+ return ngrams
170
+ }
171
+
172
+ /**
173
+ * Combined tokenizer for search that includes unigrams and bigrams
174
+ */
175
+ export function tokenizeForSearch(
176
+ text: string,
177
+ options: TokenizerOptions = {}
178
+ ): string[] {
179
+ const unigrams = tokenize(text, options)
180
+ const bigrams = getNGrams(unigrams, 2)
181
+
182
+ // Return unique tokens (unigrams + bigrams)
183
+ return [...new Set([...unigrams, ...bigrams])]
184
+ }
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Adaptive Learning
3
+ * Learns optimal retrieval thresholds from feedback
4
+ */
5
+
6
+ import type { Logger } from 'pino'
7
+ import type { MemoryFeedback, AdaptiveThresholds, RetrievalMetrics } from '../types'
8
+ import type { FeedbackStore } from './store'
9
+ import { calculateAllMetrics, calculateAverageRating, calculatePositiveRate } from './metrics'
10
+
11
+ /** Default thresholds before any learning */
12
+ const DEFAULT_THRESHOLDS: AdaptiveThresholds = {
13
+ denseMinSimilarity: 0.3,
14
+ denseWeight: 0.7,
15
+ sparseWeight: 0.3,
16
+ rrfK: 60,
17
+ feedbackCount: 0,
18
+ lastUpdated: new Date().toISOString()
19
+ }
20
+
21
+ /** Thresholds bounds to prevent extreme values */
22
+ const BOUNDS = {
23
+ denseMinSimilarity: { min: 0.1, max: 0.9 },
24
+ denseWeight: { min: 0.3, max: 0.9 },
25
+ sparseWeight: { min: 0.1, max: 0.7 },
26
+ rrfK: { min: 20, max: 100 }
27
+ }
28
+
29
+ export interface AdaptiveLearnerConfig {
30
+ /** Minimum feedback count before adaptation */
31
+ minFeedbackForAdaptation: number
32
+ /** Learning rate for threshold adjustments */
33
+ learningRate: number
34
+ /** Target positive feedback rate */
35
+ targetPositiveRate: number
36
+ }
37
+
38
+ const DEFAULT_CONFIG: AdaptiveLearnerConfig = {
39
+ minFeedbackForAdaptation: 10,
40
+ learningRate: 0.1,
41
+ targetPositiveRate: 0.7
42
+ }
43
+
44
+ export class AdaptiveLearner {
45
+ private logger: Logger
46
+ private feedbackStore: FeedbackStore
47
+ private config: AdaptiveLearnerConfig
48
+ private currentThresholds: AdaptiveThresholds
49
+
50
+ constructor(
51
+ logger: Logger,
52
+ feedbackStore: FeedbackStore,
53
+ config: Partial<AdaptiveLearnerConfig> = {}
54
+ ) {
55
+ this.logger = logger.child({ component: 'adaptive-learner' })
56
+ this.feedbackStore = feedbackStore
57
+ this.config = { ...DEFAULT_CONFIG, ...config }
58
+ this.currentThresholds = { ...DEFAULT_THRESHOLDS }
59
+ }
60
+
61
+ /**
62
+ * Get current adaptive thresholds
63
+ */
64
+ getThresholds(): AdaptiveThresholds {
65
+ return { ...this.currentThresholds }
66
+ }
67
+
68
+ /**
69
+ * Update thresholds based on feedback
70
+ */
71
+ async updateThresholds(): Promise<AdaptiveThresholds> {
72
+ const feedback = await this.feedbackStore.getRecentFeedback(1000)
73
+
74
+ if (feedback.length < this.config.minFeedbackForAdaptation) {
75
+ this.logger.debug({
76
+ feedbackCount: feedback.length,
77
+ required: this.config.minFeedbackForAdaptation
78
+ }, 'Not enough feedback for adaptation')
79
+ return this.currentThresholds
80
+ }
81
+
82
+ this.logger.info({ feedbackCount: feedback.length }, 'Adapting thresholds')
83
+
84
+ const metrics = calculateAllMetrics(feedback)
85
+ const positiveRate = calculatePositiveRate(feedback)
86
+ const avgRating = calculateAverageRating(feedback)
87
+
88
+ // Analyze feedback by provenance
89
+ const denseOnly = feedback.filter(f => this.inferProvenance(f) === 'dense')
90
+ const sparseOnly = feedback.filter(f => this.inferProvenance(f) === 'sparse')
91
+ const both = feedback.filter(f => this.inferProvenance(f) === 'both')
92
+
93
+ const densePositiveRate = denseOnly.length > 0 ? calculatePositiveRate(denseOnly) : 0.5
94
+ const sparsePositiveRate = sparseOnly.length > 0 ? calculatePositiveRate(sparseOnly) : 0.5
95
+ const bothPositiveRate = both.length > 0 ? calculatePositiveRate(both) : 0.5
96
+
97
+ // Adjust dense/sparse weights based on performance
98
+ if (denseOnly.length >= 5 && sparseOnly.length >= 5) {
99
+ // If dense performs better, increase its weight
100
+ if (densePositiveRate > sparsePositiveRate + 0.1) {
101
+ this.currentThresholds.denseWeight = this.adjustValue(
102
+ this.currentThresholds.denseWeight,
103
+ this.config.learningRate,
104
+ 'increase',
105
+ BOUNDS.denseWeight
106
+ )
107
+ this.currentThresholds.sparseWeight = this.adjustValue(
108
+ this.currentThresholds.sparseWeight,
109
+ this.config.learningRate,
110
+ 'decrease',
111
+ BOUNDS.sparseWeight
112
+ )
113
+ }
114
+ // If sparse performs better, increase its weight
115
+ else if (sparsePositiveRate > densePositiveRate + 0.1) {
116
+ this.currentThresholds.sparseWeight = this.adjustValue(
117
+ this.currentThresholds.sparseWeight,
118
+ this.config.learningRate,
119
+ 'increase',
120
+ BOUNDS.sparseWeight
121
+ )
122
+ this.currentThresholds.denseWeight = this.adjustValue(
123
+ this.currentThresholds.denseWeight,
124
+ this.config.learningRate,
125
+ 'decrease',
126
+ BOUNDS.denseWeight
127
+ )
128
+ }
129
+ }
130
+
131
+ // Adjust minimum similarity based on positive rate
132
+ if (positiveRate < this.config.targetPositiveRate - 0.1) {
133
+ // Too many negative results - lower threshold to get more results
134
+ this.currentThresholds.denseMinSimilarity = this.adjustValue(
135
+ this.currentThresholds.denseMinSimilarity,
136
+ this.config.learningRate,
137
+ 'decrease',
138
+ BOUNDS.denseMinSimilarity
139
+ )
140
+ } else if (positiveRate > this.config.targetPositiveRate + 0.1) {
141
+ // Results are good - can raise threshold to be more selective
142
+ this.currentThresholds.denseMinSimilarity = this.adjustValue(
143
+ this.currentThresholds.denseMinSimilarity,
144
+ this.config.learningRate / 2, // More conservative increase
145
+ 'increase',
146
+ BOUNDS.denseMinSimilarity
147
+ )
148
+ }
149
+
150
+ // Adjust RRF K based on metrics
151
+ // Higher K reduces effect of rank, lower K emphasizes top ranks
152
+ if (metrics.mrr < 0.5 && both.length > 0) {
153
+ // If MRR is low and we have combined results, try lower K
154
+ this.currentThresholds.rrfK = this.adjustValue(
155
+ this.currentThresholds.rrfK,
156
+ 5, // Adjust by fixed amount
157
+ 'decrease',
158
+ BOUNDS.rrfK
159
+ )
160
+ }
161
+
162
+ this.currentThresholds.feedbackCount = feedback.length
163
+ this.currentThresholds.lastUpdated = new Date().toISOString()
164
+
165
+ this.logger.info({
166
+ thresholds: this.currentThresholds,
167
+ metrics,
168
+ positiveRate,
169
+ avgRating
170
+ }, 'Thresholds adapted')
171
+
172
+ return this.currentThresholds
173
+ }
174
+
175
+ /**
176
+ * Adjust a value with bounds checking
177
+ */
178
+ private adjustValue(
179
+ current: number,
180
+ amount: number,
181
+ direction: 'increase' | 'decrease',
182
+ bounds: { min: number; max: number }
183
+ ): number {
184
+ const delta = direction === 'increase' ? amount : -amount
185
+ const newValue = current + delta
186
+ return Math.max(bounds.min, Math.min(bounds.max, newValue))
187
+ }
188
+
189
+ /**
190
+ * Infer provenance from feedback (heuristic)
191
+ * In a real implementation, this would be stored in feedback
192
+ */
193
+ private inferProvenance(feedback: MemoryFeedback): 'dense' | 'sparse' | 'both' {
194
+ // This is a heuristic - in production, provenance should be stored with feedback
195
+ // For now, assume most results are from combined search
196
+ return 'both'
197
+ }
198
+
199
+ /**
200
+ * Get adaptation status
201
+ */
202
+ getStatus(): {
203
+ feedbackCount: number
204
+ canAdapt: boolean
205
+ currentThresholds: AdaptiveThresholds
206
+ lastUpdated: string
207
+ } {
208
+ return {
209
+ feedbackCount: this.currentThresholds.feedbackCount,
210
+ canAdapt: this.currentThresholds.feedbackCount >= this.config.minFeedbackForAdaptation,
211
+ currentThresholds: this.currentThresholds,
212
+ lastUpdated: this.currentThresholds.lastUpdated
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Reset thresholds to defaults
218
+ */
219
+ reset(): void {
220
+ this.currentThresholds = { ...DEFAULT_THRESHOLDS }
221
+ this.logger.info('Thresholds reset to defaults')
222
+ }
223
+ }