claude-brain 0.15.2 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +191 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -11
  5. package/bunfig.toml +8 -8
  6. package/package.json +82 -82
  7. package/packs/backend/node.json +173 -173
  8. package/packs/core/javascript.json +176 -176
  9. package/packs/core/typescript.json +222 -222
  10. package/packs/frontend/react.json +254 -254
  11. package/packs/meta/testing.json +172 -172
  12. package/scripts/postinstall.mjs +341 -341
  13. package/src/automation/auto-context.ts +240 -240
  14. package/src/automation/decision-detector.ts +452 -452
  15. package/src/automation/index.ts +11 -11
  16. package/src/automation/phase12-manager.ts +456 -456
  17. package/src/automation/proactive-recall.ts +373 -373
  18. package/src/automation/project-detector.ts +310 -310
  19. package/src/automation/repo-scanner.ts +205 -205
  20. package/src/cli/auto-setup.ts +82 -82
  21. package/src/cli/bin.ts +209 -202
  22. package/src/cli/commands/chroma.ts +573 -573
  23. package/src/cli/commands/git-hook.ts +189 -189
  24. package/src/cli/commands/hooks.ts +213 -213
  25. package/src/cli/commands/init.ts +122 -122
  26. package/src/cli/commands/install-mcp.ts +92 -92
  27. package/src/cli/commands/pack.ts +197 -197
  28. package/src/cli/commands/refresh.ts +323 -0
  29. package/src/cli/commands/serve.ts +167 -173
  30. package/src/cli/commands/start.ts +42 -42
  31. package/src/cli/commands/uninstall-mcp.ts +41 -41
  32. package/src/cli/commands/update.ts +124 -121
  33. package/src/cli/diagnose.ts +4 -4
  34. package/src/cli/health-check.ts +4 -4
  35. package/src/cli/migrate-chroma.ts +106 -106
  36. package/src/cli/setup.ts +4 -4
  37. package/src/cli/ui/animations.ts +80 -80
  38. package/src/cli/ui/components.ts +82 -82
  39. package/src/cli/ui/index.ts +4 -4
  40. package/src/cli/ui/logo.ts +36 -36
  41. package/src/cli/ui/theme.ts +55 -55
  42. package/src/config/defaults.ts +50 -50
  43. package/src/config/home.ts +55 -55
  44. package/src/config/index.ts +7 -7
  45. package/src/config/loader.ts +166 -166
  46. package/src/config/migration.ts +76 -76
  47. package/src/config/schema.ts +360 -360
  48. package/src/config/validator.ts +184 -184
  49. package/src/config/watcher.ts +86 -86
  50. package/src/context/assembler.ts +398 -398
  51. package/src/context/cache-manager.ts +101 -101
  52. package/src/context/formatter.ts +84 -84
  53. package/src/context/hierarchy.ts +85 -85
  54. package/src/context/index.ts +83 -83
  55. package/src/context/progress-tracker.ts +174 -174
  56. package/src/context/standards-manager.ts +287 -287
  57. package/src/context/types.ts +252 -252
  58. package/src/context/validator.ts +58 -58
  59. package/src/diagnostics/index.ts +123 -123
  60. package/src/health/index.ts +229 -229
  61. package/src/hooks/brain-hook.ts +128 -112
  62. package/src/hooks/capture.ts +168 -205
  63. package/src/hooks/context-hook.ts +137 -0
  64. package/src/hooks/deduplicator.ts +72 -72
  65. package/src/hooks/git-capture.ts +109 -109
  66. package/src/hooks/git-hook-installer.ts +207 -207
  67. package/src/hooks/index.ts +20 -20
  68. package/src/hooks/installer.ts +244 -194
  69. package/src/hooks/passive-classifier.ts +404 -723
  70. package/src/hooks/queue.ts +129 -129
  71. package/src/hooks/session-tracker.ts +312 -275
  72. package/src/hooks/types.ts +52 -47
  73. package/src/index.ts +7 -7
  74. package/src/intelligence/cross-project/affinity.ts +162 -162
  75. package/src/intelligence/cross-project/generalizer.ts +283 -283
  76. package/src/intelligence/cross-project/index.ts +13 -13
  77. package/src/intelligence/cross-project/transfer.ts +201 -201
  78. package/src/intelligence/index.ts +24 -24
  79. package/src/intelligence/optimization/index.ts +10 -10
  80. package/src/intelligence/optimization/precompute.ts +202 -202
  81. package/src/intelligence/optimization/semantic-cache.ts +207 -207
  82. package/src/intelligence/prediction/context-anticipator.ts +198 -198
  83. package/src/intelligence/prediction/decision-predictor.ts +184 -184
  84. package/src/intelligence/prediction/index.ts +13 -13
  85. package/src/intelligence/prediction/recommender.ts +268 -268
  86. package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
  87. package/src/intelligence/reasoning/counterfactual.ts +248 -248
  88. package/src/intelligence/reasoning/index.ts +13 -13
  89. package/src/intelligence/reasoning/synthesizer.ts +169 -169
  90. package/src/intelligence/temporal/evolution.ts +197 -197
  91. package/src/intelligence/temporal/index.ts +16 -16
  92. package/src/intelligence/temporal/query-processor.ts +190 -190
  93. package/src/intelligence/temporal/timeline.ts +259 -259
  94. package/src/intelligence/temporal/trends.ts +263 -263
  95. package/src/knowledge/entity-extractor.ts +416 -416
  96. package/src/knowledge/graph/builder.ts +185 -185
  97. package/src/knowledge/graph/linker.ts +201 -201
  98. package/src/knowledge/graph/memory-graph.ts +359 -359
  99. package/src/knowledge/graph/schema.ts +99 -99
  100. package/src/knowledge/graph/search.ts +168 -168
  101. package/src/knowledge/relationship-extractor.ts +108 -108
  102. package/src/memory/chroma/client.ts +174 -174
  103. package/src/memory/chroma/collection-manager.ts +94 -94
  104. package/src/memory/chroma/config.ts +57 -57
  105. package/src/memory/chroma/embeddings.ts +155 -155
  106. package/src/memory/chroma/index.ts +82 -82
  107. package/src/memory/chroma/migration.ts +270 -270
  108. package/src/memory/chroma/schemas.ts +69 -69
  109. package/src/memory/chroma/search.ts +315 -315
  110. package/src/memory/chroma/store.ts +741 -741
  111. package/src/memory/consolidation/archiver.ts +164 -164
  112. package/src/memory/consolidation/merger.ts +186 -186
  113. package/src/memory/consolidation/scorer.ts +138 -138
  114. package/src/memory/context-builder.ts +236 -236
  115. package/src/memory/database.ts +169 -169
  116. package/src/memory/embedding-utils.ts +156 -156
  117. package/src/memory/embeddings.ts +226 -226
  118. package/src/memory/episodic/detector.ts +108 -108
  119. package/src/memory/episodic/manager.ts +351 -351
  120. package/src/memory/episodic/summarizer.ts +179 -179
  121. package/src/memory/episodic/types.ts +52 -52
  122. package/src/memory/index.ts +582 -582
  123. package/src/memory/knowledge-extractor.ts +455 -455
  124. package/src/memory/learning.ts +378 -378
  125. package/src/memory/patterns.ts +396 -396
  126. package/src/memory/schema.ts +88 -88
  127. package/src/memory/search.ts +309 -309
  128. package/src/memory/store.ts +787 -787
  129. package/src/memory/types.ts +121 -121
  130. package/src/orchestrator/coordinator.ts +272 -272
  131. package/src/orchestrator/decision-logger.ts +228 -228
  132. package/src/orchestrator/event-emitter.ts +198 -198
  133. package/src/orchestrator/event-queue.ts +184 -184
  134. package/src/orchestrator/handlers/base-handler.ts +70 -70
  135. package/src/orchestrator/handlers/context-handler.ts +73 -73
  136. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  137. package/src/orchestrator/handlers/index.ts +10 -10
  138. package/src/orchestrator/handlers/status-handler.ts +131 -131
  139. package/src/orchestrator/handlers/task-handler.ts +171 -171
  140. package/src/orchestrator/index.ts +275 -275
  141. package/src/orchestrator/task-parser.ts +284 -284
  142. package/src/orchestrator/types.ts +98 -98
  143. package/src/packs/index.ts +9 -9
  144. package/src/packs/loader.ts +134 -134
  145. package/src/packs/manager.ts +204 -204
  146. package/src/packs/ranker.ts +78 -78
  147. package/src/packs/types.ts +81 -81
  148. package/src/phase12/index.ts +5 -5
  149. package/src/retrieval/bm25/index.ts +300 -300
  150. package/src/retrieval/bm25/tokenizer.ts +184 -184
  151. package/src/retrieval/feedback/adaptive.ts +223 -223
  152. package/src/retrieval/feedback/index.ts +16 -16
  153. package/src/retrieval/feedback/metrics.ts +223 -223
  154. package/src/retrieval/feedback/store.ts +283 -283
  155. package/src/retrieval/fusion/index.ts +194 -194
  156. package/src/retrieval/fusion/rrf.ts +163 -163
  157. package/src/retrieval/index.ts +12 -12
  158. package/src/retrieval/pipeline.ts +375 -375
  159. package/src/retrieval/query/expander.ts +198 -198
  160. package/src/retrieval/query/index.ts +27 -27
  161. package/src/retrieval/query/intent-classifier.ts +236 -236
  162. package/src/retrieval/query/temporal-parser.ts +295 -295
  163. package/src/retrieval/reranker/index.ts +188 -188
  164. package/src/retrieval/reranker/model.ts +95 -95
  165. package/src/retrieval/service.ts +125 -125
  166. package/src/retrieval/types.ts +162 -162
  167. package/src/routing/entity-extractor.ts +428 -428
  168. package/src/routing/intent-classifier.ts +450 -436
  169. package/src/routing/response-filter.ts +261 -258
  170. package/src/routing/router.ts +1441 -1322
  171. package/src/routing/search-engine.ts +515 -475
  172. package/src/routing/types.ts +94 -94
  173. package/src/scripts/health-check.ts +118 -118
  174. package/src/scripts/setup.ts +122 -122
  175. package/src/server/handlers/call-tool.ts +156 -156
  176. package/src/server/handlers/index.ts +9 -9
  177. package/src/server/handlers/list-tools.ts +35 -35
  178. package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
  179. package/src/server/handlers/tools/auto-remember.ts +200 -200
  180. package/src/server/handlers/tools/brain.ts +85 -85
  181. package/src/server/handlers/tools/create-project.ts +135 -135
  182. package/src/server/handlers/tools/detect-trends.ts +144 -144
  183. package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
  184. package/src/server/handlers/tools/get-activity-log.ts +194 -194
  185. package/src/server/handlers/tools/get-code-standards.ts +124 -124
  186. package/src/server/handlers/tools/get-corrections.ts +154 -154
  187. package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
  188. package/src/server/handlers/tools/get-episode.ts +103 -103
  189. package/src/server/handlers/tools/get-patterns.ts +158 -158
  190. package/src/server/handlers/tools/get-phase12-status.ts +63 -63
  191. package/src/server/handlers/tools/get-project-context.ts +75 -75
  192. package/src/server/handlers/tools/get-recommendations.ts +145 -145
  193. package/src/server/handlers/tools/index.ts +31 -31
  194. package/src/server/handlers/tools/init-project.ts +757 -757
  195. package/src/server/handlers/tools/list-episodes.ts +90 -90
  196. package/src/server/handlers/tools/list-projects.ts +125 -125
  197. package/src/server/handlers/tools/rate-memory.ts +101 -101
  198. package/src/server/handlers/tools/recall-similar.ts +87 -87
  199. package/src/server/handlers/tools/recognize-pattern.ts +126 -126
  200. package/src/server/handlers/tools/record-correction.ts +125 -125
  201. package/src/server/handlers/tools/remember-decision.ts +153 -153
  202. package/src/server/handlers/tools/schemas.ts +253 -253
  203. package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
  204. package/src/server/handlers/tools/smart-context.ts +146 -146
  205. package/src/server/handlers/tools/update-progress.ts +131 -131
  206. package/src/server/handlers/tools/what-if-analysis.ts +135 -135
  207. package/src/server/http-api.ts +761 -693
  208. package/src/server/index.ts +40 -40
  209. package/src/server/mcp-server.ts +283 -283
  210. package/src/server/providers/index.ts +7 -7
  211. package/src/server/providers/prompts.ts +327 -327
  212. package/src/server/providers/resources.ts +622 -622
  213. package/src/server/services.ts +468 -468
  214. package/src/server/types.ts +39 -39
  215. package/src/server/utils/error-handler.ts +155 -155
  216. package/src/server/utils/index.ts +13 -13
  217. package/src/server/utils/memory-indicator.ts +83 -83
  218. package/src/server/utils/request-context.ts +122 -122
  219. package/src/server/utils/response-formatter.ts +129 -129
  220. package/src/server/utils/validators.ts +210 -210
  221. package/src/setup/index.ts +48 -48
  222. package/src/setup/wizard.ts +461 -461
  223. package/src/tools/index.ts +24 -24
  224. package/src/tools/registry.ts +115 -115
  225. package/src/tools/schemas.test.ts +30 -30
  226. package/src/tools/schemas.ts +617 -617
  227. package/src/tools/types.ts +412 -412
  228. package/src/utils/circuit-breaker.ts +130 -130
  229. package/src/utils/cleanup.ts +34 -34
  230. package/src/utils/error-handler.ts +132 -132
  231. package/src/utils/error-messages.ts +60 -60
  232. package/src/utils/fallback.ts +45 -45
  233. package/src/utils/index.ts +54 -54
  234. package/src/utils/logger-utils.ts +80 -80
  235. package/src/utils/logger.ts +88 -88
  236. package/src/utils/phase12-helper.ts +56 -56
  237. package/src/utils/retry.ts +94 -94
  238. package/src/utils/timing.ts +47 -47
  239. package/src/utils/transaction.ts +63 -63
  240. package/src/vault/frontmatter.ts +264 -264
  241. package/src/vault/index.ts +318 -318
  242. package/src/vault/paths.ts +106 -106
  243. package/src/vault/query.ts +422 -422
  244. package/src/vault/reader.ts +264 -264
  245. package/src/vault/templates.ts +186 -186
  246. package/src/vault/types.ts +73 -73
  247. package/src/vault/watcher.ts +277 -277
  248. package/src/vault/writer.ts +413 -413
  249. package/tsconfig.json +30 -30
  250. package/src/cli/auto-update.ts +0 -157
@@ -1,226 +1,226 @@
1
- /**
2
- * Embedding Generation Service
3
- * Phase 3: Memory and Embedding System
4
- *
5
- * Uses transformers.js for local embedding generation
6
- * Model: all-MiniLM-L6-v2 (384 dimensions)
7
- */
8
-
9
- import { pipeline, env } from '@xenova/transformers'
10
- import { createHash } from 'crypto'
11
- import type { Logger } from 'pino'
12
- import type { EmbeddingCacheStats } from './types'
13
- import { cosineSimilarity } from './embedding-utils'
14
- import { CircuitBreaker } from '@/utils'
15
-
16
- // Configure transformers.js for Bun compatibility
17
- env.allowLocalModels = true
18
- env.allowRemoteModels = true
19
- env.useBrowserCache = false
20
- // Disable web workers to avoid ONNX blob URL issues in Bun
21
- env.backends.onnx.wasm.numThreads = 1
22
- env.backends.onnx.wasm.simd = true
23
-
24
- type FeatureExtractionPipeline = Awaited<ReturnType<typeof pipeline>>
25
-
26
- export class EmbeddingService {
27
- private embeddingPipeline: FeatureExtractionPipeline | null = null
28
- private modelName: string
29
- private isInitialized: boolean = false
30
- private logger: Logger
31
- private cache: Map<string, number[]>
32
- private maxCacheSize: number
33
- private circuitBreaker: CircuitBreaker
34
-
35
- constructor(
36
- logger: Logger,
37
- modelName: string = 'Xenova/all-MiniLM-L6-v2',
38
- maxCacheSize: number = 1000
39
- ) {
40
- this.modelName = modelName
41
- this.logger = logger.child({ component: 'embedding-service' })
42
- this.cache = new Map()
43
- this.maxCacheSize = maxCacheSize
44
- this.circuitBreaker = new CircuitBreaker(
45
- 'embedding-service',
46
- logger,
47
- {
48
- failureThreshold: 3,
49
- timeout: 30000
50
- }
51
- )
52
- }
53
-
54
- /**
55
- * Initialize the embedding pipeline
56
- * Downloads model on first run, then uses cache
57
- */
58
- async initialize(): Promise<void> {
59
- if (this.isInitialized) {
60
- return
61
- }
62
-
63
- try {
64
- this.logger.info({ model: this.modelName }, 'Loading embedding model...')
65
-
66
- // Load the feature extraction pipeline
67
- this.embeddingPipeline = await pipeline('feature-extraction', this.modelName)
68
-
69
- this.isInitialized = true
70
- this.logger.info({ model: this.modelName }, 'Embedding model loaded successfully')
71
- } catch (error) {
72
- this.logger.error({ error, model: this.modelName }, 'Failed to load embedding model')
73
- throw error
74
- }
75
- }
76
-
77
- /**
78
- * Generate embedding for a single text
79
- */
80
- async generateEmbedding(text: string): Promise<number[]> {
81
- if (!this.isInitialized) {
82
- await this.initialize()
83
- }
84
-
85
- const cacheKey = this.getCacheKey(text)
86
- const cached = this.cache.get(cacheKey)
87
- if (cached) {
88
- this.logger.debug({ cacheKey }, 'Embedding cache hit')
89
- return cached
90
- }
91
-
92
- return this.circuitBreaker.execute(async () => {
93
- const startTime = Date.now()
94
-
95
- const preprocessed = this.preprocessText(text)
96
-
97
- const output = await (this.embeddingPipeline as any)(preprocessed, {
98
- pooling: 'mean',
99
- normalize: true
100
- })
101
-
102
- const embedding = Array.from((output as any).data as Float32Array)
103
-
104
- const duration = Date.now() - startTime
105
- this.logger.debug(
106
- { textLength: text.length, duration, dimensions: embedding.length },
107
- 'Embedding generated'
108
- )
109
-
110
- this.addToCache(cacheKey, embedding)
111
-
112
- return embedding
113
- })
114
- }
115
-
116
- /**
117
- * Generate embeddings for multiple texts (batched)
118
- */
119
- async generateEmbeddings(texts: string[]): Promise<number[][]> {
120
- if (!this.isInitialized) {
121
- await this.initialize()
122
- }
123
-
124
- // Process in batches to avoid memory issues
125
- const batchSize = 10
126
- const results: number[][] = []
127
-
128
- for (let i = 0; i < texts.length; i += batchSize) {
129
- const batch = texts.slice(i, i + batchSize)
130
- const batchResults = await Promise.all(batch.map((text) => this.generateEmbedding(text)))
131
- results.push(...batchResults)
132
-
133
- this.logger.debug(
134
- { progress: `${Math.min(i + batchSize, texts.length)}/${texts.length}` },
135
- 'Batch embedding progress'
136
- )
137
- }
138
-
139
- return results
140
- }
141
-
142
- /**
143
- * Preprocess text before embedding
144
- */
145
- private preprocessText(text: string): string {
146
- // Remove excessive whitespace
147
- let processed = text.replace(/\s+/g, ' ').trim()
148
-
149
- // Truncate to max length (256 tokens ~ 1024 chars)
150
- if (processed.length > 1024) {
151
- processed = processed.slice(0, 1024)
152
- this.logger.debug('Text truncated to 1024 characters')
153
- }
154
-
155
- return processed
156
- }
157
-
158
- /**
159
- * Generate cache key from text
160
- * Uses SHA-256 hash to prevent collisions
161
- */
162
- private getCacheKey(text: string): string {
163
- return createHash('sha256').update(text).digest('hex')
164
- }
165
-
166
- /**
167
- * Add embedding to cache with LRU eviction
168
- */
169
- private addToCache(key: string, embedding: number[]): void {
170
- // Simple LRU: if at max size, delete oldest entry
171
- if (this.cache.size >= this.maxCacheSize) {
172
- const firstKey = this.cache.keys().next().value
173
- if (firstKey) {
174
- this.cache.delete(firstKey)
175
- }
176
- }
177
- this.cache.set(key, embedding)
178
- }
179
-
180
- /**
181
- * Clear embedding cache
182
- */
183
- clearCache(): void {
184
- this.cache.clear()
185
- this.logger.debug('Embedding cache cleared')
186
- }
187
-
188
- /**
189
- * Get cache statistics
190
- */
191
- getCacheStats(): EmbeddingCacheStats {
192
- return {
193
- size: this.cache.size,
194
- keys: Array.from(this.cache.keys())
195
- }
196
- }
197
-
198
- /**
199
- * Check if service is initialized
200
- */
201
- isReady(): boolean {
202
- return this.isInitialized
203
- }
204
-
205
- /**
206
- * Calculate cosine similarity between two embeddings
207
- * Static method for external use
208
- */
209
- static cosineSimilarity(a: number[], b: number[]): number {
210
- return cosineSimilarity(a, b)
211
- }
212
-
213
- /**
214
- * Get the model name
215
- */
216
- getModelName(): string {
217
- return this.modelName
218
- }
219
-
220
- /**
221
- * Get expected embedding dimensions
222
- */
223
- getEmbeddingDimensions(): number {
224
- return 384 // all-MiniLM-L6-v2 outputs 384 dimensions
225
- }
226
- }
1
+ /**
2
+ * Embedding Generation Service
3
+ * Phase 3: Memory and Embedding System
4
+ *
5
+ * Uses transformers.js for local embedding generation
6
+ * Model: all-MiniLM-L6-v2 (384 dimensions)
7
+ */
8
+
9
+ import { pipeline, env } from '@xenova/transformers'
10
+ import { createHash } from 'crypto'
11
+ import type { Logger } from 'pino'
12
+ import type { EmbeddingCacheStats } from './types'
13
+ import { cosineSimilarity } from './embedding-utils'
14
+ import { CircuitBreaker } from '@/utils'
15
+
16
+ // Configure transformers.js for Bun compatibility
17
+ env.allowLocalModels = true
18
+ env.allowRemoteModels = true
19
+ env.useBrowserCache = false
20
+ // Disable web workers to avoid ONNX blob URL issues in Bun
21
+ env.backends.onnx.wasm.numThreads = 1
22
+ env.backends.onnx.wasm.simd = true
23
+
24
+ type FeatureExtractionPipeline = Awaited<ReturnType<typeof pipeline>>
25
+
26
+ export class EmbeddingService {
27
+ private embeddingPipeline: FeatureExtractionPipeline | null = null
28
+ private modelName: string
29
+ private isInitialized: boolean = false
30
+ private logger: Logger
31
+ private cache: Map<string, number[]>
32
+ private maxCacheSize: number
33
+ private circuitBreaker: CircuitBreaker
34
+
35
+ constructor(
36
+ logger: Logger,
37
+ modelName: string = 'Xenova/all-MiniLM-L6-v2',
38
+ maxCacheSize: number = 1000
39
+ ) {
40
+ this.modelName = modelName
41
+ this.logger = logger.child({ component: 'embedding-service' })
42
+ this.cache = new Map()
43
+ this.maxCacheSize = maxCacheSize
44
+ this.circuitBreaker = new CircuitBreaker(
45
+ 'embedding-service',
46
+ logger,
47
+ {
48
+ failureThreshold: 3,
49
+ timeout: 30000
50
+ }
51
+ )
52
+ }
53
+
54
+ /**
55
+ * Initialize the embedding pipeline
56
+ * Downloads model on first run, then uses cache
57
+ */
58
+ async initialize(): Promise<void> {
59
+ if (this.isInitialized) {
60
+ return
61
+ }
62
+
63
+ try {
64
+ this.logger.info({ model: this.modelName }, 'Loading embedding model...')
65
+
66
+ // Load the feature extraction pipeline
67
+ this.embeddingPipeline = await pipeline('feature-extraction', this.modelName)
68
+
69
+ this.isInitialized = true
70
+ this.logger.info({ model: this.modelName }, 'Embedding model loaded successfully')
71
+ } catch (error) {
72
+ this.logger.error({ error, model: this.modelName }, 'Failed to load embedding model')
73
+ throw error
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Generate embedding for a single text
79
+ */
80
+ async generateEmbedding(text: string): Promise<number[]> {
81
+ if (!this.isInitialized) {
82
+ await this.initialize()
83
+ }
84
+
85
+ const cacheKey = this.getCacheKey(text)
86
+ const cached = this.cache.get(cacheKey)
87
+ if (cached) {
88
+ this.logger.debug({ cacheKey }, 'Embedding cache hit')
89
+ return cached
90
+ }
91
+
92
+ return this.circuitBreaker.execute(async () => {
93
+ const startTime = Date.now()
94
+
95
+ const preprocessed = this.preprocessText(text)
96
+
97
+ const output = await (this.embeddingPipeline as any)(preprocessed, {
98
+ pooling: 'mean',
99
+ normalize: true
100
+ })
101
+
102
+ const embedding = Array.from((output as any).data as Float32Array)
103
+
104
+ const duration = Date.now() - startTime
105
+ this.logger.debug(
106
+ { textLength: text.length, duration, dimensions: embedding.length },
107
+ 'Embedding generated'
108
+ )
109
+
110
+ this.addToCache(cacheKey, embedding)
111
+
112
+ return embedding
113
+ })
114
+ }
115
+
116
+ /**
117
+ * Generate embeddings for multiple texts (batched)
118
+ */
119
+ async generateEmbeddings(texts: string[]): Promise<number[][]> {
120
+ if (!this.isInitialized) {
121
+ await this.initialize()
122
+ }
123
+
124
+ // Process in batches to avoid memory issues
125
+ const batchSize = 10
126
+ const results: number[][] = []
127
+
128
+ for (let i = 0; i < texts.length; i += batchSize) {
129
+ const batch = texts.slice(i, i + batchSize)
130
+ const batchResults = await Promise.all(batch.map((text) => this.generateEmbedding(text)))
131
+ results.push(...batchResults)
132
+
133
+ this.logger.debug(
134
+ { progress: `${Math.min(i + batchSize, texts.length)}/${texts.length}` },
135
+ 'Batch embedding progress'
136
+ )
137
+ }
138
+
139
+ return results
140
+ }
141
+
142
+ /**
143
+ * Preprocess text before embedding
144
+ */
145
+ private preprocessText(text: string): string {
146
+ // Remove excessive whitespace
147
+ let processed = text.replace(/\s+/g, ' ').trim()
148
+
149
+ // Truncate to max length (256 tokens ~ 1024 chars)
150
+ if (processed.length > 1024) {
151
+ processed = processed.slice(0, 1024)
152
+ this.logger.debug('Text truncated to 1024 characters')
153
+ }
154
+
155
+ return processed
156
+ }
157
+
158
+ /**
159
+ * Generate cache key from text
160
+ * Uses SHA-256 hash to prevent collisions
161
+ */
162
+ private getCacheKey(text: string): string {
163
+ return createHash('sha256').update(text).digest('hex')
164
+ }
165
+
166
+ /**
167
+ * Add embedding to cache with LRU eviction
168
+ */
169
+ private addToCache(key: string, embedding: number[]): void {
170
+ // Simple LRU: if at max size, delete oldest entry
171
+ if (this.cache.size >= this.maxCacheSize) {
172
+ const firstKey = this.cache.keys().next().value
173
+ if (firstKey) {
174
+ this.cache.delete(firstKey)
175
+ }
176
+ }
177
+ this.cache.set(key, embedding)
178
+ }
179
+
180
+ /**
181
+ * Clear embedding cache
182
+ */
183
+ clearCache(): void {
184
+ this.cache.clear()
185
+ this.logger.debug('Embedding cache cleared')
186
+ }
187
+
188
+ /**
189
+ * Get cache statistics
190
+ */
191
+ getCacheStats(): EmbeddingCacheStats {
192
+ return {
193
+ size: this.cache.size,
194
+ keys: Array.from(this.cache.keys())
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Check if service is initialized
200
+ */
201
+ isReady(): boolean {
202
+ return this.isInitialized
203
+ }
204
+
205
+ /**
206
+ * Calculate cosine similarity between two embeddings
207
+ * Static method for external use
208
+ */
209
+ static cosineSimilarity(a: number[], b: number[]): number {
210
+ return cosineSimilarity(a, b)
211
+ }
212
+
213
+ /**
214
+ * Get the model name
215
+ */
216
+ getModelName(): string {
217
+ return this.modelName
218
+ }
219
+
220
+ /**
221
+ * Get expected embedding dimensions
222
+ */
223
+ getEmbeddingDimensions(): number {
224
+ return 384 // all-MiniLM-L6-v2 outputs 384 dimensions
225
+ }
226
+ }
@@ -1,108 +1,108 @@
1
- /**
2
- * Session Detector
3
- * Classifies messages as new sessions, continuations, or topic shifts
4
- */
5
-
6
- import type { SessionClassification, EpisodeMessage } from './types'
7
-
8
- export interface DetectorOptions {
9
- sessionGapMinutes?: number
10
- }
11
-
12
- const GREETING_PATTERNS = [
13
- /^(?:hi|hello|hey|good\s+(?:morning|afternoon|evening)|greetings)\b/i,
14
- /^(?:what's up|howdy|sup)\b/i
15
- ]
16
-
17
- const CONTINUATION_PATTERNS = [
18
- /(?:continuing|continue|back\s+to|as\s+(?:we\s+)?discussed|picking\s+up|where\s+we\s+left)/i,
19
- /(?:regarding|about\s+(?:the|that)|following\s+up|as\s+I\s+(?:said|mentioned))/i,
20
- /(?:also|additionally|furthermore|moreover|in\s+addition)/i
21
- ]
22
-
23
- export class SessionDetector {
24
- private sessionGapMs: number
25
-
26
- constructor(options: DetectorOptions = {}) {
27
- this.sessionGapMs = (options.sessionGapMinutes || 30) * 60 * 1000
28
- }
29
-
30
- classifyMessage(
31
- message: EpisodeMessage,
32
- lastActivity?: string,
33
- recentMessages?: EpisodeMessage[]
34
- ): SessionClassification {
35
- const content = message.content
36
-
37
- // Check time gap
38
- if (lastActivity) {
39
- const gap = new Date(message.timestamp).getTime() - new Date(lastActivity).getTime()
40
- if (gap > this.sessionGapMs) {
41
- return 'new_session'
42
- }
43
- } else {
44
- // No previous activity → new session
45
- return 'new_session'
46
- }
47
-
48
- // Check for greeting patterns
49
- if (GREETING_PATTERNS.some(p => p.test(content.trim()))) {
50
- return 'new_session'
51
- }
52
-
53
- // Check for continuation patterns
54
- if (CONTINUATION_PATTERNS.some(p => p.test(content))) {
55
- return 'continuation'
56
- }
57
-
58
- // Check topic coherence with recent messages
59
- if (recentMessages && recentMessages.length > 0) {
60
- const coherence = this.calculateTopicCoherence(content, recentMessages)
61
- if (coherence < 0.1) {
62
- return 'topic_shift'
63
- }
64
- }
65
-
66
- return 'continuation'
67
- }
68
-
69
- private calculateTopicCoherence(content: string, recentMessages: EpisodeMessage[]): number {
70
- const currentKeywords = this.extractKeywords(content)
71
- if (currentKeywords.size === 0) return 0.5
72
-
73
- const recentText = recentMessages.map(m => m.content).join(' ')
74
- const recentKeywords = this.extractKeywords(recentText)
75
- if (recentKeywords.size === 0) return 0.5
76
-
77
- // Jaccard-like overlap
78
- let overlap = 0
79
- for (const kw of currentKeywords) {
80
- if (recentKeywords.has(kw)) overlap++
81
- }
82
-
83
- return overlap / Math.max(currentKeywords.size, 1)
84
- }
85
-
86
- private extractKeywords(text: string): Set<string> {
87
- const stopWords = new Set([
88
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
89
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
90
- 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
91
- 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
92
- 'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
93
- 'under', 'again', 'further', 'then', 'once', 'and', 'but', 'or', 'nor',
94
- 'not', 'so', 'if', 'that', 'this', 'it', 'its', 'i', 'me', 'my', 'we',
95
- 'our', 'you', 'your', 'he', 'she', 'they', 'them', 'their', 'what',
96
- 'which', 'who', 'when', 'where', 'how', 'all', 'each', 'every', 'both',
97
- 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'only', 'own',
98
- 'same', 'than', 'too', 'very', 'just', 'about', 'here', 'there'
99
- ])
100
-
101
- const words = text.toLowerCase()
102
- .replace(/[^a-z0-9\s-]/g, ' ')
103
- .split(/\s+/)
104
- .filter(w => w.length > 2 && !stopWords.has(w))
105
-
106
- return new Set(words)
107
- }
108
- }
1
+ /**
2
+ * Session Detector
3
+ * Classifies messages as new sessions, continuations, or topic shifts
4
+ */
5
+
6
+ import type { SessionClassification, EpisodeMessage } from './types'
7
+
8
+ export interface DetectorOptions {
9
+ sessionGapMinutes?: number
10
+ }
11
+
12
+ const GREETING_PATTERNS = [
13
+ /^(?:hi|hello|hey|good\s+(?:morning|afternoon|evening)|greetings)\b/i,
14
+ /^(?:what's up|howdy|sup)\b/i
15
+ ]
16
+
17
+ const CONTINUATION_PATTERNS = [
18
+ /(?:continuing|continue|back\s+to|as\s+(?:we\s+)?discussed|picking\s+up|where\s+we\s+left)/i,
19
+ /(?:regarding|about\s+(?:the|that)|following\s+up|as\s+I\s+(?:said|mentioned))/i,
20
+ /(?:also|additionally|furthermore|moreover|in\s+addition)/i
21
+ ]
22
+
23
+ export class SessionDetector {
24
+ private sessionGapMs: number
25
+
26
+ constructor(options: DetectorOptions = {}) {
27
+ this.sessionGapMs = (options.sessionGapMinutes || 30) * 60 * 1000
28
+ }
29
+
30
+ classifyMessage(
31
+ message: EpisodeMessage,
32
+ lastActivity?: string,
33
+ recentMessages?: EpisodeMessage[]
34
+ ): SessionClassification {
35
+ const content = message.content
36
+
37
+ // Check time gap
38
+ if (lastActivity) {
39
+ const gap = new Date(message.timestamp).getTime() - new Date(lastActivity).getTime()
40
+ if (gap > this.sessionGapMs) {
41
+ return 'new_session'
42
+ }
43
+ } else {
44
+ // No previous activity → new session
45
+ return 'new_session'
46
+ }
47
+
48
+ // Check for greeting patterns
49
+ if (GREETING_PATTERNS.some(p => p.test(content.trim()))) {
50
+ return 'new_session'
51
+ }
52
+
53
+ // Check for continuation patterns
54
+ if (CONTINUATION_PATTERNS.some(p => p.test(content))) {
55
+ return 'continuation'
56
+ }
57
+
58
+ // Check topic coherence with recent messages
59
+ if (recentMessages && recentMessages.length > 0) {
60
+ const coherence = this.calculateTopicCoherence(content, recentMessages)
61
+ if (coherence < 0.1) {
62
+ return 'topic_shift'
63
+ }
64
+ }
65
+
66
+ return 'continuation'
67
+ }
68
+
69
+ private calculateTopicCoherence(content: string, recentMessages: EpisodeMessage[]): number {
70
+ const currentKeywords = this.extractKeywords(content)
71
+ if (currentKeywords.size === 0) return 0.5
72
+
73
+ const recentText = recentMessages.map(m => m.content).join(' ')
74
+ const recentKeywords = this.extractKeywords(recentText)
75
+ if (recentKeywords.size === 0) return 0.5
76
+
77
+ // Jaccard-like overlap
78
+ let overlap = 0
79
+ for (const kw of currentKeywords) {
80
+ if (recentKeywords.has(kw)) overlap++
81
+ }
82
+
83
+ return overlap / Math.max(currentKeywords.size, 1)
84
+ }
85
+
86
+ private extractKeywords(text: string): Set<string> {
87
+ const stopWords = new Set([
88
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
89
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
90
+ 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
91
+ 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
92
+ 'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
93
+ 'under', 'again', 'further', 'then', 'once', 'and', 'but', 'or', 'nor',
94
+ 'not', 'so', 'if', 'that', 'this', 'it', 'its', 'i', 'me', 'my', 'we',
95
+ 'our', 'you', 'your', 'he', 'she', 'they', 'them', 'their', 'what',
96
+ 'which', 'who', 'when', 'where', 'how', 'all', 'each', 'every', 'both',
97
+ 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'only', 'own',
98
+ 'same', 'than', 'too', 'very', 'just', 'about', 'here', 'there'
99
+ ])
100
+
101
+ const words = text.toLowerCase()
102
+ .replace(/[^a-z0-9\s-]/g, ' ')
103
+ .split(/\s+/)
104
+ .filter(w => w.length > 2 && !stopWords.has(w))
105
+
106
+ return new Set(words)
107
+ }
108
+ }