claude-brain 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +191 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +11 -11
  5. package/bunfig.toml +8 -8
  6. package/package.json +80 -80
  7. package/packs/backend/node.json +173 -173
  8. package/packs/core/javascript.json +176 -176
  9. package/packs/core/typescript.json +222 -222
  10. package/packs/frontend/react.json +254 -254
  11. package/packs/meta/testing.json +172 -172
  12. package/src/automation/auto-context.ts +240 -240
  13. package/src/automation/decision-detector.ts +452 -452
  14. package/src/automation/index.ts +11 -11
  15. package/src/automation/phase12-manager.ts +456 -456
  16. package/src/automation/proactive-recall.ts +373 -373
  17. package/src/automation/project-detector.ts +310 -310
  18. package/src/automation/repo-scanner.ts +205 -205
  19. package/src/cli/auto-setup.ts +82 -82
  20. package/src/cli/bin.ts +202 -202
  21. package/src/cli/commands/chroma.ts +573 -573
  22. package/src/cli/commands/git-hook.ts +189 -189
  23. package/src/cli/commands/hooks.ts +213 -213
  24. package/src/cli/commands/init.ts +122 -122
  25. package/src/cli/commands/install-mcp.ts +92 -92
  26. package/src/cli/commands/pack.ts +197 -197
  27. package/src/cli/commands/serve.ts +167 -167
  28. package/src/cli/commands/start.ts +42 -42
  29. package/src/cli/commands/uninstall-mcp.ts +41 -41
  30. package/src/cli/commands/update.ts +121 -121
  31. package/src/cli/diagnose.ts +4 -4
  32. package/src/cli/health-check.ts +4 -4
  33. package/src/cli/migrate-chroma.ts +106 -106
  34. package/src/cli/setup.ts +4 -4
  35. package/src/cli/ui/animations.ts +80 -80
  36. package/src/cli/ui/components.ts +82 -82
  37. package/src/cli/ui/index.ts +4 -4
  38. package/src/cli/ui/logo.ts +36 -36
  39. package/src/cli/ui/theme.ts +55 -55
  40. package/src/config/defaults.ts +50 -50
  41. package/src/config/home.ts +55 -55
  42. package/src/config/index.ts +7 -7
  43. package/src/config/loader.ts +166 -166
  44. package/src/config/migration.ts +76 -76
  45. package/src/config/schema.ts +360 -360
  46. package/src/config/validator.ts +184 -184
  47. package/src/config/watcher.ts +86 -86
  48. package/src/context/assembler.ts +398 -398
  49. package/src/context/cache-manager.ts +101 -101
  50. package/src/context/formatter.ts +84 -84
  51. package/src/context/hierarchy.ts +85 -85
  52. package/src/context/index.ts +83 -83
  53. package/src/context/progress-tracker.ts +174 -174
  54. package/src/context/standards-manager.ts +287 -287
  55. package/src/context/types.ts +252 -252
  56. package/src/context/validator.ts +58 -58
  57. package/src/diagnostics/index.ts +123 -123
  58. package/src/health/index.ts +229 -229
  59. package/src/hooks/brain-hook.ts +112 -112
  60. package/src/hooks/capture.ts +168 -168
  61. package/src/hooks/deduplicator.ts +72 -72
  62. package/src/hooks/git-capture.ts +109 -109
  63. package/src/hooks/git-hook-installer.ts +207 -207
  64. package/src/hooks/index.ts +20 -20
  65. package/src/hooks/installer.ts +191 -194
  66. package/src/hooks/passive-classifier.ts +366 -366
  67. package/src/hooks/queue.ts +129 -129
  68. package/src/hooks/session-tracker.ts +275 -275
  69. package/src/hooks/types.ts +47 -47
  70. package/src/index.ts +7 -7
  71. package/src/intelligence/cross-project/affinity.ts +162 -162
  72. package/src/intelligence/cross-project/generalizer.ts +283 -283
  73. package/src/intelligence/cross-project/index.ts +13 -13
  74. package/src/intelligence/cross-project/transfer.ts +201 -201
  75. package/src/intelligence/index.ts +24 -24
  76. package/src/intelligence/optimization/index.ts +10 -10
  77. package/src/intelligence/optimization/precompute.ts +202 -202
  78. package/src/intelligence/optimization/semantic-cache.ts +207 -207
  79. package/src/intelligence/prediction/context-anticipator.ts +198 -198
  80. package/src/intelligence/prediction/decision-predictor.ts +184 -184
  81. package/src/intelligence/prediction/index.ts +13 -13
  82. package/src/intelligence/prediction/recommender.ts +268 -268
  83. package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
  84. package/src/intelligence/reasoning/counterfactual.ts +248 -248
  85. package/src/intelligence/reasoning/index.ts +13 -13
  86. package/src/intelligence/reasoning/synthesizer.ts +169 -169
  87. package/src/intelligence/temporal/evolution.ts +197 -197
  88. package/src/intelligence/temporal/index.ts +16 -16
  89. package/src/intelligence/temporal/query-processor.ts +190 -190
  90. package/src/intelligence/temporal/timeline.ts +259 -259
  91. package/src/intelligence/temporal/trends.ts +263 -263
  92. package/src/knowledge/entity-extractor.ts +416 -416
  93. package/src/knowledge/graph/builder.ts +185 -185
  94. package/src/knowledge/graph/linker.ts +201 -201
  95. package/src/knowledge/graph/memory-graph.ts +359 -359
  96. package/src/knowledge/graph/schema.ts +99 -99
  97. package/src/knowledge/graph/search.ts +168 -168
  98. package/src/knowledge/relationship-extractor.ts +108 -108
  99. package/src/memory/chroma/client.ts +174 -174
  100. package/src/memory/chroma/collection-manager.ts +94 -94
  101. package/src/memory/chroma/config.ts +57 -57
  102. package/src/memory/chroma/embeddings.ts +153 -153
  103. package/src/memory/chroma/index.ts +82 -82
  104. package/src/memory/chroma/migration.ts +270 -270
  105. package/src/memory/chroma/schemas.ts +69 -69
  106. package/src/memory/chroma/search.ts +315 -315
  107. package/src/memory/chroma/store.ts +741 -741
  108. package/src/memory/consolidation/archiver.ts +164 -164
  109. package/src/memory/consolidation/merger.ts +186 -186
  110. package/src/memory/consolidation/scorer.ts +138 -138
  111. package/src/memory/context-builder.ts +236 -236
  112. package/src/memory/database.ts +169 -169
  113. package/src/memory/embedding-utils.ts +156 -156
  114. package/src/memory/embeddings.ts +226 -226
  115. package/src/memory/episodic/detector.ts +108 -108
  116. package/src/memory/episodic/manager.ts +351 -351
  117. package/src/memory/episodic/summarizer.ts +179 -179
  118. package/src/memory/episodic/types.ts +52 -52
  119. package/src/memory/index.ts +582 -582
  120. package/src/memory/knowledge-extractor.ts +455 -455
  121. package/src/memory/learning.ts +378 -378
  122. package/src/memory/patterns.ts +396 -396
  123. package/src/memory/schema.ts +88 -88
  124. package/src/memory/search.ts +309 -309
  125. package/src/memory/store.ts +787 -787
  126. package/src/memory/types.ts +121 -121
  127. package/src/orchestrator/coordinator.ts +272 -272
  128. package/src/orchestrator/decision-logger.ts +228 -228
  129. package/src/orchestrator/event-emitter.ts +198 -198
  130. package/src/orchestrator/event-queue.ts +184 -184
  131. package/src/orchestrator/handlers/base-handler.ts +70 -70
  132. package/src/orchestrator/handlers/context-handler.ts +73 -73
  133. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  134. package/src/orchestrator/handlers/index.ts +10 -10
  135. package/src/orchestrator/handlers/status-handler.ts +131 -131
  136. package/src/orchestrator/handlers/task-handler.ts +171 -171
  137. package/src/orchestrator/index.ts +275 -275
  138. package/src/orchestrator/task-parser.ts +284 -284
  139. package/src/orchestrator/types.ts +98 -98
  140. package/src/packs/index.ts +9 -9
  141. package/src/packs/loader.ts +134 -134
  142. package/src/packs/manager.ts +204 -204
  143. package/src/packs/ranker.ts +78 -78
  144. package/src/packs/types.ts +81 -81
  145. package/src/phase12/index.ts +5 -5
  146. package/src/retrieval/bm25/index.ts +300 -300
  147. package/src/retrieval/bm25/tokenizer.ts +184 -184
  148. package/src/retrieval/feedback/adaptive.ts +223 -223
  149. package/src/retrieval/feedback/index.ts +16 -16
  150. package/src/retrieval/feedback/metrics.ts +223 -223
  151. package/src/retrieval/feedback/store.ts +283 -283
  152. package/src/retrieval/fusion/index.ts +194 -194
  153. package/src/retrieval/fusion/rrf.ts +163 -163
  154. package/src/retrieval/index.ts +12 -12
  155. package/src/retrieval/pipeline.ts +375 -375
  156. package/src/retrieval/query/expander.ts +198 -198
  157. package/src/retrieval/query/index.ts +27 -27
  158. package/src/retrieval/query/intent-classifier.ts +236 -236
  159. package/src/retrieval/query/temporal-parser.ts +295 -295
  160. package/src/retrieval/reranker/index.ts +188 -188
  161. package/src/retrieval/reranker/model.ts +95 -95
  162. package/src/retrieval/service.ts +125 -125
  163. package/src/retrieval/types.ts +162 -162
  164. package/src/routing/entity-extractor.ts +428 -428
  165. package/src/routing/intent-classifier.ts +436 -436
  166. package/src/routing/response-filter.ts +258 -254
  167. package/src/routing/router.ts +1322 -1314
  168. package/src/routing/search-engine.ts +475 -475
  169. package/src/routing/types.ts +94 -84
  170. package/src/scripts/health-check.ts +118 -118
  171. package/src/scripts/setup.ts +122 -122
  172. package/src/server/handlers/call-tool.ts +156 -156
  173. package/src/server/handlers/index.ts +9 -9
  174. package/src/server/handlers/list-tools.ts +35 -35
  175. package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
  176. package/src/server/handlers/tools/auto-remember.ts +200 -200
  177. package/src/server/handlers/tools/brain.ts +85 -85
  178. package/src/server/handlers/tools/create-project.ts +135 -135
  179. package/src/server/handlers/tools/detect-trends.ts +144 -144
  180. package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
  181. package/src/server/handlers/tools/get-activity-log.ts +194 -194
  182. package/src/server/handlers/tools/get-code-standards.ts +124 -124
  183. package/src/server/handlers/tools/get-corrections.ts +154 -154
  184. package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
  185. package/src/server/handlers/tools/get-episode.ts +103 -103
  186. package/src/server/handlers/tools/get-patterns.ts +158 -158
  187. package/src/server/handlers/tools/get-phase12-status.ts +63 -63
  188. package/src/server/handlers/tools/get-project-context.ts +75 -75
  189. package/src/server/handlers/tools/get-recommendations.ts +145 -145
  190. package/src/server/handlers/tools/index.ts +31 -31
  191. package/src/server/handlers/tools/init-project.ts +757 -757
  192. package/src/server/handlers/tools/list-episodes.ts +90 -90
  193. package/src/server/handlers/tools/list-projects.ts +125 -125
  194. package/src/server/handlers/tools/rate-memory.ts +101 -101
  195. package/src/server/handlers/tools/recall-similar.ts +87 -87
  196. package/src/server/handlers/tools/recognize-pattern.ts +126 -126
  197. package/src/server/handlers/tools/record-correction.ts +125 -125
  198. package/src/server/handlers/tools/remember-decision.ts +153 -153
  199. package/src/server/handlers/tools/schemas.ts +253 -253
  200. package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
  201. package/src/server/handlers/tools/smart-context.ts +146 -146
  202. package/src/server/handlers/tools/update-progress.ts +131 -131
  203. package/src/server/handlers/tools/what-if-analysis.ts +135 -135
  204. package/src/server/http-api.ts +693 -693
  205. package/src/server/index.ts +40 -40
  206. package/src/server/mcp-server.ts +283 -283
  207. package/src/server/providers/index.ts +7 -7
  208. package/src/server/providers/prompts.ts +327 -327
  209. package/src/server/providers/resources.ts +622 -622
  210. package/src/server/services.ts +468 -468
  211. package/src/server/types.ts +39 -39
  212. package/src/server/utils/error-handler.ts +155 -155
  213. package/src/server/utils/index.ts +13 -13
  214. package/src/server/utils/memory-indicator.ts +83 -83
  215. package/src/server/utils/request-context.ts +122 -122
  216. package/src/server/utils/response-formatter.ts +129 -124
  217. package/src/server/utils/validators.ts +210 -210
  218. package/src/setup/index.ts +48 -48
  219. package/src/setup/wizard.ts +461 -461
  220. package/src/tools/index.ts +24 -24
  221. package/src/tools/registry.ts +115 -115
  222. package/src/tools/schemas.test.ts +30 -30
  223. package/src/tools/schemas.ts +617 -617
  224. package/src/tools/types.ts +412 -412
  225. package/src/utils/circuit-breaker.ts +130 -130
  226. package/src/utils/cleanup.ts +34 -34
  227. package/src/utils/error-handler.ts +132 -132
  228. package/src/utils/error-messages.ts +60 -60
  229. package/src/utils/fallback.ts +45 -45
  230. package/src/utils/index.ts +54 -54
  231. package/src/utils/logger-utils.ts +80 -80
  232. package/src/utils/logger.ts +88 -88
  233. package/src/utils/phase12-helper.ts +56 -56
  234. package/src/utils/retry.ts +94 -94
  235. package/src/utils/timing.ts +47 -47
  236. package/src/utils/transaction.ts +63 -63
  237. package/src/vault/frontmatter.ts +264 -264
  238. package/src/vault/index.ts +318 -318
  239. package/src/vault/paths.ts +106 -106
  240. package/src/vault/query.ts +422 -422
  241. package/src/vault/reader.ts +264 -264
  242. package/src/vault/templates.ts +186 -186
  243. package/src/vault/types.ts +73 -73
  244. package/src/vault/watcher.ts +277 -277
  245. package/src/vault/writer.ts +413 -413
  246. package/tsconfig.json +30 -30
@@ -1,226 +1,226 @@
1
- /**
2
- * Embedding Generation Service
3
- * Phase 3: Memory and Embedding System
4
- *
5
- * Uses transformers.js for local embedding generation
6
- * Model: all-MiniLM-L6-v2 (384 dimensions)
7
- */
8
-
9
- import { pipeline, env } from '@xenova/transformers'
10
- import { createHash } from 'crypto'
11
- import type { Logger } from 'pino'
12
- import type { EmbeddingCacheStats } from './types'
13
- import { cosineSimilarity } from './embedding-utils'
14
- import { CircuitBreaker } from '@/utils'
15
-
16
- // Configure transformers.js for Bun compatibility
17
- env.allowLocalModels = true
18
- env.allowRemoteModels = true
19
- env.useBrowserCache = false
20
- // Disable web workers to avoid ONNX blob URL issues in Bun
21
- env.backends.onnx.wasm.numThreads = 1
22
- env.backends.onnx.wasm.simd = true
23
-
24
- type FeatureExtractionPipeline = Awaited<ReturnType<typeof pipeline>>
25
-
26
- export class EmbeddingService {
27
- private embeddingPipeline: FeatureExtractionPipeline | null = null
28
- private modelName: string
29
- private isInitialized: boolean = false
30
- private logger: Logger
31
- private cache: Map<string, number[]>
32
- private maxCacheSize: number
33
- private circuitBreaker: CircuitBreaker
34
-
35
- constructor(
36
- logger: Logger,
37
- modelName: string = 'Xenova/all-MiniLM-L6-v2',
38
- maxCacheSize: number = 1000
39
- ) {
40
- this.modelName = modelName
41
- this.logger = logger.child({ component: 'embedding-service' })
42
- this.cache = new Map()
43
- this.maxCacheSize = maxCacheSize
44
- this.circuitBreaker = new CircuitBreaker(
45
- 'embedding-service',
46
- logger,
47
- {
48
- failureThreshold: 3,
49
- timeout: 30000
50
- }
51
- )
52
- }
53
-
54
- /**
55
- * Initialize the embedding pipeline
56
- * Downloads model on first run, then uses cache
57
- */
58
- async initialize(): Promise<void> {
59
- if (this.isInitialized) {
60
- return
61
- }
62
-
63
- try {
64
- this.logger.info({ model: this.modelName }, 'Loading embedding model...')
65
-
66
- // Load the feature extraction pipeline
67
- this.embeddingPipeline = await pipeline('feature-extraction', this.modelName)
68
-
69
- this.isInitialized = true
70
- this.logger.info({ model: this.modelName }, 'Embedding model loaded successfully')
71
- } catch (error) {
72
- this.logger.error({ error, model: this.modelName }, 'Failed to load embedding model')
73
- throw error
74
- }
75
- }
76
-
77
- /**
78
- * Generate embedding for a single text
79
- */
80
- async generateEmbedding(text: string): Promise<number[]> {
81
- if (!this.isInitialized) {
82
- await this.initialize()
83
- }
84
-
85
- const cacheKey = this.getCacheKey(text)
86
- const cached = this.cache.get(cacheKey)
87
- if (cached) {
88
- this.logger.debug({ cacheKey }, 'Embedding cache hit')
89
- return cached
90
- }
91
-
92
- return this.circuitBreaker.execute(async () => {
93
- const startTime = Date.now()
94
-
95
- const preprocessed = this.preprocessText(text)
96
-
97
- const output = await (this.embeddingPipeline as any)(preprocessed, {
98
- pooling: 'mean',
99
- normalize: true
100
- })
101
-
102
- const embedding = Array.from((output as any).data as Float32Array)
103
-
104
- const duration = Date.now() - startTime
105
- this.logger.debug(
106
- { textLength: text.length, duration, dimensions: embedding.length },
107
- 'Embedding generated'
108
- )
109
-
110
- this.addToCache(cacheKey, embedding)
111
-
112
- return embedding
113
- })
114
- }
115
-
116
- /**
117
- * Generate embeddings for multiple texts (batched)
118
- */
119
- async generateEmbeddings(texts: string[]): Promise<number[][]> {
120
- if (!this.isInitialized) {
121
- await this.initialize()
122
- }
123
-
124
- // Process in batches to avoid memory issues
125
- const batchSize = 10
126
- const results: number[][] = []
127
-
128
- for (let i = 0; i < texts.length; i += batchSize) {
129
- const batch = texts.slice(i, i + batchSize)
130
- const batchResults = await Promise.all(batch.map((text) => this.generateEmbedding(text)))
131
- results.push(...batchResults)
132
-
133
- this.logger.debug(
134
- { progress: `${Math.min(i + batchSize, texts.length)}/${texts.length}` },
135
- 'Batch embedding progress'
136
- )
137
- }
138
-
139
- return results
140
- }
141
-
142
- /**
143
- * Preprocess text before embedding
144
- */
145
- private preprocessText(text: string): string {
146
- // Remove excessive whitespace
147
- let processed = text.replace(/\s+/g, ' ').trim()
148
-
149
- // Truncate to max length (256 tokens ~ 1024 chars)
150
- if (processed.length > 1024) {
151
- processed = processed.slice(0, 1024)
152
- this.logger.debug('Text truncated to 1024 characters')
153
- }
154
-
155
- return processed
156
- }
157
-
158
- /**
159
- * Generate cache key from text
160
- * Uses SHA-256 hash to prevent collisions
161
- */
162
- private getCacheKey(text: string): string {
163
- return createHash('sha256').update(text).digest('hex')
164
- }
165
-
166
- /**
167
- * Add embedding to cache with LRU eviction
168
- */
169
- private addToCache(key: string, embedding: number[]): void {
170
- // Simple LRU: if at max size, delete oldest entry
171
- if (this.cache.size >= this.maxCacheSize) {
172
- const firstKey = this.cache.keys().next().value
173
- if (firstKey) {
174
- this.cache.delete(firstKey)
175
- }
176
- }
177
- this.cache.set(key, embedding)
178
- }
179
-
180
- /**
181
- * Clear embedding cache
182
- */
183
- clearCache(): void {
184
- this.cache.clear()
185
- this.logger.debug('Embedding cache cleared')
186
- }
187
-
188
- /**
189
- * Get cache statistics
190
- */
191
- getCacheStats(): EmbeddingCacheStats {
192
- return {
193
- size: this.cache.size,
194
- keys: Array.from(this.cache.keys())
195
- }
196
- }
197
-
198
- /**
199
- * Check if service is initialized
200
- */
201
- isReady(): boolean {
202
- return this.isInitialized
203
- }
204
-
205
- /**
206
- * Calculate cosine similarity between two embeddings
207
- * Static method for external use
208
- */
209
- static cosineSimilarity(a: number[], b: number[]): number {
210
- return cosineSimilarity(a, b)
211
- }
212
-
213
- /**
214
- * Get the model name
215
- */
216
- getModelName(): string {
217
- return this.modelName
218
- }
219
-
220
- /**
221
- * Get expected embedding dimensions
222
- */
223
- getEmbeddingDimensions(): number {
224
- return 384 // all-MiniLM-L6-v2 outputs 384 dimensions
225
- }
226
- }
1
+ /**
2
+ * Embedding Generation Service
3
+ * Phase 3: Memory and Embedding System
4
+ *
5
+ * Uses transformers.js for local embedding generation
6
+ * Model: all-MiniLM-L6-v2 (384 dimensions)
7
+ */
8
+
9
+ import { pipeline, env } from '@xenova/transformers'
10
+ import { createHash } from 'crypto'
11
+ import type { Logger } from 'pino'
12
+ import type { EmbeddingCacheStats } from './types'
13
+ import { cosineSimilarity } from './embedding-utils'
14
+ import { CircuitBreaker } from '@/utils'
15
+
16
+ // Configure transformers.js for Bun compatibility
17
+ env.allowLocalModels = true
18
+ env.allowRemoteModels = true
19
+ env.useBrowserCache = false
20
+ // Disable web workers to avoid ONNX blob URL issues in Bun
21
+ env.backends.onnx.wasm.numThreads = 1
22
+ env.backends.onnx.wasm.simd = true
23
+
24
+ type FeatureExtractionPipeline = Awaited<ReturnType<typeof pipeline>>
25
+
26
+ export class EmbeddingService {
27
+ private embeddingPipeline: FeatureExtractionPipeline | null = null
28
+ private modelName: string
29
+ private isInitialized: boolean = false
30
+ private logger: Logger
31
+ private cache: Map<string, number[]>
32
+ private maxCacheSize: number
33
+ private circuitBreaker: CircuitBreaker
34
+
35
+ constructor(
36
+ logger: Logger,
37
+ modelName: string = 'Xenova/all-MiniLM-L6-v2',
38
+ maxCacheSize: number = 1000
39
+ ) {
40
+ this.modelName = modelName
41
+ this.logger = logger.child({ component: 'embedding-service' })
42
+ this.cache = new Map()
43
+ this.maxCacheSize = maxCacheSize
44
+ this.circuitBreaker = new CircuitBreaker(
45
+ 'embedding-service',
46
+ logger,
47
+ {
48
+ failureThreshold: 3,
49
+ timeout: 30000
50
+ }
51
+ )
52
+ }
53
+
54
+ /**
55
+ * Initialize the embedding pipeline
56
+ * Downloads model on first run, then uses cache
57
+ */
58
+ async initialize(): Promise<void> {
59
+ if (this.isInitialized) {
60
+ return
61
+ }
62
+
63
+ try {
64
+ this.logger.info({ model: this.modelName }, 'Loading embedding model...')
65
+
66
+ // Load the feature extraction pipeline
67
+ this.embeddingPipeline = await pipeline('feature-extraction', this.modelName)
68
+
69
+ this.isInitialized = true
70
+ this.logger.info({ model: this.modelName }, 'Embedding model loaded successfully')
71
+ } catch (error) {
72
+ this.logger.error({ error, model: this.modelName }, 'Failed to load embedding model')
73
+ throw error
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Generate embedding for a single text
79
+ */
80
+ async generateEmbedding(text: string): Promise<number[]> {
81
+ if (!this.isInitialized) {
82
+ await this.initialize()
83
+ }
84
+
85
+ const cacheKey = this.getCacheKey(text)
86
+ const cached = this.cache.get(cacheKey)
87
+ if (cached) {
88
+ this.logger.debug({ cacheKey }, 'Embedding cache hit')
89
+ return cached
90
+ }
91
+
92
+ return this.circuitBreaker.execute(async () => {
93
+ const startTime = Date.now()
94
+
95
+ const preprocessed = this.preprocessText(text)
96
+
97
+ const output = await (this.embeddingPipeline as any)(preprocessed, {
98
+ pooling: 'mean',
99
+ normalize: true
100
+ })
101
+
102
+ const embedding = Array.from((output as any).data as Float32Array)
103
+
104
+ const duration = Date.now() - startTime
105
+ this.logger.debug(
106
+ { textLength: text.length, duration, dimensions: embedding.length },
107
+ 'Embedding generated'
108
+ )
109
+
110
+ this.addToCache(cacheKey, embedding)
111
+
112
+ return embedding
113
+ })
114
+ }
115
+
116
+ /**
117
+ * Generate embeddings for multiple texts (batched)
118
+ */
119
+ async generateEmbeddings(texts: string[]): Promise<number[][]> {
120
+ if (!this.isInitialized) {
121
+ await this.initialize()
122
+ }
123
+
124
+ // Process in batches to avoid memory issues
125
+ const batchSize = 10
126
+ const results: number[][] = []
127
+
128
+ for (let i = 0; i < texts.length; i += batchSize) {
129
+ const batch = texts.slice(i, i + batchSize)
130
+ const batchResults = await Promise.all(batch.map((text) => this.generateEmbedding(text)))
131
+ results.push(...batchResults)
132
+
133
+ this.logger.debug(
134
+ { progress: `${Math.min(i + batchSize, texts.length)}/${texts.length}` },
135
+ 'Batch embedding progress'
136
+ )
137
+ }
138
+
139
+ return results
140
+ }
141
+
142
+ /**
143
+ * Preprocess text before embedding
144
+ */
145
+ private preprocessText(text: string): string {
146
+ // Remove excessive whitespace
147
+ let processed = text.replace(/\s+/g, ' ').trim()
148
+
149
+ // Truncate to max length (256 tokens ~ 1024 chars)
150
+ if (processed.length > 1024) {
151
+ processed = processed.slice(0, 1024)
152
+ this.logger.debug('Text truncated to 1024 characters')
153
+ }
154
+
155
+ return processed
156
+ }
157
+
158
+ /**
159
+ * Generate cache key from text
160
+ * Uses SHA-256 hash to prevent collisions
161
+ */
162
+ private getCacheKey(text: string): string {
163
+ return createHash('sha256').update(text).digest('hex')
164
+ }
165
+
166
+ /**
167
+ * Add embedding to cache with LRU eviction
168
+ */
169
+ private addToCache(key: string, embedding: number[]): void {
170
+ // Simple LRU: if at max size, delete oldest entry
171
+ if (this.cache.size >= this.maxCacheSize) {
172
+ const firstKey = this.cache.keys().next().value
173
+ if (firstKey) {
174
+ this.cache.delete(firstKey)
175
+ }
176
+ }
177
+ this.cache.set(key, embedding)
178
+ }
179
+
180
+ /**
181
+ * Clear embedding cache
182
+ */
183
+ clearCache(): void {
184
+ this.cache.clear()
185
+ this.logger.debug('Embedding cache cleared')
186
+ }
187
+
188
+ /**
189
+ * Get cache statistics
190
+ */
191
+ getCacheStats(): EmbeddingCacheStats {
192
+ return {
193
+ size: this.cache.size,
194
+ keys: Array.from(this.cache.keys())
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Check if service is initialized
200
+ */
201
+ isReady(): boolean {
202
+ return this.isInitialized
203
+ }
204
+
205
+ /**
206
+ * Calculate cosine similarity between two embeddings
207
+ * Static method for external use
208
+ */
209
+ static cosineSimilarity(a: number[], b: number[]): number {
210
+ return cosineSimilarity(a, b)
211
+ }
212
+
213
+ /**
214
+ * Get the model name
215
+ */
216
+ getModelName(): string {
217
+ return this.modelName
218
+ }
219
+
220
+ /**
221
+ * Get expected embedding dimensions
222
+ */
223
+ getEmbeddingDimensions(): number {
224
+ return 384 // all-MiniLM-L6-v2 outputs 384 dimensions
225
+ }
226
+ }
@@ -1,108 +1,108 @@
1
- /**
2
- * Session Detector
3
- * Classifies messages as new sessions, continuations, or topic shifts
4
- */
5
-
6
- import type { SessionClassification, EpisodeMessage } from './types'
7
-
8
- export interface DetectorOptions {
9
- sessionGapMinutes?: number
10
- }
11
-
12
- const GREETING_PATTERNS = [
13
- /^(?:hi|hello|hey|good\s+(?:morning|afternoon|evening)|greetings)\b/i,
14
- /^(?:what's up|howdy|sup)\b/i
15
- ]
16
-
17
- const CONTINUATION_PATTERNS = [
18
- /(?:continuing|continue|back\s+to|as\s+(?:we\s+)?discussed|picking\s+up|where\s+we\s+left)/i,
19
- /(?:regarding|about\s+(?:the|that)|following\s+up|as\s+I\s+(?:said|mentioned))/i,
20
- /(?:also|additionally|furthermore|moreover|in\s+addition)/i
21
- ]
22
-
23
- export class SessionDetector {
24
- private sessionGapMs: number
25
-
26
- constructor(options: DetectorOptions = {}) {
27
- this.sessionGapMs = (options.sessionGapMinutes || 30) * 60 * 1000
28
- }
29
-
30
- classifyMessage(
31
- message: EpisodeMessage,
32
- lastActivity?: string,
33
- recentMessages?: EpisodeMessage[]
34
- ): SessionClassification {
35
- const content = message.content
36
-
37
- // Check time gap
38
- if (lastActivity) {
39
- const gap = new Date(message.timestamp).getTime() - new Date(lastActivity).getTime()
40
- if (gap > this.sessionGapMs) {
41
- return 'new_session'
42
- }
43
- } else {
44
- // No previous activity → new session
45
- return 'new_session'
46
- }
47
-
48
- // Check for greeting patterns
49
- if (GREETING_PATTERNS.some(p => p.test(content.trim()))) {
50
- return 'new_session'
51
- }
52
-
53
- // Check for continuation patterns
54
- if (CONTINUATION_PATTERNS.some(p => p.test(content))) {
55
- return 'continuation'
56
- }
57
-
58
- // Check topic coherence with recent messages
59
- if (recentMessages && recentMessages.length > 0) {
60
- const coherence = this.calculateTopicCoherence(content, recentMessages)
61
- if (coherence < 0.1) {
62
- return 'topic_shift'
63
- }
64
- }
65
-
66
- return 'continuation'
67
- }
68
-
69
- private calculateTopicCoherence(content: string, recentMessages: EpisodeMessage[]): number {
70
- const currentKeywords = this.extractKeywords(content)
71
- if (currentKeywords.size === 0) return 0.5
72
-
73
- const recentText = recentMessages.map(m => m.content).join(' ')
74
- const recentKeywords = this.extractKeywords(recentText)
75
- if (recentKeywords.size === 0) return 0.5
76
-
77
- // Jaccard-like overlap
78
- let overlap = 0
79
- for (const kw of currentKeywords) {
80
- if (recentKeywords.has(kw)) overlap++
81
- }
82
-
83
- return overlap / Math.max(currentKeywords.size, 1)
84
- }
85
-
86
- private extractKeywords(text: string): Set<string> {
87
- const stopWords = new Set([
88
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
89
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
90
- 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
91
- 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
92
- 'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
93
- 'under', 'again', 'further', 'then', 'once', 'and', 'but', 'or', 'nor',
94
- 'not', 'so', 'if', 'that', 'this', 'it', 'its', 'i', 'me', 'my', 'we',
95
- 'our', 'you', 'your', 'he', 'she', 'they', 'them', 'their', 'what',
96
- 'which', 'who', 'when', 'where', 'how', 'all', 'each', 'every', 'both',
97
- 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'only', 'own',
98
- 'same', 'than', 'too', 'very', 'just', 'about', 'here', 'there'
99
- ])
100
-
101
- const words = text.toLowerCase()
102
- .replace(/[^a-z0-9\s-]/g, ' ')
103
- .split(/\s+/)
104
- .filter(w => w.length > 2 && !stopWords.has(w))
105
-
106
- return new Set(words)
107
- }
108
- }
1
+ /**
2
+ * Session Detector
3
+ * Classifies messages as new sessions, continuations, or topic shifts
4
+ */
5
+
6
+ import type { SessionClassification, EpisodeMessage } from './types'
7
+
8
+ export interface DetectorOptions {
9
+ sessionGapMinutes?: number
10
+ }
11
+
12
+ const GREETING_PATTERNS = [
13
+ /^(?:hi|hello|hey|good\s+(?:morning|afternoon|evening)|greetings)\b/i,
14
+ /^(?:what's up|howdy|sup)\b/i
15
+ ]
16
+
17
+ const CONTINUATION_PATTERNS = [
18
+ /(?:continuing|continue|back\s+to|as\s+(?:we\s+)?discussed|picking\s+up|where\s+we\s+left)/i,
19
+ /(?:regarding|about\s+(?:the|that)|following\s+up|as\s+I\s+(?:said|mentioned))/i,
20
+ /(?:also|additionally|furthermore|moreover|in\s+addition)/i
21
+ ]
22
+
23
+ export class SessionDetector {
24
+ private sessionGapMs: number
25
+
26
+ constructor(options: DetectorOptions = {}) {
27
+ this.sessionGapMs = (options.sessionGapMinutes || 30) * 60 * 1000
28
+ }
29
+
30
+ classifyMessage(
31
+ message: EpisodeMessage,
32
+ lastActivity?: string,
33
+ recentMessages?: EpisodeMessage[]
34
+ ): SessionClassification {
35
+ const content = message.content
36
+
37
+ // Check time gap
38
+ if (lastActivity) {
39
+ const gap = new Date(message.timestamp).getTime() - new Date(lastActivity).getTime()
40
+ if (gap > this.sessionGapMs) {
41
+ return 'new_session'
42
+ }
43
+ } else {
44
+ // No previous activity → new session
45
+ return 'new_session'
46
+ }
47
+
48
+ // Check for greeting patterns
49
+ if (GREETING_PATTERNS.some(p => p.test(content.trim()))) {
50
+ return 'new_session'
51
+ }
52
+
53
+ // Check for continuation patterns
54
+ if (CONTINUATION_PATTERNS.some(p => p.test(content))) {
55
+ return 'continuation'
56
+ }
57
+
58
+ // Check topic coherence with recent messages
59
+ if (recentMessages && recentMessages.length > 0) {
60
+ const coherence = this.calculateTopicCoherence(content, recentMessages)
61
+ if (coherence < 0.1) {
62
+ return 'topic_shift'
63
+ }
64
+ }
65
+
66
+ return 'continuation'
67
+ }
68
+
69
+ private calculateTopicCoherence(content: string, recentMessages: EpisodeMessage[]): number {
70
+ const currentKeywords = this.extractKeywords(content)
71
+ if (currentKeywords.size === 0) return 0.5
72
+
73
+ const recentText = recentMessages.map(m => m.content).join(' ')
74
+ const recentKeywords = this.extractKeywords(recentText)
75
+ if (recentKeywords.size === 0) return 0.5
76
+
77
+ // Jaccard-like overlap
78
+ let overlap = 0
79
+ for (const kw of currentKeywords) {
80
+ if (recentKeywords.has(kw)) overlap++
81
+ }
82
+
83
+ return overlap / Math.max(currentKeywords.size, 1)
84
+ }
85
+
86
+ private extractKeywords(text: string): Set<string> {
87
+ const stopWords = new Set([
88
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
89
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
90
+ 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
91
+ 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
92
+ 'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
93
+ 'under', 'again', 'further', 'then', 'once', 'and', 'but', 'or', 'nor',
94
+ 'not', 'so', 'if', 'that', 'this', 'it', 'its', 'i', 'me', 'my', 'we',
95
+ 'our', 'you', 'your', 'he', 'she', 'they', 'them', 'their', 'what',
96
+ 'which', 'who', 'when', 'where', 'how', 'all', 'each', 'every', 'both',
97
+ 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'only', 'own',
98
+ 'same', 'than', 'too', 'very', 'just', 'about', 'here', 'there'
99
+ ])
100
+
101
+ const words = text.toLowerCase()
102
+ .replace(/[^a-z0-9\s-]/g, ' ')
103
+ .split(/\s+/)
104
+ .filter(w => w.length > 2 && !stopWords.has(w))
105
+
106
+ return new Set(words)
107
+ }
108
+ }