claude-brain 0.27.2 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,749 @@
1
+ /**
2
+ * Inference Router — SLM Upgrade Phase 4B
3
+ * Routes each classification task to model or regex fallback.
4
+ *
5
+ * For each task:
6
+ * 1. If config mode is 'regex'/'api' → use regex/API only
7
+ * 2. If config mode is 'model' → try model, fall back to regex on failure
8
+ * 3. If config mode is 'both' → run both, log comparison, use model output
9
+ *
10
+ * Confidence thresholding: if model confidence < threshold, use regex instead.
11
+ * Function signatures match the existing regex classifiers exactly.
12
+ */
13
+
14
+ import type { Logger } from 'pino'
15
+ import type { Config } from '@/config'
16
+ import type { ModelManager, ModelTask } from './model-manager'
17
+ import { logTrainingData, logModelFeedback } from '@/training/data-store'
18
+ import { getTokenizer } from './tokenizer'
19
+
20
+ // Import existing regex classifiers
21
+ import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
22
+ import type { Intent } from '@/routing/intent-classifier'
23
+ import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
24
+ import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
25
+ import type { QueryIntent } from '@/retrieval/types'
26
+ import type { Pattern } from '@/memory/patterns'
27
+
28
+ /** Default intent labels matching the model training order */
29
+ const INTENT_LABELS: Intent[] = [
30
+ 'session_start', 'context_needed', 'decision_made', 'store_this',
31
+ 'pattern_found', 'mistake_learned', 'progress_update', 'question',
32
+ 'comparison', 'exploration', 'list_all', 'update_memory',
33
+ 'delete_memory', 'detail_request', 'timeline', 'no_action'
34
+ ]
35
+
36
+ /** BIO labels for token-level entity extraction */
37
+ const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
38
+
39
+ /** Query intent labels matching model training order.
40
+ * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
41
+ const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
42
+
43
+ /** Map model query labels → QueryIntent['type'] (handles training label renames) */
44
+ const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
45
+ factual: 'factual',
46
+ procedural: 'pattern',
47
+ comparative: 'comparison',
48
+ temporal: 'temporal',
49
+ exploratory: 'exploratory',
50
+ decision: 'decision',
51
+ }
52
+
53
+ /** Pattern type labels */
54
+ const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
55
+
56
+ /** Knowledge type labels */
57
+ const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
58
+
59
+ /** Map BIO entity tag prefix to EntityType */
60
+ const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
61
+ 'TECH': 'technology',
62
+ 'PROJECT': 'project',
63
+ 'CONCEPT': 'concept',
64
+ }
65
+
66
+ /** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
67
+ const ENTITY_TOKEN_THRESHOLD = 0.5
68
+
69
+ /** Common words that should never be entities (false positive filter) */
70
+ const ENTITY_STOPWORDS = new Set([
71
+ 'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
72
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
73
+ 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
74
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
75
+ 'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
76
+ 'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
77
+ ])
78
+
79
+ /** EOS token ID for greedy decode (GPT-2 uses 50256) */
80
+ const EOS_TOKEN_ID = 50256
81
+
82
+ /** Max tokens to generate for compression */
83
+ const COMPRESS_MAX_TOKENS = 128
84
+
85
+ /** Inference mode per task */
86
+ type TaskMode = 'model' | 'regex' | 'both' | 'api'
87
+
88
+ export class InferenceRouter {
89
+ private logger: Logger
90
+ private config: Config
91
+ private modelManager: ModelManager
92
+ private confidenceThreshold: number
93
+
94
+ // Regex fallback instances
95
+ private intentClassifier: BrainIntentClassifier
96
+ private entityExtractor: EntityExtractor
97
+
98
+ constructor(logger: Logger, config: Config, modelManager: ModelManager) {
99
+ this.logger = logger.child({ component: 'inference-router' })
100
+ this.config = config
101
+ this.modelManager = modelManager
102
+ this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
103
+
104
+ // Initialize regex fallbacks
105
+ this.intentClassifier = new BrainIntentClassifier()
106
+ this.entityExtractor = new EntityExtractor()
107
+ }
108
+
109
+ /**
110
+ * Check if SLM is enabled globally
111
+ */
112
+ get enabled(): boolean {
113
+ return this.config.slm?.enabled ?? false
114
+ }
115
+
116
+ /**
117
+ * Get the mode for a specific task
118
+ */
119
+ private getTaskMode(task: ModelTask): TaskMode {
120
+ if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
121
+ const taskConfig = this.config.slm?.tasks
122
+ if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
123
+
124
+ switch (task) {
125
+ case 'intent': return taskConfig.intent ?? 'regex'
126
+ case 'entity': return taskConfig.entity ?? 'regex'
127
+ case 'query': return taskConfig.query ?? 'regex'
128
+ case 'knowledge': return taskConfig.knowledge ?? 'regex'
129
+ case 'compress': return taskConfig.compress ?? 'api'
130
+ case 'pattern': return taskConfig.pattern ?? 'regex'
131
+ default: return 'regex'
132
+ }
133
+ }
134
+
135
+ /**
136
+ * Softmax over logits array (used when ONNX models are available)
137
+ */
138
+ softmax(logits: Float32Array): number[] {
139
+ const max = Math.max(...logits)
140
+ const exps = Array.from(logits).map(x => Math.exp(x - max))
141
+ const sum = exps.reduce((a, b) => a + b, 0)
142
+ return exps.map(e => e / sum)
143
+ }
144
+
145
+ // ── Intent Classification ──────────────────────────────────────────
146
+
147
+ /**
148
+ * Classify brain() message intent.
149
+ * Drop-in replacement for IntentClassifier.classify().
150
+ * Async to support model inference when ONNX models are available.
151
+ */
152
+ async classifyIntent(message: string): Promise<ClassificationResult> {
153
+ const mode = this.getTaskMode('intent')
154
+
155
+ if (mode === 'regex') {
156
+ return this.intentClassifier.classify(message)
157
+ }
158
+
159
+ // Try model
160
+ const modelResult = await this.tryModelClassifyIntent(message)
161
+ const regexResult = this.intentClassifier.classify(message)
162
+
163
+ if (mode === 'both') {
164
+ this.logComparison('intent', message, modelResult, regexResult)
165
+ return modelResult ?? regexResult
166
+ }
167
+
168
+ // mode === 'model': use model if available and confident
169
+ if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
170
+ return modelResult
171
+ }
172
+ return regexResult
173
+ }
174
+
175
+ /**
176
+ * Attempt model-based intent classification. Returns null if model unavailable.
177
+ * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
178
+ */
179
+ private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
180
+ if (!this.modelManager.hasModel('intent')) return null
181
+
182
+ try {
183
+ const tokenizer = await getTokenizer(this.logger)
184
+ const { inputIds, attentionMask } = tokenizer.encode(message, 128)
185
+ const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
186
+ if (!logits) return null
187
+
188
+ const probs = this.softmax(logits)
189
+ const manifest = this.modelManager.getManifestEntry('intent')
190
+ const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
191
+
192
+ // Find top prediction
193
+ let maxIdx = 0
194
+ let maxProb = probs[0]!
195
+ for (let i = 1; i < probs.length; i++) {
196
+ if (probs[i]! > maxProb) {
197
+ maxProb = probs[i]!
198
+ maxIdx = i
199
+ }
200
+ }
201
+
202
+ // Find secondary intents (above a lower threshold)
203
+ const secondary: Intent[] = []
204
+ for (let i = 0; i < probs.length; i++) {
205
+ if (i !== maxIdx && probs[i]! > 0.15) {
206
+ secondary.push(labels[i]!)
207
+ }
208
+ }
209
+
210
+ return {
211
+ primary: labels[maxIdx]!,
212
+ confidence: maxProb,
213
+ secondary,
214
+ }
215
+ } catch (error) {
216
+ this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
217
+ return null
218
+ }
219
+ }
220
+
221
+ // ── Entity Extraction ──────────────────────────────────────────────
222
+
223
+ /**
224
+ * Extract entities from text.
225
+ * Drop-in replacement for EntityExtractor.extract().
226
+ * Now async to support model inference.
227
+ */
228
+ async extractEntities(text: string): Promise<ExtractedEntity[]> {
229
+ const mode = this.getTaskMode('entity')
230
+
231
+ if (mode === 'regex') {
232
+ return this.entityExtractor.extract(text)
233
+ }
234
+
235
+ const modelResult = await this.tryModelExtractEntities(text)
236
+ const regexResult = this.entityExtractor.extract(text)
237
+
238
+ if (mode === 'both') {
239
+ this.logComparison('entity', text, modelResult, regexResult)
240
+ return modelResult ?? regexResult
241
+ }
242
+
243
+ return modelResult ?? regexResult
244
+ }
245
+
246
+ /**
247
+ * Attempt model-based entity extraction using BIO sequence labeling.
248
+ * Output is per-token logits [numTokens * numLabels].
249
+ * Groups consecutive B-/I- tags into ExtractedEntity objects.
250
+ * Applies softmax per-token and filters low-confidence/garbage entities.
251
+ */
252
+ private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
253
+ if (!this.modelManager.hasModel('entity')) return null
254
+
255
+ try {
256
+ const tokenizer = await getTokenizer(this.logger)
257
+ const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
258
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
259
+ const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
260
+ if (!logits) return null
261
+
262
+ const manifest = this.modelManager.getManifestEntry('entity')
263
+ const labels = manifest?.labels ?? ENTITY_BIO_LABELS
264
+ const numLabels = labels.length
265
+
266
+ // Count active tokens (non-padding)
267
+ const numTokens = attentionMask.filter(m => m === 1).length
268
+
269
+ // Decode per-token BIO tags with softmax probabilities
270
+ const tokenTags: { tag: string; prob: number }[] = []
271
+ for (let t = 0; t < numTokens; t++) {
272
+ // Extract logits for this token and apply softmax
273
+ const tokenLogits = new Float32Array(numLabels)
274
+ for (let l = 0; l < numLabels; l++) {
275
+ tokenLogits[l] = logits[t * numLabels + l]!
276
+ }
277
+ const probs = this.softmax(tokenLogits)
278
+
279
+ let bestIdx = 0
280
+ let bestProb = probs[0]!
281
+ for (let l = 1; l < numLabels; l++) {
282
+ if (probs[l]! > bestProb) {
283
+ bestProb = probs[l]!
284
+ bestIdx = l
285
+ }
286
+ }
287
+
288
+ // Only accept B-/I- tags if softmax probability exceeds entity threshold
289
+ const tag = labels[bestIdx]!
290
+ const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
291
+ if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
292
+ tokenTags.push({ tag: 'O', prob: bestProb })
293
+ } else {
294
+ tokenTags.push({ tag, prob: bestProb })
295
+ }
296
+ }
297
+
298
+ // Group consecutive B-/I- tags into entities
299
+ const entities: ExtractedEntity[] = []
300
+ let currentType: string | null = null
301
+ let currentTokenIds: number[] = []
302
+ let currentProbs: number[] = []
303
+ let startPos = 0
304
+
305
+ const flushEntity = () => {
306
+ if (currentType && currentTokenIds.length > 0) {
307
+ const entityType = BIO_TYPE_MAP[currentType]
308
+ if (entityType) {
309
+ const name = tokenizer.decode(currentTokenIds).trim()
310
+ const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
311
+
312
+ // Filter: minimum 2 chars, not a stopword, decent average confidence
313
+ if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
314
+ entities.push({
315
+ name,
316
+ normalizedName: name.toLowerCase(),
317
+ type: entityType,
318
+ confidence: avgProb,
319
+ source: 'rule',
320
+ positions: [startPos],
321
+ })
322
+ }
323
+ }
324
+ }
325
+ currentType = null
326
+ currentTokenIds = []
327
+ currentProbs = []
328
+ }
329
+
330
+ for (let i = 0; i < tokenTags.length; i++) {
331
+ const { tag, prob } = tokenTags[i]!
332
+
333
+ if (tag.startsWith('B-')) {
334
+ flushEntity()
335
+ currentType = tag.slice(2)
336
+ currentTokenIds = [inputIds[i]!]
337
+ currentProbs = [prob]
338
+ startPos = i
339
+ } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
340
+ currentTokenIds.push(inputIds[i]!)
341
+ currentProbs.push(prob)
342
+ } else {
343
+ flushEntity()
344
+ }
345
+ }
346
+ flushEntity()
347
+
348
+ return entities.length > 0 ? entities : null
349
+ } catch (error) {
350
+ this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
351
+ return null
352
+ }
353
+ }
354
+
355
+ // ── Query Intent Classification ────────────────────────────────────
356
+
357
+ /**
358
+ * Classify search query intent.
359
+ * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
360
+ * Now async to support model inference.
361
+ */
362
+ async classifyQueryIntent(query: string): Promise<QueryIntent> {
363
+ const mode = this.getTaskMode('query')
364
+
365
+ if (mode === 'regex') {
366
+ return classifyQueryIntent(query)
367
+ }
368
+
369
+ const modelResult = await this.tryModelClassifyQuery(query)
370
+ const regexResult = classifyQueryIntent(query)
371
+
372
+ if (mode === 'both') {
373
+ this.logComparison('query', query, modelResult, regexResult)
374
+ return modelResult ?? regexResult
375
+ }
376
+
377
+ if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
378
+ return modelResult
379
+ }
380
+ return regexResult
381
+ }
382
+
383
+ /**
384
+ * Attempt model-based query intent classification.
385
+ * 6-class classifier. Model uses training labels (procedural, comparative)
386
+ * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
387
+ */
388
+ private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
389
+ if (!this.modelManager.hasModel('query')) return null
390
+
391
+ try {
392
+ const tokenizer = await getTokenizer(this.logger)
393
+ const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
394
+ const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
395
+ const logits = await this.modelManager.infer('query', inputIds, attentionMask)
396
+ if (!logits) return null
397
+
398
+ const probs = this.softmax(logits)
399
+ const manifest = this.modelManager.getManifestEntry('query')
400
+ const labels = manifest?.labels ?? [...QUERY_LABELS]
401
+
402
+ let maxIdx = 0
403
+ let maxProb = probs[0]!
404
+ for (let i = 1; i < probs.length; i++) {
405
+ if (probs[i]! > maxProb) {
406
+ maxProb = probs[i]!
407
+ maxIdx = i
408
+ }
409
+ }
410
+
411
+ // Map model label to QueryIntent type (handles training label renames)
412
+ const rawLabel = labels[maxIdx]!
413
+ const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
414
+
415
+ return {
416
+ type: mappedType,
417
+ confidence: maxProb,
418
+ }
419
+ } catch (error) {
420
+ this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
421
+ return null
422
+ }
423
+ }
424
+
425
+ // ── Pattern Classification ─────────────────────────────────────────
426
+
427
+ /**
428
+ * Classify pattern type.
429
+ * Drop-in for PatternRecognizer.determinePatternType().
430
+ * Now async to support model inference.
431
+ */
432
+ async classifyPatternType(description: string): Promise<Pattern['type']> {
433
+ const mode = this.getTaskMode('pattern')
434
+
435
+ if (mode === 'regex') {
436
+ return this.regexClassifyPattern(description)
437
+ }
438
+
439
+ const modelResult = await this.tryModelClassifyPattern(description)
440
+ const regexResult = this.regexClassifyPattern(description)
441
+
442
+ if (mode === 'both') {
443
+ this.logComparison('pattern', description, modelResult, regexResult)
444
+ return modelResult ?? regexResult
445
+ }
446
+
447
+ return modelResult ?? regexResult
448
+ }
449
+
450
+ /**
451
+ * Regex fallback for pattern classification (extracted from PatternRecognizer)
452
+ */
453
+ private regexClassifyPattern(description: string): Pattern['type'] {
454
+ const lower = description.toLowerCase()
455
+ if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
456
+ return 'anti-pattern'
457
+ }
458
+ if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
459
+ return 'best-practice'
460
+ }
461
+ if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
462
+ return 'common-issue'
463
+ }
464
+ return 'solution'
465
+ }
466
+
467
+ /**
468
+ * Attempt model-based pattern type classification.
469
+ * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
470
+ */
471
+ private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
472
+ if (!this.modelManager.hasModel('pattern')) return null
473
+
474
+ try {
475
+ const tokenizer = await getTokenizer(this.logger)
476
+ const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
477
+ const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
478
+ const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
479
+ if (!logits) return null
480
+
481
+ const probs = this.softmax(logits)
482
+ const manifest = this.modelManager.getManifestEntry('pattern')
483
+ const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
484
+
485
+ let maxIdx = 0
486
+ let maxProb = probs[0]!
487
+ for (let i = 1; i < probs.length; i++) {
488
+ if (probs[i]! > maxProb) {
489
+ maxProb = probs[i]!
490
+ maxIdx = i
491
+ }
492
+ }
493
+
494
+ // Only return if confidence exceeds threshold
495
+ if (maxProb < this.confidenceThreshold) return null
496
+
497
+ return labels[maxIdx]!
498
+ } catch (error) {
499
+ this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
500
+ return null
501
+ }
502
+ }
503
+
504
+ // ── Knowledge Type Classification ──────────────────────────────────
505
+
506
+ /**
507
+ * Classify knowledge type from text.
508
+ * 5-class classifier: fact, preference, constraint, goal, definition.
509
+ */
510
+ async classifyKnowledgeType(text: string): Promise<string> {
511
+ const mode = this.getTaskMode('knowledge')
512
+
513
+ if (mode === 'regex') {
514
+ return this.regexClassifyKnowledge(text)
515
+ }
516
+
517
+ const modelResult = await this.tryModelClassifyKnowledge(text)
518
+ const regexResult = this.regexClassifyKnowledge(text)
519
+
520
+ if (mode === 'both') {
521
+ this.logComparison('knowledge', text, modelResult, regexResult)
522
+ return modelResult ?? regexResult
523
+ }
524
+
525
+ if (modelResult) {
526
+ return modelResult
527
+ }
528
+ return regexResult
529
+ }
530
+
531
+ /**
532
+ * Regex fallback for knowledge type classification.
533
+ */
534
+ private regexClassifyKnowledge(text: string): string {
535
+ const lower = text.toLowerCase()
536
+ if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
537
+ return 'preference'
538
+ }
539
+ if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
540
+ return 'constraint'
541
+ }
542
+ if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
543
+ return 'goal'
544
+ }
545
+ if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
546
+ return 'definition'
547
+ }
548
+ return 'fact'
549
+ }
550
+
551
+ /**
552
+ * Attempt model-based knowledge type classification.
553
+ * 5-class classifier: fact, preference, constraint, goal, definition.
554
+ */
555
+ private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
556
+ if (!this.modelManager.hasModel('knowledge')) return null
557
+
558
+ try {
559
+ const tokenizer = await getTokenizer(this.logger)
560
+ const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
561
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
562
+ const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
563
+ if (!logits) return null
564
+
565
+ const probs = this.softmax(logits)
566
+ const manifest = this.modelManager.getManifestEntry('knowledge')
567
+ const labels = manifest?.labels ?? KNOWLEDGE_LABELS
568
+
569
+ let maxIdx = 0
570
+ let maxProb = probs[0]!
571
+ for (let i = 1; i < probs.length; i++) {
572
+ if (probs[i]! > maxProb) {
573
+ maxProb = probs[i]!
574
+ maxIdx = i
575
+ }
576
+ }
577
+
578
+ if (maxProb < this.confidenceThreshold) return null
579
+
580
+ return labels[maxIdx]!
581
+ } catch (error) {
582
+ this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
583
+ return null
584
+ }
585
+ }
586
+
587
+ // ── Local Compression ──────────────────────────────────────────────
588
+
589
+ /**
590
+ * Compress text using local model or return original if unavailable.
591
+ * Uses greedy autoregressive decoding for seq2seq generation.
592
+ */
593
+ async compress(text: string): Promise<string> {
594
+ const mode = this.getTaskMode('compress')
595
+
596
+ // In 'api' mode or 'regex' mode, no local compression available
597
+ if (mode === 'api' || mode === 'regex') {
598
+ return text
599
+ }
600
+
601
+ const modelResult = await this.tryModelCompress(text)
602
+
603
+ if (mode === 'both' && modelResult) {
604
+ this.logger.debug({
605
+ originalLen: text.length,
606
+ compressedLen: modelResult.length,
607
+ ratio: (modelResult.length / text.length).toFixed(2),
608
+ }, 'Compression comparison')
609
+ }
610
+
611
+ return modelResult ?? text
612
+ }
613
+
614
+ /**
615
+ * Attempt model-based text compression using greedy autoregressive decoding.
616
+ * Generates tokens one at a time until EOS or max length.
617
+ */
618
+ private async tryModelCompress(text: string): Promise<string | null> {
619
+ if (!this.modelManager.hasModel('compress')) return null
620
+
621
+ try {
622
+ const tokenizer = await getTokenizer(this.logger)
623
+ const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
624
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
625
+
626
+ // Greedy autoregressive decode loop
627
+ const generatedIds: number[] = [...inputIds.filter((_, i) => attentionMask[i] === 1)]
628
+ const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
629
+
630
+ for (let step = 0; step < maxGenTokens; step++) {
631
+ const currentMask = new Array(generatedIds.length).fill(1)
632
+ const logits = await this.modelManager.infer('compress', generatedIds, currentMask)
633
+ if (!logits) break
634
+
635
+ // Get logits for the last token position
636
+ // For seq2seq, the output logits cover the vocabulary
637
+ const vocabSize = logits.length / generatedIds.length
638
+ const lastTokenOffset = (generatedIds.length - 1) * vocabSize
639
+ const lastTokenLogits = logits.slice(lastTokenOffset, lastTokenOffset + vocabSize)
640
+
641
+ // Find argmax (greedy decode)
642
+ let bestId = 0
643
+ let bestVal = -Infinity
644
+ for (let v = 0; v < lastTokenLogits.length; v++) {
645
+ if (lastTokenLogits[v]! > bestVal) {
646
+ bestVal = lastTokenLogits[v]!
647
+ bestId = v
648
+ }
649
+ }
650
+
651
+ // Stop on EOS
652
+ if (bestId === EOS_TOKEN_ID || bestId === 0) break
653
+
654
+ generatedIds.push(bestId)
655
+ }
656
+
657
+ // Decode only the generated tokens (after input)
658
+ const inputLen = inputIds.filter((_, i) => attentionMask[i] === 1).length
659
+ const outputIds = generatedIds.slice(inputLen)
660
+
661
+ if (outputIds.length === 0) return null
662
+
663
+ const decoded = tokenizer.decode(outputIds).trim()
664
+ return decoded.length > 0 ? decoded : null
665
+ } catch (error) {
666
+ this.logger.warn({ error }, 'Model compression failed, returning original text')
667
+ return null
668
+ }
669
+ }
670
+
671
+ // ── Comparison Logging ─────────────────────────────────────────────
672
+
673
+ /**
674
+ * Log model vs regex comparison for "both" mode validation.
675
+ * Stored in model_feedback table for later analysis.
676
+ */
677
+ private logComparison(task: ModelTask, input: string, modelResult: any, regexResult: any): void {
678
+ try {
679
+ const modelLabel = this.extractLabel(modelResult)
680
+ const regexLabel = this.extractLabel(regexResult)
681
+ const agree = modelLabel === regexLabel
682
+
683
+ this.logger.debug({
684
+ task,
685
+ modelLabel,
686
+ regexLabel,
687
+ agree,
688
+ }, 'Model vs regex comparison')
689
+
690
+ // Always log to feedback table (both agreements and disagreements)
691
+ logModelFeedback({
692
+ task,
693
+ input,
694
+ modelPrediction: modelLabel ?? 'null',
695
+ modelConfidence: this.extractConfidence(modelResult),
696
+ regexPrediction: regexLabel ?? 'null',
697
+ })
698
+
699
+ if (!agree) {
700
+ // Also log disagreement to training data for review
701
+ logTrainingData({
702
+ task,
703
+ input,
704
+ output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
705
+ metadata: JSON.stringify({ mode: 'both', comparison: true }),
706
+ })
707
+ }
708
+ } catch {
709
+ // Non-critical
710
+ }
711
+ }
712
+
713
+ /**
714
+ * Extract confidence from a model result object.
715
+ */
716
+ private extractConfidence(result: any): number {
717
+ if (!result) return 0
718
+ if (typeof result.confidence === 'number') return result.confidence
719
+ return 0
720
+ }
721
+
722
+ private extractLabel(result: any): string | null {
723
+ if (!result) return null
724
+ if (typeof result === 'string') return result
725
+ if (result.primary) return result.primary
726
+ if (result.type) return result.type
727
+ return JSON.stringify(result).slice(0, 50)
728
+ }
729
+
730
+ // ── Status ─────────────────────────────────────────────────────────
731
+
732
+ /**
733
+ * Get inference routing status for all tasks
734
+ */
735
+ getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
736
+ const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
737
+ const modelStatus = this.modelManager.getStatus()
738
+ const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
739
+
740
+ for (const task of tasks) {
741
+ status[task] = {
742
+ mode: this.getTaskMode(task),
743
+ modelAvailable: modelStatus[task].available,
744
+ modelLoaded: modelStatus[task].loaded,
745
+ }
746
+ }
747
+ return status
748
+ }
749
+ }