npm - claude-brain - Versions diffs - 0.27.2 → 0.28.0 - Mend

claude-brain 0.27.2 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/VERSION +1 -1
package/package.json +3 -1
package/src/cli/bin.ts +14 -0
package/src/cli/commands/export-training.ts +70 -0
package/src/cli/commands/models.ts +681 -0
package/src/cli/commands/status.ts +44 -0
package/src/config/home.ts +1 -0
package/src/config/schema.ts +30 -0
package/src/intelligence/inference-router.ts +749 -0
package/src/intelligence/model-manager.ts +206 -0
package/src/intelligence/tokenizer.ts +118 -0
package/src/knowledge/entity-extractor.ts +31 -1
package/src/memory/compression.ts +17 -1
package/src/memory/patterns.ts +46 -6
package/src/retrieval/query/intent-classifier.ts +17 -1
package/src/routing/entity-extractor.ts +30 -4
package/src/routing/intent-classifier.ts +45 -16
package/src/routing/router.ts +22 -2
package/src/server/handlers/list-tools.ts +6 -6
package/src/server/http-api.ts +83 -1
package/src/server/services.ts +47 -0
package/src/training/data-store.ts +298 -0
package/src/training/retrain-pipeline.ts +394 -0

package/src/intelligence/inference-router.ts ADDED Viewed

@@ -0,0 +1,749 @@
+/**
+ * Inference Router — SLM Upgrade Phase 4B
+ * Routes each classification task to model or regex fallback.
+ *
+ * For each task:
+ *   1. If config mode is 'regex'/'api' → use regex/API only
+ *   2. If config mode is 'model' → try model, fall back to regex on failure
+ *   3. If config mode is 'both' → run both, log comparison, use model output
+ *
+ * Confidence thresholding: if model confidence < threshold, use regex instead.
+ * Function signatures match the existing regex classifiers exactly.
+ */
+import type { Logger } from 'pino'
+import type { Config } from '@/config'
+import type { ModelManager, ModelTask } from './model-manager'
+import { logTrainingData, logModelFeedback } from '@/training/data-store'
+import { getTokenizer } from './tokenizer'
+// Import existing regex classifiers
+import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
+import type { Intent } from '@/routing/intent-classifier'
+import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
+import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
+import type { QueryIntent } from '@/retrieval/types'
+import type { Pattern } from '@/memory/patterns'
+/** Default intent labels matching the model training order */
+const INTENT_LABELS: Intent[] = [
+  'session_start', 'context_needed', 'decision_made', 'store_this',
+  'pattern_found', 'mistake_learned', 'progress_update', 'question',
+  'comparison', 'exploration', 'list_all', 'update_memory',
+  'delete_memory', 'detail_request', 'timeline', 'no_action'
+]
+/** BIO labels for token-level entity extraction */
+const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
+/** Query intent labels matching model training order.
+ * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
+const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
+/** Map model query labels → QueryIntent['type'] (handles training label renames) */
+const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
+  factual: 'factual',
+  procedural: 'pattern',
+  comparative: 'comparison',
+  temporal: 'temporal',
+  exploratory: 'exploratory',
+  decision: 'decision',
+}
+/** Pattern type labels */
+const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
+/** Knowledge type labels */
+const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
+/** Map BIO entity tag prefix to EntityType */
+const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
+  'TECH': 'technology',
+  'PROJECT': 'project',
+  'CONCEPT': 'concept',
+}
+/** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
+const ENTITY_TOKEN_THRESHOLD = 0.5
+/** Common words that should never be entities (false positive filter) */
+const ENTITY_STOPWORDS = new Set([
+  'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
+  'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
+  'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
+  'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
+  'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
+  'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
+])
+/** EOS token ID for greedy decode (GPT-2 uses 50256) */
+const EOS_TOKEN_ID = 50256
+/** Max tokens to generate for compression */
+const COMPRESS_MAX_TOKENS = 128
+/** Inference mode per task */
+type TaskMode = 'model' | 'regex' | 'both' | 'api'
+export class InferenceRouter {
+  private logger: Logger
+  private config: Config
+  private modelManager: ModelManager
+  private confidenceThreshold: number
+  // Regex fallback instances
+  private intentClassifier: BrainIntentClassifier
+  private entityExtractor: EntityExtractor
+  constructor(logger: Logger, config: Config, modelManager: ModelManager) {
+    this.logger = logger.child({ component: 'inference-router' })
+    this.config = config
+    this.modelManager = modelManager
+    this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
+    // Initialize regex fallbacks
+    this.intentClassifier = new BrainIntentClassifier()
+    this.entityExtractor = new EntityExtractor()
+  }
+  /**
+   * Check if SLM is enabled globally
+   */
+  get enabled(): boolean {
+    return this.config.slm?.enabled ?? false
+  }
+  /**
+   * Get the mode for a specific task
+   */
+  private getTaskMode(task: ModelTask): TaskMode {
+    if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
+    const taskConfig = this.config.slm?.tasks
+    if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
+    switch (task) {
+      case 'intent': return taskConfig.intent ?? 'regex'
+      case 'entity': return taskConfig.entity ?? 'regex'
+      case 'query': return taskConfig.query ?? 'regex'
+      case 'knowledge': return taskConfig.knowledge ?? 'regex'
+      case 'compress': return taskConfig.compress ?? 'api'
+      case 'pattern': return taskConfig.pattern ?? 'regex'
+      default: return 'regex'
+    }
+  }
+  /**
+   * Softmax over logits array (used when ONNX models are available)
+   */
+  softmax(logits: Float32Array): number[] {
+    const max = Math.max(...logits)
+    const exps = Array.from(logits).map(x => Math.exp(x - max))
+    const sum = exps.reduce((a, b) => a + b, 0)
+    return exps.map(e => e / sum)
+  }
+  // ── Intent Classification ──────────────────────────────────────────
+  /**
+   * Classify brain() message intent.
+   * Drop-in replacement for IntentClassifier.classify().
+   * Async to support model inference when ONNX models are available.
+   */
+  async classifyIntent(message: string): Promise<ClassificationResult> {
+    const mode = this.getTaskMode('intent')
+    if (mode === 'regex') {
+      return this.intentClassifier.classify(message)
+    }
+    // Try model
+    const modelResult = await this.tryModelClassifyIntent(message)
+    const regexResult = this.intentClassifier.classify(message)
+    if (mode === 'both') {
+      this.logComparison('intent', message, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    // mode === 'model': use model if available and confident
+    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Attempt model-based intent classification. Returns null if model unavailable.
+   * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
+   */
+  private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
+    if (!this.modelManager.hasModel('intent')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const { inputIds, attentionMask } = tokenizer.encode(message, 128)
+      const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('intent')
+      const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
+      // Find top prediction
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Find secondary intents (above a lower threshold)
+      const secondary: Intent[] = []
+      for (let i = 0; i < probs.length; i++) {
+        if (i !== maxIdx && probs[i]! > 0.15) {
+          secondary.push(labels[i]!)
+        }
+      }
+      return {
+        primary: labels[maxIdx]!,
+        confidence: maxProb,
+        secondary,
+      }
+    } catch (error) {
+      this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Entity Extraction ──────────────────────────────────────────────
+  /**
+   * Extract entities from text.
+   * Drop-in replacement for EntityExtractor.extract().
+   * Now async to support model inference.
+   */
+  async extractEntities(text: string): Promise<ExtractedEntity[]> {
+    const mode = this.getTaskMode('entity')
+    if (mode === 'regex') {
+      return this.entityExtractor.extract(text)
+    }
+    const modelResult = await this.tryModelExtractEntities(text)
+    const regexResult = this.entityExtractor.extract(text)
+    if (mode === 'both') {
+      this.logComparison('entity', text, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    return modelResult ?? regexResult
+  }
+  /**
+   * Attempt model-based entity extraction using BIO sequence labeling.
+   * Output is per-token logits [numTokens * numLabels].
+   * Groups consecutive B-/I- tags into ExtractedEntity objects.
+   * Applies softmax per-token and filters low-confidence/garbage entities.
+   */
+  private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
+    if (!this.modelManager.hasModel('entity')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
+      if (!logits) return null
+      const manifest = this.modelManager.getManifestEntry('entity')
+      const labels = manifest?.labels ?? ENTITY_BIO_LABELS
+      const numLabels = labels.length
+      // Count active tokens (non-padding)
+      const numTokens = attentionMask.filter(m => m === 1).length
+      // Decode per-token BIO tags with softmax probabilities
+      const tokenTags: { tag: string; prob: number }[] = []
+      for (let t = 0; t < numTokens; t++) {
+        // Extract logits for this token and apply softmax
+        const tokenLogits = new Float32Array(numLabels)
+        for (let l = 0; l < numLabels; l++) {
+          tokenLogits[l] = logits[t * numLabels + l]!
+        }
+        const probs = this.softmax(tokenLogits)
+        let bestIdx = 0
+        let bestProb = probs[0]!
+        for (let l = 1; l < numLabels; l++) {
+          if (probs[l]! > bestProb) {
+            bestProb = probs[l]!
+            bestIdx = l
+          }
+        }
+        // Only accept B-/I- tags if softmax probability exceeds entity threshold
+        const tag = labels[bestIdx]!
+        const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
+        if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
+          tokenTags.push({ tag: 'O', prob: bestProb })
+        } else {
+          tokenTags.push({ tag, prob: bestProb })
+        }
+      }
+      // Group consecutive B-/I- tags into entities
+      const entities: ExtractedEntity[] = []
+      let currentType: string | null = null
+      let currentTokenIds: number[] = []
+      let currentProbs: number[] = []
+      let startPos = 0
+      const flushEntity = () => {
+        if (currentType && currentTokenIds.length > 0) {
+          const entityType = BIO_TYPE_MAP[currentType]
+          if (entityType) {
+            const name = tokenizer.decode(currentTokenIds).trim()
+            const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
+            // Filter: minimum 2 chars, not a stopword, decent average confidence
+            if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
+              entities.push({
+                name,
+                normalizedName: name.toLowerCase(),
+                type: entityType,
+                confidence: avgProb,
+                source: 'rule',
+                positions: [startPos],
+              })
+            }
+          }
+        }
+        currentType = null
+        currentTokenIds = []
+        currentProbs = []
+      }
+      for (let i = 0; i < tokenTags.length; i++) {
+        const { tag, prob } = tokenTags[i]!
+        if (tag.startsWith('B-')) {
+          flushEntity()
+          currentType = tag.slice(2)
+          currentTokenIds = [inputIds[i]!]
+          currentProbs = [prob]
+          startPos = i
+        } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
+          currentTokenIds.push(inputIds[i]!)
+          currentProbs.push(prob)
+        } else {
+          flushEntity()
+        }
+      }
+      flushEntity()
+      return entities.length > 0 ? entities : null
+    } catch (error) {
+      this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Query Intent Classification ────────────────────────────────────
+  /**
+   * Classify search query intent.
+   * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
+   * Now async to support model inference.
+   */
+  async classifyQueryIntent(query: string): Promise<QueryIntent> {
+    const mode = this.getTaskMode('query')
+    if (mode === 'regex') {
+      return classifyQueryIntent(query)
+    }
+    const modelResult = await this.tryModelClassifyQuery(query)
+    const regexResult = classifyQueryIntent(query)
+    if (mode === 'both') {
+      this.logComparison('query', query, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Attempt model-based query intent classification.
+   * 6-class classifier. Model uses training labels (procedural, comparative)
+   * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
+   */
+  private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
+    if (!this.modelManager.hasModel('query')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
+      const logits = await this.modelManager.infer('query', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('query')
+      const labels = manifest?.labels ?? [...QUERY_LABELS]
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Map model label to QueryIntent type (handles training label renames)
+      const rawLabel = labels[maxIdx]!
+      const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
+      return {
+        type: mappedType,
+        confidence: maxProb,
+      }
+    } catch (error) {
+      this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Pattern Classification ─────────────────────────────────────────
+  /**
+   * Classify pattern type.
+   * Drop-in for PatternRecognizer.determinePatternType().
+   * Now async to support model inference.
+   */
+  async classifyPatternType(description: string): Promise<Pattern['type']> {
+    const mode = this.getTaskMode('pattern')
+    if (mode === 'regex') {
+      return this.regexClassifyPattern(description)
+    }
+    const modelResult = await this.tryModelClassifyPattern(description)
+    const regexResult = this.regexClassifyPattern(description)
+    if (mode === 'both') {
+      this.logComparison('pattern', description, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    return modelResult ?? regexResult
+  }
+  /**
+   * Regex fallback for pattern classification (extracted from PatternRecognizer)
+   */
+  private regexClassifyPattern(description: string): Pattern['type'] {
+    const lower = description.toLowerCase()
+    if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
+      return 'anti-pattern'
+    }
+    if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
+      return 'best-practice'
+    }
+    if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
+      return 'common-issue'
+    }
+    return 'solution'
+  }
+  /**
+   * Attempt model-based pattern type classification.
+   * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
+   */
+  private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
+    if (!this.modelManager.hasModel('pattern')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
+      const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('pattern')
+      const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Only return if confidence exceeds threshold
+      if (maxProb < this.confidenceThreshold) return null
+      return labels[maxIdx]!
+    } catch (error) {
+      this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Knowledge Type Classification ──────────────────────────────────
+  /**
+   * Classify knowledge type from text.
+   * 5-class classifier: fact, preference, constraint, goal, definition.
+   */
+  async classifyKnowledgeType(text: string): Promise<string> {
+    const mode = this.getTaskMode('knowledge')
+    if (mode === 'regex') {
+      return this.regexClassifyKnowledge(text)
+    }
+    const modelResult = await this.tryModelClassifyKnowledge(text)
+    const regexResult = this.regexClassifyKnowledge(text)
+    if (mode === 'both') {
+      this.logComparison('knowledge', text, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    if (modelResult) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Regex fallback for knowledge type classification.
+   */
+  private regexClassifyKnowledge(text: string): string {
+    const lower = text.toLowerCase()
+    if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
+      return 'preference'
+    }
+    if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
+      return 'constraint'
+    }
+    if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
+      return 'goal'
+    }
+    if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
+      return 'definition'
+    }
+    return 'fact'
+  }
+  /**
+   * Attempt model-based knowledge type classification.
+   * 5-class classifier: fact, preference, constraint, goal, definition.
+   */
+  private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
+    if (!this.modelManager.hasModel('knowledge')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('knowledge')
+      const labels = manifest?.labels ?? KNOWLEDGE_LABELS
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      if (maxProb < this.confidenceThreshold) return null
+      return labels[maxIdx]!
+    } catch (error) {
+      this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Local Compression ──────────────────────────────────────────────
+  /**
+   * Compress text using local model or return original if unavailable.
+   * Uses greedy autoregressive decoding for seq2seq generation.
+   */
+  async compress(text: string): Promise<string> {
+    const mode = this.getTaskMode('compress')
+    // In 'api' mode or 'regex' mode, no local compression available
+    if (mode === 'api' || mode === 'regex') {
+      return text
+    }
+    const modelResult = await this.tryModelCompress(text)
+    if (mode === 'both' && modelResult) {
+      this.logger.debug({
+        originalLen: text.length,
+        compressedLen: modelResult.length,
+        ratio: (modelResult.length / text.length).toFixed(2),
+      }, 'Compression comparison')
+    }
+    return modelResult ?? text
+  }
+  /**
+   * Attempt model-based text compression using greedy autoregressive decoding.
+   * Generates tokens one at a time until EOS or max length.
+   */
+  private async tryModelCompress(text: string): Promise<string | null> {
+    if (!this.modelManager.hasModel('compress')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      // Greedy autoregressive decode loop
+      const generatedIds: number[] = [...inputIds.filter((_, i) => attentionMask[i] === 1)]
+      const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
+      for (let step = 0; step < maxGenTokens; step++) {
+        const currentMask = new Array(generatedIds.length).fill(1)
+        const logits = await this.modelManager.infer('compress', generatedIds, currentMask)
+        if (!logits) break
+        // Get logits for the last token position
+        // For seq2seq, the output logits cover the vocabulary
+        const vocabSize = logits.length / generatedIds.length
+        const lastTokenOffset = (generatedIds.length - 1) * vocabSize
+        const lastTokenLogits = logits.slice(lastTokenOffset, lastTokenOffset + vocabSize)
+        // Find argmax (greedy decode)
+        let bestId = 0
+        let bestVal = -Infinity
+        for (let v = 0; v < lastTokenLogits.length; v++) {
+          if (lastTokenLogits[v]! > bestVal) {
+            bestVal = lastTokenLogits[v]!
+            bestId = v
+          }
+        }
+        // Stop on EOS
+        if (bestId === EOS_TOKEN_ID || bestId === 0) break
+        generatedIds.push(bestId)
+      }
+      // Decode only the generated tokens (after input)
+      const inputLen = inputIds.filter((_, i) => attentionMask[i] === 1).length
+      const outputIds = generatedIds.slice(inputLen)
+      if (outputIds.length === 0) return null
+      const decoded = tokenizer.decode(outputIds).trim()
+      return decoded.length > 0 ? decoded : null
+    } catch (error) {
+      this.logger.warn({ error }, 'Model compression failed, returning original text')
+      return null
+    }
+  }
+  // ── Comparison Logging ─────────────────────────────────────────────
+  /**
+   * Log model vs regex comparison for "both" mode validation.
+   * Stored in model_feedback table for later analysis.
+   */
+  private logComparison(task: ModelTask, input: string, modelResult: any, regexResult: any): void {
+    try {
+      const modelLabel = this.extractLabel(modelResult)
+      const regexLabel = this.extractLabel(regexResult)
+      const agree = modelLabel === regexLabel
+      this.logger.debug({
+        task,
+        modelLabel,
+        regexLabel,
+        agree,
+      }, 'Model vs regex comparison')
+      // Always log to feedback table (both agreements and disagreements)
+      logModelFeedback({
+        task,
+        input,
+        modelPrediction: modelLabel ?? 'null',
+        modelConfidence: this.extractConfidence(modelResult),
+        regexPrediction: regexLabel ?? 'null',
+      })
+      if (!agree) {
+        // Also log disagreement to training data for review
+        logTrainingData({
+          task,
+          input,
+          output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
+          metadata: JSON.stringify({ mode: 'both', comparison: true }),
+        })
+      }
+    } catch {
+      // Non-critical
+    }
+  }
+  /**
+   * Extract confidence from a model result object.
+   */
+  private extractConfidence(result: any): number {
+    if (!result) return 0
+    if (typeof result.confidence === 'number') return result.confidence
+    return 0
+  }
+  private extractLabel(result: any): string | null {
+    if (!result) return null
+    if (typeof result === 'string') return result
+    if (result.primary) return result.primary
+    if (result.type) return result.type
+    return JSON.stringify(result).slice(0, 50)
+  }
+  // ── Status ─────────────────────────────────────────────────────────
+  /**
+   * Get inference routing status for all tasks
+   */
+  getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
+    const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
+    const modelStatus = this.modelManager.getStatus()
+    const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
+    for (const task of tasks) {
+      status[task] = {
+        mode: this.getTaskMode(task),
+        modelAvailable: modelStatus[task].available,
+        modelLoaded: modelStatus[task].loaded,
+      }
+    }
+    return status
+  }
+}