npm - claude-brain - Versions diffs - 0.30.2 → 0.30.3 - Mend

claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (236) hide show

package/README.md +241 -191
package/VERSION +1 -1
package/assets/CLAUDE-unified.md +11 -11
package/assets/CLAUDE.md +29 -29
package/package.json +7 -3
package/packs/backend/node.json +173 -173
package/packs/core/javascript.json +176 -176
package/packs/core/typescript.json +222 -222
package/packs/frontend/react.json +254 -254
package/packs/meta/testing.json +172 -172
package/scripts/postinstall.mjs +531 -531
package/src/automation/decision-detector.ts +452 -452
package/src/automation/phase12-manager.ts +456 -456
package/src/automation/proactive-recall.ts +373 -373
package/src/automation/project-detector.ts +310 -310
package/src/automation/repo-scanner.ts +210 -205
package/src/cli/auto-setup.ts +75 -75
package/src/cli/auto-start.ts +266 -266
package/src/cli/bin.ts +264 -264
package/src/cli/commands/autostart.ts +90 -90
package/src/cli/commands/chroma.ts +578 -577
package/src/cli/commands/export-training.ts +70 -70
package/src/cli/commands/export.ts +130 -130
package/src/cli/commands/git-hook.ts +183 -183
package/src/cli/commands/hooks.ts +217 -217
package/src/cli/commands/init.ts +123 -123
package/src/cli/commands/install-mcp.ts +122 -111
package/src/cli/commands/models.ts +979 -979
package/src/cli/commands/pack.ts +200 -200
package/src/cli/commands/refresh.ts +344 -339
package/src/cli/commands/reindex.ts +120 -120
package/src/cli/commands/serve.ts +466 -463
package/src/cli/commands/start.ts +44 -44
package/src/cli/commands/status.ts +220 -203
package/src/cli/commands/uninstall-mcp.ts +45 -41
package/src/cli/commands/update.ts +130 -124
package/src/cli/migrate-chroma.ts +106 -106
package/src/cli/ui/animations.ts +80 -80
package/src/cli/ui/components.ts +82 -82
package/src/cli/ui/index.ts +4 -4
package/src/cli/ui/logo.ts +36 -36
package/src/cli/ui/theme.ts +55 -55
package/src/code-intelligence/indexer.ts +352 -352
package/src/code-intelligence/linker.ts +178 -178
package/src/code-intelligence/parser.ts +484 -484
package/src/code-intelligence/query.ts +291 -291
package/src/code-intelligence/schema.ts +83 -83
package/src/code-intelligence/types.ts +95 -95
package/src/config/defaults.ts +52 -52
package/src/config/home.ts +56 -56
package/src/config/index.ts +5 -5
package/src/config/loader.ts +192 -192
package/src/config/schema.ts +446 -415
package/src/config/validator.ts +182 -182
package/src/context/assembler.ts +407 -400
package/src/context/index.ts +79 -79
package/src/context/progress-tracker.ts +174 -174
package/src/context/standards-manager.ts +287 -287
package/src/context/validator.ts +58 -58
package/src/diagnostics/index.ts +122 -121
package/src/health/index.ts +233 -232
package/src/hooks/brain-hook.ts +134 -131
package/src/hooks/capture.ts +168 -168
package/src/hooks/claude-code-mastery.md +112 -112
package/src/hooks/context-hook.ts +260 -245
package/src/hooks/deduplicator.ts +72 -72
package/src/hooks/git-capture.ts +109 -109
package/src/hooks/git-hook-installer.ts +211 -207
package/src/hooks/index.ts +20 -20
package/src/hooks/installer.ts +306 -288
package/src/hooks/interceptor-hook.ts +204 -201
package/src/hooks/passive-classifier.ts +397 -397
package/src/hooks/queue.ts +160 -129
package/src/hooks/session-tracker.ts +312 -312
package/src/hooks/types.ts +52 -52
package/src/index.ts +7 -7
package/src/intelligence/cross-project/generalizer.ts +283 -283
package/src/intelligence/cross-project/index.ts +7 -7
package/src/intelligence/hf-downloader.ts +222 -222
package/src/intelligence/hf-manifest.json +78 -78
package/src/intelligence/index.ts +24 -24
package/src/intelligence/inference-router.ts +762 -762
package/src/intelligence/model-manager.ts +263 -245
package/src/intelligence/optimization/index.ts +10 -10
package/src/intelligence/optimization/precompute.ts +202 -202
package/src/intelligence/optimization/semantic-cache.ts +213 -207
package/src/intelligence/prediction/index.ts +7 -7
package/src/intelligence/prediction/recommender.ts +276 -268
package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
package/src/intelligence/reasoning/index.ts +7 -7
package/src/intelligence/temporal/evolution.ts +193 -197
package/src/intelligence/temporal/index.ts +16 -16
package/src/intelligence/temporal/query-processor.ts +190 -190
package/src/intelligence/temporal/timeline.ts +272 -259
package/src/intelligence/temporal/trends.ts +263 -263
package/src/intelligence/tokenizer.ts +118 -118
package/src/knowledge/entity-extractor.ts +447 -443
package/src/knowledge/graph/builder.ts +185 -185
package/src/knowledge/graph/linker.ts +201 -201
package/src/knowledge/graph/memory-graph.ts +359 -359
package/src/knowledge/graph/schema.ts +99 -99
package/src/knowledge/graph/search.ts +166 -166
package/src/knowledge/relationship-extractor.ts +108 -108
package/src/memory/chroma/client.ts +211 -192
package/src/memory/chroma/collection-manager.ts +92 -92
package/src/memory/chroma/config.ts +57 -57
package/src/memory/chroma/embeddings.ts +177 -175
package/src/memory/chroma/index.ts +82 -82
package/src/memory/chroma/migration.ts +270 -270
package/src/memory/chroma/schemas.ts +69 -69
package/src/memory/chroma/search.ts +319 -315
package/src/memory/chroma/store.ts +755 -747
package/src/memory/compression.ts +121 -121
package/src/memory/consolidation/archiver.ts +162 -165
package/src/memory/consolidation/merger.ts +182 -186
package/src/memory/consolidation/scorer.ts +136 -136
package/src/memory/database.ts +9 -0
package/src/memory/dual-write.ts +145 -0
package/src/memory/embeddings.ts +226 -226
package/src/memory/episodic/detector.ts +108 -108
package/src/memory/episodic/manager.ts +347 -351
package/src/memory/episodic/summarizer.ts +179 -179
package/src/memory/episodic/types.ts +52 -52
package/src/memory/fts5-search.ts +692 -633
package/src/memory/index.ts +943 -1060
package/src/memory/migrations/add-fts5.ts +118 -108
package/src/memory/patterns.ts +438 -438
package/src/memory/pruning.ts +60 -60
package/src/memory/schema.ts +88 -88
package/src/memory/store.ts +911 -787
package/src/orchestrator/handlers/decision-handler.ts +204 -204
package/src/packs/index.ts +9 -9
package/src/packs/loader.ts +134 -134
package/src/packs/manager.ts +204 -204
package/src/packs/ranker.ts +78 -78
package/src/packs/types.ts +81 -81
package/src/phase12/index.ts +5 -5
package/src/retrieval/bm25/index.ts +300 -297
package/src/retrieval/bm25/tokenizer.ts +184 -184
package/src/retrieval/feedback/adaptive.ts +221 -221
package/src/retrieval/feedback/index.ts +16 -16
package/src/retrieval/feedback/metrics.ts +221 -221
package/src/retrieval/feedback/store.ts +283 -283
package/src/retrieval/fusion/index.ts +194 -194
package/src/retrieval/fusion/rrf.ts +165 -165
package/src/retrieval/index.ts +12 -12
package/src/retrieval/pipeline.ts +375 -375
package/src/retrieval/query/expander.ts +203 -203
package/src/retrieval/query/index.ts +27 -27
package/src/retrieval/query/intent-classifier.ts +252 -252
package/src/retrieval/query/temporal-parser.ts +295 -295
package/src/retrieval/reranker/index.ts +189 -188
package/src/retrieval/reranker/model.ts +99 -95
package/src/retrieval/service.ts +125 -125
package/src/retrieval/types.ts +162 -162
package/src/routing/entity-extractor.ts +454 -454
package/src/routing/handlers/exploration-handler.ts +369 -0
package/src/routing/handlers/index.ts +19 -0
package/src/routing/handlers/memory-handler.ts +273 -0
package/src/routing/handlers/mutation-handler.ts +241 -0
package/src/routing/handlers/recall-handler.ts +642 -0
package/src/routing/handlers/shared.ts +515 -0
package/src/routing/handlers/types.ts +48 -0
package/src/routing/intent-classifier.ts +552 -552
package/src/routing/response-filter.ts +399 -391
package/src/routing/router.ts +245 -2193
package/src/routing/search-engine.ts +521 -514
package/src/routing/types.ts +104 -94
package/src/scripts/health-check.ts +118 -118
package/src/scripts/setup.ts +122 -122
package/src/server/auto-updater.ts +283 -276
package/src/server/handlers/call-tool.ts +159 -159
package/src/server/handlers/list-tools.ts +35 -35
package/src/server/handlers/tools/auto-remember.ts +165 -165
package/src/server/handlers/tools/brain.ts +86 -86
package/src/server/handlers/tools/create-project.ts +135 -135
package/src/server/handlers/tools/get-code-standards.ts +123 -123
package/src/server/handlers/tools/get-corrections.ts +152 -152
package/src/server/handlers/tools/get-patterns.ts +156 -156
package/src/server/handlers/tools/get-project-context.ts +75 -75
package/src/server/handlers/tools/index.ts +30 -30
package/src/server/handlers/tools/init-project.ts +756 -756
package/src/server/handlers/tools/list-projects.ts +126 -126
package/src/server/handlers/tools/recall-similar.ts +87 -87
package/src/server/handlers/tools/recognize-pattern.ts +132 -132
package/src/server/handlers/tools/record-correction.ts +131 -131
package/src/server/handlers/tools/remember-decision.ts +168 -168
package/src/server/handlers/tools/schemas.ts +179 -179
package/src/server/handlers/tools/search-code.ts +122 -122
package/src/server/handlers/tools/smart-context.ts +146 -146
package/src/server/handlers/tools/update-progress.ts +131 -131
package/src/server/http-api.ts +215 -1229
package/src/server/mcp-proxy.ts +85 -84
package/src/server/mcp-server.ts +285 -284
package/src/server/middleware/auth.ts +39 -0
package/src/server/middleware/error-handler.ts +37 -0
package/src/server/middleware/rate-limit.ts +53 -0
package/src/server/middleware/validate.ts +42 -0
package/src/server/pid-manager.ts +137 -136
package/src/server/providers/resources.ts +581 -581
package/src/server/routes/code.ts +228 -0
package/src/server/routes/context.ts +26 -0
package/src/server/routes/health.ts +19 -0
package/src/server/routes/helpers.ts +100 -0
package/src/server/routes/hooks.ts +197 -0
package/src/server/routes/mcp.ts +47 -0
package/src/server/routes/memory.ts +397 -0
package/src/server/routes/models.ts +96 -0
package/src/server/routes/projects.ts +89 -0
package/src/server/routes/types.ts +21 -0
package/src/server/schemas/api-schemas.ts +202 -0
package/src/server/services.ts +720 -720
package/src/server/utils/memory-indicator.ts +84 -84
package/src/server/utils/response-formatter.ts +129 -129
package/src/server/web-viewer.ts +1145 -1115
package/src/setup/index.ts +38 -38
package/src/tools/registry.ts +115 -115
package/src/tools/schemas.ts +666 -666
package/src/tools/types.ts +412 -412
package/src/training/data-store.ts +320 -298
package/src/training/retrain-pipeline.ts +399 -394
package/src/utils/error-handler.ts +136 -136
package/src/utils/index.ts +58 -58
package/src/utils/kill-port.ts +55 -53
package/src/utils/phase12-helper.ts +56 -56
package/src/utils/safe-path.ts +43 -0
package/src/utils/timing.ts +47 -47
package/src/utils/transaction.ts +63 -63
package/src/vault/index.ts +4 -3
package/src/vault/paths.ts +106 -106
package/src/vault/query.ts +4 -1
package/src/vault/reader.ts +44 -1
package/src/vault/watcher.ts +24 -1
package/src/vault/writer.ts +487 -413
package/skills/persistent-memory/SKILL.md +0 -148
package/skills/persistent-memory/references/tool-reference.md +0 -90

package/src/intelligence/inference-router.ts CHANGED Viewed

@@ -1,762 +1,762 @@
-/**
- * Inference Router — SLM Upgrade Phase 4B
- * Routes each classification task to model or regex fallback.
- *
- * For each task:
- *   1. If config mode is 'regex'/'api' → use regex/API only
- *   2. If config mode is 'model' → try model, fall back to regex on failure
- *   3. If config mode is 'both' → run both, log comparison, use model output
- *
- * Confidence thresholding: if model confidence < threshold, use regex instead.
- * Function signatures match the existing regex classifiers exactly.
- */
-import type { Logger } from 'pino'
-import type { Config } from '@/config'
-import type { ModelManager, ModelTask } from './model-manager'
-import { logTrainingData, logModelFeedback } from '@/training/data-store'
-import { getTokenizer } from './tokenizer'
-// Import existing regex classifiers
-import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
-import type { Intent } from '@/routing/intent-classifier'
-import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
-import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
-import type { QueryIntent } from '@/retrieval/types'
-import type { Pattern } from '@/memory/patterns'
-/** Default intent labels matching the model training order */
-const INTENT_LABELS: Intent[] = [
-  'session_start', 'context_needed', 'decision_made', 'store_this',
-  'pattern_found', 'mistake_learned', 'progress_update', 'question',
-  'comparison', 'exploration', 'list_all', 'update_memory',
-  'delete_memory', 'detail_request', 'timeline', 'no_action'
-]
-/** BIO labels for token-level entity extraction */
-const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
-/** Query intent labels matching model training order.
- * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
-const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
-/** Map model query labels → QueryIntent['type'] (handles training label renames) */
-const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
-  factual: 'factual',
-  procedural: 'pattern',
-  comparative: 'comparison',
-  temporal: 'temporal',
-  exploratory: 'exploratory',
-  decision: 'decision',
-}
-/** Pattern type labels */
-const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
-/** Knowledge type labels */
-const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
-/** Map BIO entity tag prefix to EntityType */
-const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
-  'TECH': 'technology',
-  'PROJECT': 'project',
-  'CONCEPT': 'concept',
-}
-/** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
-const ENTITY_TOKEN_THRESHOLD = 0.25
-/** Common words that should never be entities (false positive filter) */
-const ENTITY_STOPWORDS = new Set([
-  'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
-  'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
-  'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
-  'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
-  'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
-  'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
-])
-/** EOS token ID for greedy decode (GPT-2 uses 50256) */
-const EOS_TOKEN_ID = 50256
-/** Max tokens to generate for compression */
-const COMPRESS_MAX_TOKENS = 128
-/** Inference mode per task */
-type TaskMode = 'model' | 'regex' | 'both' | 'api'
-export class InferenceRouter {
-  private logger: Logger
-  private config: Config
-  private modelManager: ModelManager
-  private confidenceThreshold: number
-  // Regex fallback instances
-  private intentClassifier: BrainIntentClassifier
-  private entityExtractor: EntityExtractor
-  constructor(logger: Logger, config: Config, modelManager: ModelManager) {
-    this.logger = logger.child({ component: 'inference-router' })
-    this.config = config
-    this.modelManager = modelManager
-    this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
-    // Initialize regex fallbacks
-    this.intentClassifier = new BrainIntentClassifier()
-    this.entityExtractor = new EntityExtractor()
-  }
-  /**
-   * Check if SLM is enabled globally
-   */
-  get enabled(): boolean {
-    return this.config.slm?.enabled ?? false
-  }
-  /**
-   * Get the mode for a specific task
-   */
-  private getTaskMode(task: ModelTask): TaskMode {
-    if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
-    const taskConfig = this.config.slm?.tasks
-    if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
-    switch (task) {
-      case 'intent': return taskConfig.intent ?? 'regex'
-      case 'entity': return taskConfig.entity ?? 'regex'
-      case 'query': return taskConfig.query ?? 'regex'
-      case 'knowledge': return taskConfig.knowledge ?? 'regex'
-      case 'compress': return taskConfig.compress ?? 'api'
-      case 'pattern': return taskConfig.pattern ?? 'regex'
-      default: return 'regex'
-    }
-  }
-  /**
-   * Softmax over logits array (used when ONNX models are available)
-   */
-  softmax(logits: Float32Array): number[] {
-    const max = Math.max(...logits)
-    const exps = Array.from(logits).map(x => Math.exp(x - max))
-    const sum = exps.reduce((a, b) => a + b, 0)
-    return exps.map(e => e / sum)
-  }
-  // ── Intent Classification ──────────────────────────────────────────
-  /**
-   * Classify brain() message intent.
-   * Drop-in replacement for IntentClassifier.classify().
-   * Async to support model inference when ONNX models are available.
-   */
-  async classifyIntent(message: string): Promise<ClassificationResult> {
-    const mode = this.getTaskMode('intent')
-    if (mode === 'regex') {
-      return this.intentClassifier.classify(message)
-    }
-    // Try model
-    const modelResult = await this.tryModelClassifyIntent(message)
-    const regexResult = this.intentClassifier.classify(message)
-    if (mode === 'both') {
-      this.logComparison('intent', message, modelResult, regexResult)
-      return modelResult ?? regexResult
-    }
-    // mode === 'model': use model if available and confident
-    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
-      return modelResult
-    }
-    return regexResult
-  }
-  /**
-   * Attempt model-based intent classification. Returns null if model unavailable.
-   * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
-   */
-  private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
-    if (!this.modelManager.hasModel('intent')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const { inputIds, attentionMask } = tokenizer.encode(message, 128)
-      const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
-      if (!logits) return null
-      const probs = this.softmax(logits)
-      const manifest = this.modelManager.getManifestEntry('intent')
-      const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
-      // Find top prediction
-      let maxIdx = 0
-      let maxProb = probs[0]!
-      for (let i = 1; i < probs.length; i++) {
-        if (probs[i]! > maxProb) {
-          maxProb = probs[i]!
-          maxIdx = i
-        }
-      }
-      // Find secondary intents (above a lower threshold)
-      const secondary: Intent[] = []
-      for (let i = 0; i < probs.length; i++) {
-        if (i !== maxIdx && probs[i]! > 0.15) {
-          secondary.push(labels[i]!)
-        }
-      }
-      return {
-        primary: labels[maxIdx]!,
-        confidence: maxProb,
-        secondary,
-      }
-    } catch (error) {
-      this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
-      return null
-    }
-  }
-  // ── Entity Extraction ──────────────────────────────────────────────
-  /**
-   * Extract entities from text.
-   * Drop-in replacement for EntityExtractor.extract().
-   * Now async to support model inference.
-   */
-  async extractEntities(text: string): Promise<ExtractedEntity[]> {
-    const mode = this.getTaskMode('entity')
-    if (mode === 'regex') {
-      return this.entityExtractor.extract(text)
-    }
-    const modelResult = await this.tryModelExtractEntities(text)
-    const regexResult = this.entityExtractor.extract(text)
-    if (mode === 'both') {
-      this.logComparison('entity', text, modelResult, regexResult)
-      return modelResult ?? regexResult
-    }
-    return modelResult ?? regexResult
-  }
-  /**
-   * Attempt model-based entity extraction using BIO sequence labeling.
-   * Output is per-token logits [numTokens * numLabels].
-   * Groups consecutive B-/I- tags into ExtractedEntity objects.
-   * Applies softmax per-token and filters low-confidence/garbage entities.
-   */
-  private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
-    if (!this.modelManager.hasModel('entity')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
-      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
-      const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
-      if (!logits) return null
-      const manifest = this.modelManager.getManifestEntry('entity')
-      const labels = manifest?.labels ?? ENTITY_BIO_LABELS
-      const numLabels = labels.length
-      // Count active tokens (non-padding)
-      const numTokens = attentionMask.filter(m => m === 1).length
-      // Decode per-token BIO tags with softmax probabilities
-      const tokenTags: { tag: string; prob: number }[] = []
-      for (let t = 0; t < numTokens; t++) {
-        // Extract logits for this token and apply softmax
-        const tokenLogits = new Float32Array(numLabels)
-        for (let l = 0; l < numLabels; l++) {
-          tokenLogits[l] = logits[t * numLabels + l]!
-        }
-        const probs = this.softmax(tokenLogits)
-        let bestIdx = 0
-        let bestProb = probs[0]!
-        for (let l = 1; l < numLabels; l++) {
-          if (probs[l]! > bestProb) {
-            bestProb = probs[l]!
-            bestIdx = l
-          }
-        }
-        // Only accept B-/I- tags if softmax probability exceeds entity threshold
-        const tag = labels[bestIdx]!
-        const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
-        if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
-          tokenTags.push({ tag: 'O', prob: bestProb })
-        } else {
-          tokenTags.push({ tag, prob: bestProb })
-        }
-      }
-      // Group consecutive B-/I- tags into entities
-      const entities: ExtractedEntity[] = []
-      let currentType: string | null = null
-      let currentTokenIds: number[] = []
-      let currentProbs: number[] = []
-      let startPos = 0
-      const flushEntity = () => {
-        if (currentType && currentTokenIds.length > 0) {
-          const entityType = BIO_TYPE_MAP[currentType]
-          if (entityType) {
-            const name = tokenizer.decode(currentTokenIds).trim()
-            const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
-            // Filter: minimum 2 chars, not a stopword, decent average confidence
-            if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
-              entities.push({
-                name,
-                normalizedName: name.toLowerCase(),
-                type: entityType,
-                confidence: avgProb,
-                source: 'model',
-                positions: [startPos],
-              })
-            }
-          }
-        }
-        currentType = null
-        currentTokenIds = []
-        currentProbs = []
-      }
-      for (let i = 0; i < tokenTags.length; i++) {
-        const { tag, prob } = tokenTags[i]!
-        if (tag.startsWith('B-')) {
-          flushEntity()
-          currentType = tag.slice(2)
-          currentTokenIds = [inputIds[i]!]
-          currentProbs = [prob]
-          startPos = i
-        } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
-          currentTokenIds.push(inputIds[i]!)
-          currentProbs.push(prob)
-        } else {
-          flushEntity()
-        }
-      }
-      flushEntity()
-      return entities.length > 0 ? entities : null
-    } catch (error) {
-      this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
-      return null
-    }
-  }
-  // ── Query Intent Classification ────────────────────────────────────
-  /**
-   * Classify search query intent.
-   * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
-   * Now async to support model inference.
-   */
-  async classifyQueryIntent(query: string): Promise<QueryIntent> {
-    const mode = this.getTaskMode('query')
-    if (mode === 'regex') {
-      return classifyQueryIntent(query)
-    }
-    const modelResult = await this.tryModelClassifyQuery(query)
-    const regexResult = classifyQueryIntent(query)
-    if (mode === 'both') {
-      this.logComparison('query', query, modelResult, regexResult)
-      return modelResult ?? regexResult
-    }
-    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
-      return modelResult
-    }
-    return regexResult
-  }
-  /**
-   * Attempt model-based query intent classification.
-   * 6-class classifier. Model uses training labels (procedural, comparative)
-   * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
-   */
-  private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
-    if (!this.modelManager.hasModel('query')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
-      const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
-      const logits = await this.modelManager.infer('query', inputIds, attentionMask)
-      if (!logits) return null
-      const probs = this.softmax(logits)
-      const manifest = this.modelManager.getManifestEntry('query')
-      const labels = manifest?.labels ?? [...QUERY_LABELS]
-      let maxIdx = 0
-      let maxProb = probs[0]!
-      for (let i = 1; i < probs.length; i++) {
-        if (probs[i]! > maxProb) {
-          maxProb = probs[i]!
-          maxIdx = i
-        }
-      }
-      // Map model label to QueryIntent type (handles training label renames)
-      const rawLabel = labels[maxIdx]!
-      const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
-      return {
-        type: mappedType,
-        confidence: maxProb,
-      }
-    } catch (error) {
-      this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
-      return null
-    }
-  }
-  // ── Pattern Classification ─────────────────────────────────────────
-  /**
-   * Classify pattern type.
-   * Drop-in for PatternRecognizer.determinePatternType().
-   * Now async to support model inference.
-   */
-  async classifyPatternType(description: string): Promise<Pattern['type']> {
-    const mode = this.getTaskMode('pattern')
-    if (mode === 'regex') {
-      return this.regexClassifyPattern(description)
-    }
-    const modelResult = await this.tryModelClassifyPattern(description)
-    const regexResult = this.regexClassifyPattern(description)
-    if (mode === 'both') {
-      this.logComparison('pattern', description, modelResult, regexResult)
-      return modelResult ?? regexResult
-    }
-    return modelResult ?? regexResult
-  }
-  /**
-   * Regex fallback for pattern classification (extracted from PatternRecognizer)
-   */
-  private regexClassifyPattern(description: string): Pattern['type'] {
-    const lower = description.toLowerCase()
-    if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
-      return 'anti-pattern'
-    }
-    if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
-      return 'best-practice'
-    }
-    if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
-      return 'common-issue'
-    }
-    return 'solution'
-  }
-  /**
-   * Attempt model-based pattern type classification.
-   * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
-   */
-  private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
-    if (!this.modelManager.hasModel('pattern')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
-      const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
-      const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
-      if (!logits) return null
-      const probs = this.softmax(logits)
-      const manifest = this.modelManager.getManifestEntry('pattern')
-      const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
-      let maxIdx = 0
-      let maxProb = probs[0]!
-      for (let i = 1; i < probs.length; i++) {
-        if (probs[i]! > maxProb) {
-          maxProb = probs[i]!
-          maxIdx = i
-        }
-      }
-      // Only return if confidence exceeds threshold
-      if (maxProb < this.confidenceThreshold) return null
-      return labels[maxIdx]!
-    } catch (error) {
-      this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
-      return null
-    }
-  }
-  // ── Knowledge Type Classification ──────────────────────────────────
-  /**
-   * Classify knowledge type from text.
-   * 5-class classifier: fact, preference, constraint, goal, definition.
-   */
-  async classifyKnowledgeType(text: string): Promise<string> {
-    const mode = this.getTaskMode('knowledge')
-    if (mode === 'regex') {
-      return this.regexClassifyKnowledge(text)
-    }
-    const modelResult = await this.tryModelClassifyKnowledge(text)
-    const regexResult = this.regexClassifyKnowledge(text)
-    if (mode === 'both') {
-      this.logComparison('knowledge', text, modelResult, regexResult)
-    }
-    // Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
-    // Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
-    // so it's more trustworthy than an undertrained model for those cases.
-    if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
-      this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
-      return regexResult
-    }
-    if (modelResult) {
-      return modelResult
-    }
-    return regexResult
-  }
-  /**
-   * Regex fallback for knowledge type classification.
-   */
-  private regexClassifyKnowledge(text: string): string {
-    const lower = text.toLowerCase()
-    if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
-      return 'preference'
-    }
-    if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
-      return 'constraint'
-    }
-    if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
-      return 'goal'
-    }
-    if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
-      return 'definition'
-    }
-    return 'fact'
-  }
-  /**
-   * Attempt model-based knowledge type classification.
-   * 5-class classifier: fact, preference, constraint, goal, definition.
-   */
-  private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
-    if (!this.modelManager.hasModel('knowledge')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
-      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
-      const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
-      if (!logits) return null
-      const probs = this.softmax(logits)
-      const manifest = this.modelManager.getManifestEntry('knowledge')
-      const labels = manifest?.labels ?? KNOWLEDGE_LABELS
-      let maxIdx = 0
-      let maxProb = probs[0]!
-      for (let i = 1; i < probs.length; i++) {
-        if (probs[i]! > maxProb) {
-          maxProb = probs[i]!
-          maxIdx = i
-        }
-      }
-      if (maxProb < this.confidenceThreshold) return null
-      return labels[maxIdx]!
-    } catch (error) {
-      this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
-      return null
-    }
-  }
-  // ── Local Compression ──────────────────────────────────────────────
-  /**
-   * Compress text using local model or return original if unavailable.
-   * Uses greedy autoregressive decoding for seq2seq generation.
-   */
-  async compress(text: string): Promise<string> {
-    const mode = this.getTaskMode('compress')
-    // In 'api' mode or 'regex' mode, no local compression available
-    if (mode === 'api' || mode === 'regex') {
-      return text
-    }
-    const modelResult = await this.tryModelCompress(text)
-    if (mode === 'both' && modelResult) {
-      this.logger.debug({
-        originalLen: text.length,
-        compressedLen: modelResult.length,
-        ratio: (modelResult.length / text.length).toFixed(2),
-      }, 'Compression comparison')
-    }
-    return modelResult ?? text
-  }
-  /**
-   * Attempt model-based text compression using greedy autoregressive decoding.
-   * Generates tokens one at a time until EOS or max length.
-   */
-  private async tryModelCompress(text: string): Promise<string | null> {
-    if (!this.modelManager.hasModel('compress')) return null
-    try {
-      const tokenizer = await getTokenizer(this.logger)
-      const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
-      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
-      // Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
-      // The ONNX model expects fixed input shape [1, maxLen]; stripping padding
-      // would create variable-length tensors that the model rejects.
-      let activeLen = attentionMask.filter(m => m === 1).length
-      const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
-      const inputLen = activeLen
-      for (let step = 0; step < maxGenTokens; step++) {
-        if (activeLen >= maxLen) break
-        const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
-        if (!logits) break
-        // vocabSize = total logits / sequence length (maxLen, the padded dimension)
-        const vocabSize = logits.length / maxLen
-        const lastActiveOffset = (activeLen - 1) * vocabSize
-        const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
-        // Find argmax (greedy decode)
-        let bestId = 0
-        let bestVal = -Infinity
-        for (let v = 0; v < lastTokenLogits.length; v++) {
-          if (lastTokenLogits[v]! > bestVal) {
-            bestVal = lastTokenLogits[v]!
-            bestId = v
-          }
-        }
-        // Stop on EOS
-        if (bestId === EOS_TOKEN_ID || bestId === 0) break
-        // Write new token into the next padding slot
-        inputIds[activeLen] = bestId
-        attentionMask[activeLen] = 1
-        activeLen++
-      }
-      // Decode only the generated tokens (after input)
-      const outputIds = inputIds.slice(inputLen, activeLen)
-      if (outputIds.length === 0) return null
-      const decoded = tokenizer.decode(outputIds).trim()
-      return decoded.length > 0 ? decoded : null
-    } catch (err) {
-      const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
-      this.logger.warn({ error }, 'Model compression failed, returning original text')
-      return null
-    }
-  }
-  // ── Comparison Logging ─────────────────────────────────────────────
-  /**
-   * Log model vs regex comparison for "both" mode validation.
-   * Stored in model_feedback table for later analysis.
-   */
-  private logComparison(task: ModelTask, input: string, modelResult: any, regexResult: any): void {
-    try {
-      const modelLabel = this.extractLabel(modelResult)
-      const regexLabel = this.extractLabel(regexResult)
-      const agree = modelLabel === regexLabel
-      this.logger.debug({
-        task,
-        modelLabel,
-        regexLabel,
-        agree,
-      }, 'Model vs regex comparison')
-      // Always log to feedback table (both agreements and disagreements)
-      logModelFeedback({
-        task,
-        input,
-        modelPrediction: modelLabel ?? 'null',
-        modelConfidence: this.extractConfidence(modelResult),
-        regexPrediction: regexLabel ?? 'null',
-      })
-      if (!agree) {
-        // Also log disagreement to training data for review
-        logTrainingData({
-          task,
-          input,
-          output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
-          metadata: JSON.stringify({ mode: 'both', comparison: true }),
-        })
-      }
-    } catch {
-      // Non-critical
-    }
-  }
-  /**
-   * Extract confidence from a model result object.
-   */
-  private extractConfidence(result: any): number {
-    if (!result) return 0
-    if (typeof result.confidence === 'number') return result.confidence
-    return 0
-  }
-  private extractLabel(result: any): string | null {
-    if (!result) return null
-    if (typeof result === 'string') return result
-    if (result.primary) return result.primary
-    if (result.type) return result.type
-    return JSON.stringify(result).slice(0, 50)
-  }
-  // ── Status ─────────────────────────────────────────────────────────
-  /**
-   * Get inference routing status for all tasks
-   */
-  getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
-    const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
-    const modelStatus = this.modelManager.getStatus()
-    const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
-    for (const task of tasks) {
-      status[task] = {
-        mode: this.getTaskMode(task),
-        modelAvailable: modelStatus[task].available,
-        modelLoaded: modelStatus[task].loaded,
-      }
-    }
-    return status
-  }
-}
+/**
+ * Inference Router — SLM Upgrade Phase 4B
+ * Routes each classification task to model or regex fallback.
+ *
+ * For each task:
+ *   1. If config mode is 'regex'/'api' → use regex/API only
+ *   2. If config mode is 'model' → try model, fall back to regex on failure
+ *   3. If config mode is 'both' → run both, log comparison, use model output
+ *
+ * Confidence thresholding: if model confidence < threshold, use regex instead.
+ * Function signatures match the existing regex classifiers exactly.
+ */
+import type { Logger } from 'pino'
+import type { Config } from '@/config'
+import type { ModelManager, ModelTask } from './model-manager'
+import { logTrainingData, logModelFeedback } from '@/training/data-store'
+import { getTokenizer } from './tokenizer'
+// Import existing regex classifiers
+import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
+import type { Intent } from '@/routing/intent-classifier'
+import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
+import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
+import type { QueryIntent } from '@/retrieval/types'
+import type { Pattern } from '@/memory/patterns'
+/** Default intent labels matching the model training order */
+const INTENT_LABELS: Intent[] = [
+  'session_start', 'context_needed', 'decision_made', 'store_this',
+  'pattern_found', 'mistake_learned', 'progress_update', 'question',
+  'comparison', 'exploration', 'list_all', 'update_memory',
+  'delete_memory', 'detail_request', 'timeline', 'no_action'
+]
+/** BIO labels for token-level entity extraction */
+const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
+/** Query intent labels matching model training order.
+ * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
+const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
+/** Map model query labels → QueryIntent['type'] (handles training label renames) */
+const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
+  factual: 'factual',
+  procedural: 'pattern',
+  comparative: 'comparison',
+  temporal: 'temporal',
+  exploratory: 'exploratory',
+  decision: 'decision',
+}
+/** Pattern type labels */
+const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
+/** Knowledge type labels */
+const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
+/** Map BIO entity tag prefix to EntityType */
+const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
+  'TECH': 'technology',
+  'PROJECT': 'project',
+  'CONCEPT': 'concept',
+}
+/** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
+const ENTITY_TOKEN_THRESHOLD = 0.25
+/** Common words that should never be entities (false positive filter) */
+const ENTITY_STOPWORDS = new Set([
+  'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
+  'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
+  'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
+  'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
+  'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
+  'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
+])
+/** EOS token ID for greedy decode (GPT-2 uses 50256) */
+const EOS_TOKEN_ID = 50256
+/** Max tokens to generate for compression */
+const COMPRESS_MAX_TOKENS = 128
+/** Inference mode per task */
+type TaskMode = 'model' | 'regex' | 'both' | 'api'
+export class InferenceRouter {
+  private logger: Logger
+  private config: Config
+  private modelManager: ModelManager
+  private confidenceThreshold: number
+  // Regex fallback instances
+  private intentClassifier: BrainIntentClassifier
+  private entityExtractor: EntityExtractor
+  constructor(logger: Logger, config: Config, modelManager: ModelManager) {
+    this.logger = logger.child({ component: 'inference-router' })
+    this.config = config
+    this.modelManager = modelManager
+    this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
+    // Initialize regex fallbacks
+    this.intentClassifier = new BrainIntentClassifier()
+    this.entityExtractor = new EntityExtractor()
+  }
+  /**
+   * Check if SLM is enabled globally
+   */
+  get enabled(): boolean {
+    return this.config.slm?.enabled ?? false
+  }
+  /**
+   * Get the mode for a specific task
+   */
+  private getTaskMode(task: ModelTask): TaskMode {
+    if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
+    const taskConfig = this.config.slm?.tasks
+    if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
+    switch (task) {
+      case 'intent': return taskConfig.intent ?? 'regex'
+      case 'entity': return taskConfig.entity ?? 'regex'
+      case 'query': return taskConfig.query ?? 'regex'
+      case 'knowledge': return taskConfig.knowledge ?? 'regex'
+      case 'compress': return taskConfig.compress ?? 'api'
+      case 'pattern': return taskConfig.pattern ?? 'regex'
+      default: return 'regex'
+    }
+  }
+  /**
+   * Softmax over logits array (used when ONNX models are available)
+   */
+  softmax(logits: Float32Array): number[] {
+    const max = Math.max(...logits)
+    const exps = Array.from(logits).map(x => Math.exp(x - max))
+    const sum = exps.reduce((a, b) => a + b, 0)
+    return exps.map(e => e / sum)
+  }
+  // ── Intent Classification ──────────────────────────────────────────
+  /**
+   * Classify brain() message intent.
+   * Drop-in replacement for IntentClassifier.classify().
+   * Async to support model inference when ONNX models are available.
+   */
+  async classifyIntent(message: string): Promise<ClassificationResult> {
+    const mode = this.getTaskMode('intent')
+    if (mode === 'regex') {
+      return this.intentClassifier.classify(message)
+    }
+    // Try model
+    const modelResult = await this.tryModelClassifyIntent(message)
+    const regexResult = this.intentClassifier.classify(message)
+    if (mode === 'both') {
+      this.logComparison('intent', message, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    // mode === 'model': use model if available and confident
+    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Attempt model-based intent classification. Returns null if model unavailable.
+   * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
+   */
+  private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
+    if (!this.modelManager.hasModel('intent')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const { inputIds, attentionMask } = tokenizer.encode(message, 128)
+      const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('intent')
+      const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
+      // Find top prediction
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Find secondary intents (above a lower threshold)
+      const secondary: Intent[] = []
+      for (let i = 0; i < probs.length; i++) {
+        if (i !== maxIdx && probs[i]! > 0.15) {
+          secondary.push(labels[i]!)
+        }
+      }
+      return {
+        primary: labels[maxIdx]!,
+        confidence: maxProb,
+        secondary,
+      }
+    } catch (error) {
+      this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Entity Extraction ──────────────────────────────────────────────
+  /**
+   * Extract entities from text.
+   * Drop-in replacement for EntityExtractor.extract().
+   * Now async to support model inference.
+   */
+  async extractEntities(text: string): Promise<ExtractedEntity[]> {
+    const mode = this.getTaskMode('entity')
+    if (mode === 'regex') {
+      return this.entityExtractor.extract(text)
+    }
+    const modelResult = await this.tryModelExtractEntities(text)
+    const regexResult = this.entityExtractor.extract(text)
+    if (mode === 'both') {
+      this.logComparison('entity', text, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    return modelResult ?? regexResult
+  }
+  /**
+   * Attempt model-based entity extraction using BIO sequence labeling.
+   * Output is per-token logits [numTokens * numLabels].
+   * Groups consecutive B-/I- tags into ExtractedEntity objects.
+   * Applies softmax per-token and filters low-confidence/garbage entities.
+   */
+  private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
+    if (!this.modelManager.hasModel('entity')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
+      if (!logits) return null
+      const manifest = this.modelManager.getManifestEntry('entity')
+      const labels = manifest?.labels ?? ENTITY_BIO_LABELS
+      const numLabels = labels.length
+      // Count active tokens (non-padding)
+      const numTokens = attentionMask.filter(m => m === 1).length
+      // Decode per-token BIO tags with softmax probabilities
+      const tokenTags: { tag: string; prob: number }[] = []
+      for (let t = 0; t < numTokens; t++) {
+        // Extract logits for this token and apply softmax
+        const tokenLogits = new Float32Array(numLabels)
+        for (let l = 0; l < numLabels; l++) {
+          tokenLogits[l] = logits[t * numLabels + l]!
+        }
+        const probs = this.softmax(tokenLogits)
+        let bestIdx = 0
+        let bestProb = probs[0]!
+        for (let l = 1; l < numLabels; l++) {
+          if (probs[l]! > bestProb) {
+            bestProb = probs[l]!
+            bestIdx = l
+          }
+        }
+        // Only accept B-/I- tags if softmax probability exceeds entity threshold
+        const tag = labels[bestIdx]!
+        const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
+        if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
+          tokenTags.push({ tag: 'O', prob: bestProb })
+        } else {
+          tokenTags.push({ tag, prob: bestProb })
+        }
+      }
+      // Group consecutive B-/I- tags into entities
+      const entities: ExtractedEntity[] = []
+      let currentType: string | null = null
+      let currentTokenIds: number[] = []
+      let currentProbs: number[] = []
+      let startPos = 0
+      const flushEntity = () => {
+        if (currentType && currentTokenIds.length > 0) {
+          const entityType = BIO_TYPE_MAP[currentType]
+          if (entityType) {
+            const name = tokenizer.decode(currentTokenIds).trim()
+            const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
+            // Filter: minimum 2 chars, not a stopword, decent average confidence
+            if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
+              entities.push({
+                name,
+                normalizedName: name.toLowerCase(),
+                type: entityType,
+                confidence: avgProb,
+                source: 'model',
+                positions: [startPos],
+              })
+            }
+          }
+        }
+        currentType = null
+        currentTokenIds = []
+        currentProbs = []
+      }
+      for (let i = 0; i < tokenTags.length; i++) {
+        const { tag, prob } = tokenTags[i]!
+        if (tag.startsWith('B-')) {
+          flushEntity()
+          currentType = tag.slice(2)
+          currentTokenIds = [inputIds[i]!]
+          currentProbs = [prob]
+          startPos = i
+        } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
+          currentTokenIds.push(inputIds[i]!)
+          currentProbs.push(prob)
+        } else {
+          flushEntity()
+        }
+      }
+      flushEntity()
+      return entities.length > 0 ? entities : null
+    } catch (error) {
+      this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Query Intent Classification ────────────────────────────────────
+  /**
+   * Classify search query intent.
+   * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
+   * Now async to support model inference.
+   */
+  async classifyQueryIntent(query: string): Promise<QueryIntent> {
+    const mode = this.getTaskMode('query')
+    if (mode === 'regex') {
+      return classifyQueryIntent(query)
+    }
+    const modelResult = await this.tryModelClassifyQuery(query)
+    const regexResult = classifyQueryIntent(query)
+    if (mode === 'both') {
+      this.logComparison('query', query, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Attempt model-based query intent classification.
+   * 6-class classifier. Model uses training labels (procedural, comparative)
+   * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
+   */
+  private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
+    if (!this.modelManager.hasModel('query')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
+      const logits = await this.modelManager.infer('query', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('query')
+      const labels = manifest?.labels ?? [...QUERY_LABELS]
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Map model label to QueryIntent type (handles training label renames)
+      const rawLabel = labels[maxIdx]!
+      const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
+      return {
+        type: mappedType,
+        confidence: maxProb,
+      }
+    } catch (error) {
+      this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Pattern Classification ─────────────────────────────────────────
+  /**
+   * Classify pattern type.
+   * Drop-in for PatternRecognizer.determinePatternType().
+   * Now async to support model inference.
+   */
+  async classifyPatternType(description: string): Promise<Pattern['type']> {
+    const mode = this.getTaskMode('pattern')
+    if (mode === 'regex') {
+      return this.regexClassifyPattern(description)
+    }
+    const modelResult = await this.tryModelClassifyPattern(description)
+    const regexResult = this.regexClassifyPattern(description)
+    if (mode === 'both') {
+      this.logComparison('pattern', description, modelResult, regexResult)
+      return modelResult ?? regexResult
+    }
+    return modelResult ?? regexResult
+  }
+  /**
+   * Regex fallback for pattern classification (extracted from PatternRecognizer)
+   */
+  private regexClassifyPattern(description: string): Pattern['type'] {
+    const lower = description.toLowerCase()
+    if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
+      return 'anti-pattern'
+    }
+    if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
+      return 'best-practice'
+    }
+    if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
+      return 'common-issue'
+    }
+    return 'solution'
+  }
+  /**
+   * Attempt model-based pattern type classification.
+   * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
+   */
+  private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
+    if (!this.modelManager.hasModel('pattern')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
+      const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('pattern')
+      const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      // Only return if confidence exceeds threshold
+      if (maxProb < this.confidenceThreshold) return null
+      return labels[maxIdx]!
+    } catch (error) {
+      this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Knowledge Type Classification ──────────────────────────────────
+  /**
+   * Classify knowledge type from text.
+   * 5-class classifier: fact, preference, constraint, goal, definition.
+   */
+  async classifyKnowledgeType(text: string): Promise<string> {
+    const mode = this.getTaskMode('knowledge')
+    if (mode === 'regex') {
+      return this.regexClassifyKnowledge(text)
+    }
+    const modelResult = await this.tryModelClassifyKnowledge(text)
+    const regexResult = this.regexClassifyKnowledge(text)
+    if (mode === 'both') {
+      this.logComparison('knowledge', text, modelResult, regexResult)
+    }
+    // Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
+    // Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
+    // so it's more trustworthy than an undertrained model for those cases.
+    if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
+      this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
+      return regexResult
+    }
+    if (modelResult) {
+      return modelResult
+    }
+    return regexResult
+  }
+  /**
+   * Regex fallback for knowledge type classification.
+   */
+  private regexClassifyKnowledge(text: string): string {
+    const lower = text.toLowerCase()
+    if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
+      return 'preference'
+    }
+    if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
+      return 'constraint'
+    }
+    if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
+      return 'goal'
+    }
+    if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
+      return 'definition'
+    }
+    return 'fact'
+  }
+  /**
+   * Attempt model-based knowledge type classification.
+   * 5-class classifier: fact, preference, constraint, goal, definition.
+   */
+  private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
+    if (!this.modelManager.hasModel('knowledge')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
+      if (!logits) return null
+      const probs = this.softmax(logits)
+      const manifest = this.modelManager.getManifestEntry('knowledge')
+      const labels = manifest?.labels ?? KNOWLEDGE_LABELS
+      let maxIdx = 0
+      let maxProb = probs[0]!
+      for (let i = 1; i < probs.length; i++) {
+        if (probs[i]! > maxProb) {
+          maxProb = probs[i]!
+          maxIdx = i
+        }
+      }
+      if (maxProb < this.confidenceThreshold) return null
+      return labels[maxIdx]!
+    } catch (error) {
+      this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
+      return null
+    }
+  }
+  // ── Local Compression ──────────────────────────────────────────────
+  /**
+   * Compress text using local model or return original if unavailable.
+   * Uses greedy autoregressive decoding for seq2seq generation.
+   */
+  async compress(text: string): Promise<string> {
+    const mode = this.getTaskMode('compress')
+    // In 'api' mode or 'regex' mode, no local compression available
+    if (mode === 'api' || mode === 'regex') {
+      return text
+    }
+    const modelResult = await this.tryModelCompress(text)
+    if (mode === 'both' && modelResult) {
+      this.logger.debug({
+        originalLen: text.length,
+        compressedLen: modelResult.length,
+        ratio: (modelResult.length / text.length).toFixed(2),
+      }, 'Compression comparison')
+    }
+    return modelResult ?? text
+  }
+  /**
+   * Attempt model-based text compression using greedy autoregressive decoding.
+   * Generates tokens one at a time until EOS or max length.
+   */
+  private async tryModelCompress(text: string): Promise<string | null> {
+    if (!this.modelManager.hasModel('compress')) return null
+    try {
+      const tokenizer = await getTokenizer(this.logger)
+      const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
+      const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
+      // Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
+      // The ONNX model expects fixed input shape [1, maxLen]; stripping padding
+      // would create variable-length tensors that the model rejects.
+      let activeLen = attentionMask.filter(m => m === 1).length
+      const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
+      const inputLen = activeLen
+      for (let step = 0; step < maxGenTokens; step++) {
+        if (activeLen >= maxLen) break
+        const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
+        if (!logits) break
+        // vocabSize = total logits / sequence length (maxLen, the padded dimension)
+        const vocabSize = logits.length / maxLen
+        const lastActiveOffset = (activeLen - 1) * vocabSize
+        const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
+        // Find argmax (greedy decode)
+        let bestId = 0
+        let bestVal = -Infinity
+        for (let v = 0; v < lastTokenLogits.length; v++) {
+          if (lastTokenLogits[v]! > bestVal) {
+            bestVal = lastTokenLogits[v]!
+            bestId = v
+          }
+        }
+        // Stop on EOS
+        if (bestId === EOS_TOKEN_ID || bestId === 0) break
+        // Write new token into the next padding slot
+        inputIds[activeLen] = bestId
+        attentionMask[activeLen] = 1
+        activeLen++
+      }
+      // Decode only the generated tokens (after input)
+      const outputIds = inputIds.slice(inputLen, activeLen)
+      if (outputIds.length === 0) return null
+      const decoded = tokenizer.decode(outputIds).trim()
+      return decoded.length > 0 ? decoded : null
+    } catch (err) {
+      const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
+      this.logger.warn({ error }, 'Model compression failed, returning original text')
+      return null
+    }
+  }
+  // ── Comparison Logging ─────────────────────────────────────────────
+  /**
+   * Log model vs regex comparison for "both" mode validation.
+   * Stored in model_feedback table for later analysis.
+   */
+  private logComparison(task: ModelTask, input: string, modelResult: Record<string, unknown> | string | null, regexResult: Record<string, unknown> | string | null): void {
+    try {
+      const modelLabel = this.extractLabel(modelResult)
+      const regexLabel = this.extractLabel(regexResult)
+      const agree = modelLabel === regexLabel
+      this.logger.debug({
+        task,
+        modelLabel,
+        regexLabel,
+        agree,
+      }, 'Model vs regex comparison')
+      // Always log to feedback table (both agreements and disagreements)
+      logModelFeedback({
+        task,
+        input,
+        modelPrediction: modelLabel ?? 'null',
+        modelConfidence: this.extractConfidence(modelResult),
+        regexPrediction: regexLabel ?? 'null',
+      })
+      if (!agree) {
+        // Also log disagreement to training data for review
+        logTrainingData({
+          task,
+          input,
+          output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
+          metadata: JSON.stringify({ mode: 'both', comparison: true }),
+        })
+      }
+    } catch {
+      // Non-critical
+    }
+  }
+  /**
+   * Extract confidence from a model result object.
+   */
+  private extractConfidence(result: Record<string, unknown> | string | null): number {
+    if (!result || typeof result === 'string') return 0
+    if (typeof result.confidence === 'number') return result.confidence
+    return 0
+  }
+  private extractLabel(result: Record<string, unknown> | string | null): string | null {
+    if (!result) return null
+    if (typeof result === 'string') return result
+    if (result.primary) return String(result.primary)
+    if (result.type) return String(result.type)
+    return JSON.stringify(result).slice(0, 50)
+  }
+  // ── Status ─────────────────────────────────────────────────────────
+  /**
+   * Get inference routing status for all tasks
+   */
+  getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
+    const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
+    const modelStatus = this.modelManager.getStatus()
+    const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
+    for (const task of tasks) {
+      status[task] = {
+        mode: this.getTaskMode(task),
+        modelAvailable: modelStatus[task].available,
+        modelLoaded: modelStatus[task].loaded,
+      }
+    }
+    return status
+  }
+}