npm - claude-brain - Versions diffs - 0.27.3 → 0.28.0 - Mend

claude-brain 0.27.3 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/VERSION +1 -1
package/package.json +3 -1
package/src/cli/bin.ts +14 -0
package/src/cli/commands/export-training.ts +70 -0
package/src/cli/commands/models.ts +681 -0
package/src/cli/commands/status.ts +44 -0
package/src/config/home.ts +1 -0
package/src/config/schema.ts +30 -0
package/src/intelligence/inference-router.ts +749 -0
package/src/intelligence/model-manager.ts +206 -0
package/src/intelligence/tokenizer.ts +118 -0
package/src/knowledge/entity-extractor.ts +31 -1
package/src/memory/compression.ts +17 -1
package/src/memory/patterns.ts +46 -6
package/src/retrieval/query/intent-classifier.ts +17 -1
package/src/routing/entity-extractor.ts +30 -4
package/src/routing/intent-classifier.ts +45 -16
package/src/routing/router.ts +22 -2
package/src/server/http-api.ts +83 -1
package/src/server/services.ts +47 -0
package/src/training/data-store.ts +298 -0
package/src/training/retrain-pipeline.ts +394 -0

package/src/routing/intent-classifier.ts CHANGED Viewed

@@ -240,85 +240,94 @@ export class IntentClassifier {
     // Phase 19 B3: Detect temporal signals for secondary intent
     const hasTemporal = this.hasTemporalSignal(lower)
+    // SLM Phase 1A: Log classification for training data collection
+    const startTime = Date.now()
     // Check in priority order — first confident match wins
+    // Helper to log + return in one step
+    const ret = (result: ClassificationResult): ClassificationResult => {
+      this._logTraining(message, result, startTime)
+      return result
+    }
     // 1. no_action: very short messages, greetings, acknowledgments
     if (this.isNoAction(lower)) {
-      return { primary: 'no_action', confidence: 0.95, secondary: [] }
+      return ret({ primary: 'no_action', confidence: 0.95, secondary: [] })
     }
     // 2. delete_memory: "forget that", "delete", "remove"
     if (this.isDeleteMemory(lower)) {
-      return { primary: 'delete_memory', confidence: 0.90, secondary }
+      return ret({ primary: 'delete_memory', confidence: 0.90, secondary })
     }
     // 3. update_memory: "actually", "correction:", "change that to"
     if (this.isUpdateMemory(lower)) {
-      return { primary: 'update_memory', confidence: 0.85, secondary }
+      return ret({ primary: 'update_memory', confidence: 0.85, secondary })
     }
     // 4. store_this: explicit "remember:", "save this:", "I prefer" (never for questions)
     if (this.isStoreThis(lower, message)) {
       if (this.hasDecisionSignal(lower)) secondary.push('decision_made')
-      return { primary: 'store_this', confidence: 0.90, secondary }
+      return ret({ primary: 'store_this', confidence: 0.90, secondary })
     }
     // 5. decision_made: decision phrases + reasoning (never for questions)
     if (this.isDecisionMade(lower, message)) {
       if (this.hasComparisonSignal(lower)) secondary.push('comparison')
-      return { primary: 'decision_made', confidence: 0.85, secondary }
+      return ret({ primary: 'decision_made', confidence: 0.85, secondary })
     }
     // 6. mistake_learned: correction/bug/lesson indicators
     if (this.isMistakeLearned(lower)) {
-      return { primary: 'mistake_learned', confidence: 0.85, secondary }
+      return ret({ primary: 'mistake_learned', confidence: 0.85, secondary })
     }
     // 7. list_all: "list all", "what decisions", "show all"
     if (this.isListAll(lower, message)) {
-      return { primary: 'list_all', confidence: 0.85, secondary }
+      return ret({ primary: 'list_all', confidence: 0.85, secondary })
     }
     // 8. progress_update: completed task indicators (NOT questions)
     if (this.isProgressUpdate(lower, message)) {
       if (this.hasSessionSignal(lower)) secondary.push('session_start')
-      return { primary: 'progress_update', confidence: 0.85, secondary }
+      return ret({ primary: 'progress_update', confidence: 0.85, secondary })
     }
     // 9. comparison: vs, which is better, etc.
     if (this.isComparison(lower)) {
       if (this.isQuestion(lower, message)) secondary.push('question')
       if (hasTemporal) secondary.push('exploration')
-      return { primary: 'comparison', confidence: 0.85, secondary }
+      return ret({ primary: 'comparison', confidence: 0.85, secondary })
     }
     // 10. pattern_found: explicit pattern documentation
     if (this.isPatternFound(lower)) {
-      return { primary: 'pattern_found', confidence: 0.80, secondary }
+      return ret({ primary: 'pattern_found', confidence: 0.80, secondary })
     }
     // 11. session_start: starting/resuming work (Phase 19: narrowed check)
     if (this.isSessionStart(lower)) {
       secondary.push('context_needed')
-      return { primary: 'session_start', confidence: 0.90, secondary }
+      return ret({ primary: 'session_start', confidence: 0.90, secondary })
     }
     // 12. detail_request: "details obs_abc123", "show me <id>" (before exploration to avoid misclassification)
     if (this.isDetailRequest(lower)) {
-      return { primary: 'detail_request', confidence: 0.90, secondary }
+      return ret({ primary: 'detail_request', confidence: 0.90, secondary })
     }
     // 12b. timeline: "timeline for project", "what did I do yesterday", "recent activity" (before exploration)
     if (this.isTimeline(lower)) {
       if (hasTemporal) secondary.push('exploration')
-      return { primary: 'timeline', confidence: 0.85, secondary }
+      return ret({ primary: 'timeline', confidence: 0.85, secondary })
     }
     // 12c. exploration: trends, graph, evolution, history (general exploration that isn't a specific timeline)
     if (this.isExploration(lower)) {
       if (this.isQuestion(lower, message)) secondary.push('question')
       if (hasTemporal) secondary.push('exploration')
-      return { primary: 'exploration', confidence: 0.75, secondary }
+      return ret({ primary: 'exploration', confidence: 0.75, secondary })
     }
     // 13. question: starts with question word or ends with ?
@@ -330,12 +339,32 @@ export class IntentClassifier {
       // Phase 19 B1: ? → 0.95, question word → 0.90
       const confidence = message.trim().endsWith('?') ? 0.95 : 0.90
-      return { primary: 'question', confidence, secondary }
+      return ret({ primary: 'question', confidence, secondary })
     }
     // 14. Default: context_needed
     if (hasTemporal) secondary.push('exploration')
-    return { primary: 'context_needed', confidence: 0.60, secondary }
+    const defaultResult = { primary: 'context_needed' as Intent, confidence: 0.60, secondary }
+    this._logTraining(message, defaultResult, startTime)
+    return defaultResult
+  }
+  /**
+   * SLM Phase 1A: Log classification result for training data collection.
+   * Fire-and-forget, never blocks the main path.
+   */
+  private _logTraining(message: string, result: ClassificationResult, startTime: number): void {
+    try {
+      const { logTrainingData } = require('@/training/data-store')
+      logTrainingData({
+        task: 'intent' as const,
+        input: message,
+        output: JSON.stringify({ label: result.primary, secondary: result.secondary }),
+        metadata: JSON.stringify({ confidence: result.confidence, elapsed_ms: Date.now() - startTime }),
+      })
+    } catch {
+      // Training data logging is non-critical
+    }
   }
   private isNoAction(lower: string): boolean {

package/src/routing/router.ts CHANGED Viewed

@@ -12,6 +12,7 @@
 import type { Logger } from 'pino'
 import { IntentClassifier, type ClassificationResult } from './intent-classifier'
+import type { InferenceRouter } from '@/intelligence/inference-router'
 import { BrainEntityExtractor, type BrainExtractedEntities } from './entity-extractor'
 import { ResponseFilter, type BrainResponse, type TierResults, type FilterableResult, formatCompactResponse, formatDetailResponse, formatTimeline, groupByDay } from './response-filter'
 import { SearchEngine } from './search-engine'
@@ -62,6 +63,9 @@ export class BrainRouter {
   private searchEngine: SearchEngine
   private logger: Logger
+  /** SLM Upgrade: Optional inference router for model-based classification */
+  private inferenceRouter: InferenceRouter | null = null
   /** Phase 30: Optional LLM compressor for long observations */
   private compressor: ObservationCompressor | null = null
@@ -80,6 +84,12 @@ export class BrainRouter {
     this.logger = logger.child({ component: 'brain-router' })
   }
+  /** SLM Upgrade: Set the optional inference router for model-based classification */
+  setInferenceRouter(router: InferenceRouter): void {
+    this.inferenceRouter = router
+    this.entityExtractor.setInferenceRouter(router)
+  }
   /** Phase 30: Set the optional LLM compressor */
   setCompressor(compressor: ObservationCompressor): void {
     this.compressor = compressor
@@ -120,8 +130,10 @@ export class BrainRouter {
       }
     }
-    // Classify intent
-    const classification = this.classifier.classify(message)
+    // Classify intent (SLM: use inference router if available, falls back to regex)
+    const classification = this.inferenceRouter
+      ? await this.inferenceRouter.classifyIntent(message)
+      : this.classifier.classify(message)
     this.logger.debug({ intent: classification.primary, confidence: classification.confidence }, 'Intent classified')
     // Route to handler
@@ -2163,6 +2175,14 @@ let routerInstance: BrainRouter | null = null
 export function getBrainRouter(logger: Logger): BrainRouter {
   if (!routerInstance) {
     routerInstance = new BrainRouter(logger)
+    // SLM Upgrade: Wire inference router if available
+    try {
+      const { getInferenceRouter } = require('@/server/services')
+      const ir = getInferenceRouter()
+      if (ir) routerInstance.setInferenceRouter(ir)
+    } catch {
+      // Services not initialized yet — will use regex fallback
+    }
   }
   return routerInstance
 }

package/src/server/http-api.ts CHANGED Viewed

@@ -6,7 +6,7 @@
 import { Hono } from 'hono'
 import type { Logger } from 'pino'
 import type { Config } from '@/config'
-import { getMemoryService, getVaultService, isServicesInitialized } from '@/server/services'
+import { getMemoryService, getVaultService, getInferenceRouter, isServicesInitialized } from '@/server/services'
 import { ResourceProvider } from '@/server/providers/resources'
 import type { MemoryManager } from '@/memory'
 import type { CapturedKnowledge, HookStats } from '@/hooks/types'
@@ -16,6 +16,7 @@ import type { CodeIndexer } from '@/code-intelligence/indexer'
 import type { CodeQuery } from '@/code-intelligence/query'
 import type { MemoryCodeLinker } from '@/code-intelligence/linker'
 import { setupWebViewer, setWebViewerCodeQuery } from '@/server/web-viewer'
+import { getTrainingStats, getModelFeedbackStats, getDisagreements } from '@/training/data-store'
 export class HttpApiServer {
   private app: Hono
@@ -135,6 +136,12 @@ export class HttpApiServer {
     // Phase 23b: Expose brain://context/auto via HTTP for testability
     this.app.get('/api/context/auto', () => this.handleContextAuto())
+    // Phase 6A: SLM feedback & model status endpoints
+    this.app.get('/api/models/status', () => this.handleModelsStatus())
+    this.app.get('/api/models/feedback', (c) => this.handleModelsFeedback(c))
+    this.app.get('/api/models/disagreements', (c) => this.handleModelsDisagreements(c))
+    this.app.get('/api/training/stats', () => this.handleTrainingStats())
   }
   private async handleListProjects(): Promise<Response> {
@@ -1047,6 +1054,81 @@ export class HttpApiServer {
     }
   }
+  // ─── Phase 6A: SLM Model Feedback Endpoints ────────────
+  private handleModelsStatus(): Response {
+    try {
+      const inferenceRouter = getInferenceRouter()
+      if (!inferenceRouter) {
+        return Response.json({
+          success: true,
+          data: { enabled: false, message: 'SLM inference not initialized' },
+        })
+      }
+      return Response.json({ success: true, data: inferenceRouter.getStatus() })
+    } catch (error) {
+      this.logger.error({ error }, 'Failed to get model status')
+      return Response.json(
+        { success: false, error: 'Failed to get model status' },
+        { status: 500 }
+      )
+    }
+  }
+  private handleModelsFeedback(c: any): Response {
+    try {
+      const task = c.req.query('task')
+      const stats = getModelFeedbackStats()
+      if (task) {
+        const taskStats = stats[task]
+        if (!taskStats) {
+          return Response.json(
+            { success: false, error: `Unknown task: ${task}` },
+            { status: 400 }
+          )
+        }
+        return Response.json({ success: true, data: { [task]: taskStats } })
+      }
+      return Response.json({ success: true, data: stats })
+    } catch (error) {
+      this.logger.error({ error }, 'Failed to get model feedback stats')
+      return Response.json(
+        { success: false, error: 'Failed to get model feedback stats' },
+        { status: 500 }
+      )
+    }
+  }
+  private handleModelsDisagreements(c: any): Response {
+    try {
+      const task = c.req.query('task') || 'intent'
+      const limit = parseInt(c.req.query('limit') || '50', 10)
+      const disagreements = getDisagreements(task, limit)
+      return Response.json({ success: true, data: disagreements })
+    } catch (error) {
+      this.logger.error({ error }, 'Failed to get model disagreements')
+      return Response.json(
+        { success: false, error: 'Failed to get model disagreements' },
+        { status: 500 }
+      )
+    }
+  }
+  private handleTrainingStats(): Response {
+    try {
+      const stats = getTrainingStats()
+      return Response.json({ success: true, data: stats })
+    } catch (error) {
+      this.logger.error({ error }, 'Failed to get training stats')
+      return Response.json(
+        { success: false, error: 'Failed to get training stats' },
+        { status: 500 }
+      )
+    }
+  }
   async start(): Promise<void> {
     const port = this.config.port || 3000

package/src/server/services.ts CHANGED Viewed

@@ -26,6 +26,8 @@ import { SemanticCache } from '@/intelligence/optimization/semantic-cache'
 import { PrecomputeEngine } from '@/intelligence/optimization/precompute'
 import { MemoryArchiver } from '@/memory/consolidation/archiver'
 import { ImportanceScorer } from '@/memory/consolidation/scorer'
+import { ModelManager } from '@/intelligence/model-manager'
+import { InferenceRouter } from '@/intelligence/inference-router'
 export interface KnowledgeGraphServiceContainer {
   graph: InMemoryKnowledgeGraph
@@ -53,6 +55,8 @@ export interface Services {
   codeIndexer: CodeIndexer | null
   codeQuery: CodeQuery | null
   codeLinker: MemoryCodeLinker | null
+  modelManager: ModelManager | null
+  inferenceRouter: InferenceRouter | null
   logger: Logger
   config: Config
 }
@@ -371,6 +375,25 @@ export async function initializeServices(config: Config, logger: Logger): Promis
       serviceLogger.warn({ error }, 'Failed to initialize code linker, continuing without it')
     }
+    // Initialize SLM Model Manager & Inference Router
+    let modelManager: ModelManager | null = null
+    let inferenceRouter: InferenceRouter | null = null
+    try {
+      const slmModelsDir = config.slm?.modelsDir?.replace(/^~/, require('os').homedir())
+      modelManager = new ModelManager(serviceLogger, slmModelsDir)
+      inferenceRouter = new InferenceRouter(serviceLogger, config, modelManager)
+      const slmEnabled = config.slm?.enabled ?? false
+      serviceLogger.info({ enabled: slmEnabled }, 'SLM inference router initialized')
+    } catch (error) {
+      serviceLogger.warn({ error }, 'Failed to initialize SLM inference, continuing with regex only')
+    }
+    // Wire SLM inference into PatternRecognizer (Phase 4C)
+    if (inferenceRouter && phase12) {
+      phase12.patterns.setInferenceRouter(inferenceRouter)
+      serviceLogger.info('SLM inference wired into PatternRecognizer')
+    }
     // Store services
     services = {
       memory,
@@ -388,6 +411,8 @@ export async function initializeServices(config: Config, logger: Logger): Promis
       codeIndexer,
       codeQuery,
       codeLinker,
+      modelManager,
+      inferenceRouter,
       logger,
       config
     }
@@ -526,6 +551,22 @@ export function getCodeLinker(): MemoryCodeLinker | null {
   return services?.codeLinker ?? null
 }
+/**
+ * Get Model Manager (SLM Upgrade)
+ * Returns null if SLM is not initialized
+ */
+export function getModelManager(): ModelManager | null {
+  return services?.modelManager ?? null
+}
+/**
+ * Get Inference Router (SLM Upgrade)
+ * Returns null if SLM is not initialized
+ */
+export function getInferenceRouter(): InferenceRouter | null {
+  return services?.inferenceRouter ?? null
+}
 /**
  * Check if services are initialized
  */
@@ -617,6 +658,12 @@ export async function shutdownServices(): Promise<void> {
     }
   }
+  // Unload SLM models
+  if (services.modelManager) {
+    services.modelManager.unloadAll()
+    serviceLogger.info('SLM models unloaded')
+  }
   // Cleanup Phase 12
   services.phase12.cleanup()

package/src/training/data-store.ts ADDED Viewed

@@ -0,0 +1,298 @@
+/**
+ * Training Data Store — Phase 1A (SLM Upgrade)
+ * Logs classification decisions to SQLite for model training.
+ * Async, non-blocking — never impacts main request path.
+ *
+ * Table: training_data in ~/.claude-brain/data/memory.db
+ */
+import { Database } from 'bun:sqlite'
+import { join } from 'node:path'
+import { existsSync, mkdirSync } from 'node:fs'
+import { getClaudeBrainHome } from '@/config/home'
+export type TrainingTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
+export interface TrainingEntry {
+  task: TrainingTask
+  input: string
+  output: string   // JSON-encoded: label, entities array, summary, etc.
+  metadata?: string // JSON-encoded: confidence, scores, timing
+}
+export interface ModelFeedbackEntry {
+  task: string
+  input: string
+  modelPrediction: string
+  modelConfidence: number
+  regexPrediction: string
+  actualLabel?: string
+}
+let db: Database | null = null
+let insertStmt: ReturnType<Database['prepare']> | null = null
+let feedbackInsertStmt: ReturnType<Database['prepare']> | null = null
+function getDb(): Database | null {
+  if (db) return db
+  try {
+    const dataDir = join(getClaudeBrainHome(), 'data')
+    if (!existsSync(dataDir)) {
+      mkdirSync(dataDir, { recursive: true })
+    }
+    const dbPath = join(dataDir, 'memory.db')
+    db = new Database(dbPath)
+    db.run('PRAGMA journal_mode = WAL')
+    ensureTable(db)
+    insertStmt = db.prepare(
+      'INSERT INTO training_data (task, input, output, metadata) VALUES (?, ?, ?, ?)'
+    )
+    feedbackInsertStmt = db.prepare(
+      'INSERT INTO model_feedback (task, input, model_prediction, model_confidence, regex_prediction, actual_label) VALUES (?, ?, ?, ?, ?, ?)'
+    )
+    return db
+  } catch {
+    return null
+  }
+}
+function ensureTable(database: Database): void {
+  database.run(`
+    CREATE TABLE IF NOT EXISTS training_data (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      task TEXT NOT NULL,
+      input TEXT NOT NULL,
+      output TEXT NOT NULL,
+      metadata TEXT,
+      verified INTEGER DEFAULT 0,
+      created_at TEXT DEFAULT (datetime('now'))
+    )
+  `)
+  // Indexes for efficient querying
+  database.run('CREATE INDEX IF NOT EXISTS idx_training_task ON training_data(task)')
+  database.run('CREATE INDEX IF NOT EXISTS idx_training_verified ON training_data(verified)')
+  // Phase 6A: Model feedback table for continuous learning loop
+  database.run(`
+    CREATE TABLE IF NOT EXISTS model_feedback (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      task TEXT NOT NULL,
+      input TEXT NOT NULL,
+      model_prediction TEXT NOT NULL,
+      model_confidence REAL NOT NULL,
+      regex_prediction TEXT NOT NULL,
+      actual_label TEXT,
+      created_at TEXT DEFAULT (datetime('now'))
+    )
+  `)
+  database.run('CREATE INDEX IF NOT EXISTS idx_feedback_task ON model_feedback(task)')
+}
+/**
+ * Log a training example. Fire-and-forget — errors are silently swallowed.
+ */
+export function logTrainingData(entry: TrainingEntry): void {
+  setImmediate(() => {
+    try {
+      const database = getDb()
+      if (!database || !insertStmt) return
+      insertStmt.run(entry.task, entry.input, entry.output, entry.metadata || null)
+    } catch {
+      // Never block or crash the main path
+    }
+  })
+}
+/**
+ * Export training data as JSONL lines for a specific task.
+ */
+export function exportTrainingData(
+  task: TrainingTask,
+  options?: { verifiedOnly?: boolean; limit?: number }
+): string[] {
+  const database = getDb()
+  if (!database) return []
+  let sql = 'SELECT input, output, metadata, verified, created_at FROM training_data WHERE task = ?'
+  const params: any[] = [task]
+  if (options?.verifiedOnly) {
+    sql += ' AND verified = 1'
+  }
+  sql += ' ORDER BY created_at DESC'
+  if (options?.limit) {
+    sql += ' LIMIT ?'
+    params.push(options.limit)
+  }
+  const rows = database.prepare(sql).all(...params) as any[]
+  return rows.map(row => JSON.stringify({
+    input: row.input,
+    output: JSON.parse(row.output),
+    metadata: row.metadata ? JSON.parse(row.metadata) : null,
+    verified: row.verified === 1,
+    created_at: row.created_at,
+  }))
+}
+/**
+ * Get count of training examples per task.
+ */
+export function getTrainingStats(): Record<TrainingTask, { total: number; verified: number }> {
+  const database = getDb()
+  const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
+  const stats = {} as Record<TrainingTask, { total: number; verified: number }>
+  for (const task of tasks) {
+    if (!database) {
+      stats[task] = { total: 0, verified: 0 }
+      continue
+    }
+    const total = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ?').get(task) as any)?.c || 0
+    const verified = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ? AND verified = 1').get(task) as any)?.c || 0
+    stats[task] = { total, verified }
+  }
+  return stats
+}
+// ── Phase 6A: Model Feedback Functions ──────────────────────────────
+/**
+ * Log a model vs regex comparison. Fire-and-forget — errors are silently swallowed.
+ */
+export function logModelFeedback(entry: ModelFeedbackEntry): void {
+  setImmediate(() => {
+    try {
+      const database = getDb()
+      if (!database || !feedbackInsertStmt) return
+      feedbackInsertStmt.run(
+        entry.task,
+        entry.input,
+        entry.modelPrediction,
+        entry.modelConfidence,
+        entry.regexPrediction,
+        entry.actualLabel || null
+      )
+    } catch {
+      // Never block or crash the main path
+    }
+  })
+}
+/**
+ * Get per-task feedback stats: total, agreements, disagreements, disagreement rate, reviewed count.
+ */
+export function getModelFeedbackStats(): Record<string, {
+  total: number
+  agreements: number
+  disagreements: number
+  disagreementRate: number
+  reviewed: number
+}> {
+  const database = getDb()
+  const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
+  const stats = {} as Record<string, {
+    total: number
+    agreements: number
+    disagreements: number
+    disagreementRate: number
+    reviewed: number
+  }>
+  for (const task of tasks) {
+    if (!database) {
+      stats[task] = { total: 0, agreements: 0, disagreements: 0, disagreementRate: 0, reviewed: 0 }
+      continue
+    }
+    const total = (database.prepare(
+      'SELECT COUNT(*) as c FROM model_feedback WHERE task = ?'
+    ).get(task) as any)?.c || 0
+    const agreements = (database.prepare(
+      'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND model_prediction = regex_prediction'
+    ).get(task) as any)?.c || 0
+    const disagreements = total - agreements
+    const reviewed = (database.prepare(
+      'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND actual_label IS NOT NULL'
+    ).get(task) as any)?.c || 0
+    stats[task] = {
+      total,
+      agreements,
+      disagreements,
+      disagreementRate: total > 0 ? disagreements / total : 0,
+      reviewed,
+    }
+  }
+  return stats
+}
+/**
+ * Export feedback as JSONL lines for a specific task.
+ */
+export function exportModelFeedback(
+  task: string,
+  options?: { limit?: number }
+): string[] {
+  const database = getDb()
+  if (!database) return []
+  let sql = 'SELECT input, model_prediction, model_confidence, regex_prediction, actual_label, created_at FROM model_feedback WHERE task = ?'
+  const params: any[] = [task]
+  sql += ' ORDER BY created_at DESC'
+  if (options?.limit) {
+    sql += ' LIMIT ?'
+    params.push(options.limit)
+  }
+  const rows = database.prepare(sql).all(...params) as any[]
+  return rows.map(row => JSON.stringify({
+    input: row.input,
+    modelPrediction: row.model_prediction,
+    modelConfidence: row.model_confidence,
+    regexPrediction: row.regex_prediction,
+    actualLabel: row.actual_label,
+    createdAt: row.created_at,
+  }))
+}
+/**
+ * Get the most recent disagreements for human review.
+ */
+export function getDisagreements(
+  task: string,
+  limit: number = 50
+): Array<{
+  input: string
+  modelPrediction: string
+  modelConfidence: number
+  regexPrediction: string
+  createdAt: string
+}> {
+  const database = getDb()
+  if (!database) return []
+  const rows = database.prepare(`
+    SELECT input, model_prediction, model_confidence, regex_prediction, created_at
+    FROM model_feedback
+    WHERE task = ? AND model_prediction != regex_prediction
+    ORDER BY created_at DESC
+    LIMIT ?
+  `).all(task, limit) as any[]
+  return rows.map(row => ({
+    input: row.input,
+    modelPrediction: row.model_prediction,
+    modelConfidence: row.model_confidence,
+    regexPrediction: row.regex_prediction,
+    createdAt: row.created_at,
+  }))
+}