npm - claude-brain - Versions diffs - 0.27.2 → 0.28.0 - Mend

claude-brain 0.27.2 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/VERSION +1 -1
package/package.json +3 -1
package/src/cli/bin.ts +14 -0
package/src/cli/commands/export-training.ts +70 -0
package/src/cli/commands/models.ts +681 -0
package/src/cli/commands/status.ts +44 -0
package/src/config/home.ts +1 -0
package/src/config/schema.ts +30 -0
package/src/intelligence/inference-router.ts +749 -0
package/src/intelligence/model-manager.ts +206 -0
package/src/intelligence/tokenizer.ts +118 -0
package/src/knowledge/entity-extractor.ts +31 -1
package/src/memory/compression.ts +17 -1
package/src/memory/patterns.ts +46 -6
package/src/retrieval/query/intent-classifier.ts +17 -1
package/src/routing/entity-extractor.ts +30 -4
package/src/routing/intent-classifier.ts +45 -16
package/src/routing/router.ts +22 -2
package/src/server/handlers/list-tools.ts +6 -6
package/src/server/http-api.ts +83 -1
package/src/server/services.ts +47 -0
package/src/training/data-store.ts +298 -0
package/src/training/retrain-pipeline.ts +394 -0

package/src/intelligence/model-manager.ts ADDED Viewed

@@ -0,0 +1,206 @@
+/**
+ * Model Manager — SLM Upgrade Phase 4A
+ * Discovers, loads, and caches ONNX models from ~/.claude-brain/models/
+ * Lazy loading: models load on first use, not at startup.
+ */
+import { existsSync, readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import type { Logger } from 'pino'
+import { getHomePaths } from '@/config/home'
+export type ModelTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
+export interface ModelManifestEntry {
+  version: string
+  file: string
+  sha256?: string
+  params?: string
+  accuracy?: number
+  labels?: string[]
+  maxSeqLen?: number
+}
+export interface ModelManifest {
+  models: Partial<Record<ModelTask, ModelManifestEntry>>
+}
+export interface LoadedModel {
+  session: any  // onnxruntime.InferenceSession
+  manifest: ModelManifestEntry
+  loadedAt: number
+}
+export class ModelManager {
+  private modelsDir: string
+  private manifest: ModelManifest | null = null
+  private loadedModels = new Map<ModelTask, LoadedModel>()
+  private onnxRuntime: any = null
+  private onnxAvailable: boolean | null = null
+  private logger: Logger
+  constructor(logger: Logger, modelsDir?: string) {
+    this.logger = logger.child({ component: 'model-manager' })
+    this.modelsDir = modelsDir || getHomePaths().models
+  }
+  /**
+   * Check if ONNX Runtime is available (installed as optional dep)
+   * Tries onnxruntime-node first (faster native), falls back to onnxruntime-web (WASM)
+   */
+  private async checkOnnxRuntime(): Promise<boolean> {
+    if (this.onnxAvailable !== null) return this.onnxAvailable
+    // Try native node bindings first
+    try {
+      this.onnxRuntime = await import('onnxruntime-node')
+      this.onnxAvailable = true
+      this.logger.debug('ONNX Runtime (native) available')
+      return true
+    } catch {
+      // Native not available, try WASM fallback
+    }
+    try {
+      this.onnxRuntime = await import('onnxruntime-web')
+      this.onnxAvailable = true
+      this.logger.debug('ONNX Runtime (WASM) available')
+    } catch {
+      this.onnxAvailable = false
+      this.logger.debug('ONNX Runtime not installed — models will not load')
+    }
+    return this.onnxAvailable
+  }
+  /**
+   * Load the manifest.json from the models directory
+   */
+  private loadManifest(): ModelManifest | null {
+    if (this.manifest) return this.manifest
+    const manifestPath = join(this.modelsDir, 'manifest.json')
+    if (!existsSync(manifestPath)) {
+      this.logger.debug({ modelsDir: this.modelsDir }, 'No manifest.json found')
+      return null
+    }
+    try {
+      const raw = readFileSync(manifestPath, 'utf-8')
+      this.manifest = JSON.parse(raw) as ModelManifest
+      return this.manifest
+    } catch (error) {
+      this.logger.warn({ error }, 'Failed to parse manifest.json')
+      return null
+    }
+  }
+  /**
+   * Check if a model file exists for a given task (without loading it)
+   */
+  hasModel(task: ModelTask): boolean {
+    const manifest = this.loadManifest()
+    if (!manifest?.models[task]) return false
+    const entry = manifest.models[task]!
+    return existsSync(join(this.modelsDir, entry.file))
+  }
+  /**
+   * Get manifest entry for a task
+   */
+  getManifestEntry(task: ModelTask): ModelManifestEntry | null {
+    const manifest = this.loadManifest()
+    return manifest?.models[task] ?? null
+  }
+  /**
+   * Lazy-load a model on first use. Returns null if unavailable.
+   */
+  async loadModel(task: ModelTask): Promise<LoadedModel | null> {
+    // Return cached model
+    if (this.loadedModels.has(task)) return this.loadedModels.get(task)!
+    // Check prerequisites
+    if (!(await this.checkOnnxRuntime())) return null
+    const manifest = this.loadManifest()
+    if (!manifest?.models[task]) return null
+    const entry = manifest.models[task]!
+    const modelPath = join(this.modelsDir, entry.file)
+    if (!existsSync(modelPath)) {
+      this.logger.debug({ task, file: entry.file }, 'Model file not found')
+      return null
+    }
+    try {
+      const startMs = Date.now()
+      const session = await this.onnxRuntime.InferenceSession.create(modelPath)
+      const loaded: LoadedModel = {
+        session,
+        manifest: entry,
+        loadedAt: Date.now(),
+      }
+      this.loadedModels.set(task, loaded)
+      this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
+      return loaded
+    } catch (error) {
+      this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
+      return null
+    }
+  }
+  /**
+   * Run inference on a loaded model. Returns raw output tensor data.
+   * Callers (InferenceRouter) handle task-specific pre/post processing.
+   * Automatically detects which inputs the model accepts (input_ids, attention_mask).
+   */
+  async infer(task: ModelTask, inputIds: number[], attentionMask?: number[]): Promise<Float32Array | null> {
+    const model = await this.loadModel(task)
+    if (!model) return null
+    try {
+      const OrtTensor = this.onnxRuntime.Tensor
+      const inputTensor = new OrtTensor('int64', BigInt64Array.from(inputIds.map(BigInt)), [1, inputIds.length])
+      const feeds: Record<string, any> = { input_ids: inputTensor }
+      // Only pass attention_mask if the model actually accepts it
+      const modelInputNames = model.session.inputNames ?? []
+      if (attentionMask && modelInputNames.includes('attention_mask')) {
+        const maskTensor = new OrtTensor('int64', BigInt64Array.from(attentionMask.map(BigInt)), [1, attentionMask.length])
+        feeds.attention_mask = maskTensor
+      }
+      const results = await model.session.run(feeds)
+      // Most classification models output 'logits'
+      const outputKey = Object.keys(results)[0]
+      if (!outputKey) return null
+      return results[outputKey].data as Float32Array
+    } catch (error) {
+      this.logger.warn({ error, task }, 'Inference failed')
+      return null
+    }
+  }
+  /**
+   * Get status of all models (for CLI and health checks)
+   */
+  getStatus(): Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }> {
+    const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
+    const status = {} as Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }>
+    for (const task of tasks) {
+      const entry = this.getManifestEntry(task)
+      status[task] = {
+        available: this.hasModel(task),
+        loaded: this.loadedModels.has(task),
+        version: entry?.version,
+        accuracy: entry?.accuracy,
+      }
+    }
+    return status
+  }
+  /**
+   * Unload all models (for cleanup/testing)
+   */
+  unloadAll(): void {
+    this.loadedModels.clear()
+    this.logger.debug('All models unloaded')
+  }
+}

package/src/intelligence/tokenizer.ts ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * Tokenizer — SLM Upgrade Phase 4B
+ * GPT-2 BPE tokenizer for ONNX model inference.
+ *
+ * Strategy:
+ *   1. Try to dynamically import `tiktoken` (JS package)
+ *   2. Fall back to a simple whitespace tokenizer with hash-based IDs
+ *
+ * The tokenizer pads/truncates sequences to a fixed length and
+ * returns both input_ids and attention_mask arrays.
+ */
+import type { Logger } from 'pino'
+export interface TokenizerOutput {
+  inputIds: number[]
+  attentionMask: number[]
+}
+export interface Tokenizer {
+  encode(text: string, maxLength: number): TokenizerOutput
+  decode(tokenIds: number[]): string
+}
+/** Singleton cache */
+let cachedTokenizer: Tokenizer | null = null
+/**
+ * Get a tokenizer instance. Tries tiktoken first, falls back to hash-based.
+ */
+export async function getTokenizer(logger?: Logger): Promise<Tokenizer> {
+  if (cachedTokenizer) return cachedTokenizer
+  // Try tiktoken (JS binding for GPT-2 BPE)
+  try {
+    const tiktoken = await import('tiktoken')
+    const enc = tiktoken.encoding_for_model('gpt2')
+    cachedTokenizer = {
+      encode(text: string, maxLength: number): TokenizerOutput {
+        const tokens = Array.from(enc.encode(text))
+        // Truncate if necessary
+        const truncated = tokens.slice(0, maxLength)
+        // Pad to maxLength
+        const inputIds = new Array(maxLength).fill(0)
+        const attentionMask = new Array(maxLength).fill(0)
+        for (let i = 0; i < truncated.length; i++) {
+          inputIds[i] = truncated[i]
+          attentionMask[i] = 1
+        }
+        return { inputIds, attentionMask }
+      },
+      decode(tokenIds: number[]): string {
+        // Filter out padding (0) tokens
+        const filtered = tokenIds.filter(id => id !== 0)
+        return new TextDecoder().decode(enc.decode(new Uint32Array(filtered)))
+      }
+    }
+    logger?.debug('Using tiktoken GPT-2 tokenizer')
+    return cachedTokenizer
+  } catch {
+    // tiktoken not available
+  }
+  // Fallback: simple whitespace tokenizer with hash-based IDs
+  logger?.warn('tiktoken not available — using fallback hash-based tokenizer (reduced accuracy)')
+  cachedTokenizer = {
+    encode(text: string, maxLength: number): TokenizerOutput {
+      // Split on whitespace and punctuation, filter empties
+      const tokens = text
+        .toLowerCase()
+        .split(/(\s+|[.,!?;:'"()\[\]{}<>\/\\@#$%^&*+=~`|_-]+)/)
+        .filter(t => t.trim().length > 0)
+      // Hash each token to a stable ID in [1, 50256] range (GPT-2 vocab size)
+      const VOCAB_SIZE = 50256
+      const tokenIds = tokens.map(t => {
+        let hash = 5381
+        for (let i = 0; i < t.length; i++) {
+          hash = ((hash << 5) + hash + t.charCodeAt(i)) & 0x7fffffff
+        }
+        return (hash % (VOCAB_SIZE - 1)) + 1 // avoid 0 (used for padding)
+      })
+      // Truncate
+      const truncated = tokenIds.slice(0, maxLength)
+      // Pad to maxLength
+      const inputIds = new Array(maxLength).fill(0)
+      const attentionMask = new Array(maxLength).fill(0)
+      for (let i = 0; i < truncated.length; i++) {
+        inputIds[i] = truncated[i]
+        attentionMask[i] = 1
+      }
+      return { inputIds, attentionMask }
+    },
+    decode(_tokenIds: number[]): string {
+      // Hash-based tokenizer is one-way; decode is not possible.
+      // Compression will fall back to returning original text.
+      return ''
+    }
+  }
+  return cachedTokenizer
+}
+/** Reset cached tokenizer (for testing) */
+export function _resetTokenizerForTesting(): void {
+  cachedTokenizer = null
+}

package/src/knowledge/entity-extractor.ts CHANGED Viewed

@@ -222,6 +222,7 @@ export class EntityExtractor {
   }
   extract(text: string): ExtractedEntity[] {
+    const startTime = Date.now()
     const entities: Map<string, ExtractedEntity> = new Map()
     this.extractFromDictionary(text, entities)
@@ -233,8 +234,37 @@ export class EntityExtractor {
       this.extractWithNlp(text, entities)
     }
-    return Array.from(entities.values())
+    const result = Array.from(entities.values())
       .sort((a, b) => b.confidence - a.confidence)
+    // SLM Phase 1A: Log extraction for training data collection
+    this._logTraining(text, result, startTime)
+    return result
+  }
+  /**
+   * SLM Phase 1A: Log entity extraction result for training data.
+   */
+  private _logTraining(text: string, entities: ExtractedEntity[], startTime: number): void {
+    try {
+      const { logTrainingData } = require('@/training/data-store')
+      logTrainingData({
+        task: 'entity' as const,
+        input: text,
+        output: JSON.stringify(entities.map(e => ({
+          text: e.name,
+          type: e.type,
+          normalized: e.normalizedName,
+          confidence: e.confidence,
+          source: e.source,
+          positions: e.positions,
+        }))),
+        metadata: JSON.stringify({ count: entities.length, elapsed_ms: Date.now() - startTime }),
+      })
+    } catch {
+      // Non-critical
+    }
   }
   extractBatch(texts: string[]): ExtractedEntity[][] {

package/src/memory/compression.ts CHANGED Viewed

@@ -42,8 +42,24 @@ Drop: filler words, repetition, context that's obvious from the category.
 Observation: ${content}`
     try {
+      const startTime = Date.now()
       const response = await this.callLLM(prompt)
-      return { summary: response, original: content, compressed: true }
+      const result: CompressedObservation = { summary: response, original: content, compressed: true }
+      // SLM Phase 1A: Log compression input/output pair as gold training data
+      try {
+        const { logTrainingData } = require('@/training/data-store')
+        logTrainingData({
+          task: 'compress' as const,
+          input: content,
+          output: JSON.stringify({ summary: response }),
+          metadata: JSON.stringify({ category, elapsed_ms: Date.now() - startTime, provider: this.config.provider }),
+        })
+      } catch {
+        // Non-critical
+      }
+      return result
     } catch (error) {
       this.logger.warn({ error }, 'LLM compression failed, storing original')
       return { summary: content, compressed: false }

package/src/memory/patterns.ts CHANGED Viewed

@@ -34,11 +34,19 @@ export class PatternRecognizer {
   private memory: MemoryManager
   private patterns: Map<string, Pattern> = new Map()
+  /** SLM Upgrade: Optional inference router for model-based pattern classification */
+  private inferenceRouter: any = null
   constructor(logger: Logger, memory: MemoryManager) {
     this.logger = logger.child({ component: 'pattern-recognizer' })
     this.memory = memory
   }
+  /** SLM Upgrade: Set optional inference router */
+  setInferenceRouter(router: any): void {
+    this.inferenceRouter = router
+  }
   /**
    * Analyze all decisions to find patterns
    */
@@ -58,7 +66,7 @@ export class PatternRecognizer {
       const clusters = await this.clusterDecisions(allDecisions)
       // Extract patterns from clusters
-      const patterns = this.extractPatterns(clusters)
+      const patterns = await this.extractPatterns(clusters)
       // Store patterns
       for (const pattern of patterns) {
@@ -159,14 +167,16 @@ export class PatternRecognizer {
   /**
    * Extract patterns from decision clusters
    */
-  private extractPatterns(clusters: DecisionWithProject[][]): Pattern[] {
+  private async extractPatterns(clusters: DecisionWithProject[][]): Promise<Pattern[]> {
     const patterns: Pattern[] = []
     for (const cluster of clusters) {
-      const pattern = this.analyzeCluster(cluster)
+      const pattern = await this.analyzeCluster(cluster)
       if (pattern) {
         patterns.push(pattern)
+        // SLM Phase 1A: Log classification for training data
+        this._logPatternTraining(pattern.description, pattern.type)
       }
     }
@@ -176,7 +186,7 @@ export class PatternRecognizer {
   /**
    * Analyze a cluster to find the common pattern
    */
-  private analyzeCluster(cluster: DecisionWithProject[]): Pattern | null {
+  private async analyzeCluster(cluster: DecisionWithProject[]): Promise<Pattern | null> {
     if (cluster.length < 3) return null
     // Extract common keywords
@@ -184,8 +194,21 @@ export class PatternRecognizer {
     if (keywords.length === 0) return null
-    // Determine pattern type
-    const type = this.determinePatternType(cluster)
+    // Determine pattern type — SLM model first, regex fallback
+    let type: Pattern['type']
+    if (this.inferenceRouter) {
+      try {
+        // Use representative text from cluster for model classification
+        const representative = cluster.slice(0, 3)
+          .map(d => d.decision + (d.reasoning ? ` ${d.reasoning}` : ''))
+          .join('. ')
+        type = await this.inferenceRouter.classifyPatternType(representative)
+      } catch {
+        type = this.determinePatternType(cluster)
+      }
+    } else {
+      type = this.determinePatternType(cluster)
+    }
     // Create pattern description
     const description = this.generatePatternDescription(keywords, type)
@@ -284,6 +307,23 @@ export class PatternRecognizer {
     return 'solution'
   }
+  /**
+   * SLM Phase 1A: Log pattern type classification for training data.
+   * Called from analyzeCluster when a pattern type is determined.
+   */
+  private _logPatternTraining(description: string, patternType: Pattern['type']): void {
+    try {
+      const { logTrainingData } = require('@/training/data-store')
+      logTrainingData({
+        task: 'pattern' as const,
+        input: description,
+        output: JSON.stringify({ label: patternType }),
+      })
+    } catch {
+      // Non-critical
+    }
+  }
   /**
    * Generate pattern description
    */

package/src/retrieval/query/intent-classifier.ts CHANGED Viewed

@@ -77,6 +77,7 @@ const LOW_CONFIDENCE = 0.5
  * Classify the intent of a query
  */
 export function classifyIntent(query: string): QueryIntent {
+  const startTime = Date.now()
   const normalizedQuery = query.toLowerCase().trim()
   // Track matches for each intent
@@ -132,11 +133,26 @@ export function classifyIntent(query: string): QueryIntent {
     metadata.comparisonTerms = extractComparisonTerms(query)
   }
-  return {
+  const result: QueryIntent = {
     type: bestIntent as QueryIntent['type'],
     confidence,
     metadata
   }
+  // SLM Phase 1A: Log query classification for training data collection
+  try {
+    const { logTrainingData } = require('@/training/data-store')
+    logTrainingData({
+      task: 'query' as const,
+      input: query,
+      output: JSON.stringify({ label: result.type }),
+      metadata: JSON.stringify({ confidence, scores, elapsed_ms: Date.now() - startTime }),
+    })
+  } catch {
+    // Non-critical
+  }
+  return result
 }
 /**

package/src/routing/entity-extractor.ts CHANGED Viewed

@@ -112,6 +112,14 @@ export interface BrainExtractedEntities {
 }
 export class BrainEntityExtractor {
+  /** SLM Upgrade: Optional inference router for model-based entity extraction */
+  private inferenceRouter: any = null
+  /** SLM Upgrade: Set optional inference router for model-based NER */
+  setInferenceRouter(router: any): void {
+    this.inferenceRouter = router
+  }
   /**
    * Extract all entities from a natural language message
    */
@@ -122,7 +130,7 @@ export class BrainEntityExtractor {
     // Use provided project or try to detect from message
     entities.project = knownProject || await this.extractProject(message)
-    entities.technologies = this.extractTechnologies(message)
+    entities.technologies = await this.extractTechnologies(message)
     entities.topic = this.extractTopic(message)
     // Extract decision components if present
@@ -203,12 +211,30 @@ export class BrainEntityExtractor {
   }
   /**
-   * Extract technology mentions
+   * Extract technology mentions.
+   * SLM Upgrade: tries model-based NER first if InferenceRouter is available,
+   * merges with dictionary fallback for comprehensive coverage.
    */
-  private extractTechnologies(message: string): string[] {
+  private async extractTechnologies(message: string): Promise<string[]> {
+    const found = new Set<string>()
+    // SLM: Try model-based entity extraction first
+    if (this.inferenceRouter) {
+      try {
+        const modelEntities = await this.inferenceRouter.extractEntities(message)
+        for (const entity of modelEntities) {
+          if (entity.type === 'technology') {
+            found.add(entity.normalizedName)
+          }
+        }
+      } catch {
+        // Model failed, fall through to dictionary
+      }
+    }
+    // Dictionary fallback (always runs — merges with model results)
     const lower = message.toLowerCase()
     const words = lower.split(/[\s,;:()[\]{}"'`|/\\]+/)
-    const found = new Set<string>()
     for (const word of words) {
       const cleaned = word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, '')