claude-brain 0.28.0 → 0.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 0.28.0
1
+ 0.28.1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-brain",
3
- "version": "0.28.0",
3
+ "version": "0.28.1",
4
4
  "description": "Local development assistant bridging Obsidian vaults with Claude Code via MCP",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -64,7 +64,7 @@ const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
64
64
  }
65
65
 
66
66
  /** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
67
- const ENTITY_TOKEN_THRESHOLD = 0.5
67
+ const ENTITY_TOKEN_THRESHOLD = 0.25
68
68
 
69
69
  /** Common words that should never be entities (false positive filter) */
70
70
  const ENTITY_STOPWORDS = new Set([
@@ -316,7 +316,7 @@ export class InferenceRouter {
316
316
  normalizedName: name.toLowerCase(),
317
317
  type: entityType,
318
318
  confidence: avgProb,
319
- source: 'rule',
319
+ source: 'model',
320
320
  positions: [startPos],
321
321
  })
322
322
  }
@@ -519,7 +519,14 @@ export class InferenceRouter {
519
519
 
520
520
  if (mode === 'both') {
521
521
  this.logComparison('knowledge', text, modelResult, regexResult)
522
- return modelResult ?? regexResult
522
+ }
523
+
524
+ // Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
525
+ // Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
526
+ // so it's more trustworthy than an undertrained model for those cases.
527
+ if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
528
+ this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
529
+ return regexResult
523
530
  }
524
531
 
525
532
  if (modelResult) {
@@ -623,20 +630,23 @@ export class InferenceRouter {
623
630
  const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
624
631
  const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
625
632
 
626
- // Greedy autoregressive decode loop
627
- const generatedIds: number[] = [...inputIds.filter((_, i) => attentionMask[i] === 1)]
633
+ // Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
634
+ // The ONNX model expects fixed input shape [1, maxLen]; stripping padding
635
+ // would create variable-length tensors that the model rejects.
636
+ let activeLen = attentionMask.filter(m => m === 1).length
628
637
  const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
638
+ const inputLen = activeLen
629
639
 
630
640
  for (let step = 0; step < maxGenTokens; step++) {
631
- const currentMask = new Array(generatedIds.length).fill(1)
632
- const logits = await this.modelManager.infer('compress', generatedIds, currentMask)
641
+ if (activeLen >= maxLen) break
642
+
643
+ const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
633
644
  if (!logits) break
634
645
 
635
- // Get logits for the last token position
636
- // For seq2seq, the output logits cover the vocabulary
637
- const vocabSize = logits.length / generatedIds.length
638
- const lastTokenOffset = (generatedIds.length - 1) * vocabSize
639
- const lastTokenLogits = logits.slice(lastTokenOffset, lastTokenOffset + vocabSize)
646
+ // vocabSize = total logits / sequence length (maxLen, the padded dimension)
647
+ const vocabSize = logits.length / maxLen
648
+ const lastActiveOffset = (activeLen - 1) * vocabSize
649
+ const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
640
650
 
641
651
  // Find argmax (greedy decode)
642
652
  let bestId = 0
@@ -651,18 +661,21 @@ export class InferenceRouter {
651
661
  // Stop on EOS
652
662
  if (bestId === EOS_TOKEN_ID || bestId === 0) break
653
663
 
654
- generatedIds.push(bestId)
664
+ // Write new token into the next padding slot
665
+ inputIds[activeLen] = bestId
666
+ attentionMask[activeLen] = 1
667
+ activeLen++
655
668
  }
656
669
 
657
670
  // Decode only the generated tokens (after input)
658
- const inputLen = inputIds.filter((_, i) => attentionMask[i] === 1).length
659
- const outputIds = generatedIds.slice(inputLen)
671
+ const outputIds = inputIds.slice(inputLen, activeLen)
660
672
 
661
673
  if (outputIds.length === 0) return null
662
674
 
663
675
  const decoded = tokenizer.decode(outputIds).trim()
664
676
  return decoded.length > 0 ? decoded : null
665
- } catch (error) {
677
+ } catch (err) {
678
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
666
679
  this.logger.warn({ error }, 'Model compression failed, returning original text')
667
680
  return null
668
681
  }
@@ -138,7 +138,8 @@ export class ModelManager {
138
138
  this.loadedModels.set(task, loaded)
139
139
  this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
140
140
  return loaded
141
- } catch (error) {
141
+ } catch (err) {
142
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
142
143
  this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
143
144
  return null
144
145
  }
@@ -171,7 +172,8 @@ export class ModelManager {
171
172
  const outputKey = Object.keys(results)[0]
172
173
  if (!outputKey) return null
173
174
  return results[outputKey].data as Float32Array
174
- } catch (error) {
175
+ } catch (err) {
176
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
175
177
  this.logger.warn({ error, task }, 'Inference failed')
176
178
  return null
177
179
  }
@@ -10,7 +10,7 @@ export interface ExtractedEntity {
10
10
  normalizedName: string
11
11
  type: EntityType
12
12
  confidence: number
13
- source: 'dictionary' | 'nlp' | 'rule'
13
+ source: 'dictionary' | 'nlp' | 'rule' | 'model'
14
14
  positions: number[]
15
15
  }
16
16