claude-brain 0.28.0 → 0.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.28.
|
|
1
|
+
0.28.1
|
package/package.json
CHANGED
|
@@ -64,7 +64,7 @@ const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
/** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
|
|
67
|
-
const ENTITY_TOKEN_THRESHOLD = 0.
|
|
67
|
+
const ENTITY_TOKEN_THRESHOLD = 0.25
|
|
68
68
|
|
|
69
69
|
/** Common words that should never be entities (false positive filter) */
|
|
70
70
|
const ENTITY_STOPWORDS = new Set([
|
|
@@ -316,7 +316,7 @@ export class InferenceRouter {
|
|
|
316
316
|
normalizedName: name.toLowerCase(),
|
|
317
317
|
type: entityType,
|
|
318
318
|
confidence: avgProb,
|
|
319
|
-
source: '
|
|
319
|
+
source: 'model',
|
|
320
320
|
positions: [startPos],
|
|
321
321
|
})
|
|
322
322
|
}
|
|
@@ -519,7 +519,14 @@ export class InferenceRouter {
|
|
|
519
519
|
|
|
520
520
|
if (mode === 'both') {
|
|
521
521
|
this.logComparison('knowledge', text, modelResult, regexResult)
|
|
522
|
-
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
|
|
525
|
+
// Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
|
|
526
|
+
// so it's more trustworthy than an undertrained model for those cases.
|
|
527
|
+
if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
|
|
528
|
+
this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
|
|
529
|
+
return regexResult
|
|
523
530
|
}
|
|
524
531
|
|
|
525
532
|
if (modelResult) {
|
|
@@ -623,20 +630,23 @@ export class InferenceRouter {
|
|
|
623
630
|
const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
|
|
624
631
|
const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
|
|
625
632
|
|
|
626
|
-
//
|
|
627
|
-
|
|
633
|
+
// Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
|
|
634
|
+
// The ONNX model expects fixed input shape [1, maxLen]; stripping padding
|
|
635
|
+
// would create variable-length tensors that the model rejects.
|
|
636
|
+
let activeLen = attentionMask.filter(m => m === 1).length
|
|
628
637
|
const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
|
|
638
|
+
const inputLen = activeLen
|
|
629
639
|
|
|
630
640
|
for (let step = 0; step < maxGenTokens; step++) {
|
|
631
|
-
|
|
632
|
-
|
|
641
|
+
if (activeLen >= maxLen) break
|
|
642
|
+
|
|
643
|
+
const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
|
|
633
644
|
if (!logits) break
|
|
634
645
|
|
|
635
|
-
//
|
|
636
|
-
|
|
637
|
-
const
|
|
638
|
-
const
|
|
639
|
-
const lastTokenLogits = logits.slice(lastTokenOffset, lastTokenOffset + vocabSize)
|
|
646
|
+
// vocabSize = total logits / sequence length (maxLen, the padded dimension)
|
|
647
|
+
const vocabSize = logits.length / maxLen
|
|
648
|
+
const lastActiveOffset = (activeLen - 1) * vocabSize
|
|
649
|
+
const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
|
|
640
650
|
|
|
641
651
|
// Find argmax (greedy decode)
|
|
642
652
|
let bestId = 0
|
|
@@ -651,18 +661,21 @@ export class InferenceRouter {
|
|
|
651
661
|
// Stop on EOS
|
|
652
662
|
if (bestId === EOS_TOKEN_ID || bestId === 0) break
|
|
653
663
|
|
|
654
|
-
|
|
664
|
+
// Write new token into the next padding slot
|
|
665
|
+
inputIds[activeLen] = bestId
|
|
666
|
+
attentionMask[activeLen] = 1
|
|
667
|
+
activeLen++
|
|
655
668
|
}
|
|
656
669
|
|
|
657
670
|
// Decode only the generated tokens (after input)
|
|
658
|
-
const
|
|
659
|
-
const outputIds = generatedIds.slice(inputLen)
|
|
671
|
+
const outputIds = inputIds.slice(inputLen, activeLen)
|
|
660
672
|
|
|
661
673
|
if (outputIds.length === 0) return null
|
|
662
674
|
|
|
663
675
|
const decoded = tokenizer.decode(outputIds).trim()
|
|
664
676
|
return decoded.length > 0 ? decoded : null
|
|
665
|
-
} catch (
|
|
677
|
+
} catch (err) {
|
|
678
|
+
const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
|
|
666
679
|
this.logger.warn({ error }, 'Model compression failed, returning original text')
|
|
667
680
|
return null
|
|
668
681
|
}
|
|
@@ -138,7 +138,8 @@ export class ModelManager {
|
|
|
138
138
|
this.loadedModels.set(task, loaded)
|
|
139
139
|
this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
|
|
140
140
|
return loaded
|
|
141
|
-
} catch (
|
|
141
|
+
} catch (err) {
|
|
142
|
+
const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
|
|
142
143
|
this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
|
|
143
144
|
return null
|
|
144
145
|
}
|
|
@@ -171,7 +172,8 @@ export class ModelManager {
|
|
|
171
172
|
const outputKey = Object.keys(results)[0]
|
|
172
173
|
if (!outputKey) return null
|
|
173
174
|
return results[outputKey].data as Float32Array
|
|
174
|
-
} catch (
|
|
175
|
+
} catch (err) {
|
|
176
|
+
const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
|
|
175
177
|
this.logger.warn({ error, task }, 'Inference failed')
|
|
176
178
|
return null
|
|
177
179
|
}
|