npm - @soulcraft/brainy - Versions diffs - 2.8.0 → 2.9.0 - Mend

@soulcraft/brainy 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/embeddings/lightweight-embedder.js +1 -1
package/dist/embeddings/universal-memory-manager.js +1 -1
package/dist/embeddings/worker-embedding.js +1 -1
package/dist/utils/embedding.d.ts +2 -2
package/dist/utils/embedding.js +36 -7
package/dist/utils/hybridModelManager.js +7 -7
package/package.json +1 -1

package/dist/embeddings/lightweight-embedder.js CHANGED Viewed

@@ -97,7 +97,7 @@ export class LightweightEmbedder {
             console.log('⚠️ Loading ONNX model for complex text...');
             const { TransformerEmbedding } = await import('../utils/embedding.js');
             this.onnxEmbedder = new TransformerEmbedding({
-                dtype: 'fp32',
+                precision: 'fp32',
                 verbose: false
             });
             await this.onnxEmbedder.init();

package/dist/embeddings/universal-memory-manager.js CHANGED Viewed

@@ -107,7 +107,7 @@ export class UniversalMemoryManager {
             const { TransformerEmbedding } = await import('../utils/embedding.js');
             this.embeddingFunction = new TransformerEmbedding({
                 verbose: false,
-                dtype: 'fp32',
+                precision: 'fp32',
                 localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
             });
             await this.embeddingFunction.init();

package/dist/embeddings/worker-embedding.js CHANGED Viewed

@@ -13,7 +13,7 @@ async function initModel() {
     if (!model) {
         model = new TransformerEmbedding({
             verbose: false,
-            dtype: 'fp32',
+            precision: 'fp32',
             localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
         });
         await model.init();

package/dist/utils/embedding.d.ts CHANGED Viewed

@@ -25,8 +25,8 @@ export interface TransformerEmbeddingOptions {
     cacheDir?: string;
     /** Force local files only (no downloads) */
     localFilesOnly?: boolean;
-    /** Quantization setting (fp32, fp16, q8, q4) */
-    dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
+    /** Model precision: 'q8' = 75% smaller quantized model, 'fp32' = full precision (default) */
+    precision?: 'fp32' | 'q8';
     /** Device to run inference on - 'auto' detects best available */
     device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
 }

package/dist/utils/embedding.js CHANGED Viewed

@@ -4,6 +4,8 @@
  */
 import { isBrowser } from './environment.js';
 import { ModelManager } from '../embeddings/model-manager.js';
+import { join } from 'path';
+import { existsSync } from 'fs';
 // @ts-ignore - Transformers.js is now the primary embedding library
 import { pipeline, env } from '@huggingface/transformers';
 // CRITICAL: Disable ONNX memory arena to prevent 4-8GB allocation
@@ -98,11 +100,11 @@ export class TransformerEmbedding {
             verbose: this.verbose,
             cacheDir: options.cacheDir || './models',
             localFilesOnly: localFilesOnly,
-            dtype: options.dtype || 'fp32', // CRITICAL: fp32 default for backward compatibility
+            precision: options.precision || 'fp32', // Clean and clear!
             device: options.device || 'auto'
         };
         // ULTRA-CAREFUL: Runtime warnings for q8 usage
-        if (this.options.dtype === 'q8') {
+        if (this.options.precision === 'q8') {
             const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
             if (!confirmed && this.verbose) {
                 console.warn('🚨 Q8 MODEL WARNING:');
@@ -114,7 +116,7 @@ export class TransformerEmbedding {
             }
         }
         if (this.verbose) {
-            this.logger('log', `Embedding config: dtype=${this.options.dtype}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
+            this.logger('log', `Embedding config: precision=${this.options.precision}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
         }
         // Configure transformers.js environment
         if (!isBrowser()) {
@@ -226,18 +228,33 @@ export class TransformerEmbedding {
             const startTime = Date.now();
             // Check model availability and select appropriate variant
             const available = modelManager.getAvailableModels(this.options.model);
-            const actualType = modelManager.getBestAvailableModel(this.options.dtype, this.options.model);
+            let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
             if (!actualType) {
                 throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
             }
-            if (actualType !== this.options.dtype) {
-                this.logger('log', `Using ${actualType} model (${this.options.dtype} not available)`);
+            if (actualType !== this.options.precision) {
+                this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
+            }
+            // CRITICAL FIX: Control which model file transformers.js loads
+            // When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
+            // We need to explicitly control this based on the precision setting
+            // Set environment to control model selection BEFORE creating pipeline
+            if (actualType === 'q8') {
+                // For Q8, we want to use the quantized model
+                // transformers.js v3 doesn't have a direct flag, so we need to work around this
+                // HACK: Temporarily modify the model file preference
+                // This forces transformers.js to look for model_quantized.onnx first
+                const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
+                this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
+            }
+            else {
+                this.logger('log', '📦 Using FP32 model (full precision)');
             }
             // Load the feature extraction pipeline with memory optimizations
             const pipelineOptions = {
                 cache_dir: cacheDir,
                 local_files_only: isBrowser() ? false : this.options.localFilesOnly,
-                dtype: actualType, // Use the actual available model type
+                // Remove the quantized flag - it doesn't work in transformers.js v3
                 // CRITICAL: ONNX memory optimizations
                 session_options: {
                     enableCpuMemArena: false, // Disable pre-allocated memory arena
@@ -256,6 +273,18 @@ export class TransformerEmbedding {
                 this.logger('log', `Pipeline options: ${JSON.stringify(pipelineOptions)}`);
             }
             try {
+                // For Q8 models, we need to explicitly specify the model file
+                if (actualType === 'q8') {
+                    // Check if quantized model exists
+                    const modelPath = join(cacheDir, this.options.model, 'onnx', 'model_quantized.onnx');
+                    if (existsSync(modelPath)) {
+                        this.logger('log', '✅ Q8 model found locally');
+                    }
+                    else {
+                        this.logger('warn', '⚠️ Q8 model not found, will fall back to FP32');
+                        actualType = 'fp32'; // Fall back to fp32
+                    }
+                }
                 this.extractor = await pipeline('feature-extraction', this.options.model, pipelineOptions);
             }
             catch (gpuError) {

package/dist/utils/hybridModelManager.js CHANGED Viewed

@@ -83,7 +83,7 @@ class HybridModelManager {
             // Smart configuration based on environment
             let options = {
                 verbose: !isTest && !isServerless,
-                dtype: 'fp32',
+                precision: 'fp32', // Use clearer precision parameter
                 device: 'cpu'
             };
             // Environment-specific optimizations
@@ -91,7 +91,7 @@ class HybridModelManager {
                 options = {
                     ...options,
                     localFilesOnly: forceLocalOnly || false, // Respect environment variable
-                    dtype: 'fp32',
+                    precision: 'fp32',
                     device: 'cpu',
                     verbose: false
                 };
@@ -100,7 +100,7 @@ class HybridModelManager {
                 options = {
                     ...options,
                     localFilesOnly: forceLocalOnly || true, // Default true for serverless, but respect env
-                    dtype: 'fp32',
+                    precision: 'fp32',
                     device: 'cpu',
                     verbose: false
                 };
@@ -109,7 +109,7 @@ class HybridModelManager {
                 options = {
                     ...options,
                     localFilesOnly: forceLocalOnly || true, // Default true for docker, but respect env
-                    dtype: 'fp32',
+                    precision: 'fp32',
                     device: 'auto',
                     verbose: false
                 };
@@ -119,7 +119,7 @@ class HybridModelManager {
                 options = {
                     ...options,
                     localFilesOnly: forceLocalOnly || false, // Respect environment variable for tests
-                    dtype: 'fp32',
+                    precision: 'fp32',
                     device: 'cpu',
                     verbose: false
                 };
@@ -128,7 +128,7 @@ class HybridModelManager {
                 options = {
                     ...options,
                     localFilesOnly: forceLocalOnly || false, // Respect environment variable for default node
-                    dtype: 'fp32',
+                    precision: 'fp32',
                     device: 'auto',
                     verbose: true
                 };
@@ -168,7 +168,7 @@ class HybridModelManager {
             // 2. If that fails, explicitly allow remote with verbose logging
             { ...options, localFilesOnly: false, verbose: true, source: 'fallback-verbose' },
             // 3. Last resort: basic configuration
-            { verbose: false, dtype: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
+            { verbose: false, precision: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
         ];
         let lastError = null;
         for (const attemptOptions of attempts) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "2.8.0",
+  "version": "2.9.0",
   "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
   "main": "dist/index.js",
   "module": "dist/index.js",