npm - @soulcraft/brainy - Versions diffs - 2.7.4 → 2.8.0 - Mend

@soulcraft/brainy 2.7.4 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +2 -3
package/README.md +31 -0
package/dist/embeddings/model-manager.d.ts +11 -0
package/dist/embeddings/model-manager.js +43 -7
package/dist/utils/embedding.js +24 -3
package/package.json +4 -1
package/scripts/download-models.cjs +102 -19

package/CHANGELOG.md CHANGED Viewed

@@ -1,9 +1,8 @@
 # Changelog
-All notable changes to Brainy will be documented in this file.
+All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [2.8.0](https://github.com/soulcraftlabs/brainy/compare/v2.7.4...v2.8.0) (2025-08-29)
 ## [2.7.4] - 2025-08-29

package/README.md CHANGED Viewed

@@ -121,6 +121,37 @@ await brain.find("Documentation about authentication from last month")
 - **Worker-based embeddings** - Non-blocking operations
 - **Automatic caching** - Intelligent result caching
+### Performance Optimization
+**Q8 Quantized Models** - 75% smaller, faster loading (v2.8.0+)
+```javascript
+// Default: Full precision (fp32) - maximum compatibility
+const brain = new BrainyData()
+// Optimized: Quantized models (q8) - 75% smaller, 99% accuracy
+const brainOptimized = new BrainyData({
+  embeddingOptions: { dtype: 'q8' }
+})
+```
+**Model Comparison:**
+- **FP32 (default)**: 90MB, 100% accuracy, maximum compatibility
+- **Q8 (optional)**: 23MB, ~99% accuracy, faster loading
+**When to use Q8:**
+- ✅ New projects where size/speed matters
+- ✅ Memory-constrained environments
+- ✅ Mobile or edge deployments
+- ❌ Existing projects with FP32 data (incompatible embeddings)
+**Air-gap deployment:**
+```bash
+npm run download-models        # Both models (recommended)
+npm run download-models:q8     # Q8 only (space-constrained)
+npm run download-models:fp32   # FP32 only (compatibility)
+```
 ## 📚 Core API
 ### `search()` - Vector Similarity

package/dist/embeddings/model-manager.d.ts CHANGED Viewed

@@ -18,6 +18,17 @@ export declare class ModelManager {
     private getModelsPath;
     ensureModels(modelName?: string): Promise<boolean>;
     private verifyModelFiles;
+    /**
+     * Check which model variants are available locally
+     */
+    getAvailableModels(modelName?: string): {
+        fp32: boolean;
+        q8: boolean;
+    };
+    /**
+     * Get the best available model variant based on preference and availability
+     */
+    getBestAvailableModel(preferredType?: 'fp32' | 'q8', modelName?: string): 'fp32' | 'q8' | null;
     private tryModelSource;
     private downloadAndExtractFromGitHub;
     /**

package/dist/embeddings/model-manager.js CHANGED Viewed

@@ -31,13 +31,16 @@ const MODEL_SOURCES = {
         pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
     }
 };
-// Model verification files - minimal set needed for transformers.js
-const MODEL_FILES = [
+// Model verification files - BOTH fp32 and q8 variants
+const REQUIRED_FILES = [
     'config.json',
     'tokenizer.json',
-    'tokenizer_config.json',
-    'onnx/model.onnx'
+    'tokenizer_config.json'
 ];
+const MODEL_VARIANTS = {
+    fp32: 'onnx/model.onnx',
+    q8: 'onnx/model_quantized.onnx'
+};
 export class ModelManager {
     constructor() {
         this.isInitialized = false;
@@ -105,14 +108,47 @@ export class ModelManager {
         return true;
     }
     async verifyModelFiles(modelPath) {
-        // Check if essential model files exist
-        for (const file of MODEL_FILES) {
+        // Check if essential files exist
+        for (const file of REQUIRED_FILES) {
             const fullPath = join(modelPath, file);
             if (!existsSync(fullPath)) {
                 return false;
             }
         }
-        return true;
+        // At least one model variant must exist (fp32 or q8)
+        const fp32Exists = existsSync(join(modelPath, MODEL_VARIANTS.fp32));
+        const q8Exists = existsSync(join(modelPath, MODEL_VARIANTS.q8));
+        return fp32Exists || q8Exists;
+    }
+    /**
+     * Check which model variants are available locally
+     */
+    getAvailableModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
+        const modelPath = join(this.modelsPath, modelName);
+        return {
+            fp32: existsSync(join(modelPath, MODEL_VARIANTS.fp32)),
+            q8: existsSync(join(modelPath, MODEL_VARIANTS.q8))
+        };
+    }
+    /**
+     * Get the best available model variant based on preference and availability
+     */
+    getBestAvailableModel(preferredType = 'fp32', modelName = 'Xenova/all-MiniLM-L6-v2') {
+        const available = this.getAvailableModels(modelName);
+        // If preferred type is available, use it
+        if (available[preferredType]) {
+            return preferredType;
+        }
+        // Otherwise fall back to what's available
+        if (preferredType === 'q8' && available.fp32) {
+            console.warn('⚠️ Q8 model requested but not available, falling back to FP32');
+            return 'fp32';
+        }
+        if (preferredType === 'fp32' && available.q8) {
+            console.warn('⚠️ FP32 model requested but not available, falling back to Q8');
+            return 'q8';
+        }
+        return null;
     }
     async tryModelSource(name, source, modelName) {
         try {

package/dist/utils/embedding.js CHANGED Viewed

@@ -98,11 +98,23 @@ export class TransformerEmbedding {
             verbose: this.verbose,
             cacheDir: options.cacheDir || './models',
             localFilesOnly: localFilesOnly,
-            dtype: options.dtype || 'fp32', // Use fp32 by default as quantized models aren't available on CDN
+            dtype: options.dtype || 'fp32', // CRITICAL: fp32 default for backward compatibility
             device: options.device || 'auto'
         };
+        // ULTRA-CAREFUL: Runtime warnings for q8 usage
+        if (this.options.dtype === 'q8') {
+            const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
+            if (!confirmed && this.verbose) {
+                console.warn('🚨 Q8 MODEL WARNING:');
+                console.warn('   • Q8 creates different embeddings than fp32');
+                console.warn('   • Q8 is incompatible with existing fp32 data');
+                console.warn('   • Only use q8 for new projects or when explicitly migrating');
+                console.warn('   • Set BRAINY_Q8_CONFIRMED=true to silence this warning');
+                console.warn('   • Q8 model is 75% smaller but may have slightly reduced accuracy');
+            }
+        }
         if (this.verbose) {
-            this.logger('log', `Embedding config: localFilesOnly=${localFilesOnly}, model=${this.options.model}, cacheDir=${this.options.cacheDir}`);
+            this.logger('log', `Embedding config: dtype=${this.options.dtype}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
         }
         // Configure transformers.js environment
         if (!isBrowser()) {
@@ -212,11 +224,20 @@ export class TransformerEmbedding {
                 : this.options.cacheDir;
             this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
             const startTime = Date.now();
+            // Check model availability and select appropriate variant
+            const available = modelManager.getAvailableModels(this.options.model);
+            const actualType = modelManager.getBestAvailableModel(this.options.dtype, this.options.model);
+            if (!actualType) {
+                throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
+            }
+            if (actualType !== this.options.dtype) {
+                this.logger('log', `Using ${actualType} model (${this.options.dtype} not available)`);
+            }
             // Load the feature extraction pipeline with memory optimizations
             const pipelineOptions = {
                 cache_dir: cacheDir,
                 local_files_only: isBrowser() ? false : this.options.localFilesOnly,
-                dtype: this.options.dtype || 'fp32', // Use fp32 model as quantized models aren't available on CDN
+                dtype: actualType, // Use the actual available model type
                 // CRITICAL: ONNX memory optimizations
                 session_options: {
                     enableCpuMemArena: false, // Disable pre-allocated memory arena

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "2.7.4",
+  "version": "2.8.0",
   "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
   "main": "dist/index.js",
   "module": "dist/index.js",
@@ -73,6 +73,9 @@
     "test:ci-integration": "NODE_OPTIONS='--max-old-space-size=16384' CI=true vitest run --config tests/configs/vitest.integration.config.ts",
     "test:ci": "npm run test:ci-unit",
     "download-models": "node scripts/download-models.cjs",
+    "download-models:fp32": "node scripts/download-models.cjs fp32",
+    "download-models:q8": "node scripts/download-models.cjs q8",
+    "download-models:both": "node scripts/download-models.cjs",
     "models:verify": "node scripts/ensure-models.js",
     "lint": "eslint --ext .ts,.js src/",
     "lint:fix": "eslint --ext .ts,.js src/ --fix",

package/scripts/download-models.cjs CHANGED Viewed

@@ -9,6 +9,11 @@ const path = require('path')
 const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
 const OUTPUT_DIR = './models'
+// Parse command line arguments for model type selection
+const args = process.argv.slice(2)
+const downloadType = args.includes('fp32') ? 'fp32' :
+                    args.includes('q8') ? 'q8' : 'both'
 async function downloadModels() {
   // Use dynamic import for ES modules in CommonJS
   const { pipeline, env } = await import('@huggingface/transformers')
@@ -16,29 +21,31 @@ async function downloadModels() {
   // Configure transformers.js to use local cache
   env.cacheDir = './models-cache'
   env.allowRemoteModels = true
   try {
-    console.log('🔄 Downloading all-MiniLM-L6-v2 model for offline bundling...')
+    console.log('🧠 Brainy Model Downloader v2.8.0')
+    console.log('===================================')
     console.log(`   Model: ${MODEL_NAME}`)
+    console.log(`   Type: ${downloadType} (fp32, q8, or both)`)
     console.log(`   Cache: ${env.cacheDir}`)
+    console.log('')
     // Create output directory
     await fs.mkdir(OUTPUT_DIR, { recursive: true })
-    // Load the model to force download
-    console.log('📥 Loading model pipeline...')
-    const extractor = await pipeline('feature-extraction', MODEL_NAME)
-    // Test the model to make sure it works
-    console.log('🧪 Testing model...')
-    const testResult = await extractor(['Hello world!'], {
-      pooling: 'mean',
-      normalize: true
-    })
+    // Download models based on type
+    if (downloadType === 'both' || downloadType === 'fp32') {
+      console.log('📥 Downloading FP32 model (full precision, 90MB)...')
+      await downloadModelVariant('fp32')
+    }
-    console.log(`✅ Model test successful! Embedding dimensions: ${testResult.data.length}`)
+    if (downloadType === 'both' || downloadType === 'q8') {
+      console.log('📥 Downloading Q8 model (quantized, 23MB)...')
+      await downloadModelVariant('q8')
+    }
     // Copy ALL model files from cache to our models directory
-    console.log('📋 Copying ALL model files to bundle directory...')
+    console.log('📋 Copying model files to bundle directory...')
     const cacheDir = path.resolve(env.cacheDir)
     const outputDir = path.resolve(OUTPUT_DIR)
@@ -62,22 +69,89 @@ async function downloadModels() {
     console.log(`   Total size: ${await calculateDirectorySize(outputDir)} MB`)
     console.log(`   Location: ${outputDir}`)
-    // Create a marker file
+    // Create a marker file with downloaded model info
+    const markerData = {
+      model: MODEL_NAME,
+      bundledAt: new Date().toISOString(),
+      version: '2.8.0',
+      downloadType: downloadType,
+      models: {}
+    }
+    // Check which models were downloaded
+    const fp32Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model.onnx')
+    const q8Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx')
+    if (await fileExists(fp32Path)) {
+      const stats = await fs.stat(fp32Path)
+      markerData.models.fp32 = {
+        file: 'onnx/model.onnx',
+        size: stats.size,
+        sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
+      }
+    }
+    if (await fileExists(q8Path)) {
+      const stats = await fs.stat(q8Path)
+      markerData.models.q8 = {
+        file: 'onnx/model_quantized.onnx',
+        size: stats.size,
+        sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
+      }
+    }
     await fs.writeFile(
       path.join(outputDir, '.brainy-models-bundled'),
-      JSON.stringify({
-        model: MODEL_NAME,
-        bundledAt: new Date().toISOString(),
-        version: '1.0.0'
-      }, null, 2)
+      JSON.stringify(markerData, null, 2)
     )
+    console.log('')
+    console.log('✅ Download complete! Available models:')
+    if (markerData.models.fp32) {
+      console.log(`   • FP32: ${markerData.models.fp32.sizeFormatted} (full precision)`)
+    }
+    if (markerData.models.q8) {
+      console.log(`   • Q8: ${markerData.models.q8.sizeFormatted} (quantized, 75% smaller)`)
+    }
+    console.log('')
+    console.log('Air-gap deployment ready! 🚀')
   } catch (error) {
     console.error('❌ Error downloading models:', error)
     process.exit(1)
   }
 }
+// Download a specific model variant
+async function downloadModelVariant(dtype) {
+  const { pipeline } = await import('@huggingface/transformers')
+  try {
+    // Load the model to force download
+    const extractor = await pipeline('feature-extraction', MODEL_NAME, {
+      dtype: dtype,
+      cache_dir: './models-cache'
+    })
+    // Test the model
+    const testResult = await extractor(['Hello world!'], {
+      pooling: 'mean',
+      normalize: true
+    })
+    console.log(`   ✅ ${dtype.toUpperCase()} model downloaded and tested (${testResult.data.length} dimensions)`)
+    // Dispose to free memory
+    if (extractor.dispose) {
+      await extractor.dispose()
+    }
+  } catch (error) {
+    console.error(`   ❌ Failed to download ${dtype} model:`, error)
+    throw error
+  }
+}
 async function findModelDirectories(baseDir, modelName) {
   const dirs = []
@@ -141,6 +215,15 @@ async function dirExists(dir) {
   }
 }
+async function fileExists(file) {
+  try {
+    const stats = await fs.stat(file)
+    return stats.isFile()
+  } catch (error) {
+    return false
+  }
+}
 async function copyDirectory(src, dest) {
   await fs.mkdir(dest, { recursive: true })
   const entries = await fs.readdir(src, { withFileTypes: true })