@soulcraft/brainy 2.7.4 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,9 +1,8 @@
1
1
  # Changelog
2
2
 
3
- All notable changes to Brainy will be documented in this file.
3
+ All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
5
+ ## [2.8.0](https://github.com/soulcraftlabs/brainy/compare/v2.7.4...v2.8.0) (2025-08-29)
7
6
 
8
7
  ## [2.7.4] - 2025-08-29
9
8
 
package/README.md CHANGED
@@ -121,6 +121,37 @@ await brain.find("Documentation about authentication from last month")
121
121
  - **Worker-based embeddings** - Non-blocking operations
122
122
  - **Automatic caching** - Intelligent result caching
123
123
 
124
+ ### Performance Optimization
125
+
126
+ **Q8 Quantized Models** - 75% smaller, faster loading (v2.8.0+)
127
+
128
+ ```javascript
129
+ // Default: Full precision (fp32) - maximum compatibility
130
+ const brain = new BrainyData()
131
+
132
+ // Optimized: Quantized models (q8) - 75% smaller, 99% accuracy
133
+ const brainOptimized = new BrainyData({
134
+ embeddingOptions: { dtype: 'q8' }
135
+ })
136
+ ```
137
+
138
+ **Model Comparison:**
139
+ - **FP32 (default)**: 90MB, 100% accuracy, maximum compatibility
140
+ - **Q8 (optional)**: 23MB, ~99% accuracy, faster loading
141
+
142
+ **When to use Q8:**
143
+ - ✅ New projects where size/speed matters
144
+ - ✅ Memory-constrained environments
145
+ - ✅ Mobile or edge deployments
146
+ - ❌ Existing projects with FP32 data (incompatible embeddings)
147
+
148
+ **Air-gap deployment:**
149
+ ```bash
150
+ npm run download-models # Both models (recommended)
151
+ npm run download-models:q8 # Q8 only (space-constrained)
152
+ npm run download-models:fp32 # FP32 only (compatibility)
153
+ ```
154
+
124
155
  ## 📚 Core API
125
156
 
126
157
  ### `search()` - Vector Similarity
@@ -18,6 +18,17 @@ export declare class ModelManager {
18
18
  private getModelsPath;
19
19
  ensureModels(modelName?: string): Promise<boolean>;
20
20
  private verifyModelFiles;
21
+ /**
22
+ * Check which model variants are available locally
23
+ */
24
+ getAvailableModels(modelName?: string): {
25
+ fp32: boolean;
26
+ q8: boolean;
27
+ };
28
+ /**
29
+ * Get the best available model variant based on preference and availability
30
+ */
31
+ getBestAvailableModel(preferredType?: 'fp32' | 'q8', modelName?: string): 'fp32' | 'q8' | null;
21
32
  private tryModelSource;
22
33
  private downloadAndExtractFromGitHub;
23
34
  /**
@@ -31,13 +31,16 @@ const MODEL_SOURCES = {
31
31
  pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
32
32
  }
33
33
  };
34
- // Model verification files - minimal set needed for transformers.js
35
- const MODEL_FILES = [
34
+ // Model verification files - BOTH fp32 and q8 variants
35
+ const REQUIRED_FILES = [
36
36
  'config.json',
37
37
  'tokenizer.json',
38
- 'tokenizer_config.json',
39
- 'onnx/model.onnx'
38
+ 'tokenizer_config.json'
40
39
  ];
40
+ const MODEL_VARIANTS = {
41
+ fp32: 'onnx/model.onnx',
42
+ q8: 'onnx/model_quantized.onnx'
43
+ };
41
44
  export class ModelManager {
42
45
  constructor() {
43
46
  this.isInitialized = false;
@@ -105,14 +108,47 @@ export class ModelManager {
105
108
  return true;
106
109
  }
107
110
  async verifyModelFiles(modelPath) {
108
- // Check if essential model files exist
109
- for (const file of MODEL_FILES) {
111
+ // Check if essential files exist
112
+ for (const file of REQUIRED_FILES) {
110
113
  const fullPath = join(modelPath, file);
111
114
  if (!existsSync(fullPath)) {
112
115
  return false;
113
116
  }
114
117
  }
115
- return true;
118
+ // At least one model variant must exist (fp32 or q8)
119
+ const fp32Exists = existsSync(join(modelPath, MODEL_VARIANTS.fp32));
120
+ const q8Exists = existsSync(join(modelPath, MODEL_VARIANTS.q8));
121
+ return fp32Exists || q8Exists;
122
+ }
123
+ /**
124
+ * Check which model variants are available locally
125
+ */
126
+ getAvailableModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
127
+ const modelPath = join(this.modelsPath, modelName);
128
+ return {
129
+ fp32: existsSync(join(modelPath, MODEL_VARIANTS.fp32)),
130
+ q8: existsSync(join(modelPath, MODEL_VARIANTS.q8))
131
+ };
132
+ }
133
+ /**
134
+ * Get the best available model variant based on preference and availability
135
+ */
136
+ getBestAvailableModel(preferredType = 'fp32', modelName = 'Xenova/all-MiniLM-L6-v2') {
137
+ const available = this.getAvailableModels(modelName);
138
+ // If preferred type is available, use it
139
+ if (available[preferredType]) {
140
+ return preferredType;
141
+ }
142
+ // Otherwise fall back to what's available
143
+ if (preferredType === 'q8' && available.fp32) {
144
+ console.warn('⚠️ Q8 model requested but not available, falling back to FP32');
145
+ return 'fp32';
146
+ }
147
+ if (preferredType === 'fp32' && available.q8) {
148
+ console.warn('⚠️ FP32 model requested but not available, falling back to Q8');
149
+ return 'q8';
150
+ }
151
+ return null;
116
152
  }
117
153
  async tryModelSource(name, source, modelName) {
118
154
  try {
@@ -98,11 +98,23 @@ export class TransformerEmbedding {
98
98
  verbose: this.verbose,
99
99
  cacheDir: options.cacheDir || './models',
100
100
  localFilesOnly: localFilesOnly,
101
- dtype: options.dtype || 'fp32', // Use fp32 by default as quantized models aren't available on CDN
101
+ dtype: options.dtype || 'fp32', // CRITICAL: fp32 default for backward compatibility
102
102
  device: options.device || 'auto'
103
103
  };
104
+ // ULTRA-CAREFUL: Runtime warnings for q8 usage
105
+ if (this.options.dtype === 'q8') {
106
+ const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
107
+ if (!confirmed && this.verbose) {
108
+ console.warn('🚨 Q8 MODEL WARNING:');
109
+ console.warn(' • Q8 creates different embeddings than fp32');
110
+ console.warn(' • Q8 is incompatible with existing fp32 data');
111
+ console.warn(' • Only use q8 for new projects or when explicitly migrating');
112
+ console.warn(' • Set BRAINY_Q8_CONFIRMED=true to silence this warning');
113
+ console.warn(' • Q8 model is 75% smaller but may have slightly reduced accuracy');
114
+ }
115
+ }
104
116
  if (this.verbose) {
105
- this.logger('log', `Embedding config: localFilesOnly=${localFilesOnly}, model=${this.options.model}, cacheDir=${this.options.cacheDir}`);
117
+ this.logger('log', `Embedding config: dtype=${this.options.dtype}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
106
118
  }
107
119
  // Configure transformers.js environment
108
120
  if (!isBrowser()) {
@@ -212,11 +224,20 @@ export class TransformerEmbedding {
212
224
  : this.options.cacheDir;
213
225
  this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
214
226
  const startTime = Date.now();
227
+ // Check model availability and select appropriate variant
228
+ const available = modelManager.getAvailableModels(this.options.model);
229
+ const actualType = modelManager.getBestAvailableModel(this.options.dtype, this.options.model);
230
+ if (!actualType) {
231
+ throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
232
+ }
233
+ if (actualType !== this.options.dtype) {
234
+ this.logger('log', `Using ${actualType} model (${this.options.dtype} not available)`);
235
+ }
215
236
  // Load the feature extraction pipeline with memory optimizations
216
237
  const pipelineOptions = {
217
238
  cache_dir: cacheDir,
218
239
  local_files_only: isBrowser() ? false : this.options.localFilesOnly,
219
- dtype: this.options.dtype || 'fp32', // Use fp32 model as quantized models aren't available on CDN
240
+ dtype: actualType, // Use the actual available model type
220
241
  // CRITICAL: ONNX memory optimizations
221
242
  session_options: {
222
243
  enableCpuMemArena: false, // Disable pre-allocated memory arena
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "2.7.4",
3
+ "version": "2.8.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -73,6 +73,9 @@
73
73
  "test:ci-integration": "NODE_OPTIONS='--max-old-space-size=16384' CI=true vitest run --config tests/configs/vitest.integration.config.ts",
74
74
  "test:ci": "npm run test:ci-unit",
75
75
  "download-models": "node scripts/download-models.cjs",
76
+ "download-models:fp32": "node scripts/download-models.cjs fp32",
77
+ "download-models:q8": "node scripts/download-models.cjs q8",
78
+ "download-models:both": "node scripts/download-models.cjs",
76
79
  "models:verify": "node scripts/ensure-models.js",
77
80
  "lint": "eslint --ext .ts,.js src/",
78
81
  "lint:fix": "eslint --ext .ts,.js src/ --fix",
@@ -9,6 +9,11 @@ const path = require('path')
9
9
  const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
10
10
  const OUTPUT_DIR = './models'
11
11
 
12
+ // Parse command line arguments for model type selection
13
+ const args = process.argv.slice(2)
14
+ const downloadType = args.includes('fp32') ? 'fp32' :
15
+ args.includes('q8') ? 'q8' : 'both'
16
+
12
17
  async function downloadModels() {
13
18
  // Use dynamic import for ES modules in CommonJS
14
19
  const { pipeline, env } = await import('@huggingface/transformers')
@@ -16,29 +21,31 @@ async function downloadModels() {
16
21
  // Configure transformers.js to use local cache
17
22
  env.cacheDir = './models-cache'
18
23
  env.allowRemoteModels = true
24
+
19
25
  try {
20
- console.log('🔄 Downloading all-MiniLM-L6-v2 model for offline bundling...')
26
+ console.log('🧠 Brainy Model Downloader v2.8.0')
27
+ console.log('===================================')
21
28
  console.log(` Model: ${MODEL_NAME}`)
29
+ console.log(` Type: ${downloadType} (fp32, q8, or both)`)
22
30
  console.log(` Cache: ${env.cacheDir}`)
31
+ console.log('')
23
32
 
24
33
  // Create output directory
25
34
  await fs.mkdir(OUTPUT_DIR, { recursive: true })
26
35
 
27
- // Load the model to force download
28
- console.log('📥 Loading model pipeline...')
29
- const extractor = await pipeline('feature-extraction', MODEL_NAME)
30
-
31
- // Test the model to make sure it works
32
- console.log('🧪 Testing model...')
33
- const testResult = await extractor(['Hello world!'], {
34
- pooling: 'mean',
35
- normalize: true
36
- })
36
+ // Download models based on type
37
+ if (downloadType === 'both' || downloadType === 'fp32') {
38
+ console.log('📥 Downloading FP32 model (full precision, 90MB)...')
39
+ await downloadModelVariant('fp32')
40
+ }
37
41
 
38
- console.log(`✅ Model test successful! Embedding dimensions: ${testResult.data.length}`)
42
+ if (downloadType === 'both' || downloadType === 'q8') {
43
+ console.log('📥 Downloading Q8 model (quantized, 23MB)...')
44
+ await downloadModelVariant('q8')
45
+ }
39
46
 
40
47
  // Copy ALL model files from cache to our models directory
41
- console.log('📋 Copying ALL model files to bundle directory...')
48
+ console.log('📋 Copying model files to bundle directory...')
42
49
 
43
50
  const cacheDir = path.resolve(env.cacheDir)
44
51
  const outputDir = path.resolve(OUTPUT_DIR)
@@ -62,22 +69,89 @@ async function downloadModels() {
62
69
  console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
63
70
  console.log(` Location: ${outputDir}`)
64
71
 
65
- // Create a marker file
72
+ // Create a marker file with downloaded model info
73
+ const markerData = {
74
+ model: MODEL_NAME,
75
+ bundledAt: new Date().toISOString(),
76
+ version: '2.8.0',
77
+ downloadType: downloadType,
78
+ models: {}
79
+ }
80
+
81
+ // Check which models were downloaded
82
+ const fp32Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model.onnx')
83
+ const q8Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx')
84
+
85
+ if (await fileExists(fp32Path)) {
86
+ const stats = await fs.stat(fp32Path)
87
+ markerData.models.fp32 = {
88
+ file: 'onnx/model.onnx',
89
+ size: stats.size,
90
+ sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
91
+ }
92
+ }
93
+
94
+ if (await fileExists(q8Path)) {
95
+ const stats = await fs.stat(q8Path)
96
+ markerData.models.q8 = {
97
+ file: 'onnx/model_quantized.onnx',
98
+ size: stats.size,
99
+ sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
100
+ }
101
+ }
102
+
66
103
  await fs.writeFile(
67
104
  path.join(outputDir, '.brainy-models-bundled'),
68
- JSON.stringify({
69
- model: MODEL_NAME,
70
- bundledAt: new Date().toISOString(),
71
- version: '1.0.0'
72
- }, null, 2)
105
+ JSON.stringify(markerData, null, 2)
73
106
  )
74
107
 
108
+ console.log('')
109
+ console.log('✅ Download complete! Available models:')
110
+ if (markerData.models.fp32) {
111
+ console.log(` • FP32: ${markerData.models.fp32.sizeFormatted} (full precision)`)
112
+ }
113
+ if (markerData.models.q8) {
114
+ console.log(` • Q8: ${markerData.models.q8.sizeFormatted} (quantized, 75% smaller)`)
115
+ }
116
+ console.log('')
117
+ console.log('Air-gap deployment ready! 🚀')
118
+
75
119
  } catch (error) {
76
120
  console.error('❌ Error downloading models:', error)
77
121
  process.exit(1)
78
122
  }
79
123
  }
80
124
 
125
+ // Download a specific model variant
126
+ async function downloadModelVariant(dtype) {
127
+ const { pipeline } = await import('@huggingface/transformers')
128
+
129
+ try {
130
+ // Load the model to force download
131
+ const extractor = await pipeline('feature-extraction', MODEL_NAME, {
132
+ dtype: dtype,
133
+ cache_dir: './models-cache'
134
+ })
135
+
136
+ // Test the model
137
+ const testResult = await extractor(['Hello world!'], {
138
+ pooling: 'mean',
139
+ normalize: true
140
+ })
141
+
142
+ console.log(` ✅ ${dtype.toUpperCase()} model downloaded and tested (${testResult.data.length} dimensions)`)
143
+
144
+ // Dispose to free memory
145
+ if (extractor.dispose) {
146
+ await extractor.dispose()
147
+ }
148
+
149
+ } catch (error) {
150
+ console.error(` ❌ Failed to download ${dtype} model:`, error)
151
+ throw error
152
+ }
153
+ }
154
+
81
155
  async function findModelDirectories(baseDir, modelName) {
82
156
  const dirs = []
83
157
 
@@ -141,6 +215,15 @@ async function dirExists(dir) {
141
215
  }
142
216
  }
143
217
 
218
+ async function fileExists(file) {
219
+ try {
220
+ const stats = await fs.stat(file)
221
+ return stats.isFile()
222
+ } catch (error) {
223
+ return false
224
+ }
225
+ }
226
+
144
227
  async function copyDirectory(src, dest) {
145
228
  await fs.mkdir(dest, { recursive: true })
146
229
  const entries = await fs.readdir(src, { withFileTypes: true })