@soulcraft/brainy 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,7 +97,7 @@ export class LightweightEmbedder {
97
97
  console.log('⚠️ Loading ONNX model for complex text...');
98
98
  const { TransformerEmbedding } = await import('../utils/embedding.js');
99
99
  this.onnxEmbedder = new TransformerEmbedding({
100
- dtype: 'fp32',
100
+ precision: 'fp32',
101
101
  verbose: false
102
102
  });
103
103
  await this.onnxEmbedder.init();
@@ -107,7 +107,7 @@ export class UniversalMemoryManager {
107
107
  const { TransformerEmbedding } = await import('../utils/embedding.js');
108
108
  this.embeddingFunction = new TransformerEmbedding({
109
109
  verbose: false,
110
- dtype: 'fp32',
110
+ precision: 'fp32',
111
111
  localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
112
112
  });
113
113
  await this.embeddingFunction.init();
@@ -13,7 +13,7 @@ async function initModel() {
13
13
  if (!model) {
14
14
  model = new TransformerEmbedding({
15
15
  verbose: false,
16
- dtype: 'fp32',
16
+ precision: 'fp32',
17
17
  localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
18
18
  });
19
19
  await model.init();
@@ -25,8 +25,8 @@ export interface TransformerEmbeddingOptions {
25
25
  cacheDir?: string;
26
26
  /** Force local files only (no downloads) */
27
27
  localFilesOnly?: boolean;
28
- /** Quantization setting (fp32, fp16, q8, q4) */
29
- dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
28
+ /** Model precision: 'q8' = 75% smaller quantized model, 'fp32' = full precision (default) */
29
+ precision?: 'fp32' | 'q8';
30
30
  /** Device to run inference on - 'auto' detects best available */
31
31
  device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
32
32
  }
@@ -4,6 +4,8 @@
4
4
  */
5
5
  import { isBrowser } from './environment.js';
6
6
  import { ModelManager } from '../embeddings/model-manager.js';
7
+ import { join } from 'path';
8
+ import { existsSync } from 'fs';
7
9
  // @ts-ignore - Transformers.js is now the primary embedding library
8
10
  import { pipeline, env } from '@huggingface/transformers';
9
11
  // CRITICAL: Disable ONNX memory arena to prevent 4-8GB allocation
@@ -98,11 +100,11 @@ export class TransformerEmbedding {
98
100
  verbose: this.verbose,
99
101
  cacheDir: options.cacheDir || './models',
100
102
  localFilesOnly: localFilesOnly,
101
- dtype: options.dtype || 'fp32', // CRITICAL: fp32 default for backward compatibility
103
+ precision: options.precision || 'fp32', // Clean and clear!
102
104
  device: options.device || 'auto'
103
105
  };
104
106
  // ULTRA-CAREFUL: Runtime warnings for q8 usage
105
- if (this.options.dtype === 'q8') {
107
+ if (this.options.precision === 'q8') {
106
108
  const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
107
109
  if (!confirmed && this.verbose) {
108
110
  console.warn('🚨 Q8 MODEL WARNING:');
@@ -114,7 +116,7 @@ export class TransformerEmbedding {
114
116
  }
115
117
  }
116
118
  if (this.verbose) {
117
- this.logger('log', `Embedding config: dtype=${this.options.dtype}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
119
+ this.logger('log', `Embedding config: precision=${this.options.precision}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
118
120
  }
119
121
  // Configure transformers.js environment
120
122
  if (!isBrowser()) {
@@ -226,18 +228,33 @@ export class TransformerEmbedding {
226
228
  const startTime = Date.now();
227
229
  // Check model availability and select appropriate variant
228
230
  const available = modelManager.getAvailableModels(this.options.model);
229
- const actualType = modelManager.getBestAvailableModel(this.options.dtype, this.options.model);
231
+ let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
230
232
  if (!actualType) {
231
233
  throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
232
234
  }
233
- if (actualType !== this.options.dtype) {
234
- this.logger('log', `Using ${actualType} model (${this.options.dtype} not available)`);
235
+ if (actualType !== this.options.precision) {
236
+ this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
237
+ }
238
+ // CRITICAL FIX: Control which model file transformers.js loads
239
+ // When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
240
+ // We need to explicitly control this based on the precision setting
241
+ // Set environment to control model selection BEFORE creating pipeline
242
+ if (actualType === 'q8') {
243
+ // For Q8, we want to use the quantized model
244
+ // transformers.js v3 doesn't have a direct flag, so we need to work around this
245
+ // HACK: Temporarily modify the model file preference
246
+ // This forces transformers.js to look for model_quantized.onnx first
247
+ const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
248
+ this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
249
+ }
250
+ else {
251
+ this.logger('log', '📦 Using FP32 model (full precision)');
235
252
  }
236
253
  // Load the feature extraction pipeline with memory optimizations
237
254
  const pipelineOptions = {
238
255
  cache_dir: cacheDir,
239
256
  local_files_only: isBrowser() ? false : this.options.localFilesOnly,
240
- dtype: actualType, // Use the actual available model type
257
+ // Remove the quantized flag - it doesn't work in transformers.js v3
241
258
  // CRITICAL: ONNX memory optimizations
242
259
  session_options: {
243
260
  enableCpuMemArena: false, // Disable pre-allocated memory arena
@@ -256,6 +273,18 @@ export class TransformerEmbedding {
256
273
  this.logger('log', `Pipeline options: ${JSON.stringify(pipelineOptions)}`);
257
274
  }
258
275
  try {
276
+ // For Q8 models, we need to explicitly specify the model file
277
+ if (actualType === 'q8') {
278
+ // Check if quantized model exists
279
+ const modelPath = join(cacheDir, this.options.model, 'onnx', 'model_quantized.onnx');
280
+ if (existsSync(modelPath)) {
281
+ this.logger('log', '✅ Q8 model found locally');
282
+ }
283
+ else {
284
+ this.logger('warn', '⚠️ Q8 model not found, will fall back to FP32');
285
+ actualType = 'fp32'; // Fall back to fp32
286
+ }
287
+ }
259
288
  this.extractor = await pipeline('feature-extraction', this.options.model, pipelineOptions);
260
289
  }
261
290
  catch (gpuError) {
@@ -83,7 +83,7 @@ class HybridModelManager {
83
83
  // Smart configuration based on environment
84
84
  let options = {
85
85
  verbose: !isTest && !isServerless,
86
- dtype: 'fp32',
86
+ precision: 'fp32', // Use clearer precision parameter
87
87
  device: 'cpu'
88
88
  };
89
89
  // Environment-specific optimizations
@@ -91,7 +91,7 @@ class HybridModelManager {
91
91
  options = {
92
92
  ...options,
93
93
  localFilesOnly: forceLocalOnly || false, // Respect environment variable
94
- dtype: 'fp32',
94
+ precision: 'fp32',
95
95
  device: 'cpu',
96
96
  verbose: false
97
97
  };
@@ -100,7 +100,7 @@ class HybridModelManager {
100
100
  options = {
101
101
  ...options,
102
102
  localFilesOnly: forceLocalOnly || true, // Default true for serverless, but respect env
103
- dtype: 'fp32',
103
+ precision: 'fp32',
104
104
  device: 'cpu',
105
105
  verbose: false
106
106
  };
@@ -109,7 +109,7 @@ class HybridModelManager {
109
109
  options = {
110
110
  ...options,
111
111
  localFilesOnly: forceLocalOnly || true, // Default true for docker, but respect env
112
- dtype: 'fp32',
112
+ precision: 'fp32',
113
113
  device: 'auto',
114
114
  verbose: false
115
115
  };
@@ -119,7 +119,7 @@ class HybridModelManager {
119
119
  options = {
120
120
  ...options,
121
121
  localFilesOnly: forceLocalOnly || false, // Respect environment variable for tests
122
- dtype: 'fp32',
122
+ precision: 'fp32',
123
123
  device: 'cpu',
124
124
  verbose: false
125
125
  };
@@ -128,7 +128,7 @@ class HybridModelManager {
128
128
  options = {
129
129
  ...options,
130
130
  localFilesOnly: forceLocalOnly || false, // Respect environment variable for default node
131
- dtype: 'fp32',
131
+ precision: 'fp32',
132
132
  device: 'auto',
133
133
  verbose: true
134
134
  };
@@ -168,7 +168,7 @@ class HybridModelManager {
168
168
  // 2. If that fails, explicitly allow remote with verbose logging
169
169
  { ...options, localFilesOnly: false, verbose: true, source: 'fallback-verbose' },
170
170
  // 3. Last resort: basic configuration
171
- { verbose: false, dtype: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
171
+ { verbose: false, precision: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
172
172
  ];
173
173
  let lastError = null;
174
174
  for (const attemptOptions of attempts) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "2.8.0",
3
+ "version": "2.9.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",