@soulcraft/brainy 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embeddings/lightweight-embedder.js +1 -1
- package/dist/embeddings/universal-memory-manager.js +1 -1
- package/dist/embeddings/worker-embedding.js +1 -1
- package/dist/utils/embedding.d.ts +2 -2
- package/dist/utils/embedding.js +36 -7
- package/dist/utils/hybridModelManager.js +7 -7
- package/package.json +1 -1
|
@@ -97,7 +97,7 @@ export class LightweightEmbedder {
|
|
|
97
97
|
console.log('⚠️ Loading ONNX model for complex text...');
|
|
98
98
|
const { TransformerEmbedding } = await import('../utils/embedding.js');
|
|
99
99
|
this.onnxEmbedder = new TransformerEmbedding({
|
|
100
|
-
|
|
100
|
+
precision: 'fp32',
|
|
101
101
|
verbose: false
|
|
102
102
|
});
|
|
103
103
|
await this.onnxEmbedder.init();
|
|
@@ -107,7 +107,7 @@ export class UniversalMemoryManager {
|
|
|
107
107
|
const { TransformerEmbedding } = await import('../utils/embedding.js');
|
|
108
108
|
this.embeddingFunction = new TransformerEmbedding({
|
|
109
109
|
verbose: false,
|
|
110
|
-
|
|
110
|
+
precision: 'fp32',
|
|
111
111
|
localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
|
|
112
112
|
});
|
|
113
113
|
await this.embeddingFunction.init();
|
|
@@ -25,8 +25,8 @@ export interface TransformerEmbeddingOptions {
|
|
|
25
25
|
cacheDir?: string;
|
|
26
26
|
/** Force local files only (no downloads) */
|
|
27
27
|
localFilesOnly?: boolean;
|
|
28
|
-
/**
|
|
29
|
-
|
|
28
|
+
/** Model precision: 'q8' = 75% smaller quantized model, 'fp32' = full precision (default) */
|
|
29
|
+
precision?: 'fp32' | 'q8';
|
|
30
30
|
/** Device to run inference on - 'auto' detects best available */
|
|
31
31
|
device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
|
|
32
32
|
}
|
package/dist/utils/embedding.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { isBrowser } from './environment.js';
|
|
6
6
|
import { ModelManager } from '../embeddings/model-manager.js';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { existsSync } from 'fs';
|
|
7
9
|
// @ts-ignore - Transformers.js is now the primary embedding library
|
|
8
10
|
import { pipeline, env } from '@huggingface/transformers';
|
|
9
11
|
// CRITICAL: Disable ONNX memory arena to prevent 4-8GB allocation
|
|
@@ -98,11 +100,11 @@ export class TransformerEmbedding {
|
|
|
98
100
|
verbose: this.verbose,
|
|
99
101
|
cacheDir: options.cacheDir || './models',
|
|
100
102
|
localFilesOnly: localFilesOnly,
|
|
101
|
-
|
|
103
|
+
precision: options.precision || 'fp32', // Clean and clear!
|
|
102
104
|
device: options.device || 'auto'
|
|
103
105
|
};
|
|
104
106
|
// ULTRA-CAREFUL: Runtime warnings for q8 usage
|
|
105
|
-
if (this.options.
|
|
107
|
+
if (this.options.precision === 'q8') {
|
|
106
108
|
const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
|
|
107
109
|
if (!confirmed && this.verbose) {
|
|
108
110
|
console.warn('🚨 Q8 MODEL WARNING:');
|
|
@@ -114,7 +116,7 @@ export class TransformerEmbedding {
|
|
|
114
116
|
}
|
|
115
117
|
}
|
|
116
118
|
if (this.verbose) {
|
|
117
|
-
this.logger('log', `Embedding config:
|
|
119
|
+
this.logger('log', `Embedding config: precision=${this.options.precision}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
|
|
118
120
|
}
|
|
119
121
|
// Configure transformers.js environment
|
|
120
122
|
if (!isBrowser()) {
|
|
@@ -226,18 +228,33 @@ export class TransformerEmbedding {
|
|
|
226
228
|
const startTime = Date.now();
|
|
227
229
|
// Check model availability and select appropriate variant
|
|
228
230
|
const available = modelManager.getAvailableModels(this.options.model);
|
|
229
|
-
|
|
231
|
+
let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
|
|
230
232
|
if (!actualType) {
|
|
231
233
|
throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
|
|
232
234
|
}
|
|
233
|
-
if (actualType !== this.options.
|
|
234
|
-
this.logger('log', `Using ${actualType} model (${this.options.
|
|
235
|
+
if (actualType !== this.options.precision) {
|
|
236
|
+
this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
|
|
237
|
+
}
|
|
238
|
+
// CRITICAL FIX: Control which model file transformers.js loads
|
|
239
|
+
// When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
|
|
240
|
+
// We need to explicitly control this based on the precision setting
|
|
241
|
+
// Set environment to control model selection BEFORE creating pipeline
|
|
242
|
+
if (actualType === 'q8') {
|
|
243
|
+
// For Q8, we want to use the quantized model
|
|
244
|
+
// transformers.js v3 doesn't have a direct flag, so we need to work around this
|
|
245
|
+
// HACK: Temporarily modify the model file preference
|
|
246
|
+
// This forces transformers.js to look for model_quantized.onnx first
|
|
247
|
+
const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
|
|
248
|
+
this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
this.logger('log', '📦 Using FP32 model (full precision)');
|
|
235
252
|
}
|
|
236
253
|
// Load the feature extraction pipeline with memory optimizations
|
|
237
254
|
const pipelineOptions = {
|
|
238
255
|
cache_dir: cacheDir,
|
|
239
256
|
local_files_only: isBrowser() ? false : this.options.localFilesOnly,
|
|
240
|
-
|
|
257
|
+
// Remove the quantized flag - it doesn't work in transformers.js v3
|
|
241
258
|
// CRITICAL: ONNX memory optimizations
|
|
242
259
|
session_options: {
|
|
243
260
|
enableCpuMemArena: false, // Disable pre-allocated memory arena
|
|
@@ -256,6 +273,18 @@ export class TransformerEmbedding {
|
|
|
256
273
|
this.logger('log', `Pipeline options: ${JSON.stringify(pipelineOptions)}`);
|
|
257
274
|
}
|
|
258
275
|
try {
|
|
276
|
+
// For Q8 models, we need to explicitly specify the model file
|
|
277
|
+
if (actualType === 'q8') {
|
|
278
|
+
// Check if quantized model exists
|
|
279
|
+
const modelPath = join(cacheDir, this.options.model, 'onnx', 'model_quantized.onnx');
|
|
280
|
+
if (existsSync(modelPath)) {
|
|
281
|
+
this.logger('log', '✅ Q8 model found locally');
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
this.logger('warn', '⚠️ Q8 model not found, will fall back to FP32');
|
|
285
|
+
actualType = 'fp32'; // Fall back to fp32
|
|
286
|
+
}
|
|
287
|
+
}
|
|
259
288
|
this.extractor = await pipeline('feature-extraction', this.options.model, pipelineOptions);
|
|
260
289
|
}
|
|
261
290
|
catch (gpuError) {
|
|
@@ -83,7 +83,7 @@ class HybridModelManager {
|
|
|
83
83
|
// Smart configuration based on environment
|
|
84
84
|
let options = {
|
|
85
85
|
verbose: !isTest && !isServerless,
|
|
86
|
-
|
|
86
|
+
precision: 'fp32', // Use clearer precision parameter
|
|
87
87
|
device: 'cpu'
|
|
88
88
|
};
|
|
89
89
|
// Environment-specific optimizations
|
|
@@ -91,7 +91,7 @@ class HybridModelManager {
|
|
|
91
91
|
options = {
|
|
92
92
|
...options,
|
|
93
93
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable
|
|
94
|
-
|
|
94
|
+
precision: 'fp32',
|
|
95
95
|
device: 'cpu',
|
|
96
96
|
verbose: false
|
|
97
97
|
};
|
|
@@ -100,7 +100,7 @@ class HybridModelManager {
|
|
|
100
100
|
options = {
|
|
101
101
|
...options,
|
|
102
102
|
localFilesOnly: forceLocalOnly || true, // Default true for serverless, but respect env
|
|
103
|
-
|
|
103
|
+
precision: 'fp32',
|
|
104
104
|
device: 'cpu',
|
|
105
105
|
verbose: false
|
|
106
106
|
};
|
|
@@ -109,7 +109,7 @@ class HybridModelManager {
|
|
|
109
109
|
options = {
|
|
110
110
|
...options,
|
|
111
111
|
localFilesOnly: forceLocalOnly || true, // Default true for docker, but respect env
|
|
112
|
-
|
|
112
|
+
precision: 'fp32',
|
|
113
113
|
device: 'auto',
|
|
114
114
|
verbose: false
|
|
115
115
|
};
|
|
@@ -119,7 +119,7 @@ class HybridModelManager {
|
|
|
119
119
|
options = {
|
|
120
120
|
...options,
|
|
121
121
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable for tests
|
|
122
|
-
|
|
122
|
+
precision: 'fp32',
|
|
123
123
|
device: 'cpu',
|
|
124
124
|
verbose: false
|
|
125
125
|
};
|
|
@@ -128,7 +128,7 @@ class HybridModelManager {
|
|
|
128
128
|
options = {
|
|
129
129
|
...options,
|
|
130
130
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable for default node
|
|
131
|
-
|
|
131
|
+
precision: 'fp32',
|
|
132
132
|
device: 'auto',
|
|
133
133
|
verbose: true
|
|
134
134
|
};
|
|
@@ -168,7 +168,7 @@ class HybridModelManager {
|
|
|
168
168
|
// 2. If that fails, explicitly allow remote with verbose logging
|
|
169
169
|
{ ...options, localFilesOnly: false, verbose: true, source: 'fallback-verbose' },
|
|
170
170
|
// 3. Last resort: basic configuration
|
|
171
|
-
{ verbose: false,
|
|
171
|
+
{ verbose: false, precision: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
|
|
172
172
|
];
|
|
173
173
|
let lastError = null;
|
|
174
174
|
for (const attemptOptions of attempts) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.9.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|