@soulcraft/brainy 2.7.4 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -3
- package/README.md +31 -0
- package/dist/embeddings/lightweight-embedder.js +1 -1
- package/dist/embeddings/model-manager.d.ts +11 -0
- package/dist/embeddings/model-manager.js +43 -7
- package/dist/embeddings/universal-memory-manager.js +1 -1
- package/dist/embeddings/worker-embedding.js +1 -1
- package/dist/utils/embedding.d.ts +2 -2
- package/dist/utils/embedding.js +53 -3
- package/dist/utils/hybridModelManager.js +7 -7
- package/package.json +4 -1
- package/scripts/download-models.cjs +102 -19
package/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
All notable changes to
|
|
3
|
+
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
5
|
+
## [2.8.0](https://github.com/soulcraftlabs/brainy/compare/v2.7.4...v2.8.0) (2025-08-29)
|
|
7
6
|
|
|
8
7
|
## [2.7.4] - 2025-08-29
|
|
9
8
|
|
package/README.md
CHANGED
|
@@ -121,6 +121,37 @@ await brain.find("Documentation about authentication from last month")
|
|
|
121
121
|
- **Worker-based embeddings** - Non-blocking operations
|
|
122
122
|
- **Automatic caching** - Intelligent result caching
|
|
123
123
|
|
|
124
|
+
### Performance Optimization
|
|
125
|
+
|
|
126
|
+
**Q8 Quantized Models** - 75% smaller, faster loading (v2.8.0+)
|
|
127
|
+
|
|
128
|
+
```javascript
|
|
129
|
+
// Default: Full precision (fp32) - maximum compatibility
|
|
130
|
+
const brain = new BrainyData()
|
|
131
|
+
|
|
132
|
+
// Optimized: Quantized models (q8) - 75% smaller, 99% accuracy
|
|
133
|
+
const brainOptimized = new BrainyData({
|
|
134
|
+
embeddingOptions: { dtype: 'q8' }
|
|
135
|
+
})
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Model Comparison:**
|
|
139
|
+
- **FP32 (default)**: 90MB, 100% accuracy, maximum compatibility
|
|
140
|
+
- **Q8 (optional)**: 23MB, ~99% accuracy, faster loading
|
|
141
|
+
|
|
142
|
+
**When to use Q8:**
|
|
143
|
+
- ✅ New projects where size/speed matters
|
|
144
|
+
- ✅ Memory-constrained environments
|
|
145
|
+
- ✅ Mobile or edge deployments
|
|
146
|
+
- ❌ Existing projects with FP32 data (incompatible embeddings)
|
|
147
|
+
|
|
148
|
+
**Air-gap deployment:**
|
|
149
|
+
```bash
|
|
150
|
+
npm run download-models # Both models (recommended)
|
|
151
|
+
npm run download-models:q8 # Q8 only (space-constrained)
|
|
152
|
+
npm run download-models:fp32 # FP32 only (compatibility)
|
|
153
|
+
```
|
|
154
|
+
|
|
124
155
|
## 📚 Core API
|
|
125
156
|
|
|
126
157
|
### `search()` - Vector Similarity
|
|
@@ -97,7 +97,7 @@ export class LightweightEmbedder {
|
|
|
97
97
|
console.log('⚠️ Loading ONNX model for complex text...');
|
|
98
98
|
const { TransformerEmbedding } = await import('../utils/embedding.js');
|
|
99
99
|
this.onnxEmbedder = new TransformerEmbedding({
|
|
100
|
-
|
|
100
|
+
precision: 'fp32',
|
|
101
101
|
verbose: false
|
|
102
102
|
});
|
|
103
103
|
await this.onnxEmbedder.init();
|
|
@@ -18,6 +18,17 @@ export declare class ModelManager {
|
|
|
18
18
|
private getModelsPath;
|
|
19
19
|
ensureModels(modelName?: string): Promise<boolean>;
|
|
20
20
|
private verifyModelFiles;
|
|
21
|
+
/**
|
|
22
|
+
* Check which model variants are available locally
|
|
23
|
+
*/
|
|
24
|
+
getAvailableModels(modelName?: string): {
|
|
25
|
+
fp32: boolean;
|
|
26
|
+
q8: boolean;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Get the best available model variant based on preference and availability
|
|
30
|
+
*/
|
|
31
|
+
getBestAvailableModel(preferredType?: 'fp32' | 'q8', modelName?: string): 'fp32' | 'q8' | null;
|
|
21
32
|
private tryModelSource;
|
|
22
33
|
private downloadAndExtractFromGitHub;
|
|
23
34
|
/**
|
|
@@ -31,13 +31,16 @@ const MODEL_SOURCES = {
|
|
|
31
31
|
pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
|
|
32
32
|
}
|
|
33
33
|
};
|
|
34
|
-
// Model verification files -
|
|
35
|
-
const
|
|
34
|
+
// Model verification files - BOTH fp32 and q8 variants
|
|
35
|
+
const REQUIRED_FILES = [
|
|
36
36
|
'config.json',
|
|
37
37
|
'tokenizer.json',
|
|
38
|
-
'tokenizer_config.json'
|
|
39
|
-
'onnx/model.onnx'
|
|
38
|
+
'tokenizer_config.json'
|
|
40
39
|
];
|
|
40
|
+
const MODEL_VARIANTS = {
|
|
41
|
+
fp32: 'onnx/model.onnx',
|
|
42
|
+
q8: 'onnx/model_quantized.onnx'
|
|
43
|
+
};
|
|
41
44
|
export class ModelManager {
|
|
42
45
|
constructor() {
|
|
43
46
|
this.isInitialized = false;
|
|
@@ -105,14 +108,47 @@ export class ModelManager {
|
|
|
105
108
|
return true;
|
|
106
109
|
}
|
|
107
110
|
async verifyModelFiles(modelPath) {
|
|
108
|
-
// Check if essential
|
|
109
|
-
for (const file of
|
|
111
|
+
// Check if essential files exist
|
|
112
|
+
for (const file of REQUIRED_FILES) {
|
|
110
113
|
const fullPath = join(modelPath, file);
|
|
111
114
|
if (!existsSync(fullPath)) {
|
|
112
115
|
return false;
|
|
113
116
|
}
|
|
114
117
|
}
|
|
115
|
-
|
|
118
|
+
// At least one model variant must exist (fp32 or q8)
|
|
119
|
+
const fp32Exists = existsSync(join(modelPath, MODEL_VARIANTS.fp32));
|
|
120
|
+
const q8Exists = existsSync(join(modelPath, MODEL_VARIANTS.q8));
|
|
121
|
+
return fp32Exists || q8Exists;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check which model variants are available locally
|
|
125
|
+
*/
|
|
126
|
+
getAvailableModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
127
|
+
const modelPath = join(this.modelsPath, modelName);
|
|
128
|
+
return {
|
|
129
|
+
fp32: existsSync(join(modelPath, MODEL_VARIANTS.fp32)),
|
|
130
|
+
q8: existsSync(join(modelPath, MODEL_VARIANTS.q8))
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get the best available model variant based on preference and availability
|
|
135
|
+
*/
|
|
136
|
+
getBestAvailableModel(preferredType = 'fp32', modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
137
|
+
const available = this.getAvailableModels(modelName);
|
|
138
|
+
// If preferred type is available, use it
|
|
139
|
+
if (available[preferredType]) {
|
|
140
|
+
return preferredType;
|
|
141
|
+
}
|
|
142
|
+
// Otherwise fall back to what's available
|
|
143
|
+
if (preferredType === 'q8' && available.fp32) {
|
|
144
|
+
console.warn('⚠️ Q8 model requested but not available, falling back to FP32');
|
|
145
|
+
return 'fp32';
|
|
146
|
+
}
|
|
147
|
+
if (preferredType === 'fp32' && available.q8) {
|
|
148
|
+
console.warn('⚠️ FP32 model requested but not available, falling back to Q8');
|
|
149
|
+
return 'q8';
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
116
152
|
}
|
|
117
153
|
async tryModelSource(name, source, modelName) {
|
|
118
154
|
try {
|
|
@@ -107,7 +107,7 @@ export class UniversalMemoryManager {
|
|
|
107
107
|
const { TransformerEmbedding } = await import('../utils/embedding.js');
|
|
108
108
|
this.embeddingFunction = new TransformerEmbedding({
|
|
109
109
|
verbose: false,
|
|
110
|
-
|
|
110
|
+
precision: 'fp32',
|
|
111
111
|
localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
|
|
112
112
|
});
|
|
113
113
|
await this.embeddingFunction.init();
|
|
@@ -25,8 +25,8 @@ export interface TransformerEmbeddingOptions {
|
|
|
25
25
|
cacheDir?: string;
|
|
26
26
|
/** Force local files only (no downloads) */
|
|
27
27
|
localFilesOnly?: boolean;
|
|
28
|
-
/**
|
|
29
|
-
|
|
28
|
+
/** Model precision: 'q8' = 75% smaller quantized model, 'fp32' = full precision (default) */
|
|
29
|
+
precision?: 'fp32' | 'q8';
|
|
30
30
|
/** Device to run inference on - 'auto' detects best available */
|
|
31
31
|
device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
|
|
32
32
|
}
|
package/dist/utils/embedding.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { isBrowser } from './environment.js';
|
|
6
6
|
import { ModelManager } from '../embeddings/model-manager.js';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { existsSync } from 'fs';
|
|
7
9
|
// @ts-ignore - Transformers.js is now the primary embedding library
|
|
8
10
|
import { pipeline, env } from '@huggingface/transformers';
|
|
9
11
|
// CRITICAL: Disable ONNX memory arena to prevent 4-8GB allocation
|
|
@@ -98,11 +100,23 @@ export class TransformerEmbedding {
|
|
|
98
100
|
verbose: this.verbose,
|
|
99
101
|
cacheDir: options.cacheDir || './models',
|
|
100
102
|
localFilesOnly: localFilesOnly,
|
|
101
|
-
|
|
103
|
+
precision: options.precision || 'fp32', // Clean and clear!
|
|
102
104
|
device: options.device || 'auto'
|
|
103
105
|
};
|
|
106
|
+
// ULTRA-CAREFUL: Runtime warnings for q8 usage
|
|
107
|
+
if (this.options.precision === 'q8') {
|
|
108
|
+
const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
|
|
109
|
+
if (!confirmed && this.verbose) {
|
|
110
|
+
console.warn('🚨 Q8 MODEL WARNING:');
|
|
111
|
+
console.warn(' • Q8 creates different embeddings than fp32');
|
|
112
|
+
console.warn(' • Q8 is incompatible with existing fp32 data');
|
|
113
|
+
console.warn(' • Only use q8 for new projects or when explicitly migrating');
|
|
114
|
+
console.warn(' • Set BRAINY_Q8_CONFIRMED=true to silence this warning');
|
|
115
|
+
console.warn(' • Q8 model is 75% smaller but may have slightly reduced accuracy');
|
|
116
|
+
}
|
|
117
|
+
}
|
|
104
118
|
if (this.verbose) {
|
|
105
|
-
this.logger('log', `Embedding config:
|
|
119
|
+
this.logger('log', `Embedding config: precision=${this.options.precision}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
|
|
106
120
|
}
|
|
107
121
|
// Configure transformers.js environment
|
|
108
122
|
if (!isBrowser()) {
|
|
@@ -212,11 +226,35 @@ export class TransformerEmbedding {
|
|
|
212
226
|
: this.options.cacheDir;
|
|
213
227
|
this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
|
|
214
228
|
const startTime = Date.now();
|
|
229
|
+
// Check model availability and select appropriate variant
|
|
230
|
+
const available = modelManager.getAvailableModels(this.options.model);
|
|
231
|
+
let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
|
|
232
|
+
if (!actualType) {
|
|
233
|
+
throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
|
|
234
|
+
}
|
|
235
|
+
if (actualType !== this.options.precision) {
|
|
236
|
+
this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
|
|
237
|
+
}
|
|
238
|
+
// CRITICAL FIX: Control which model file transformers.js loads
|
|
239
|
+
// When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
|
|
240
|
+
// We need to explicitly control this based on the precision setting
|
|
241
|
+
// Set environment to control model selection BEFORE creating pipeline
|
|
242
|
+
if (actualType === 'q8') {
|
|
243
|
+
// For Q8, we want to use the quantized model
|
|
244
|
+
// transformers.js v3 doesn't have a direct flag, so we need to work around this
|
|
245
|
+
// HACK: Temporarily modify the model file preference
|
|
246
|
+
// This forces transformers.js to look for model_quantized.onnx first
|
|
247
|
+
const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
|
|
248
|
+
this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
this.logger('log', '📦 Using FP32 model (full precision)');
|
|
252
|
+
}
|
|
215
253
|
// Load the feature extraction pipeline with memory optimizations
|
|
216
254
|
const pipelineOptions = {
|
|
217
255
|
cache_dir: cacheDir,
|
|
218
256
|
local_files_only: isBrowser() ? false : this.options.localFilesOnly,
|
|
219
|
-
|
|
257
|
+
// Remove the quantized flag - it doesn't work in transformers.js v3
|
|
220
258
|
// CRITICAL: ONNX memory optimizations
|
|
221
259
|
session_options: {
|
|
222
260
|
enableCpuMemArena: false, // Disable pre-allocated memory arena
|
|
@@ -235,6 +273,18 @@ export class TransformerEmbedding {
|
|
|
235
273
|
this.logger('log', `Pipeline options: ${JSON.stringify(pipelineOptions)}`);
|
|
236
274
|
}
|
|
237
275
|
try {
|
|
276
|
+
// For Q8 models, we need to explicitly specify the model file
|
|
277
|
+
if (actualType === 'q8') {
|
|
278
|
+
// Check if quantized model exists
|
|
279
|
+
const modelPath = join(cacheDir, this.options.model, 'onnx', 'model_quantized.onnx');
|
|
280
|
+
if (existsSync(modelPath)) {
|
|
281
|
+
this.logger('log', '✅ Q8 model found locally');
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
this.logger('warn', '⚠️ Q8 model not found, will fall back to FP32');
|
|
285
|
+
actualType = 'fp32'; // Fall back to fp32
|
|
286
|
+
}
|
|
287
|
+
}
|
|
238
288
|
this.extractor = await pipeline('feature-extraction', this.options.model, pipelineOptions);
|
|
239
289
|
}
|
|
240
290
|
catch (gpuError) {
|
|
@@ -83,7 +83,7 @@ class HybridModelManager {
|
|
|
83
83
|
// Smart configuration based on environment
|
|
84
84
|
let options = {
|
|
85
85
|
verbose: !isTest && !isServerless,
|
|
86
|
-
|
|
86
|
+
precision: 'fp32', // Use clearer precision parameter
|
|
87
87
|
device: 'cpu'
|
|
88
88
|
};
|
|
89
89
|
// Environment-specific optimizations
|
|
@@ -91,7 +91,7 @@ class HybridModelManager {
|
|
|
91
91
|
options = {
|
|
92
92
|
...options,
|
|
93
93
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable
|
|
94
|
-
|
|
94
|
+
precision: 'fp32',
|
|
95
95
|
device: 'cpu',
|
|
96
96
|
verbose: false
|
|
97
97
|
};
|
|
@@ -100,7 +100,7 @@ class HybridModelManager {
|
|
|
100
100
|
options = {
|
|
101
101
|
...options,
|
|
102
102
|
localFilesOnly: forceLocalOnly || true, // Default true for serverless, but respect env
|
|
103
|
-
|
|
103
|
+
precision: 'fp32',
|
|
104
104
|
device: 'cpu',
|
|
105
105
|
verbose: false
|
|
106
106
|
};
|
|
@@ -109,7 +109,7 @@ class HybridModelManager {
|
|
|
109
109
|
options = {
|
|
110
110
|
...options,
|
|
111
111
|
localFilesOnly: forceLocalOnly || true, // Default true for docker, but respect env
|
|
112
|
-
|
|
112
|
+
precision: 'fp32',
|
|
113
113
|
device: 'auto',
|
|
114
114
|
verbose: false
|
|
115
115
|
};
|
|
@@ -119,7 +119,7 @@ class HybridModelManager {
|
|
|
119
119
|
options = {
|
|
120
120
|
...options,
|
|
121
121
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable for tests
|
|
122
|
-
|
|
122
|
+
precision: 'fp32',
|
|
123
123
|
device: 'cpu',
|
|
124
124
|
verbose: false
|
|
125
125
|
};
|
|
@@ -128,7 +128,7 @@ class HybridModelManager {
|
|
|
128
128
|
options = {
|
|
129
129
|
...options,
|
|
130
130
|
localFilesOnly: forceLocalOnly || false, // Respect environment variable for default node
|
|
131
|
-
|
|
131
|
+
precision: 'fp32',
|
|
132
132
|
device: 'auto',
|
|
133
133
|
verbose: true
|
|
134
134
|
};
|
|
@@ -168,7 +168,7 @@ class HybridModelManager {
|
|
|
168
168
|
// 2. If that fails, explicitly allow remote with verbose logging
|
|
169
169
|
{ ...options, localFilesOnly: false, verbose: true, source: 'fallback-verbose' },
|
|
170
170
|
// 3. Last resort: basic configuration
|
|
171
|
-
{ verbose: false,
|
|
171
|
+
{ verbose: false, precision: 'fp32', device: 'cpu', localFilesOnly: false, source: 'last-resort' }
|
|
172
172
|
];
|
|
173
173
|
let lastError = null;
|
|
174
174
|
for (const attemptOptions of attempts) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.9.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -73,6 +73,9 @@
|
|
|
73
73
|
"test:ci-integration": "NODE_OPTIONS='--max-old-space-size=16384' CI=true vitest run --config tests/configs/vitest.integration.config.ts",
|
|
74
74
|
"test:ci": "npm run test:ci-unit",
|
|
75
75
|
"download-models": "node scripts/download-models.cjs",
|
|
76
|
+
"download-models:fp32": "node scripts/download-models.cjs fp32",
|
|
77
|
+
"download-models:q8": "node scripts/download-models.cjs q8",
|
|
78
|
+
"download-models:both": "node scripts/download-models.cjs",
|
|
76
79
|
"models:verify": "node scripts/ensure-models.js",
|
|
77
80
|
"lint": "eslint --ext .ts,.js src/",
|
|
78
81
|
"lint:fix": "eslint --ext .ts,.js src/ --fix",
|
|
@@ -9,6 +9,11 @@ const path = require('path')
|
|
|
9
9
|
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
|
|
10
10
|
const OUTPUT_DIR = './models'
|
|
11
11
|
|
|
12
|
+
// Parse command line arguments for model type selection
|
|
13
|
+
const args = process.argv.slice(2)
|
|
14
|
+
const downloadType = args.includes('fp32') ? 'fp32' :
|
|
15
|
+
args.includes('q8') ? 'q8' : 'both'
|
|
16
|
+
|
|
12
17
|
async function downloadModels() {
|
|
13
18
|
// Use dynamic import for ES modules in CommonJS
|
|
14
19
|
const { pipeline, env } = await import('@huggingface/transformers')
|
|
@@ -16,29 +21,31 @@ async function downloadModels() {
|
|
|
16
21
|
// Configure transformers.js to use local cache
|
|
17
22
|
env.cacheDir = './models-cache'
|
|
18
23
|
env.allowRemoteModels = true
|
|
24
|
+
|
|
19
25
|
try {
|
|
20
|
-
console.log('
|
|
26
|
+
console.log('🧠 Brainy Model Downloader v2.8.0')
|
|
27
|
+
console.log('===================================')
|
|
21
28
|
console.log(` Model: ${MODEL_NAME}`)
|
|
29
|
+
console.log(` Type: ${downloadType} (fp32, q8, or both)`)
|
|
22
30
|
console.log(` Cache: ${env.cacheDir}`)
|
|
31
|
+
console.log('')
|
|
23
32
|
|
|
24
33
|
// Create output directory
|
|
25
34
|
await fs.mkdir(OUTPUT_DIR, { recursive: true })
|
|
26
35
|
|
|
27
|
-
//
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
console.log('🧪 Testing model...')
|
|
33
|
-
const testResult = await extractor(['Hello world!'], {
|
|
34
|
-
pooling: 'mean',
|
|
35
|
-
normalize: true
|
|
36
|
-
})
|
|
36
|
+
// Download models based on type
|
|
37
|
+
if (downloadType === 'both' || downloadType === 'fp32') {
|
|
38
|
+
console.log('📥 Downloading FP32 model (full precision, 90MB)...')
|
|
39
|
+
await downloadModelVariant('fp32')
|
|
40
|
+
}
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
if (downloadType === 'both' || downloadType === 'q8') {
|
|
43
|
+
console.log('📥 Downloading Q8 model (quantized, 23MB)...')
|
|
44
|
+
await downloadModelVariant('q8')
|
|
45
|
+
}
|
|
39
46
|
|
|
40
47
|
// Copy ALL model files from cache to our models directory
|
|
41
|
-
console.log('📋 Copying
|
|
48
|
+
console.log('📋 Copying model files to bundle directory...')
|
|
42
49
|
|
|
43
50
|
const cacheDir = path.resolve(env.cacheDir)
|
|
44
51
|
const outputDir = path.resolve(OUTPUT_DIR)
|
|
@@ -62,22 +69,89 @@ async function downloadModels() {
|
|
|
62
69
|
console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
|
|
63
70
|
console.log(` Location: ${outputDir}`)
|
|
64
71
|
|
|
65
|
-
// Create a marker file
|
|
72
|
+
// Create a marker file with downloaded model info
|
|
73
|
+
const markerData = {
|
|
74
|
+
model: MODEL_NAME,
|
|
75
|
+
bundledAt: new Date().toISOString(),
|
|
76
|
+
version: '2.8.0',
|
|
77
|
+
downloadType: downloadType,
|
|
78
|
+
models: {}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Check which models were downloaded
|
|
82
|
+
const fp32Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model.onnx')
|
|
83
|
+
const q8Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx')
|
|
84
|
+
|
|
85
|
+
if (await fileExists(fp32Path)) {
|
|
86
|
+
const stats = await fs.stat(fp32Path)
|
|
87
|
+
markerData.models.fp32 = {
|
|
88
|
+
file: 'onnx/model.onnx',
|
|
89
|
+
size: stats.size,
|
|
90
|
+
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (await fileExists(q8Path)) {
|
|
95
|
+
const stats = await fs.stat(q8Path)
|
|
96
|
+
markerData.models.q8 = {
|
|
97
|
+
file: 'onnx/model_quantized.onnx',
|
|
98
|
+
size: stats.size,
|
|
99
|
+
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
66
103
|
await fs.writeFile(
|
|
67
104
|
path.join(outputDir, '.brainy-models-bundled'),
|
|
68
|
-
JSON.stringify(
|
|
69
|
-
model: MODEL_NAME,
|
|
70
|
-
bundledAt: new Date().toISOString(),
|
|
71
|
-
version: '1.0.0'
|
|
72
|
-
}, null, 2)
|
|
105
|
+
JSON.stringify(markerData, null, 2)
|
|
73
106
|
)
|
|
74
107
|
|
|
108
|
+
console.log('')
|
|
109
|
+
console.log('✅ Download complete! Available models:')
|
|
110
|
+
if (markerData.models.fp32) {
|
|
111
|
+
console.log(` • FP32: ${markerData.models.fp32.sizeFormatted} (full precision)`)
|
|
112
|
+
}
|
|
113
|
+
if (markerData.models.q8) {
|
|
114
|
+
console.log(` • Q8: ${markerData.models.q8.sizeFormatted} (quantized, 75% smaller)`)
|
|
115
|
+
}
|
|
116
|
+
console.log('')
|
|
117
|
+
console.log('Air-gap deployment ready! 🚀')
|
|
118
|
+
|
|
75
119
|
} catch (error) {
|
|
76
120
|
console.error('❌ Error downloading models:', error)
|
|
77
121
|
process.exit(1)
|
|
78
122
|
}
|
|
79
123
|
}
|
|
80
124
|
|
|
125
|
+
// Download a specific model variant
|
|
126
|
+
async function downloadModelVariant(dtype) {
|
|
127
|
+
const { pipeline } = await import('@huggingface/transformers')
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
// Load the model to force download
|
|
131
|
+
const extractor = await pipeline('feature-extraction', MODEL_NAME, {
|
|
132
|
+
dtype: dtype,
|
|
133
|
+
cache_dir: './models-cache'
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
// Test the model
|
|
137
|
+
const testResult = await extractor(['Hello world!'], {
|
|
138
|
+
pooling: 'mean',
|
|
139
|
+
normalize: true
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
console.log(` ✅ ${dtype.toUpperCase()} model downloaded and tested (${testResult.data.length} dimensions)`)
|
|
143
|
+
|
|
144
|
+
// Dispose to free memory
|
|
145
|
+
if (extractor.dispose) {
|
|
146
|
+
await extractor.dispose()
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
} catch (error) {
|
|
150
|
+
console.error(` ❌ Failed to download ${dtype} model:`, error)
|
|
151
|
+
throw error
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
81
155
|
async function findModelDirectories(baseDir, modelName) {
|
|
82
156
|
const dirs = []
|
|
83
157
|
|
|
@@ -141,6 +215,15 @@ async function dirExists(dir) {
|
|
|
141
215
|
}
|
|
142
216
|
}
|
|
143
217
|
|
|
218
|
+
async function fileExists(file) {
|
|
219
|
+
try {
|
|
220
|
+
const stats = await fs.stat(file)
|
|
221
|
+
return stats.isFile()
|
|
222
|
+
} catch (error) {
|
|
223
|
+
return false
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
144
227
|
async function copyDirectory(src, dest) {
|
|
145
228
|
await fs.mkdir(dest, { recursive: true })
|
|
146
229
|
const entries = await fs.readdir(src, { withFileTypes: true })
|