@soulcraft/brainy 2.7.4 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -3
- package/README.md +31 -0
- package/dist/embeddings/model-manager.d.ts +11 -0
- package/dist/embeddings/model-manager.js +43 -7
- package/dist/utils/embedding.js +24 -3
- package/package.json +4 -1
- package/scripts/download-models.cjs +102 -19
package/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
All notable changes to
|
|
3
|
+
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
5
|
+
## [2.8.0](https://github.com/soulcraftlabs/brainy/compare/v2.7.4...v2.8.0) (2025-08-29)
|
|
7
6
|
|
|
8
7
|
## [2.7.4] - 2025-08-29
|
|
9
8
|
|
package/README.md
CHANGED
|
@@ -121,6 +121,37 @@ await brain.find("Documentation about authentication from last month")
|
|
|
121
121
|
- **Worker-based embeddings** - Non-blocking operations
|
|
122
122
|
- **Automatic caching** - Intelligent result caching
|
|
123
123
|
|
|
124
|
+
### Performance Optimization
|
|
125
|
+
|
|
126
|
+
**Q8 Quantized Models** - 75% smaller, faster loading (v2.8.0+)
|
|
127
|
+
|
|
128
|
+
```javascript
|
|
129
|
+
// Default: Full precision (fp32) - maximum compatibility
|
|
130
|
+
const brain = new BrainyData()
|
|
131
|
+
|
|
132
|
+
// Optimized: Quantized models (q8) - 75% smaller, 99% accuracy
|
|
133
|
+
const brainOptimized = new BrainyData({
|
|
134
|
+
embeddingOptions: { dtype: 'q8' }
|
|
135
|
+
})
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Model Comparison:**
|
|
139
|
+
- **FP32 (default)**: 90MB, 100% accuracy, maximum compatibility
|
|
140
|
+
- **Q8 (optional)**: 23MB, ~99% accuracy, faster loading
|
|
141
|
+
|
|
142
|
+
**When to use Q8:**
|
|
143
|
+
- ✅ New projects where size/speed matters
|
|
144
|
+
- ✅ Memory-constrained environments
|
|
145
|
+
- ✅ Mobile or edge deployments
|
|
146
|
+
- ❌ Existing projects with FP32 data (incompatible embeddings)
|
|
147
|
+
|
|
148
|
+
**Air-gap deployment:**
|
|
149
|
+
```bash
|
|
150
|
+
npm run download-models # Both models (recommended)
|
|
151
|
+
npm run download-models:q8 # Q8 only (space-constrained)
|
|
152
|
+
npm run download-models:fp32 # FP32 only (compatibility)
|
|
153
|
+
```
|
|
154
|
+
|
|
124
155
|
## 📚 Core API
|
|
125
156
|
|
|
126
157
|
### `search()` - Vector Similarity
|
|
@@ -18,6 +18,17 @@ export declare class ModelManager {
|
|
|
18
18
|
private getModelsPath;
|
|
19
19
|
ensureModels(modelName?: string): Promise<boolean>;
|
|
20
20
|
private verifyModelFiles;
|
|
21
|
+
/**
|
|
22
|
+
* Check which model variants are available locally
|
|
23
|
+
*/
|
|
24
|
+
getAvailableModels(modelName?: string): {
|
|
25
|
+
fp32: boolean;
|
|
26
|
+
q8: boolean;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Get the best available model variant based on preference and availability
|
|
30
|
+
*/
|
|
31
|
+
getBestAvailableModel(preferredType?: 'fp32' | 'q8', modelName?: string): 'fp32' | 'q8' | null;
|
|
21
32
|
private tryModelSource;
|
|
22
33
|
private downloadAndExtractFromGitHub;
|
|
23
34
|
/**
|
|
@@ -31,13 +31,16 @@ const MODEL_SOURCES = {
|
|
|
31
31
|
pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
|
|
32
32
|
}
|
|
33
33
|
};
|
|
34
|
-
// Model verification files -
|
|
35
|
-
const
|
|
34
|
+
// Model verification files - BOTH fp32 and q8 variants
|
|
35
|
+
const REQUIRED_FILES = [
|
|
36
36
|
'config.json',
|
|
37
37
|
'tokenizer.json',
|
|
38
|
-
'tokenizer_config.json'
|
|
39
|
-
'onnx/model.onnx'
|
|
38
|
+
'tokenizer_config.json'
|
|
40
39
|
];
|
|
40
|
+
const MODEL_VARIANTS = {
|
|
41
|
+
fp32: 'onnx/model.onnx',
|
|
42
|
+
q8: 'onnx/model_quantized.onnx'
|
|
43
|
+
};
|
|
41
44
|
export class ModelManager {
|
|
42
45
|
constructor() {
|
|
43
46
|
this.isInitialized = false;
|
|
@@ -105,14 +108,47 @@ export class ModelManager {
|
|
|
105
108
|
return true;
|
|
106
109
|
}
|
|
107
110
|
async verifyModelFiles(modelPath) {
|
|
108
|
-
// Check if essential
|
|
109
|
-
for (const file of
|
|
111
|
+
// Check if essential files exist
|
|
112
|
+
for (const file of REQUIRED_FILES) {
|
|
110
113
|
const fullPath = join(modelPath, file);
|
|
111
114
|
if (!existsSync(fullPath)) {
|
|
112
115
|
return false;
|
|
113
116
|
}
|
|
114
117
|
}
|
|
115
|
-
|
|
118
|
+
// At least one model variant must exist (fp32 or q8)
|
|
119
|
+
const fp32Exists = existsSync(join(modelPath, MODEL_VARIANTS.fp32));
|
|
120
|
+
const q8Exists = existsSync(join(modelPath, MODEL_VARIANTS.q8));
|
|
121
|
+
return fp32Exists || q8Exists;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check which model variants are available locally
|
|
125
|
+
*/
|
|
126
|
+
getAvailableModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
127
|
+
const modelPath = join(this.modelsPath, modelName);
|
|
128
|
+
return {
|
|
129
|
+
fp32: existsSync(join(modelPath, MODEL_VARIANTS.fp32)),
|
|
130
|
+
q8: existsSync(join(modelPath, MODEL_VARIANTS.q8))
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get the best available model variant based on preference and availability
|
|
135
|
+
*/
|
|
136
|
+
getBestAvailableModel(preferredType = 'fp32', modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
137
|
+
const available = this.getAvailableModels(modelName);
|
|
138
|
+
// If preferred type is available, use it
|
|
139
|
+
if (available[preferredType]) {
|
|
140
|
+
return preferredType;
|
|
141
|
+
}
|
|
142
|
+
// Otherwise fall back to what's available
|
|
143
|
+
if (preferredType === 'q8' && available.fp32) {
|
|
144
|
+
console.warn('⚠️ Q8 model requested but not available, falling back to FP32');
|
|
145
|
+
return 'fp32';
|
|
146
|
+
}
|
|
147
|
+
if (preferredType === 'fp32' && available.q8) {
|
|
148
|
+
console.warn('⚠️ FP32 model requested but not available, falling back to Q8');
|
|
149
|
+
return 'q8';
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
116
152
|
}
|
|
117
153
|
async tryModelSource(name, source, modelName) {
|
|
118
154
|
try {
|
package/dist/utils/embedding.js
CHANGED
|
@@ -98,11 +98,23 @@ export class TransformerEmbedding {
|
|
|
98
98
|
verbose: this.verbose,
|
|
99
99
|
cacheDir: options.cacheDir || './models',
|
|
100
100
|
localFilesOnly: localFilesOnly,
|
|
101
|
-
dtype: options.dtype || 'fp32', //
|
|
101
|
+
dtype: options.dtype || 'fp32', // CRITICAL: fp32 default for backward compatibility
|
|
102
102
|
device: options.device || 'auto'
|
|
103
103
|
};
|
|
104
|
+
// ULTRA-CAREFUL: Runtime warnings for q8 usage
|
|
105
|
+
if (this.options.dtype === 'q8') {
|
|
106
|
+
const confirmed = process.env.BRAINY_Q8_CONFIRMED === 'true';
|
|
107
|
+
if (!confirmed && this.verbose) {
|
|
108
|
+
console.warn('🚨 Q8 MODEL WARNING:');
|
|
109
|
+
console.warn(' • Q8 creates different embeddings than fp32');
|
|
110
|
+
console.warn(' • Q8 is incompatible with existing fp32 data');
|
|
111
|
+
console.warn(' • Only use q8 for new projects or when explicitly migrating');
|
|
112
|
+
console.warn(' • Set BRAINY_Q8_CONFIRMED=true to silence this warning');
|
|
113
|
+
console.warn(' • Q8 model is 75% smaller but may have slightly reduced accuracy');
|
|
114
|
+
}
|
|
115
|
+
}
|
|
104
116
|
if (this.verbose) {
|
|
105
|
-
this.logger('log', `Embedding config:
|
|
117
|
+
this.logger('log', `Embedding config: dtype=${this.options.dtype}, localFilesOnly=${localFilesOnly}, model=${this.options.model}`);
|
|
106
118
|
}
|
|
107
119
|
// Configure transformers.js environment
|
|
108
120
|
if (!isBrowser()) {
|
|
@@ -212,11 +224,20 @@ export class TransformerEmbedding {
|
|
|
212
224
|
: this.options.cacheDir;
|
|
213
225
|
this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
|
|
214
226
|
const startTime = Date.now();
|
|
227
|
+
// Check model availability and select appropriate variant
|
|
228
|
+
const available = modelManager.getAvailableModels(this.options.model);
|
|
229
|
+
const actualType = modelManager.getBestAvailableModel(this.options.dtype, this.options.model);
|
|
230
|
+
if (!actualType) {
|
|
231
|
+
throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
|
|
232
|
+
}
|
|
233
|
+
if (actualType !== this.options.dtype) {
|
|
234
|
+
this.logger('log', `Using ${actualType} model (${this.options.dtype} not available)`);
|
|
235
|
+
}
|
|
215
236
|
// Load the feature extraction pipeline with memory optimizations
|
|
216
237
|
const pipelineOptions = {
|
|
217
238
|
cache_dir: cacheDir,
|
|
218
239
|
local_files_only: isBrowser() ? false : this.options.localFilesOnly,
|
|
219
|
-
dtype:
|
|
240
|
+
dtype: actualType, // Use the actual available model type
|
|
220
241
|
// CRITICAL: ONNX memory optimizations
|
|
221
242
|
session_options: {
|
|
222
243
|
enableCpuMemArena: false, // Disable pre-allocated memory arena
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.8.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -73,6 +73,9 @@
|
|
|
73
73
|
"test:ci-integration": "NODE_OPTIONS='--max-old-space-size=16384' CI=true vitest run --config tests/configs/vitest.integration.config.ts",
|
|
74
74
|
"test:ci": "npm run test:ci-unit",
|
|
75
75
|
"download-models": "node scripts/download-models.cjs",
|
|
76
|
+
"download-models:fp32": "node scripts/download-models.cjs fp32",
|
|
77
|
+
"download-models:q8": "node scripts/download-models.cjs q8",
|
|
78
|
+
"download-models:both": "node scripts/download-models.cjs",
|
|
76
79
|
"models:verify": "node scripts/ensure-models.js",
|
|
77
80
|
"lint": "eslint --ext .ts,.js src/",
|
|
78
81
|
"lint:fix": "eslint --ext .ts,.js src/ --fix",
|
|
@@ -9,6 +9,11 @@ const path = require('path')
|
|
|
9
9
|
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
|
|
10
10
|
const OUTPUT_DIR = './models'
|
|
11
11
|
|
|
12
|
+
// Parse command line arguments for model type selection
|
|
13
|
+
const args = process.argv.slice(2)
|
|
14
|
+
const downloadType = args.includes('fp32') ? 'fp32' :
|
|
15
|
+
args.includes('q8') ? 'q8' : 'both'
|
|
16
|
+
|
|
12
17
|
async function downloadModels() {
|
|
13
18
|
// Use dynamic import for ES modules in CommonJS
|
|
14
19
|
const { pipeline, env } = await import('@huggingface/transformers')
|
|
@@ -16,29 +21,31 @@ async function downloadModels() {
|
|
|
16
21
|
// Configure transformers.js to use local cache
|
|
17
22
|
env.cacheDir = './models-cache'
|
|
18
23
|
env.allowRemoteModels = true
|
|
24
|
+
|
|
19
25
|
try {
|
|
20
|
-
console.log('
|
|
26
|
+
console.log('🧠 Brainy Model Downloader v2.8.0')
|
|
27
|
+
console.log('===================================')
|
|
21
28
|
console.log(` Model: ${MODEL_NAME}`)
|
|
29
|
+
console.log(` Type: ${downloadType} (fp32, q8, or both)`)
|
|
22
30
|
console.log(` Cache: ${env.cacheDir}`)
|
|
31
|
+
console.log('')
|
|
23
32
|
|
|
24
33
|
// Create output directory
|
|
25
34
|
await fs.mkdir(OUTPUT_DIR, { recursive: true })
|
|
26
35
|
|
|
27
|
-
//
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
console.log('🧪 Testing model...')
|
|
33
|
-
const testResult = await extractor(['Hello world!'], {
|
|
34
|
-
pooling: 'mean',
|
|
35
|
-
normalize: true
|
|
36
|
-
})
|
|
36
|
+
// Download models based on type
|
|
37
|
+
if (downloadType === 'both' || downloadType === 'fp32') {
|
|
38
|
+
console.log('📥 Downloading FP32 model (full precision, 90MB)...')
|
|
39
|
+
await downloadModelVariant('fp32')
|
|
40
|
+
}
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
if (downloadType === 'both' || downloadType === 'q8') {
|
|
43
|
+
console.log('📥 Downloading Q8 model (quantized, 23MB)...')
|
|
44
|
+
await downloadModelVariant('q8')
|
|
45
|
+
}
|
|
39
46
|
|
|
40
47
|
// Copy ALL model files from cache to our models directory
|
|
41
|
-
console.log('📋 Copying
|
|
48
|
+
console.log('📋 Copying model files to bundle directory...')
|
|
42
49
|
|
|
43
50
|
const cacheDir = path.resolve(env.cacheDir)
|
|
44
51
|
const outputDir = path.resolve(OUTPUT_DIR)
|
|
@@ -62,22 +69,89 @@ async function downloadModels() {
|
|
|
62
69
|
console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
|
|
63
70
|
console.log(` Location: ${outputDir}`)
|
|
64
71
|
|
|
65
|
-
// Create a marker file
|
|
72
|
+
// Create a marker file with downloaded model info
|
|
73
|
+
const markerData = {
|
|
74
|
+
model: MODEL_NAME,
|
|
75
|
+
bundledAt: new Date().toISOString(),
|
|
76
|
+
version: '2.8.0',
|
|
77
|
+
downloadType: downloadType,
|
|
78
|
+
models: {}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Check which models were downloaded
|
|
82
|
+
const fp32Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model.onnx')
|
|
83
|
+
const q8Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx')
|
|
84
|
+
|
|
85
|
+
if (await fileExists(fp32Path)) {
|
|
86
|
+
const stats = await fs.stat(fp32Path)
|
|
87
|
+
markerData.models.fp32 = {
|
|
88
|
+
file: 'onnx/model.onnx',
|
|
89
|
+
size: stats.size,
|
|
90
|
+
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (await fileExists(q8Path)) {
|
|
95
|
+
const stats = await fs.stat(q8Path)
|
|
96
|
+
markerData.models.q8 = {
|
|
97
|
+
file: 'onnx/model_quantized.onnx',
|
|
98
|
+
size: stats.size,
|
|
99
|
+
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
66
103
|
await fs.writeFile(
|
|
67
104
|
path.join(outputDir, '.brainy-models-bundled'),
|
|
68
|
-
JSON.stringify(
|
|
69
|
-
model: MODEL_NAME,
|
|
70
|
-
bundledAt: new Date().toISOString(),
|
|
71
|
-
version: '1.0.0'
|
|
72
|
-
}, null, 2)
|
|
105
|
+
JSON.stringify(markerData, null, 2)
|
|
73
106
|
)
|
|
74
107
|
|
|
108
|
+
console.log('')
|
|
109
|
+
console.log('✅ Download complete! Available models:')
|
|
110
|
+
if (markerData.models.fp32) {
|
|
111
|
+
console.log(` • FP32: ${markerData.models.fp32.sizeFormatted} (full precision)`)
|
|
112
|
+
}
|
|
113
|
+
if (markerData.models.q8) {
|
|
114
|
+
console.log(` • Q8: ${markerData.models.q8.sizeFormatted} (quantized, 75% smaller)`)
|
|
115
|
+
}
|
|
116
|
+
console.log('')
|
|
117
|
+
console.log('Air-gap deployment ready! 🚀')
|
|
118
|
+
|
|
75
119
|
} catch (error) {
|
|
76
120
|
console.error('❌ Error downloading models:', error)
|
|
77
121
|
process.exit(1)
|
|
78
122
|
}
|
|
79
123
|
}
|
|
80
124
|
|
|
125
|
+
// Download a specific model variant
|
|
126
|
+
async function downloadModelVariant(dtype) {
|
|
127
|
+
const { pipeline } = await import('@huggingface/transformers')
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
// Load the model to force download
|
|
131
|
+
const extractor = await pipeline('feature-extraction', MODEL_NAME, {
|
|
132
|
+
dtype: dtype,
|
|
133
|
+
cache_dir: './models-cache'
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
// Test the model
|
|
137
|
+
const testResult = await extractor(['Hello world!'], {
|
|
138
|
+
pooling: 'mean',
|
|
139
|
+
normalize: true
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
console.log(` ✅ ${dtype.toUpperCase()} model downloaded and tested (${testResult.data.length} dimensions)`)
|
|
143
|
+
|
|
144
|
+
// Dispose to free memory
|
|
145
|
+
if (extractor.dispose) {
|
|
146
|
+
await extractor.dispose()
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
} catch (error) {
|
|
150
|
+
console.error(` ❌ Failed to download ${dtype} model:`, error)
|
|
151
|
+
throw error
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
81
155
|
async function findModelDirectories(baseDir, modelName) {
|
|
82
156
|
const dirs = []
|
|
83
157
|
|
|
@@ -141,6 +215,15 @@ async function dirExists(dir) {
|
|
|
141
215
|
}
|
|
142
216
|
}
|
|
143
217
|
|
|
218
|
+
async function fileExists(file) {
|
|
219
|
+
try {
|
|
220
|
+
const stats = await fs.stat(file)
|
|
221
|
+
return stats.isFile()
|
|
222
|
+
} catch (error) {
|
|
223
|
+
return false
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
144
227
|
async function copyDirectory(src, dest) {
|
|
145
228
|
await fs.mkdir(dest, { recursive: true })
|
|
146
229
|
const entries = await fs.readdir(src, { withFileTypes: true })
|