@soulcraft/brainy 2.3.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Model Manager - Ensures transformer models are available at runtime
|
|
3
3
|
*
|
|
4
|
-
* Strategy:
|
|
5
|
-
* 1. Check local cache first
|
|
6
|
-
* 2. Try
|
|
7
|
-
* 3.
|
|
8
|
-
* 4.
|
|
4
|
+
* Strategy (in order):
|
|
5
|
+
* 1. Check local cache first (instant)
|
|
6
|
+
* 2. Try Soulcraft CDN (fastest when available)
|
|
7
|
+
* 3. Try GitHub release tar.gz with extraction (reliable backup)
|
|
8
|
+
* 4. Fall back to Hugging Face (always works)
|
|
9
|
+
*
|
|
10
|
+
* NO USER CONFIGURATION REQUIRED - Everything is automatic!
|
|
9
11
|
*/
|
|
10
12
|
export declare class ModelManager {
|
|
11
13
|
private static instance;
|
|
@@ -16,9 +18,8 @@ export declare class ModelManager {
|
|
|
16
18
|
private getModelsPath;
|
|
17
19
|
ensureModels(modelName?: string): Promise<boolean>;
|
|
18
20
|
private verifyModelFiles;
|
|
19
|
-
private
|
|
20
|
-
private
|
|
21
|
-
private configureTransformers;
|
|
21
|
+
private tryModelSource;
|
|
22
|
+
private downloadAndExtractFromGitHub;
|
|
22
23
|
/**
|
|
23
24
|
* Pre-download models for deployment
|
|
24
25
|
* This is what npm run download-models calls
|
|
@@ -1,47 +1,43 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Model Manager - Ensures transformer models are available at runtime
|
|
3
3
|
*
|
|
4
|
-
* Strategy:
|
|
5
|
-
* 1. Check local cache first
|
|
6
|
-
* 2. Try
|
|
7
|
-
* 3.
|
|
8
|
-
* 4.
|
|
4
|
+
* Strategy (in order):
|
|
5
|
+
* 1. Check local cache first (instant)
|
|
6
|
+
* 2. Try Soulcraft CDN (fastest when available)
|
|
7
|
+
* 3. Try GitHub release tar.gz with extraction (reliable backup)
|
|
8
|
+
* 4. Fall back to Hugging Face (always works)
|
|
9
|
+
*
|
|
10
|
+
* NO USER CONFIGURATION REQUIRED - Everything is automatic!
|
|
9
11
|
*/
|
|
10
12
|
import { existsSync } from 'fs';
|
|
11
|
-
import {
|
|
13
|
+
import { mkdir, writeFile } from 'fs/promises';
|
|
14
|
+
import { join } from 'path';
|
|
12
15
|
import { env } from '@huggingface/transformers';
|
|
13
16
|
// Model sources in order of preference
|
|
14
17
|
const MODEL_SOURCES = {
|
|
15
|
-
//
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
},
|
|
30
|
-
'tokenizer.json': {
|
|
31
|
-
size: 711661,
|
|
32
|
-
sha256: null
|
|
33
|
-
},
|
|
34
|
-
'config.json': {
|
|
35
|
-
size: 650,
|
|
36
|
-
sha256: null
|
|
37
|
-
},
|
|
38
|
-
'tokenizer_config.json': {
|
|
39
|
-
size: 366,
|
|
40
|
-
sha256: null
|
|
41
|
-
}
|
|
42
|
-
}
|
|
18
|
+
// CDN - Fastest when available (currently active)
|
|
19
|
+
cdn: {
|
|
20
|
+
host: 'https://models.soulcraft.com/models',
|
|
21
|
+
pathTemplate: '{model}/', // e.g., Xenova/all-MiniLM-L6-v2/
|
|
22
|
+
testFile: 'config.json' // File to test availability
|
|
23
|
+
},
|
|
24
|
+
// GitHub Release - tar.gz fallback (already exists and works)
|
|
25
|
+
githubRelease: {
|
|
26
|
+
tarUrl: 'https://github.com/soulcraftlabs/brainy/releases/download/models-v1/all-MiniLM-L6-v2.tar.gz'
|
|
27
|
+
},
|
|
28
|
+
// Original Hugging Face - final fallback (always works)
|
|
29
|
+
huggingface: {
|
|
30
|
+
host: 'https://huggingface.co',
|
|
31
|
+
pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
|
|
43
32
|
}
|
|
44
33
|
};
|
|
34
|
+
// Model verification files - minimal set needed for transformers.js
|
|
35
|
+
const MODEL_FILES = [
|
|
36
|
+
'config.json',
|
|
37
|
+
'tokenizer.json',
|
|
38
|
+
'tokenizer_config.json',
|
|
39
|
+
'onnx/model.onnx'
|
|
40
|
+
];
|
|
45
41
|
export class ModelManager {
|
|
46
42
|
constructor() {
|
|
47
43
|
this.isInitialized = false;
|
|
@@ -76,96 +72,120 @@ export class ModelManager {
|
|
|
76
72
|
if (this.isInitialized) {
|
|
77
73
|
return true;
|
|
78
74
|
}
|
|
79
|
-
|
|
75
|
+
// Configure transformers.js environment
|
|
76
|
+
env.cacheDir = this.modelsPath;
|
|
77
|
+
env.allowLocalModels = true;
|
|
78
|
+
env.useFSCache = true;
|
|
80
79
|
// Check if model already exists locally
|
|
81
|
-
|
|
80
|
+
const modelPath = join(this.modelsPath, ...modelName.split('/'));
|
|
81
|
+
if (await this.verifyModelFiles(modelPath)) {
|
|
82
82
|
console.log('✅ Models found in cache:', modelPath);
|
|
83
|
-
|
|
83
|
+
env.allowRemoteModels = false; // Use local only
|
|
84
84
|
this.isInitialized = true;
|
|
85
85
|
return true;
|
|
86
86
|
}
|
|
87
87
|
// Try to download from our sources
|
|
88
88
|
console.log('📥 Downloading transformer models...');
|
|
89
|
-
// Try
|
|
90
|
-
if (await this.
|
|
89
|
+
// Try CDN first (fastest when available)
|
|
90
|
+
if (await this.tryModelSource('Soulcraft CDN', MODEL_SOURCES.cdn, modelName)) {
|
|
91
91
|
this.isInitialized = true;
|
|
92
92
|
return true;
|
|
93
93
|
}
|
|
94
|
-
// Try
|
|
95
|
-
if (await this.
|
|
94
|
+
// Try GitHub release with tar.gz extraction (reliable backup)
|
|
95
|
+
if (await this.downloadAndExtractFromGitHub(modelName)) {
|
|
96
96
|
this.isInitialized = true;
|
|
97
97
|
return true;
|
|
98
98
|
}
|
|
99
|
-
// Fall back to Hugging Face (
|
|
99
|
+
// Fall back to Hugging Face (always works)
|
|
100
100
|
console.log('⚠️ Using Hugging Face fallback for models');
|
|
101
|
+
env.remoteHost = MODEL_SOURCES.huggingface.host;
|
|
102
|
+
env.remotePathTemplate = MODEL_SOURCES.huggingface.pathTemplate;
|
|
101
103
|
env.allowRemoteModels = true;
|
|
102
104
|
this.isInitialized = true;
|
|
103
105
|
return true;
|
|
104
106
|
}
|
|
105
|
-
async verifyModelFiles(modelPath
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
for (const [filePath, info] of Object.entries(manifest.files)) {
|
|
110
|
-
const fullPath = join(modelPath, filePath);
|
|
107
|
+
async verifyModelFiles(modelPath) {
|
|
108
|
+
// Check if essential model files exist
|
|
109
|
+
for (const file of MODEL_FILES) {
|
|
110
|
+
const fullPath = join(modelPath, file);
|
|
111
111
|
if (!existsSync(fullPath)) {
|
|
112
112
|
return false;
|
|
113
113
|
}
|
|
114
|
-
// Optionally verify size
|
|
115
|
-
if (process.env.VERIFY_MODEL_SIZE === 'true') {
|
|
116
|
-
const stats = await import('fs').then(fs => fs.promises.stat(fullPath));
|
|
117
|
-
if (stats.size !== info.size) {
|
|
118
|
-
console.warn(`⚠️ Model file size mismatch: ${filePath}`);
|
|
119
|
-
return false;
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
114
|
}
|
|
123
115
|
return true;
|
|
124
116
|
}
|
|
125
|
-
async
|
|
117
|
+
async tryModelSource(name, source, modelName) {
|
|
126
118
|
try {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
119
|
+
console.log(`📥 Trying ${name}...`);
|
|
120
|
+
// Test if the source is accessible by trying to fetch a test file
|
|
121
|
+
const testFile = source.testFile || 'config.json';
|
|
122
|
+
const modelPath = source.pathTemplate.replace('{model}', modelName).replace('{revision}', 'main');
|
|
123
|
+
const testUrl = `${source.host}/${modelPath}${testFile}`;
|
|
124
|
+
const response = await fetch(testUrl).catch(() => null);
|
|
125
|
+
if (response && response.ok) {
|
|
126
|
+
console.log(`✅ ${name} is available`);
|
|
127
|
+
// Configure transformers.js to use this source
|
|
128
|
+
env.remoteHost = source.host;
|
|
129
|
+
env.remotePathTemplate = source.pathTemplate;
|
|
130
|
+
env.allowRemoteModels = true;
|
|
131
|
+
// The model will be downloaded automatically by transformers.js when needed
|
|
132
|
+
return true;
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
console.log(`⚠️ ${name} not available (${response?.status || 'unreachable'})`);
|
|
136
|
+
return false;
|
|
133
137
|
}
|
|
134
|
-
const buffer = await response.arrayBuffer();
|
|
135
|
-
// Extract tar.gz (would need tar library in production)
|
|
136
|
-
// For now, return false to fall back to other methods
|
|
137
|
-
console.log('⚠️ GitHub model extraction not yet implemented');
|
|
138
|
-
return false;
|
|
139
138
|
}
|
|
140
139
|
catch (error) {
|
|
141
|
-
console.log(
|
|
140
|
+
console.log(`⚠️ ${name} check failed:`, error.message);
|
|
142
141
|
return false;
|
|
143
142
|
}
|
|
144
143
|
}
|
|
145
|
-
async
|
|
144
|
+
async downloadAndExtractFromGitHub(modelName) {
|
|
146
145
|
try {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
const response = await fetch(url);
|
|
146
|
+
console.log('📥 Trying GitHub Release (tar.gz)...');
|
|
147
|
+
// Download tar.gz file
|
|
148
|
+
const response = await fetch(MODEL_SOURCES.githubRelease.tarUrl);
|
|
151
149
|
if (!response.ok) {
|
|
152
|
-
|
|
150
|
+
console.log(`⚠️ GitHub Release not available (${response.status})`);
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
// Since we can't use tar-stream, we'll use Node's built-in child_process
|
|
154
|
+
// to extract using system tar command (available on all Unix systems)
|
|
155
|
+
const buffer = await response.arrayBuffer();
|
|
156
|
+
const modelPath = join(this.modelsPath, ...modelName.split('/'));
|
|
157
|
+
// Create model directory
|
|
158
|
+
await mkdir(modelPath, { recursive: true });
|
|
159
|
+
// Write tar.gz to temp file and extract
|
|
160
|
+
const tempFile = join(this.modelsPath, 'temp-model.tar.gz');
|
|
161
|
+
await writeFile(tempFile, Buffer.from(buffer));
|
|
162
|
+
// Extract using system tar command
|
|
163
|
+
const { exec } = await import('child_process');
|
|
164
|
+
const { promisify } = await import('util');
|
|
165
|
+
const execAsync = promisify(exec);
|
|
166
|
+
try {
|
|
167
|
+
// Extract and strip the first directory component
|
|
168
|
+
await execAsync(`tar -xzf ${tempFile} -C ${modelPath} --strip-components=1`, {
|
|
169
|
+
cwd: this.modelsPath
|
|
170
|
+
});
|
|
171
|
+
// Clean up temp file
|
|
172
|
+
const { unlink } = await import('fs/promises');
|
|
173
|
+
await unlink(tempFile);
|
|
174
|
+
console.log('✅ GitHub Release models extracted and cached locally');
|
|
175
|
+
// Configure to use local models now
|
|
176
|
+
env.allowRemoteModels = false;
|
|
177
|
+
return true;
|
|
178
|
+
}
|
|
179
|
+
catch (extractError) {
|
|
180
|
+
console.log('⚠️ Tar extraction failed, trying alternative method');
|
|
181
|
+
return false;
|
|
153
182
|
}
|
|
154
|
-
// Would extract files here
|
|
155
|
-
console.log('⚠️ CDN not yet available');
|
|
156
|
-
return false;
|
|
157
183
|
}
|
|
158
184
|
catch (error) {
|
|
159
|
-
console.log('⚠️
|
|
185
|
+
console.log('⚠️ GitHub Release download failed:', error.message);
|
|
160
186
|
return false;
|
|
161
187
|
}
|
|
162
188
|
}
|
|
163
|
-
configureTransformers(modelPath) {
|
|
164
|
-
// Configure transformers.js to use our local models
|
|
165
|
-
env.localModelPath = dirname(modelPath);
|
|
166
|
-
env.allowRemoteModels = false;
|
|
167
|
-
console.log('🔧 Configured transformers.js to use local models');
|
|
168
|
-
}
|
|
169
189
|
/**
|
|
170
190
|
* Pre-download models for deployment
|
|
171
191
|
* This is what npm run download-models calls
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|