@soulcraft/brainy 2.3.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,13 @@
1
1
  /**
2
2
  * Model Manager - Ensures transformer models are available at runtime
3
3
  *
4
- * Strategy:
5
- * 1. Check local cache first
6
- * 2. Try GitHub releases (our backup)
7
- * 3. Fall back to Hugging Face
8
- * 4. Future: CDN at models.soulcraft.com
4
+ * Strategy (in order):
5
+ * 1. Check local cache first (instant)
6
+ * 2. Try Soulcraft CDN (fastest when available)
7
+ * 3. Try GitHub release tar.gz with extraction (reliable backup)
8
+ * 4. Fall back to Hugging Face (always works)
9
+ *
10
+ * NO USER CONFIGURATION REQUIRED - Everything is automatic!
9
11
  */
10
12
  export declare class ModelManager {
11
13
  private static instance;
@@ -16,9 +18,8 @@ export declare class ModelManager {
16
18
  private getModelsPath;
17
19
  ensureModels(modelName?: string): Promise<boolean>;
18
20
  private verifyModelFiles;
19
- private downloadFromGitHub;
20
- private downloadFromCDN;
21
- private configureTransformers;
21
+ private tryModelSource;
22
+ private downloadAndExtractFromGitHub;
22
23
  /**
23
24
  * Pre-download models for deployment
24
25
  * This is what npm run download-models calls
@@ -1,47 +1,43 @@
1
1
  /**
2
2
  * Model Manager - Ensures transformer models are available at runtime
3
3
  *
4
- * Strategy:
5
- * 1. Check local cache first
6
- * 2. Try GitHub releases (our backup)
7
- * 3. Fall back to Hugging Face
8
- * 4. Future: CDN at models.soulcraft.com
4
+ * Strategy (in order):
5
+ * 1. Check local cache first (instant)
6
+ * 2. Try Soulcraft CDN (fastest when available)
7
+ * 3. Try GitHub release tar.gz with extraction (reliable backup)
8
+ * 4. Fall back to Hugging Face (always works)
9
+ *
10
+ * NO USER CONFIGURATION REQUIRED - Everything is automatic!
9
11
  */
10
12
  import { existsSync } from 'fs';
11
- import { join, dirname } from 'path';
13
+ import { mkdir, writeFile } from 'fs/promises';
14
+ import { join } from 'path';
12
15
  import { env } from '@huggingface/transformers';
13
16
  // Model sources in order of preference
14
17
  const MODEL_SOURCES = {
15
- // GitHub Release - our controlled backup
16
- github: 'https://github.com/soulcraftlabs/brainy/releases/download/models-v1/all-MiniLM-L6-v2.tar.gz',
17
- // Future CDN - fastest option when available
18
- cdn: 'https://models.soulcraft.com/brainy/all-MiniLM-L6-v2.tar.gz',
19
- // Original Hugging Face - fallback
20
- huggingface: 'default' // Uses transformers.js default
21
- };
22
- // Expected model files and their hashes
23
- const MODEL_MANIFEST = {
24
- 'Xenova/all-MiniLM-L6-v2': {
25
- files: {
26
- 'onnx/model.onnx': {
27
- size: 90555481,
28
- sha256: null // Will be computed from actual model
29
- },
30
- 'tokenizer.json': {
31
- size: 711661,
32
- sha256: null
33
- },
34
- 'config.json': {
35
- size: 650,
36
- sha256: null
37
- },
38
- 'tokenizer_config.json': {
39
- size: 366,
40
- sha256: null
41
- }
42
- }
18
+ // CDN - Fastest when available (currently active)
19
+ cdn: {
20
+ host: 'https://models.soulcraft.com/models',
21
+ pathTemplate: '{model}/', // e.g., Xenova/all-MiniLM-L6-v2/
22
+ testFile: 'config.json' // File to test availability
23
+ },
24
+ // GitHub Release - tar.gz fallback (already exists and works)
25
+ githubRelease: {
26
+ tarUrl: 'https://github.com/soulcraftlabs/brainy/releases/download/models-v1/all-MiniLM-L6-v2.tar.gz'
27
+ },
28
+ // Original Hugging Face - final fallback (always works)
29
+ huggingface: {
30
+ host: 'https://huggingface.co',
31
+ pathTemplate: '{model}/resolve/{revision}/' // Default transformers.js pattern
43
32
  }
44
33
  };
34
+ // Model verification files - minimal set needed for transformers.js
35
+ const MODEL_FILES = [
36
+ 'config.json',
37
+ 'tokenizer.json',
38
+ 'tokenizer_config.json',
39
+ 'onnx/model.onnx'
40
+ ];
45
41
  export class ModelManager {
46
42
  constructor() {
47
43
  this.isInitialized = false;
@@ -76,96 +72,120 @@ export class ModelManager {
76
72
  if (this.isInitialized) {
77
73
  return true;
78
74
  }
79
- const modelPath = join(this.modelsPath, ...modelName.split('/'));
75
+ // Configure transformers.js environment
76
+ env.cacheDir = this.modelsPath;
77
+ env.allowLocalModels = true;
78
+ env.useFSCache = true;
80
79
  // Check if model already exists locally
81
- if (await this.verifyModelFiles(modelPath, modelName)) {
80
+ const modelPath = join(this.modelsPath, ...modelName.split('/'));
81
+ if (await this.verifyModelFiles(modelPath)) {
82
82
  console.log('✅ Models found in cache:', modelPath);
83
- this.configureTransformers(modelPath);
83
+ env.allowRemoteModels = false; // Use local only
84
84
  this.isInitialized = true;
85
85
  return true;
86
86
  }
87
87
  // Try to download from our sources
88
88
  console.log('📥 Downloading transformer models...');
89
- // Try GitHub first (our backup)
90
- if (await this.downloadFromGitHub(modelName)) {
89
+ // Try CDN first (fastest when available)
90
+ if (await this.tryModelSource('Soulcraft CDN', MODEL_SOURCES.cdn, modelName)) {
91
91
  this.isInitialized = true;
92
92
  return true;
93
93
  }
94
- // Try CDN (when available)
95
- if (await this.downloadFromCDN(modelName)) {
94
+ // Try GitHub release with tar.gz extraction (reliable backup)
95
+ if (await this.downloadAndExtractFromGitHub(modelName)) {
96
96
  this.isInitialized = true;
97
97
  return true;
98
98
  }
99
- // Fall back to Hugging Face (default transformers.js behavior)
99
+ // Fall back to Hugging Face (always works)
100
100
  console.log('⚠️ Using Hugging Face fallback for models');
101
+ env.remoteHost = MODEL_SOURCES.huggingface.host;
102
+ env.remotePathTemplate = MODEL_SOURCES.huggingface.pathTemplate;
101
103
  env.allowRemoteModels = true;
102
104
  this.isInitialized = true;
103
105
  return true;
104
106
  }
105
- async verifyModelFiles(modelPath, modelName) {
106
- const manifest = MODEL_MANIFEST[modelName];
107
- if (!manifest)
108
- return false;
109
- for (const [filePath, info] of Object.entries(manifest.files)) {
110
- const fullPath = join(modelPath, filePath);
107
+ async verifyModelFiles(modelPath) {
108
+ // Check if essential model files exist
109
+ for (const file of MODEL_FILES) {
110
+ const fullPath = join(modelPath, file);
111
111
  if (!existsSync(fullPath)) {
112
112
  return false;
113
113
  }
114
- // Optionally verify size
115
- if (process.env.VERIFY_MODEL_SIZE === 'true') {
116
- const stats = await import('fs').then(fs => fs.promises.stat(fullPath));
117
- if (stats.size !== info.size) {
118
- console.warn(`⚠️ Model file size mismatch: ${filePath}`);
119
- return false;
120
- }
121
- }
122
114
  }
123
115
  return true;
124
116
  }
125
- async downloadFromGitHub(modelName) {
117
+ async tryModelSource(name, source, modelName) {
126
118
  try {
127
- const url = MODEL_SOURCES.github;
128
- console.log('📥 Downloading from GitHub releases...');
129
- // Download tar.gz file
130
- const response = await fetch(url);
131
- if (!response.ok) {
132
- throw new Error(`GitHub download failed: ${response.status}`);
119
+ console.log(`📥 Trying ${name}...`);
120
+ // Test if the source is accessible by trying to fetch a test file
121
+ const testFile = source.testFile || 'config.json';
122
+ const modelPath = source.pathTemplate.replace('{model}', modelName).replace('{revision}', 'main');
123
+ const testUrl = `${source.host}/${modelPath}${testFile}`;
124
+ const response = await fetch(testUrl).catch(() => null);
125
+ if (response && response.ok) {
126
+ console.log(`✅ ${name} is available`);
127
+ // Configure transformers.js to use this source
128
+ env.remoteHost = source.host;
129
+ env.remotePathTemplate = source.pathTemplate;
130
+ env.allowRemoteModels = true;
131
+ // The model will be downloaded automatically by transformers.js when needed
132
+ return true;
133
+ }
134
+ else {
135
+ console.log(`⚠️ ${name} not available (${response?.status || 'unreachable'})`);
136
+ return false;
133
137
  }
134
- const buffer = await response.arrayBuffer();
135
- // Extract tar.gz (would need tar library in production)
136
- // For now, return false to fall back to other methods
137
- console.log('⚠️ GitHub model extraction not yet implemented');
138
- return false;
139
138
  }
140
139
  catch (error) {
141
- console.log('⚠️ GitHub download failed:', error.message);
140
+ console.log(`⚠️ ${name} check failed:`, error.message);
142
141
  return false;
143
142
  }
144
143
  }
145
- async downloadFromCDN(modelName) {
144
+ async downloadAndExtractFromGitHub(modelName) {
146
145
  try {
147
- const url = MODEL_SOURCES.cdn;
148
- console.log('📥 Downloading from Soulcraft CDN...');
149
- // Try to fetch from CDN
150
- const response = await fetch(url);
146
+ console.log('📥 Trying GitHub Release (tar.gz)...');
147
+ // Download tar.gz file
148
+ const response = await fetch(MODEL_SOURCES.githubRelease.tarUrl);
151
149
  if (!response.ok) {
152
- throw new Error(`CDN download failed: ${response.status}`);
150
+ console.log(`⚠️ GitHub Release not available (${response.status})`);
151
+ return false;
152
+ }
153
+ // Since we can't use tar-stream, we'll use Node's built-in child_process
154
+ // to extract using system tar command (available on all Unix systems)
155
+ const buffer = await response.arrayBuffer();
156
+ const modelPath = join(this.modelsPath, ...modelName.split('/'));
157
+ // Create model directory
158
+ await mkdir(modelPath, { recursive: true });
159
+ // Write tar.gz to temp file and extract
160
+ const tempFile = join(this.modelsPath, 'temp-model.tar.gz');
161
+ await writeFile(tempFile, Buffer.from(buffer));
162
+ // Extract using system tar command
163
+ const { exec } = await import('child_process');
164
+ const { promisify } = await import('util');
165
+ const execAsync = promisify(exec);
166
+ try {
167
+ // Extract and strip the first directory component
168
+ await execAsync(`tar -xzf ${tempFile} -C ${modelPath} --strip-components=1`, {
169
+ cwd: this.modelsPath
170
+ });
171
+ // Clean up temp file
172
+ const { unlink } = await import('fs/promises');
173
+ await unlink(tempFile);
174
+ console.log('✅ GitHub Release models extracted and cached locally');
175
+ // Configure to use local models now
176
+ env.allowRemoteModels = false;
177
+ return true;
178
+ }
179
+ catch (extractError) {
180
+ console.log('⚠️ Tar extraction failed, trying alternative method');
181
+ return false;
153
182
  }
154
- // Would extract files here
155
- console.log('⚠️ CDN not yet available');
156
- return false;
157
183
  }
158
184
  catch (error) {
159
- console.log('⚠️ CDN download failed:', error.message);
185
+ console.log('⚠️ GitHub Release download failed:', error.message);
160
186
  return false;
161
187
  }
162
188
  }
163
- configureTransformers(modelPath) {
164
- // Configure transformers.js to use our local models
165
- env.localModelPath = dirname(modelPath);
166
- env.allowRemoteModels = false;
167
- console.log('🔧 Configured transformers.js to use local models');
168
- }
169
189
  /**
170
190
  * Pre-download models for deployment
171
191
  * This is what npm run download-models calls
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "2.3.0",
3
+ "version": "3.0.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",