@soulcraft/brainy 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -200,7 +200,7 @@ const answer = await llm.generate(relevant + userQuery) // Generate with contex
200
200
  await brain.add("The iPhone 15 Pro has a titanium design")
201
201
  await brain.add("Samsung Galaxy S24 features AI photography")
202
202
 
203
- const results = await brain.search("premium smartphones with metal build")
203
+ const results = await brain.search("smartphones with metal build")
204
204
  // Returns: iPhone (titanium matches "metal build" semantically)
205
205
  ```
206
206
 
@@ -240,18 +240,17 @@ await sharedBrain.init()
240
240
 
241
241
  // Sales Agent adds customer intelligence
242
242
  const customerId = await sharedBrain.addNoun("Acme Corp", NounType.Organization)
243
- await sharedBrain.addVerb(customerId, "enterprise-plan", VerbType.InterestedIn, {
243
+ await sharedBrain.addVerb(customerId, "business-plan", VerbType.InterestedIn, {
244
244
  priority: "high",
245
- budget: "$50k",
246
245
  timeline: "Q2 2025"
247
246
  })
248
247
 
249
248
  // Support Agent instantly sees the context
250
249
  const customerData = await sharedBrain.getNounWithVerbs(customerId)
251
- // Support knows: customer interested in enterprise plan with $50k budget
250
+ // Support knows: customer interested in business plan
252
251
 
253
252
  // Marketing Agent learns from both
254
- const insights = await sharedBrain.search("enterprise customers budget 50k", 10)
253
+ const insights = await sharedBrain.search("business customers Q2", 10)
255
254
  // Marketing can create targeted campaigns for similar prospects
256
255
  ```
257
256
 
@@ -332,9 +331,8 @@ import { BrainyData, Cortex } from '@soulcraft/brainy'
332
331
  const brain = new BrainyData()
333
332
  const cortex = new Cortex()
334
333
 
335
- // Add premium augmentations (requires Brain Cloud subscription)
336
- brain.register(new AIMemory())
337
- brain.register(new AgentCoordinator())
334
+ // Add augmentations to extend functionality
335
+ brain.register(new CustomAugmentation())
338
336
 
339
337
  // Now your AI remembers everything across all sessions!
340
338
  await brain.add("User prefers TypeScript over JavaScript")
@@ -785,7 +785,7 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
785
785
  */
786
786
  delete(id: string, options?: {
787
787
  service?: string;
788
- soft?: boolean;
788
+ hard?: boolean;
789
789
  cascade?: boolean;
790
790
  force?: boolean;
791
791
  }): Promise<boolean>;
@@ -1336,9 +1336,12 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
1336
1336
  /**
1337
1337
  * Get a configuration value with automatic decryption
1338
1338
  * @param key Configuration key
1339
+ * @param options Options including decryption (auto-detected by default)
1339
1340
  * @returns Configuration value or undefined
1340
1341
  */
1341
- getConfig(key: string): Promise<any>;
1342
+ getConfig(key: string, options?: {
1343
+ decrypt?: boolean;
1344
+ }): Promise<any>;
1342
1345
  /**
1343
1346
  * Encrypt data using universal crypto utilities
1344
1347
  */
@@ -656,6 +656,22 @@ export class BrainyData {
656
656
  return;
657
657
  }
658
658
  this.isInitializing = true;
659
+ // CRITICAL: Ensure model is available before ANY operations
660
+ // This is THE most critical part of the system
661
+ // Without the model, users CANNOT access their data
662
+ if (typeof this.embeddingFunction === 'function') {
663
+ try {
664
+ const { modelGuardian } = await import('./critical/model-guardian.js');
665
+ await modelGuardian.ensureCriticalModel();
666
+ }
667
+ catch (error) {
668
+ console.error('🚨 CRITICAL: Model verification failed!');
669
+ console.error('Brainy cannot function without the transformer model.');
670
+ console.error('Users cannot access their data without it.');
671
+ this.isInitializing = false;
672
+ throw error;
673
+ }
674
+ }
659
675
  try {
660
676
  // Pre-load the embedding model early to ensure it's always available
661
677
  // This helps prevent issues with the Universal Sentence Encoder not being loaded
@@ -1936,10 +1952,14 @@ export class BrainyData {
1936
1952
  offset: options.offset
1937
1953
  });
1938
1954
  }
1939
- // Filter out placeholder nouns from search results
1955
+ // Filter out placeholder nouns and deleted items from search results
1940
1956
  searchResults = searchResults.filter((result) => {
1941
1957
  if (result.metadata && typeof result.metadata === 'object') {
1942
1958
  const metadata = result.metadata;
1959
+ // Exclude deleted items from search results (soft delete)
1960
+ if (metadata.deleted === true) {
1961
+ return false;
1962
+ }
1943
1963
  // Exclude placeholder nouns from search results
1944
1964
  if (metadata.isPlaceholder) {
1945
1965
  return false;
@@ -2296,12 +2316,13 @@ export class BrainyData {
2296
2316
  * @returns Promise that resolves to true if the vector was deleted, false otherwise
2297
2317
  */
2298
2318
  async delete(id, options = {}) {
2319
+ // Clear API: use 'hard: true' for hard delete, otherwise soft delete
2320
+ const isHardDelete = options.hard === true;
2299
2321
  const opts = {
2300
- service: undefined,
2301
- soft: true, // Soft delete is default - preserves indexes
2302
- cascade: false,
2303
- force: false,
2304
- ...options
2322
+ service: options.service,
2323
+ soft: !isHardDelete, // Soft delete is default unless hard: true is specified
2324
+ cascade: options.cascade || false,
2325
+ force: options.force || false
2305
2326
  };
2306
2327
  // Validate id parameter first, before any other logic
2307
2328
  if (id === null || id === undefined) {
@@ -2331,11 +2352,17 @@ export class BrainyData {
2331
2352
  // Handle soft delete vs hard delete
2332
2353
  if (opts.soft) {
2333
2354
  // Soft delete: just mark as deleted - metadata filter will exclude from search
2334
- return await this.updateMetadata(actualId, {
2335
- deleted: true,
2336
- deletedAt: new Date().toISOString(),
2337
- deletedBy: opts.service || 'user'
2338
- });
2355
+ try {
2356
+ return await this.updateMetadata(actualId, {
2357
+ deleted: true,
2358
+ deletedAt: new Date().toISOString(),
2359
+ deletedBy: opts.service || 'user'
2360
+ });
2361
+ }
2362
+ catch (error) {
2363
+ // If item doesn't exist, return false (delete of non-existent item is not an error)
2364
+ return false;
2365
+ }
2339
2366
  }
2340
2367
  // Hard delete: Remove from index
2341
2368
  const removed = this.index.removeItem(actualId);
@@ -4818,34 +4845,36 @@ export class BrainyData {
4818
4845
  * @param options Options including encryption
4819
4846
  */
4820
4847
  async setConfig(key, value, options) {
4821
- const configNoun = {
4848
+ // Use a predictable ID based on the config key
4849
+ const configId = `config-${key}`;
4850
+ // Store the config data in metadata (not as vectorized data)
4851
+ const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
4852
+ // Use simple text for vectorization
4853
+ const searchableText = `Configuration setting for ${key}`;
4854
+ await this.add(searchableText, {
4855
+ nounType: NounType.State,
4822
4856
  configKey: key,
4823
- configValue: options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value,
4857
+ configValue: configValue,
4824
4858
  encrypted: !!options?.encrypt,
4825
4859
  timestamp: new Date().toISOString()
4826
- };
4827
- await this.add(configNoun, {
4828
- nounType: NounType.State,
4829
- configKey: key,
4830
- encrypted: !!options?.encrypt
4831
- });
4860
+ }, { id: configId });
4832
4861
  }
4833
4862
  /**
4834
4863
  * Get a configuration value with automatic decryption
4835
4864
  * @param key Configuration key
4865
+ * @param options Options including decryption (auto-detected by default)
4836
4866
  * @returns Configuration value or undefined
4837
4867
  */
4838
- async getConfig(key) {
4868
+ async getConfig(key, options) {
4839
4869
  try {
4840
- const results = await this.search('', 1, {
4841
- nounTypes: [NounType.State],
4842
- metadata: { configKey: key }
4843
- });
4844
- if (results.length === 0)
4870
+ // Use the predictable ID to get the config directly
4871
+ const configId = `config-${key}`;
4872
+ const storedNoun = await this.get(configId);
4873
+ if (!storedNoun)
4845
4874
  return undefined;
4846
- const configNoun = results[0];
4847
- const value = configNoun.data?.configValue || configNoun.metadata?.configValue;
4848
- const encrypted = configNoun.data?.encrypted || configNoun.metadata?.encrypted;
4875
+ // The config data is now stored in metadata
4876
+ const value = storedNoun.metadata?.configValue;
4877
+ const encrypted = storedNoun.metadata?.encrypted;
4849
4878
  if (encrypted && typeof value === 'string') {
4850
4879
  const decrypted = await this.decryptData(value);
4851
4880
  return JSON.parse(decrypted);
@@ -0,0 +1,56 @@
1
+ /**
2
+ * MODEL GUARDIAN - CRITICAL PATH
3
+ *
4
+ * THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
5
+ * Without the exact model, users CANNOT access their data
6
+ *
7
+ * Requirements:
8
+ * 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
9
+ * 2. Model MUST be available at runtime
10
+ * 3. Model MUST produce consistent 384-dim embeddings
11
+ * 4. System MUST fail fast if model unavailable in production
12
+ */
13
+ export declare class ModelGuardian {
14
+ private static instance;
15
+ private isVerified;
16
+ private modelPath;
17
+ private lastVerification;
18
+ private constructor();
19
+ static getInstance(): ModelGuardian;
20
+ /**
21
+ * CRITICAL: Verify model availability and integrity
22
+ * This MUST be called before any embedding operations
23
+ */
24
+ ensureCriticalModel(): Promise<void>;
25
+ /**
26
+ * Verify the local model files exist and are correct
27
+ */
28
+ private verifyLocalModel;
29
+ /**
30
+ * Download model from a fallback source
31
+ */
32
+ private downloadFromSource;
33
+ /**
34
+ * Configure transformers.js to use verified local model
35
+ */
36
+ private configureTransformers;
37
+ /**
38
+ * Detect where models should be stored
39
+ */
40
+ private detectModelPath;
41
+ /**
42
+ * Get model status for diagnostics
43
+ */
44
+ getStatus(): Promise<{
45
+ verified: boolean;
46
+ path: string;
47
+ lastVerification: Date | null;
48
+ modelName: string;
49
+ dimensions: number;
50
+ }>;
51
+ /**
52
+ * Force re-verification (for testing)
53
+ */
54
+ forceReverify(): Promise<void>;
55
+ }
56
+ export declare const modelGuardian: ModelGuardian;
@@ -0,0 +1,238 @@
1
+ /**
2
+ * MODEL GUARDIAN - CRITICAL PATH
3
+ *
4
+ * THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
5
+ * Without the exact model, users CANNOT access their data
6
+ *
7
+ * Requirements:
8
+ * 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
9
+ * 2. Model MUST be available at runtime
10
+ * 3. Model MUST produce consistent 384-dim embeddings
11
+ * 4. System MUST fail fast if model unavailable in production
12
+ */
13
+ import { existsSync } from 'fs';
14
+ import { stat } from 'fs/promises';
15
+ import { join, dirname } from 'path';
16
+ import { env } from '@huggingface/transformers';
17
+ // CRITICAL: These values MUST NEVER CHANGE
18
+ const CRITICAL_MODEL_CONFIG = {
19
+ modelName: 'Xenova/all-MiniLM-L6-v2',
20
+ modelHash: {
21
+ // SHA256 of model.onnx - computed from actual model
22
+ 'onnx/model.onnx': 'add_actual_hash_here',
23
+ 'tokenizer.json': 'add_actual_hash_here'
24
+ },
25
+ modelSize: {
26
+ 'onnx/model.onnx': 90555481, // Exact size in bytes
27
+ 'tokenizer.json': 711661
28
+ },
29
+ embeddingDimensions: 384,
30
+ fallbackSources: [
31
+ // Primary: Our GitHub releases (we control this)
32
+ {
33
+ name: 'GitHub (Primary)',
34
+ url: 'https://github.com/soulcraftlabs/brainy-models/releases/download/v1.0.0/all-MiniLM-L6-v2.tar.gz',
35
+ type: 'tarball'
36
+ },
37
+ // Secondary: Our CDN (future, for speed)
38
+ {
39
+ name: 'Soulcraft CDN',
40
+ url: 'https://models.soulcraft.com/brainy/v1/all-MiniLM-L6-v2.tar.gz',
41
+ type: 'tarball'
42
+ },
43
+ // Tertiary: Hugging Face (original source)
44
+ {
45
+ name: 'Hugging Face',
46
+ url: 'huggingface',
47
+ type: 'transformers'
48
+ }
49
+ ]
50
+ };
51
+ export class ModelGuardian {
52
+ constructor() {
53
+ this.isVerified = false;
54
+ this.lastVerification = null;
55
+ this.modelPath = this.detectModelPath();
56
+ }
57
+ static getInstance() {
58
+ if (!ModelGuardian.instance) {
59
+ ModelGuardian.instance = new ModelGuardian();
60
+ }
61
+ return ModelGuardian.instance;
62
+ }
63
+ /**
64
+ * CRITICAL: Verify model availability and integrity
65
+ * This MUST be called before any embedding operations
66
+ */
67
+ async ensureCriticalModel() {
68
+ console.log('🛡️ MODEL GUARDIAN: Verifying critical model availability...');
69
+ // Check if already verified in this session
70
+ if (this.isVerified && this.lastVerification) {
71
+ const hoursSinceVerification = (Date.now() - this.lastVerification.getTime()) / (1000 * 60 * 60);
72
+ if (hoursSinceVerification < 24) {
73
+ console.log('✅ Model previously verified in this session');
74
+ return;
75
+ }
76
+ }
77
+ // Step 1: Check if model exists locally
78
+ const modelExists = await this.verifyLocalModel();
79
+ if (modelExists) {
80
+ console.log('✅ Critical model verified locally');
81
+ this.isVerified = true;
82
+ this.lastVerification = new Date();
83
+ this.configureTransformers();
84
+ return;
85
+ }
86
+ // Step 2: In production, FAIL FAST
87
+ if (process.env.NODE_ENV === 'production' && !process.env.BRAINY_ALLOW_RUNTIME_DOWNLOAD) {
88
+ throw new Error('🚨 CRITICAL FAILURE: Transformer model not found in production!\n' +
89
+ 'The model is REQUIRED for Brainy to function.\n' +
90
+ 'Users CANNOT access their data without it.\n' +
91
+ 'Solution: Run "npm run download-models" during build stage.');
92
+ }
93
+ // Step 3: Attempt to download from fallback sources
94
+ console.warn('⚠️ Model not found locally, attempting download...');
95
+ for (const source of CRITICAL_MODEL_CONFIG.fallbackSources) {
96
+ try {
97
+ console.log(`📥 Trying ${source.name}...`);
98
+ await this.downloadFromSource(source);
99
+ // Verify the download
100
+ if (await this.verifyLocalModel()) {
101
+ console.log(`✅ Successfully downloaded from ${source.name}`);
102
+ this.isVerified = true;
103
+ this.lastVerification = new Date();
104
+ this.configureTransformers();
105
+ return;
106
+ }
107
+ }
108
+ catch (error) {
109
+ console.warn(`❌ ${source.name} failed:`, error.message);
110
+ }
111
+ }
112
+ // Step 4: CRITICAL FAILURE
113
+ throw new Error('🚨 CRITICAL FAILURE: Cannot obtain transformer model!\n' +
114
+ 'Tried all fallback sources.\n' +
115
+ 'Brainy CANNOT function without the model.\n' +
116
+ 'Users CANNOT access their data.\n' +
117
+ 'Please check network connectivity or pre-download models.');
118
+ }
119
+ /**
120
+ * Verify the local model files exist and are correct
121
+ */
122
+ async verifyLocalModel() {
123
+ const modelBasePath = join(this.modelPath, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
124
+ // Check critical files
125
+ const criticalFiles = [
126
+ 'onnx/model.onnx',
127
+ 'tokenizer.json',
128
+ 'config.json'
129
+ ];
130
+ for (const file of criticalFiles) {
131
+ const filePath = join(modelBasePath, file);
132
+ if (!existsSync(filePath)) {
133
+ console.log(`❌ Missing critical file: ${file}`);
134
+ return false;
135
+ }
136
+ // Verify size for critical files
137
+ if (CRITICAL_MODEL_CONFIG.modelSize[file]) {
138
+ const stats = await stat(filePath);
139
+ const expectedSize = CRITICAL_MODEL_CONFIG.modelSize[file];
140
+ if (Math.abs(stats.size - expectedSize) > 1000) { // Allow 1KB variance
141
+ console.error(`❌ CRITICAL: Model file size mismatch!\n` +
142
+ `File: ${file}\n` +
143
+ `Expected: ${expectedSize} bytes\n` +
144
+ `Actual: ${stats.size} bytes\n` +
145
+ `This indicates model corruption or version mismatch!`);
146
+ return false;
147
+ }
148
+ }
149
+ // TODO: Add SHA256 verification for ultimate security
150
+ // if (CRITICAL_MODEL_CONFIG.modelHash[file]) {
151
+ // const hash = await this.computeFileHash(filePath)
152
+ // if (hash !== CRITICAL_MODEL_CONFIG.modelHash[file]) {
153
+ // console.error('❌ CRITICAL: Model hash mismatch!')
154
+ // return false
155
+ // }
156
+ // }
157
+ }
158
+ return true;
159
+ }
160
+ /**
161
+ * Download model from a fallback source
162
+ */
163
+ async downloadFromSource(source) {
164
+ if (source.type === 'transformers') {
165
+ // Use transformers.js native download
166
+ const { pipeline } = await import('@huggingface/transformers');
167
+ env.cacheDir = this.modelPath;
168
+ env.allowRemoteModels = true;
169
+ const extractor = await pipeline('feature-extraction', CRITICAL_MODEL_CONFIG.modelName);
170
+ // Test the model
171
+ const test = await extractor('test', { pooling: 'mean', normalize: true });
172
+ if (test.data.length !== CRITICAL_MODEL_CONFIG.embeddingDimensions) {
173
+ throw new Error(`CRITICAL: Model dimension mismatch! ` +
174
+ `Expected ${CRITICAL_MODEL_CONFIG.embeddingDimensions}, ` +
175
+ `got ${test.data.length}`);
176
+ }
177
+ }
178
+ else if (source.type === 'tarball') {
179
+ // Download and extract tarball
180
+ // This would require implementation with proper tar extraction
181
+ throw new Error('Tarball extraction not yet implemented');
182
+ }
183
+ }
184
+ /**
185
+ * Configure transformers.js to use verified local model
186
+ */
187
+ configureTransformers() {
188
+ env.localModelPath = this.modelPath;
189
+ env.allowRemoteModels = false; // Force local only after verification
190
+ console.log('🔒 Transformers configured to use verified local model');
191
+ }
192
+ /**
193
+ * Detect where models should be stored
194
+ */
195
+ detectModelPath() {
196
+ const candidates = [
197
+ process.env.BRAINY_MODELS_PATH,
198
+ './models',
199
+ join(process.cwd(), 'models'),
200
+ join(process.env.HOME || '', '.brainy', 'models'),
201
+ '/opt/models', // Lambda/container path
202
+ env.cacheDir
203
+ ];
204
+ for (const path of candidates) {
205
+ if (path && existsSync(path)) {
206
+ const modelPath = join(path, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
207
+ if (existsSync(join(modelPath, 'onnx', 'model.onnx'))) {
208
+ return dirname(dirname(modelPath)); // Return base models directory
209
+ }
210
+ }
211
+ }
212
+ // Default
213
+ return './models';
214
+ }
215
+ /**
216
+ * Get model status for diagnostics
217
+ */
218
+ async getStatus() {
219
+ return {
220
+ verified: this.isVerified,
221
+ path: this.modelPath,
222
+ lastVerification: this.lastVerification,
223
+ modelName: CRITICAL_MODEL_CONFIG.modelName,
224
+ dimensions: CRITICAL_MODEL_CONFIG.embeddingDimensions
225
+ };
226
+ }
227
+ /**
228
+ * Force re-verification (for testing)
229
+ */
230
+ async forceReverify() {
231
+ this.isVerified = false;
232
+ this.lastVerification = null;
233
+ await this.ensureCriticalModel();
234
+ }
235
+ }
236
+ // Export singleton instance
237
+ export const modelGuardian = ModelGuardian.getInstance();
238
+ //# sourceMappingURL=model-guardian.js.map
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Model Manager - Ensures transformer models are available at runtime
3
+ *
4
+ * Strategy:
5
+ * 1. Check local cache first
6
+ * 2. Try GitHub releases (our backup)
7
+ * 3. Fall back to Hugging Face
8
+ * 4. Future: CDN at models.soulcraft.com
9
+ */
10
+ export declare class ModelManager {
11
+ private static instance;
12
+ private modelsPath;
13
+ private isInitialized;
14
+ private constructor();
15
+ static getInstance(): ModelManager;
16
+ private getModelsPath;
17
+ ensureModels(modelName?: string): Promise<boolean>;
18
+ private verifyModelFiles;
19
+ private downloadFromGitHub;
20
+ private downloadFromCDN;
21
+ private configureTransformers;
22
+ /**
23
+ * Pre-download models for deployment
24
+ * This is what npm run download-models calls
25
+ */
26
+ static predownload(): Promise<void>;
27
+ }
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Model Manager - Ensures transformer models are available at runtime
3
+ *
4
+ * Strategy:
5
+ * 1. Check local cache first
6
+ * 2. Try GitHub releases (our backup)
7
+ * 3. Fall back to Hugging Face
8
+ * 4. Future: CDN at models.soulcraft.com
9
+ */
10
+ import { existsSync } from 'fs';
11
+ import { join, dirname } from 'path';
12
+ import { env } from '@huggingface/transformers';
13
+ // Model sources in order of preference
14
+ const MODEL_SOURCES = {
15
+ // GitHub Release - our controlled backup
16
+ github: 'https://github.com/soulcraftlabs/brainy/releases/download/models-v1/all-MiniLM-L6-v2.tar.gz',
17
+ // Future CDN - fastest option when available
18
+ cdn: 'https://models.soulcraft.com/brainy/all-MiniLM-L6-v2.tar.gz',
19
+ // Original Hugging Face - fallback
20
+ huggingface: 'default' // Uses transformers.js default
21
+ };
22
+ // Expected model files and their hashes
23
+ const MODEL_MANIFEST = {
24
+ 'Xenova/all-MiniLM-L6-v2': {
25
+ files: {
26
+ 'onnx/model.onnx': {
27
+ size: 90555481,
28
+ sha256: null // Will be computed from actual model
29
+ },
30
+ 'tokenizer.json': {
31
+ size: 711661,
32
+ sha256: null
33
+ },
34
+ 'config.json': {
35
+ size: 650,
36
+ sha256: null
37
+ },
38
+ 'tokenizer_config.json': {
39
+ size: 366,
40
+ sha256: null
41
+ }
42
+ }
43
+ }
44
+ };
45
+ export class ModelManager {
46
+ constructor() {
47
+ this.isInitialized = false;
48
+ // Determine models path
49
+ this.modelsPath = this.getModelsPath();
50
+ }
51
+ static getInstance() {
52
+ if (!ModelManager.instance) {
53
+ ModelManager.instance = new ModelManager();
54
+ }
55
+ return ModelManager.instance;
56
+ }
57
+ getModelsPath() {
58
+ // Check various possible locations
59
+ const paths = [
60
+ process.env.BRAINY_MODELS_PATH,
61
+ './models',
62
+ join(process.cwd(), 'models'),
63
+ join(process.env.HOME || '', '.brainy', 'models'),
64
+ env.cacheDir
65
+ ];
66
+ // Find first existing path or use default
67
+ for (const path of paths) {
68
+ if (path && existsSync(path)) {
69
+ return path;
70
+ }
71
+ }
72
+ // Default to local models directory
73
+ return join(process.cwd(), 'models');
74
+ }
75
+ async ensureModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
76
+ if (this.isInitialized) {
77
+ return true;
78
+ }
79
+ const modelPath = join(this.modelsPath, ...modelName.split('/'));
80
+ // Check if model already exists locally
81
+ if (await this.verifyModelFiles(modelPath, modelName)) {
82
+ console.log('✅ Models found in cache:', modelPath);
83
+ this.configureTransformers(modelPath);
84
+ this.isInitialized = true;
85
+ return true;
86
+ }
87
+ // Try to download from our sources
88
+ console.log('📥 Downloading transformer models...');
89
+ // Try GitHub first (our backup)
90
+ if (await this.downloadFromGitHub(modelName)) {
91
+ this.isInitialized = true;
92
+ return true;
93
+ }
94
+ // Try CDN (when available)
95
+ if (await this.downloadFromCDN(modelName)) {
96
+ this.isInitialized = true;
97
+ return true;
98
+ }
99
+ // Fall back to Hugging Face (default transformers.js behavior)
100
+ console.log('⚠️ Using Hugging Face fallback for models');
101
+ env.allowRemoteModels = true;
102
+ this.isInitialized = true;
103
+ return true;
104
+ }
105
+ async verifyModelFiles(modelPath, modelName) {
106
+ const manifest = MODEL_MANIFEST[modelName];
107
+ if (!manifest)
108
+ return false;
109
+ for (const [filePath, info] of Object.entries(manifest.files)) {
110
+ const fullPath = join(modelPath, filePath);
111
+ if (!existsSync(fullPath)) {
112
+ return false;
113
+ }
114
+ // Optionally verify size
115
+ if (process.env.VERIFY_MODEL_SIZE === 'true') {
116
+ const stats = await import('fs').then(fs => fs.promises.stat(fullPath));
117
+ if (stats.size !== info.size) {
118
+ console.warn(`⚠️ Model file size mismatch: ${filePath}`);
119
+ return false;
120
+ }
121
+ }
122
+ }
123
+ return true;
124
+ }
125
+ async downloadFromGitHub(modelName) {
126
+ try {
127
+ const url = MODEL_SOURCES.github;
128
+ console.log('📥 Downloading from GitHub releases...');
129
+ // Download tar.gz file
130
+ const response = await fetch(url);
131
+ if (!response.ok) {
132
+ throw new Error(`GitHub download failed: ${response.status}`);
133
+ }
134
+ const buffer = await response.arrayBuffer();
135
+ // Extract tar.gz (would need tar library in production)
136
+ // For now, return false to fall back to other methods
137
+ console.log('⚠️ GitHub model extraction not yet implemented');
138
+ return false;
139
+ }
140
+ catch (error) {
141
+ console.log('⚠️ GitHub download failed:', error.message);
142
+ return false;
143
+ }
144
+ }
145
+ async downloadFromCDN(modelName) {
146
+ try {
147
+ const url = MODEL_SOURCES.cdn;
148
+ console.log('📥 Downloading from Soulcraft CDN...');
149
+ // Try to fetch from CDN
150
+ const response = await fetch(url);
151
+ if (!response.ok) {
152
+ throw new Error(`CDN download failed: ${response.status}`);
153
+ }
154
+ // Would extract files here
155
+ console.log('⚠️ CDN not yet available');
156
+ return false;
157
+ }
158
+ catch (error) {
159
+ console.log('⚠️ CDN download failed:', error.message);
160
+ return false;
161
+ }
162
+ }
163
+ configureTransformers(modelPath) {
164
+ // Configure transformers.js to use our local models
165
+ env.localModelPath = dirname(modelPath);
166
+ env.allowRemoteModels = false;
167
+ console.log('🔧 Configured transformers.js to use local models');
168
+ }
169
+ /**
170
+ * Pre-download models for deployment
171
+ * This is what npm run download-models calls
172
+ */
173
+ static async predownload() {
174
+ const manager = ModelManager.getInstance();
175
+ const success = await manager.ensureModels();
176
+ if (!success) {
177
+ throw new Error('Failed to download models');
178
+ }
179
+ console.log('✅ Models downloaded successfully');
180
+ }
181
+ }
182
+ // Auto-initialize on import in production
183
+ if (process.env.NODE_ENV === 'production' && process.env.SKIP_MODEL_CHECK !== 'true') {
184
+ ModelManager.getInstance().ensureModels().catch(error => {
185
+ console.error('⚠️ Model initialization failed:', error);
186
+ // Don't throw - allow app to start and try downloading on first use
187
+ });
188
+ }
189
+ //# sourceMappingURL=model-manager.js.map
@@ -3,6 +3,7 @@
3
3
  * Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
4
4
  */
5
5
  import { isBrowser } from './environment.js';
6
+ import { ModelManager } from '../embeddings/model-manager.js';
6
7
  // @ts-ignore - Transformers.js is now the primary embedding library
7
8
  import { pipeline, env } from '@huggingface/transformers';
8
9
  /**
@@ -192,6 +193,9 @@ export class TransformerEmbedding {
192
193
  }
193
194
  // Always use real implementation - no mocking
194
195
  try {
196
+ // Ensure models are available (downloads if needed)
197
+ const modelManager = ModelManager.getInstance();
198
+ await modelManager.ensureModels(this.options.model);
195
199
  // Resolve device configuration and cache directory
196
200
  const device = await resolveDevice(this.options.device);
197
201
  const cacheDir = this.options.cacheDir === './models'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Multi-Dimensional AI Database - Vector similarity, graph relationships, metadata facets with HNSW indexing and OPFS storage",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -142,7 +142,10 @@
142
142
  "_workflow:major": "node scripts/release-workflow.js major",
143
143
  "_workflow:dry-run": "npm run build && npm test && npm run _release:dry-run",
144
144
  "_dry-run": "npm pack --dry-run",
145
- "download-models": "node scripts/download-models.cjs"
145
+ "download-models": "node scripts/download-models.cjs",
146
+ "prepare-models": "node scripts/prepare-models.js",
147
+ "models:verify": "node scripts/ensure-models.js",
148
+ "models:download": "BRAINY_ALLOW_REMOTE_MODELS=true node scripts/download-models.cjs"
146
149
  },
147
150
  "keywords": [
148
151
  "vector-database",