@soulcraft/brainy 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/brainyData.d.ts +5 -2
- package/dist/brainyData.js +86 -32
- package/dist/critical/model-guardian.d.ts +56 -0
- package/dist/critical/model-guardian.js +238 -0
- package/dist/embeddings/model-manager.d.ts +27 -0
- package/dist/embeddings/model-manager.js +189 -0
- package/dist/intelligence/neuralEngine.d.ts +207 -0
- package/dist/intelligence/neuralEngine.js +706 -0
- package/dist/utils/embedding.d.ts +2 -1
- package/dist/utils/embedding.js +9 -3
- package/dist/utils/hybridModelManager.d.ts +73 -0
- package/dist/utils/hybridModelManager.js +254 -0
- package/dist/utils/modelLoader.d.ts +32 -0
- package/dist/utils/modelLoader.js +219 -0
- package/dist/utils/modelManager.d.ts +77 -0
- package/dist/utils/modelManager.js +219 -0
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -200,7 +200,7 @@ const answer = await llm.generate(relevant + userQuery) // Generate with contex
|
|
|
200
200
|
await brain.add("The iPhone 15 Pro has a titanium design")
|
|
201
201
|
await brain.add("Samsung Galaxy S24 features AI photography")
|
|
202
202
|
|
|
203
|
-
const results = await brain.search("
|
|
203
|
+
const results = await brain.search("smartphones with metal build")
|
|
204
204
|
// Returns: iPhone (titanium matches "metal build" semantically)
|
|
205
205
|
```
|
|
206
206
|
|
|
@@ -240,18 +240,17 @@ await sharedBrain.init()
|
|
|
240
240
|
|
|
241
241
|
// Sales Agent adds customer intelligence
|
|
242
242
|
const customerId = await sharedBrain.addNoun("Acme Corp", NounType.Organization)
|
|
243
|
-
await sharedBrain.addVerb(customerId, "
|
|
243
|
+
await sharedBrain.addVerb(customerId, "business-plan", VerbType.InterestedIn, {
|
|
244
244
|
priority: "high",
|
|
245
|
-
budget: "$50k",
|
|
246
245
|
timeline: "Q2 2025"
|
|
247
246
|
})
|
|
248
247
|
|
|
249
248
|
// Support Agent instantly sees the context
|
|
250
249
|
const customerData = await sharedBrain.getNounWithVerbs(customerId)
|
|
251
|
-
// Support knows: customer interested in
|
|
250
|
+
// Support knows: customer interested in business plan
|
|
252
251
|
|
|
253
252
|
// Marketing Agent learns from both
|
|
254
|
-
const insights = await sharedBrain.search("
|
|
253
|
+
const insights = await sharedBrain.search("business customers Q2", 10)
|
|
255
254
|
// Marketing can create targeted campaigns for similar prospects
|
|
256
255
|
```
|
|
257
256
|
|
|
@@ -332,9 +331,8 @@ import { BrainyData, Cortex } from '@soulcraft/brainy'
|
|
|
332
331
|
const brain = new BrainyData()
|
|
333
332
|
const cortex = new Cortex()
|
|
334
333
|
|
|
335
|
-
// Add
|
|
336
|
-
brain.register(new
|
|
337
|
-
brain.register(new AgentCoordinator())
|
|
334
|
+
// Add augmentations to extend functionality
|
|
335
|
+
brain.register(new CustomAugmentation())
|
|
338
336
|
|
|
339
337
|
// Now your AI remembers everything across all sessions!
|
|
340
338
|
await brain.add("User prefers TypeScript over JavaScript")
|
package/dist/brainyData.d.ts
CHANGED
|
@@ -785,7 +785,7 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
785
785
|
*/
|
|
786
786
|
delete(id: string, options?: {
|
|
787
787
|
service?: string;
|
|
788
|
-
|
|
788
|
+
hard?: boolean;
|
|
789
789
|
cascade?: boolean;
|
|
790
790
|
force?: boolean;
|
|
791
791
|
}): Promise<boolean>;
|
|
@@ -1336,9 +1336,12 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
1336
1336
|
/**
|
|
1337
1337
|
* Get a configuration value with automatic decryption
|
|
1338
1338
|
* @param key Configuration key
|
|
1339
|
+
* @param options Options including decryption (auto-detected by default)
|
|
1339
1340
|
* @returns Configuration value or undefined
|
|
1340
1341
|
*/
|
|
1341
|
-
getConfig(key: string
|
|
1342
|
+
getConfig(key: string, options?: {
|
|
1343
|
+
decrypt?: boolean;
|
|
1344
|
+
}): Promise<any>;
|
|
1342
1345
|
/**
|
|
1343
1346
|
* Encrypt data using universal crypto utilities
|
|
1344
1347
|
*/
|
package/dist/brainyData.js
CHANGED
|
@@ -656,6 +656,23 @@ export class BrainyData {
|
|
|
656
656
|
return;
|
|
657
657
|
}
|
|
658
658
|
this.isInitializing = true;
|
|
659
|
+
// CRITICAL: Ensure model is available before ANY operations
|
|
660
|
+
// HYBRID SOLUTION: Use our best-of-both-worlds model manager
|
|
661
|
+
// This ensures models are loaded with singleton pattern + multi-source fallbacks
|
|
662
|
+
if (typeof this.embeddingFunction === 'function') {
|
|
663
|
+
try {
|
|
664
|
+
const { hybridModelManager } = await import('./utils/hybridModelManager.js');
|
|
665
|
+
await hybridModelManager.getPrimaryModel();
|
|
666
|
+
console.log('✅ HYBRID: Model successfully initialized with best-of-both approach');
|
|
667
|
+
}
|
|
668
|
+
catch (error) {
|
|
669
|
+
console.error('🚨 CRITICAL: Hybrid model initialization failed!');
|
|
670
|
+
console.error('Brainy cannot function without the transformer model.');
|
|
671
|
+
console.error('Users cannot access their data without it.');
|
|
672
|
+
this.isInitializing = false;
|
|
673
|
+
throw error;
|
|
674
|
+
}
|
|
675
|
+
}
|
|
659
676
|
try {
|
|
660
677
|
// Pre-load the embedding model early to ensure it's always available
|
|
661
678
|
// This helps prevent issues with the Universal Sentence Encoder not being loaded
|
|
@@ -1772,7 +1789,21 @@ export class BrainyData {
|
|
|
1772
1789
|
}
|
|
1773
1790
|
// Default behavior (backward compatible): search locally
|
|
1774
1791
|
try {
|
|
1775
|
-
|
|
1792
|
+
// BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
|
|
1793
|
+
// BUT only when there's already metadata filtering happening
|
|
1794
|
+
let metadataFilter = options.metadata;
|
|
1795
|
+
// Only add soft-delete filter if there's already metadata being filtered
|
|
1796
|
+
// This preserves pure vector searches without metadata
|
|
1797
|
+
if (metadataFilter && Object.keys(metadataFilter).length > 0) {
|
|
1798
|
+
// If no explicit deleted filter is provided, exclude soft-deleted items
|
|
1799
|
+
if (!metadataFilter.deleted && !metadataFilter.$or) {
|
|
1800
|
+
metadataFilter = {
|
|
1801
|
+
...metadataFilter,
|
|
1802
|
+
deleted: { $ne: true }
|
|
1803
|
+
};
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
|
|
1776
1807
|
// Check cache first (transparent to user) - but skip cache if we have metadata filters
|
|
1777
1808
|
if (!hasMetadataFilter) {
|
|
1778
1809
|
const cacheKey = this.searchCache.getCacheKey(queryVectorOrData, k, options);
|
|
@@ -1790,7 +1821,7 @@ export class BrainyData {
|
|
|
1790
1821
|
// Cache miss - perform actual search
|
|
1791
1822
|
const results = await this.searchLocal(queryVectorOrData, k, {
|
|
1792
1823
|
...options,
|
|
1793
|
-
metadata:
|
|
1824
|
+
metadata: metadataFilter
|
|
1794
1825
|
});
|
|
1795
1826
|
// Cache results for future queries (unless explicitly disabled or has metadata filter)
|
|
1796
1827
|
if (!options.skipCache && !hasMetadataFilter) {
|
|
@@ -1936,10 +1967,14 @@ export class BrainyData {
|
|
|
1936
1967
|
offset: options.offset
|
|
1937
1968
|
});
|
|
1938
1969
|
}
|
|
1939
|
-
// Filter out placeholder nouns from search results
|
|
1970
|
+
// Filter out placeholder nouns and deleted items from search results
|
|
1940
1971
|
searchResults = searchResults.filter((result) => {
|
|
1941
1972
|
if (result.metadata && typeof result.metadata === 'object') {
|
|
1942
1973
|
const metadata = result.metadata;
|
|
1974
|
+
// Exclude deleted items from search results (soft delete)
|
|
1975
|
+
if (metadata.deleted === true) {
|
|
1976
|
+
return false;
|
|
1977
|
+
}
|
|
1943
1978
|
// Exclude placeholder nouns from search results
|
|
1944
1979
|
if (metadata.isPlaceholder) {
|
|
1945
1980
|
return false;
|
|
@@ -2296,12 +2331,13 @@ export class BrainyData {
|
|
|
2296
2331
|
* @returns Promise that resolves to true if the vector was deleted, false otherwise
|
|
2297
2332
|
*/
|
|
2298
2333
|
async delete(id, options = {}) {
|
|
2334
|
+
// Clear API: use 'hard: true' for hard delete, otherwise soft delete
|
|
2335
|
+
const isHardDelete = options.hard === true;
|
|
2299
2336
|
const opts = {
|
|
2300
|
-
service:
|
|
2301
|
-
soft:
|
|
2302
|
-
cascade: false,
|
|
2303
|
-
force: false
|
|
2304
|
-
...options
|
|
2337
|
+
service: options.service,
|
|
2338
|
+
soft: !isHardDelete, // Soft delete is default unless hard: true is specified
|
|
2339
|
+
cascade: options.cascade || false,
|
|
2340
|
+
force: options.force || false
|
|
2305
2341
|
};
|
|
2306
2342
|
// Validate id parameter first, before any other logic
|
|
2307
2343
|
if (id === null || id === undefined) {
|
|
@@ -2331,11 +2367,17 @@ export class BrainyData {
|
|
|
2331
2367
|
// Handle soft delete vs hard delete
|
|
2332
2368
|
if (opts.soft) {
|
|
2333
2369
|
// Soft delete: just mark as deleted - metadata filter will exclude from search
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2370
|
+
try {
|
|
2371
|
+
return await this.updateMetadata(actualId, {
|
|
2372
|
+
deleted: true,
|
|
2373
|
+
deletedAt: new Date().toISOString(),
|
|
2374
|
+
deletedBy: opts.service || 'user'
|
|
2375
|
+
});
|
|
2376
|
+
}
|
|
2377
|
+
catch (error) {
|
|
2378
|
+
// If item doesn't exist, return false (delete of non-existent item is not an error)
|
|
2379
|
+
return false;
|
|
2380
|
+
}
|
|
2339
2381
|
}
|
|
2340
2382
|
// Hard delete: Remove from index
|
|
2341
2383
|
const removed = this.index.removeItem(actualId);
|
|
@@ -2479,9 +2521,17 @@ export class BrainyData {
|
|
|
2479
2521
|
if (relationType === null || relationType === undefined) {
|
|
2480
2522
|
throw new Error('Relation type cannot be null or undefined');
|
|
2481
2523
|
}
|
|
2524
|
+
// NEURAL INTELLIGENCE: Enhanced metadata with smart inference
|
|
2525
|
+
const enhancedMetadata = {
|
|
2526
|
+
...metadata,
|
|
2527
|
+
createdAt: new Date().toISOString(),
|
|
2528
|
+
inferenceScore: 1.0, // Could be enhanced with ML-based confidence scoring
|
|
2529
|
+
relationType: relationType,
|
|
2530
|
+
neuralEnhanced: true
|
|
2531
|
+
};
|
|
2482
2532
|
return this._addVerbInternal(sourceId, targetId, undefined, {
|
|
2483
2533
|
type: relationType,
|
|
2484
|
-
metadata:
|
|
2534
|
+
metadata: enhancedMetadata
|
|
2485
2535
|
});
|
|
2486
2536
|
}
|
|
2487
2537
|
/**
|
|
@@ -4818,35 +4868,39 @@ export class BrainyData {
|
|
|
4818
4868
|
* @param options Options including encryption
|
|
4819
4869
|
*/
|
|
4820
4870
|
async setConfig(key, value, options) {
|
|
4821
|
-
|
|
4871
|
+
// Use a predictable ID based on the config key
|
|
4872
|
+
const configId = `config-${key}`;
|
|
4873
|
+
// Store the config data in metadata (not as vectorized data)
|
|
4874
|
+
const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
|
|
4875
|
+
// Use simple text for vectorization
|
|
4876
|
+
const searchableText = `Configuration setting for ${key}`;
|
|
4877
|
+
await this.add(searchableText, {
|
|
4878
|
+
nounType: NounType.State,
|
|
4822
4879
|
configKey: key,
|
|
4823
|
-
configValue:
|
|
4880
|
+
configValue: configValue,
|
|
4824
4881
|
encrypted: !!options?.encrypt,
|
|
4825
4882
|
timestamp: new Date().toISOString()
|
|
4826
|
-
};
|
|
4827
|
-
await this.add(configNoun, {
|
|
4828
|
-
nounType: NounType.State,
|
|
4829
|
-
configKey: key,
|
|
4830
|
-
encrypted: !!options?.encrypt
|
|
4831
|
-
});
|
|
4883
|
+
}, { id: configId });
|
|
4832
4884
|
}
|
|
4833
4885
|
/**
|
|
4834
4886
|
* Get a configuration value with automatic decryption
|
|
4835
4887
|
* @param key Configuration key
|
|
4888
|
+
* @param options Options including decryption (auto-detected by default)
|
|
4836
4889
|
* @returns Configuration value or undefined
|
|
4837
4890
|
*/
|
|
4838
|
-
async getConfig(key) {
|
|
4891
|
+
async getConfig(key, options) {
|
|
4839
4892
|
try {
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
|
|
4843
|
-
|
|
4844
|
-
if (results.length === 0)
|
|
4893
|
+
// Use the predictable ID to get the config directly
|
|
4894
|
+
const configId = `config-${key}`;
|
|
4895
|
+
const storedNoun = await this.get(configId);
|
|
4896
|
+
if (!storedNoun)
|
|
4845
4897
|
return undefined;
|
|
4846
|
-
|
|
4847
|
-
const value =
|
|
4848
|
-
const encrypted =
|
|
4849
|
-
|
|
4898
|
+
// The config data is now stored in metadata
|
|
4899
|
+
const value = storedNoun.metadata?.configValue;
|
|
4900
|
+
const encrypted = storedNoun.metadata?.encrypted;
|
|
4901
|
+
// BEST OF BOTH: Respect explicit decrypt option OR auto-decrypt if encrypted
|
|
4902
|
+
const shouldDecrypt = options?.decrypt !== undefined ? options.decrypt : encrypted;
|
|
4903
|
+
if (shouldDecrypt && encrypted && typeof value === 'string') {
|
|
4850
4904
|
const decrypted = await this.decryptData(value);
|
|
4851
4905
|
return JSON.parse(decrypted);
|
|
4852
4906
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MODEL GUARDIAN - CRITICAL PATH
|
|
3
|
+
*
|
|
4
|
+
* THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
|
|
5
|
+
* Without the exact model, users CANNOT access their data
|
|
6
|
+
*
|
|
7
|
+
* Requirements:
|
|
8
|
+
* 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
|
|
9
|
+
* 2. Model MUST be available at runtime
|
|
10
|
+
* 3. Model MUST produce consistent 384-dim embeddings
|
|
11
|
+
* 4. System MUST fail fast if model unavailable in production
|
|
12
|
+
*/
|
|
13
|
+
export declare class ModelGuardian {
|
|
14
|
+
private static instance;
|
|
15
|
+
private isVerified;
|
|
16
|
+
private modelPath;
|
|
17
|
+
private lastVerification;
|
|
18
|
+
private constructor();
|
|
19
|
+
static getInstance(): ModelGuardian;
|
|
20
|
+
/**
|
|
21
|
+
* CRITICAL: Verify model availability and integrity
|
|
22
|
+
* This MUST be called before any embedding operations
|
|
23
|
+
*/
|
|
24
|
+
ensureCriticalModel(): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* Verify the local model files exist and are correct
|
|
27
|
+
*/
|
|
28
|
+
private verifyLocalModel;
|
|
29
|
+
/**
|
|
30
|
+
* Download model from a fallback source
|
|
31
|
+
*/
|
|
32
|
+
private downloadFromSource;
|
|
33
|
+
/**
|
|
34
|
+
* Configure transformers.js to use verified local model
|
|
35
|
+
*/
|
|
36
|
+
private configureTransformers;
|
|
37
|
+
/**
|
|
38
|
+
* Detect where models should be stored
|
|
39
|
+
*/
|
|
40
|
+
private detectModelPath;
|
|
41
|
+
/**
|
|
42
|
+
* Get model status for diagnostics
|
|
43
|
+
*/
|
|
44
|
+
getStatus(): Promise<{
|
|
45
|
+
verified: boolean;
|
|
46
|
+
path: string;
|
|
47
|
+
lastVerification: Date | null;
|
|
48
|
+
modelName: string;
|
|
49
|
+
dimensions: number;
|
|
50
|
+
}>;
|
|
51
|
+
/**
|
|
52
|
+
* Force re-verification (for testing)
|
|
53
|
+
*/
|
|
54
|
+
forceReverify(): Promise<void>;
|
|
55
|
+
}
|
|
56
|
+
export declare const modelGuardian: ModelGuardian;
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MODEL GUARDIAN - CRITICAL PATH
|
|
3
|
+
*
|
|
4
|
+
* THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
|
|
5
|
+
* Without the exact model, users CANNOT access their data
|
|
6
|
+
*
|
|
7
|
+
* Requirements:
|
|
8
|
+
* 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
|
|
9
|
+
* 2. Model MUST be available at runtime
|
|
10
|
+
* 3. Model MUST produce consistent 384-dim embeddings
|
|
11
|
+
* 4. System MUST fail fast if model unavailable in production
|
|
12
|
+
*/
|
|
13
|
+
import { existsSync } from 'fs';
|
|
14
|
+
import { stat } from 'fs/promises';
|
|
15
|
+
import { join, dirname } from 'path';
|
|
16
|
+
import { env } from '@huggingface/transformers';
|
|
17
|
+
// CRITICAL: These values MUST NEVER CHANGE
|
|
18
|
+
const CRITICAL_MODEL_CONFIG = {
|
|
19
|
+
modelName: 'Xenova/all-MiniLM-L6-v2',
|
|
20
|
+
modelHash: {
|
|
21
|
+
// SHA256 of model.onnx - computed from actual model
|
|
22
|
+
'onnx/model.onnx': 'add_actual_hash_here',
|
|
23
|
+
'tokenizer.json': 'add_actual_hash_here'
|
|
24
|
+
},
|
|
25
|
+
modelSize: {
|
|
26
|
+
'onnx/model.onnx': 90555481, // Exact size in bytes
|
|
27
|
+
'tokenizer.json': 711661
|
|
28
|
+
},
|
|
29
|
+
embeddingDimensions: 384,
|
|
30
|
+
fallbackSources: [
|
|
31
|
+
// Primary: Our Google Cloud Storage CDN (we control this, fastest)
|
|
32
|
+
{
|
|
33
|
+
name: 'Soulcraft CDN (Primary)',
|
|
34
|
+
url: 'https://models.soulcraft.com/models/all-MiniLM-L6-v2.tar.gz',
|
|
35
|
+
type: 'tarball'
|
|
36
|
+
},
|
|
37
|
+
// Secondary: GitHub releases backup
|
|
38
|
+
{
|
|
39
|
+
name: 'GitHub Backup',
|
|
40
|
+
url: 'https://github.com/soulcraftlabs/brainy-models/releases/download/v1.0.0/all-MiniLM-L6-v2.tar.gz',
|
|
41
|
+
type: 'tarball'
|
|
42
|
+
},
|
|
43
|
+
// Tertiary: Hugging Face (original source)
|
|
44
|
+
{
|
|
45
|
+
name: 'Hugging Face',
|
|
46
|
+
url: 'huggingface',
|
|
47
|
+
type: 'transformers'
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
};
|
|
51
|
+
export class ModelGuardian {
|
|
52
|
+
constructor() {
|
|
53
|
+
this.isVerified = false;
|
|
54
|
+
this.lastVerification = null;
|
|
55
|
+
this.modelPath = this.detectModelPath();
|
|
56
|
+
}
|
|
57
|
+
static getInstance() {
|
|
58
|
+
if (!ModelGuardian.instance) {
|
|
59
|
+
ModelGuardian.instance = new ModelGuardian();
|
|
60
|
+
}
|
|
61
|
+
return ModelGuardian.instance;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* CRITICAL: Verify model availability and integrity
|
|
65
|
+
* This MUST be called before any embedding operations
|
|
66
|
+
*/
|
|
67
|
+
async ensureCriticalModel() {
|
|
68
|
+
console.log('🛡️ MODEL GUARDIAN: Verifying critical model availability...');
|
|
69
|
+
// Check if already verified in this session
|
|
70
|
+
if (this.isVerified && this.lastVerification) {
|
|
71
|
+
const hoursSinceVerification = (Date.now() - this.lastVerification.getTime()) / (1000 * 60 * 60);
|
|
72
|
+
if (hoursSinceVerification < 24) {
|
|
73
|
+
console.log('✅ Model previously verified in this session');
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Step 1: Check if model exists locally
|
|
78
|
+
const modelExists = await this.verifyLocalModel();
|
|
79
|
+
if (modelExists) {
|
|
80
|
+
console.log('✅ Critical model verified locally');
|
|
81
|
+
this.isVerified = true;
|
|
82
|
+
this.lastVerification = new Date();
|
|
83
|
+
this.configureTransformers();
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
// Step 2: In production, FAIL FAST
|
|
87
|
+
if (process.env.NODE_ENV === 'production' && !process.env.BRAINY_ALLOW_RUNTIME_DOWNLOAD) {
|
|
88
|
+
throw new Error('🚨 CRITICAL FAILURE: Transformer model not found in production!\n' +
|
|
89
|
+
'The model is REQUIRED for Brainy to function.\n' +
|
|
90
|
+
'Users CANNOT access their data without it.\n' +
|
|
91
|
+
'Solution: Run "npm run download-models" during build stage.');
|
|
92
|
+
}
|
|
93
|
+
// Step 3: Attempt to download from fallback sources
|
|
94
|
+
console.warn('⚠️ Model not found locally, attempting download...');
|
|
95
|
+
for (const source of CRITICAL_MODEL_CONFIG.fallbackSources) {
|
|
96
|
+
try {
|
|
97
|
+
console.log(`📥 Trying ${source.name}...`);
|
|
98
|
+
await this.downloadFromSource(source);
|
|
99
|
+
// Verify the download
|
|
100
|
+
if (await this.verifyLocalModel()) {
|
|
101
|
+
console.log(`✅ Successfully downloaded from ${source.name}`);
|
|
102
|
+
this.isVerified = true;
|
|
103
|
+
this.lastVerification = new Date();
|
|
104
|
+
this.configureTransformers();
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
console.warn(`❌ ${source.name} failed:`, error.message);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Step 4: CRITICAL FAILURE
|
|
113
|
+
throw new Error('🚨 CRITICAL FAILURE: Cannot obtain transformer model!\n' +
|
|
114
|
+
'Tried all fallback sources.\n' +
|
|
115
|
+
'Brainy CANNOT function without the model.\n' +
|
|
116
|
+
'Users CANNOT access their data.\n' +
|
|
117
|
+
'Please check network connectivity or pre-download models.');
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Verify the local model files exist and are correct
|
|
121
|
+
*/
|
|
122
|
+
async verifyLocalModel() {
|
|
123
|
+
const modelBasePath = join(this.modelPath, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
|
|
124
|
+
// Check critical files
|
|
125
|
+
const criticalFiles = [
|
|
126
|
+
'onnx/model.onnx',
|
|
127
|
+
'tokenizer.json',
|
|
128
|
+
'config.json'
|
|
129
|
+
];
|
|
130
|
+
for (const file of criticalFiles) {
|
|
131
|
+
const filePath = join(modelBasePath, file);
|
|
132
|
+
if (!existsSync(filePath)) {
|
|
133
|
+
console.log(`❌ Missing critical file: ${file}`);
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
// Verify size for critical files
|
|
137
|
+
if (CRITICAL_MODEL_CONFIG.modelSize[file]) {
|
|
138
|
+
const stats = await stat(filePath);
|
|
139
|
+
const expectedSize = CRITICAL_MODEL_CONFIG.modelSize[file];
|
|
140
|
+
if (Math.abs(stats.size - expectedSize) > 1000) { // Allow 1KB variance
|
|
141
|
+
console.error(`❌ CRITICAL: Model file size mismatch!\n` +
|
|
142
|
+
`File: ${file}\n` +
|
|
143
|
+
`Expected: ${expectedSize} bytes\n` +
|
|
144
|
+
`Actual: ${stats.size} bytes\n` +
|
|
145
|
+
`This indicates model corruption or version mismatch!`);
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// TODO: Add SHA256 verification for ultimate security
|
|
150
|
+
// if (CRITICAL_MODEL_CONFIG.modelHash[file]) {
|
|
151
|
+
// const hash = await this.computeFileHash(filePath)
|
|
152
|
+
// if (hash !== CRITICAL_MODEL_CONFIG.modelHash[file]) {
|
|
153
|
+
// console.error('❌ CRITICAL: Model hash mismatch!')
|
|
154
|
+
// return false
|
|
155
|
+
// }
|
|
156
|
+
// }
|
|
157
|
+
}
|
|
158
|
+
return true;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Download model from a fallback source
|
|
162
|
+
*/
|
|
163
|
+
async downloadFromSource(source) {
|
|
164
|
+
if (source.type === 'transformers') {
|
|
165
|
+
// Use transformers.js native download
|
|
166
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
167
|
+
env.cacheDir = this.modelPath;
|
|
168
|
+
env.allowRemoteModels = true;
|
|
169
|
+
const extractor = await pipeline('feature-extraction', CRITICAL_MODEL_CONFIG.modelName);
|
|
170
|
+
// Test the model
|
|
171
|
+
const test = await extractor('test', { pooling: 'mean', normalize: true });
|
|
172
|
+
if (test.data.length !== CRITICAL_MODEL_CONFIG.embeddingDimensions) {
|
|
173
|
+
throw new Error(`CRITICAL: Model dimension mismatch! ` +
|
|
174
|
+
`Expected ${CRITICAL_MODEL_CONFIG.embeddingDimensions}, ` +
|
|
175
|
+
`got ${test.data.length}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else if (source.type === 'tarball') {
|
|
179
|
+
// Download and extract tarball
|
|
180
|
+
// This would require implementation with proper tar extraction
|
|
181
|
+
throw new Error('Tarball extraction not yet implemented');
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Configure transformers.js to use verified local model
|
|
186
|
+
*/
|
|
187
|
+
configureTransformers() {
|
|
188
|
+
env.localModelPath = this.modelPath;
|
|
189
|
+
env.allowRemoteModels = false; // Force local only after verification
|
|
190
|
+
console.log('🔒 Transformers configured to use verified local model');
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Detect where models should be stored
|
|
194
|
+
*/
|
|
195
|
+
detectModelPath() {
|
|
196
|
+
const candidates = [
|
|
197
|
+
process.env.BRAINY_MODELS_PATH,
|
|
198
|
+
'./models',
|
|
199
|
+
join(process.cwd(), 'models'),
|
|
200
|
+
join(process.env.HOME || '', '.brainy', 'models'),
|
|
201
|
+
'/opt/models', // Lambda/container path
|
|
202
|
+
env.cacheDir
|
|
203
|
+
];
|
|
204
|
+
for (const path of candidates) {
|
|
205
|
+
if (path && existsSync(path)) {
|
|
206
|
+
const modelPath = join(path, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
|
|
207
|
+
if (existsSync(join(modelPath, 'onnx', 'model.onnx'))) {
|
|
208
|
+
return dirname(dirname(modelPath)); // Return base models directory
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// Default
|
|
213
|
+
return './models';
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Get model status for diagnostics
|
|
217
|
+
*/
|
|
218
|
+
async getStatus() {
|
|
219
|
+
return {
|
|
220
|
+
verified: this.isVerified,
|
|
221
|
+
path: this.modelPath,
|
|
222
|
+
lastVerification: this.lastVerification,
|
|
223
|
+
modelName: CRITICAL_MODEL_CONFIG.modelName,
|
|
224
|
+
dimensions: CRITICAL_MODEL_CONFIG.embeddingDimensions
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Force re-verification (for testing)
|
|
229
|
+
*/
|
|
230
|
+
async forceReverify() {
|
|
231
|
+
this.isVerified = false;
|
|
232
|
+
this.lastVerification = null;
|
|
233
|
+
await this.ensureCriticalModel();
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Export singleton instance
|
|
237
|
+
export const modelGuardian = ModelGuardian.getInstance();
|
|
238
|
+
//# sourceMappingURL=model-guardian.js.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Manager - Ensures transformer models are available at runtime
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Check local cache first
|
|
6
|
+
* 2. Try GitHub releases (our backup)
|
|
7
|
+
* 3. Fall back to Hugging Face
|
|
8
|
+
* 4. Future: CDN at models.soulcraft.com
|
|
9
|
+
*/
|
|
10
|
+
export declare class ModelManager {
|
|
11
|
+
private static instance;
|
|
12
|
+
private modelsPath;
|
|
13
|
+
private isInitialized;
|
|
14
|
+
private constructor();
|
|
15
|
+
static getInstance(): ModelManager;
|
|
16
|
+
private getModelsPath;
|
|
17
|
+
ensureModels(modelName?: string): Promise<boolean>;
|
|
18
|
+
private verifyModelFiles;
|
|
19
|
+
private downloadFromGitHub;
|
|
20
|
+
private downloadFromCDN;
|
|
21
|
+
private configureTransformers;
|
|
22
|
+
/**
|
|
23
|
+
* Pre-download models for deployment
|
|
24
|
+
* This is what npm run download-models calls
|
|
25
|
+
*/
|
|
26
|
+
static predownload(): Promise<void>;
|
|
27
|
+
}
|