@soulcraft/brainy 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/brainyData.d.ts +5 -2
- package/dist/brainyData.js +57 -28
- package/dist/critical/model-guardian.d.ts +56 -0
- package/dist/critical/model-guardian.js +238 -0
- package/dist/embeddings/model-manager.d.ts +27 -0
- package/dist/embeddings/model-manager.js +189 -0
- package/dist/utils/embedding.js +4 -0
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -200,7 +200,7 @@ const answer = await llm.generate(relevant + userQuery) // Generate with contex
|
|
|
200
200
|
await brain.add("The iPhone 15 Pro has a titanium design")
|
|
201
201
|
await brain.add("Samsung Galaxy S24 features AI photography")
|
|
202
202
|
|
|
203
|
-
const results = await brain.search("
|
|
203
|
+
const results = await brain.search("smartphones with metal build")
|
|
204
204
|
// Returns: iPhone (titanium matches "metal build" semantically)
|
|
205
205
|
```
|
|
206
206
|
|
|
@@ -240,18 +240,17 @@ await sharedBrain.init()
|
|
|
240
240
|
|
|
241
241
|
// Sales Agent adds customer intelligence
|
|
242
242
|
const customerId = await sharedBrain.addNoun("Acme Corp", NounType.Organization)
|
|
243
|
-
await sharedBrain.addVerb(customerId, "
|
|
243
|
+
await sharedBrain.addVerb(customerId, "business-plan", VerbType.InterestedIn, {
|
|
244
244
|
priority: "high",
|
|
245
|
-
budget: "$50k",
|
|
246
245
|
timeline: "Q2 2025"
|
|
247
246
|
})
|
|
248
247
|
|
|
249
248
|
// Support Agent instantly sees the context
|
|
250
249
|
const customerData = await sharedBrain.getNounWithVerbs(customerId)
|
|
251
|
-
// Support knows: customer interested in
|
|
250
|
+
// Support knows: customer interested in business plan
|
|
252
251
|
|
|
253
252
|
// Marketing Agent learns from both
|
|
254
|
-
const insights = await sharedBrain.search("
|
|
253
|
+
const insights = await sharedBrain.search("business customers Q2", 10)
|
|
255
254
|
// Marketing can create targeted campaigns for similar prospects
|
|
256
255
|
```
|
|
257
256
|
|
|
@@ -332,9 +331,8 @@ import { BrainyData, Cortex } from '@soulcraft/brainy'
|
|
|
332
331
|
const brain = new BrainyData()
|
|
333
332
|
const cortex = new Cortex()
|
|
334
333
|
|
|
335
|
-
// Add
|
|
336
|
-
brain.register(new
|
|
337
|
-
brain.register(new AgentCoordinator())
|
|
334
|
+
// Add augmentations to extend functionality
|
|
335
|
+
brain.register(new CustomAugmentation())
|
|
338
336
|
|
|
339
337
|
// Now your AI remembers everything across all sessions!
|
|
340
338
|
await brain.add("User prefers TypeScript over JavaScript")
|
package/dist/brainyData.d.ts
CHANGED
|
@@ -785,7 +785,7 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
785
785
|
*/
|
|
786
786
|
delete(id: string, options?: {
|
|
787
787
|
service?: string;
|
|
788
|
-
|
|
788
|
+
hard?: boolean;
|
|
789
789
|
cascade?: boolean;
|
|
790
790
|
force?: boolean;
|
|
791
791
|
}): Promise<boolean>;
|
|
@@ -1336,9 +1336,12 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
1336
1336
|
/**
|
|
1337
1337
|
* Get a configuration value with automatic decryption
|
|
1338
1338
|
* @param key Configuration key
|
|
1339
|
+
* @param options Options including decryption (auto-detected by default)
|
|
1339
1340
|
* @returns Configuration value or undefined
|
|
1340
1341
|
*/
|
|
1341
|
-
getConfig(key: string
|
|
1342
|
+
getConfig(key: string, options?: {
|
|
1343
|
+
decrypt?: boolean;
|
|
1344
|
+
}): Promise<any>;
|
|
1342
1345
|
/**
|
|
1343
1346
|
* Encrypt data using universal crypto utilities
|
|
1344
1347
|
*/
|
package/dist/brainyData.js
CHANGED
|
@@ -656,6 +656,22 @@ export class BrainyData {
|
|
|
656
656
|
return;
|
|
657
657
|
}
|
|
658
658
|
this.isInitializing = true;
|
|
659
|
+
// CRITICAL: Ensure model is available before ANY operations
|
|
660
|
+
// This is THE most critical part of the system
|
|
661
|
+
// Without the model, users CANNOT access their data
|
|
662
|
+
if (typeof this.embeddingFunction === 'function') {
|
|
663
|
+
try {
|
|
664
|
+
const { modelGuardian } = await import('./critical/model-guardian.js');
|
|
665
|
+
await modelGuardian.ensureCriticalModel();
|
|
666
|
+
}
|
|
667
|
+
catch (error) {
|
|
668
|
+
console.error('🚨 CRITICAL: Model verification failed!');
|
|
669
|
+
console.error('Brainy cannot function without the transformer model.');
|
|
670
|
+
console.error('Users cannot access their data without it.');
|
|
671
|
+
this.isInitializing = false;
|
|
672
|
+
throw error;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
659
675
|
try {
|
|
660
676
|
// Pre-load the embedding model early to ensure it's always available
|
|
661
677
|
// This helps prevent issues with the Universal Sentence Encoder not being loaded
|
|
@@ -1936,10 +1952,14 @@ export class BrainyData {
|
|
|
1936
1952
|
offset: options.offset
|
|
1937
1953
|
});
|
|
1938
1954
|
}
|
|
1939
|
-
// Filter out placeholder nouns from search results
|
|
1955
|
+
// Filter out placeholder nouns and deleted items from search results
|
|
1940
1956
|
searchResults = searchResults.filter((result) => {
|
|
1941
1957
|
if (result.metadata && typeof result.metadata === 'object') {
|
|
1942
1958
|
const metadata = result.metadata;
|
|
1959
|
+
// Exclude deleted items from search results (soft delete)
|
|
1960
|
+
if (metadata.deleted === true) {
|
|
1961
|
+
return false;
|
|
1962
|
+
}
|
|
1943
1963
|
// Exclude placeholder nouns from search results
|
|
1944
1964
|
if (metadata.isPlaceholder) {
|
|
1945
1965
|
return false;
|
|
@@ -2296,12 +2316,13 @@ export class BrainyData {
|
|
|
2296
2316
|
* @returns Promise that resolves to true if the vector was deleted, false otherwise
|
|
2297
2317
|
*/
|
|
2298
2318
|
async delete(id, options = {}) {
|
|
2319
|
+
// Clear API: use 'hard: true' for hard delete, otherwise soft delete
|
|
2320
|
+
const isHardDelete = options.hard === true;
|
|
2299
2321
|
const opts = {
|
|
2300
|
-
service:
|
|
2301
|
-
soft:
|
|
2302
|
-
cascade: false,
|
|
2303
|
-
force: false
|
|
2304
|
-
...options
|
|
2322
|
+
service: options.service,
|
|
2323
|
+
soft: !isHardDelete, // Soft delete is default unless hard: true is specified
|
|
2324
|
+
cascade: options.cascade || false,
|
|
2325
|
+
force: options.force || false
|
|
2305
2326
|
};
|
|
2306
2327
|
// Validate id parameter first, before any other logic
|
|
2307
2328
|
if (id === null || id === undefined) {
|
|
@@ -2331,11 +2352,17 @@ export class BrainyData {
|
|
|
2331
2352
|
// Handle soft delete vs hard delete
|
|
2332
2353
|
if (opts.soft) {
|
|
2333
2354
|
// Soft delete: just mark as deleted - metadata filter will exclude from search
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2355
|
+
try {
|
|
2356
|
+
return await this.updateMetadata(actualId, {
|
|
2357
|
+
deleted: true,
|
|
2358
|
+
deletedAt: new Date().toISOString(),
|
|
2359
|
+
deletedBy: opts.service || 'user'
|
|
2360
|
+
});
|
|
2361
|
+
}
|
|
2362
|
+
catch (error) {
|
|
2363
|
+
// If item doesn't exist, return false (delete of non-existent item is not an error)
|
|
2364
|
+
return false;
|
|
2365
|
+
}
|
|
2339
2366
|
}
|
|
2340
2367
|
// Hard delete: Remove from index
|
|
2341
2368
|
const removed = this.index.removeItem(actualId);
|
|
@@ -4818,34 +4845,36 @@ export class BrainyData {
|
|
|
4818
4845
|
* @param options Options including encryption
|
|
4819
4846
|
*/
|
|
4820
4847
|
async setConfig(key, value, options) {
|
|
4821
|
-
|
|
4848
|
+
// Use a predictable ID based on the config key
|
|
4849
|
+
const configId = `config-${key}`;
|
|
4850
|
+
// Store the config data in metadata (not as vectorized data)
|
|
4851
|
+
const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
|
|
4852
|
+
// Use simple text for vectorization
|
|
4853
|
+
const searchableText = `Configuration setting for ${key}`;
|
|
4854
|
+
await this.add(searchableText, {
|
|
4855
|
+
nounType: NounType.State,
|
|
4822
4856
|
configKey: key,
|
|
4823
|
-
configValue:
|
|
4857
|
+
configValue: configValue,
|
|
4824
4858
|
encrypted: !!options?.encrypt,
|
|
4825
4859
|
timestamp: new Date().toISOString()
|
|
4826
|
-
};
|
|
4827
|
-
await this.add(configNoun, {
|
|
4828
|
-
nounType: NounType.State,
|
|
4829
|
-
configKey: key,
|
|
4830
|
-
encrypted: !!options?.encrypt
|
|
4831
|
-
});
|
|
4860
|
+
}, { id: configId });
|
|
4832
4861
|
}
|
|
4833
4862
|
/**
|
|
4834
4863
|
* Get a configuration value with automatic decryption
|
|
4835
4864
|
* @param key Configuration key
|
|
4865
|
+
* @param options Options including decryption (auto-detected by default)
|
|
4836
4866
|
* @returns Configuration value or undefined
|
|
4837
4867
|
*/
|
|
4838
|
-
async getConfig(key) {
|
|
4868
|
+
async getConfig(key, options) {
|
|
4839
4869
|
try {
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
|
|
4843
|
-
|
|
4844
|
-
if (results.length === 0)
|
|
4870
|
+
// Use the predictable ID to get the config directly
|
|
4871
|
+
const configId = `config-${key}`;
|
|
4872
|
+
const storedNoun = await this.get(configId);
|
|
4873
|
+
if (!storedNoun)
|
|
4845
4874
|
return undefined;
|
|
4846
|
-
|
|
4847
|
-
const value =
|
|
4848
|
-
const encrypted =
|
|
4875
|
+
// The config data is now stored in metadata
|
|
4876
|
+
const value = storedNoun.metadata?.configValue;
|
|
4877
|
+
const encrypted = storedNoun.metadata?.encrypted;
|
|
4849
4878
|
if (encrypted && typeof value === 'string') {
|
|
4850
4879
|
const decrypted = await this.decryptData(value);
|
|
4851
4880
|
return JSON.parse(decrypted);
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MODEL GUARDIAN - CRITICAL PATH
|
|
3
|
+
*
|
|
4
|
+
* THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
|
|
5
|
+
* Without the exact model, users CANNOT access their data
|
|
6
|
+
*
|
|
7
|
+
* Requirements:
|
|
8
|
+
* 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
|
|
9
|
+
* 2. Model MUST be available at runtime
|
|
10
|
+
* 3. Model MUST produce consistent 384-dim embeddings
|
|
11
|
+
* 4. System MUST fail fast if model unavailable in production
|
|
12
|
+
*/
|
|
13
|
+
export declare class ModelGuardian {
|
|
14
|
+
private static instance;
|
|
15
|
+
private isVerified;
|
|
16
|
+
private modelPath;
|
|
17
|
+
private lastVerification;
|
|
18
|
+
private constructor();
|
|
19
|
+
static getInstance(): ModelGuardian;
|
|
20
|
+
/**
|
|
21
|
+
* CRITICAL: Verify model availability and integrity
|
|
22
|
+
* This MUST be called before any embedding operations
|
|
23
|
+
*/
|
|
24
|
+
ensureCriticalModel(): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* Verify the local model files exist and are correct
|
|
27
|
+
*/
|
|
28
|
+
private verifyLocalModel;
|
|
29
|
+
/**
|
|
30
|
+
* Download model from a fallback source
|
|
31
|
+
*/
|
|
32
|
+
private downloadFromSource;
|
|
33
|
+
/**
|
|
34
|
+
* Configure transformers.js to use verified local model
|
|
35
|
+
*/
|
|
36
|
+
private configureTransformers;
|
|
37
|
+
/**
|
|
38
|
+
* Detect where models should be stored
|
|
39
|
+
*/
|
|
40
|
+
private detectModelPath;
|
|
41
|
+
/**
|
|
42
|
+
* Get model status for diagnostics
|
|
43
|
+
*/
|
|
44
|
+
getStatus(): Promise<{
|
|
45
|
+
verified: boolean;
|
|
46
|
+
path: string;
|
|
47
|
+
lastVerification: Date | null;
|
|
48
|
+
modelName: string;
|
|
49
|
+
dimensions: number;
|
|
50
|
+
}>;
|
|
51
|
+
/**
|
|
52
|
+
* Force re-verification (for testing)
|
|
53
|
+
*/
|
|
54
|
+
forceReverify(): Promise<void>;
|
|
55
|
+
}
|
|
56
|
+
export declare const modelGuardian: ModelGuardian;
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MODEL GUARDIAN - CRITICAL PATH
|
|
3
|
+
*
|
|
4
|
+
* THIS IS THE MOST CRITICAL COMPONENT OF BRAINY
|
|
5
|
+
* Without the exact model, users CANNOT access their data
|
|
6
|
+
*
|
|
7
|
+
* Requirements:
|
|
8
|
+
* 1. Model MUST be Xenova/all-MiniLM-L6-v2 (never changes)
|
|
9
|
+
* 2. Model MUST be available at runtime
|
|
10
|
+
* 3. Model MUST produce consistent 384-dim embeddings
|
|
11
|
+
* 4. System MUST fail fast if model unavailable in production
|
|
12
|
+
*/
|
|
13
|
+
import { existsSync } from 'fs';
|
|
14
|
+
import { stat } from 'fs/promises';
|
|
15
|
+
import { join, dirname } from 'path';
|
|
16
|
+
import { env } from '@huggingface/transformers';
|
|
17
|
+
// CRITICAL: These values MUST NEVER CHANGE
|
|
18
|
+
const CRITICAL_MODEL_CONFIG = {
|
|
19
|
+
modelName: 'Xenova/all-MiniLM-L6-v2',
|
|
20
|
+
modelHash: {
|
|
21
|
+
// SHA256 of model.onnx - computed from actual model
|
|
22
|
+
'onnx/model.onnx': 'add_actual_hash_here',
|
|
23
|
+
'tokenizer.json': 'add_actual_hash_here'
|
|
24
|
+
},
|
|
25
|
+
modelSize: {
|
|
26
|
+
'onnx/model.onnx': 90555481, // Exact size in bytes
|
|
27
|
+
'tokenizer.json': 711661
|
|
28
|
+
},
|
|
29
|
+
embeddingDimensions: 384,
|
|
30
|
+
fallbackSources: [
|
|
31
|
+
// Primary: Our GitHub releases (we control this)
|
|
32
|
+
{
|
|
33
|
+
name: 'GitHub (Primary)',
|
|
34
|
+
url: 'https://github.com/soulcraftlabs/brainy-models/releases/download/v1.0.0/all-MiniLM-L6-v2.tar.gz',
|
|
35
|
+
type: 'tarball'
|
|
36
|
+
},
|
|
37
|
+
// Secondary: Our CDN (future, for speed)
|
|
38
|
+
{
|
|
39
|
+
name: 'Soulcraft CDN',
|
|
40
|
+
url: 'https://models.soulcraft.com/brainy/v1/all-MiniLM-L6-v2.tar.gz',
|
|
41
|
+
type: 'tarball'
|
|
42
|
+
},
|
|
43
|
+
// Tertiary: Hugging Face (original source)
|
|
44
|
+
{
|
|
45
|
+
name: 'Hugging Face',
|
|
46
|
+
url: 'huggingface',
|
|
47
|
+
type: 'transformers'
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
};
|
|
51
|
+
export class ModelGuardian {
|
|
52
|
+
constructor() {
|
|
53
|
+
this.isVerified = false;
|
|
54
|
+
this.lastVerification = null;
|
|
55
|
+
this.modelPath = this.detectModelPath();
|
|
56
|
+
}
|
|
57
|
+
static getInstance() {
|
|
58
|
+
if (!ModelGuardian.instance) {
|
|
59
|
+
ModelGuardian.instance = new ModelGuardian();
|
|
60
|
+
}
|
|
61
|
+
return ModelGuardian.instance;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* CRITICAL: Verify model availability and integrity
|
|
65
|
+
* This MUST be called before any embedding operations
|
|
66
|
+
*/
|
|
67
|
+
async ensureCriticalModel() {
|
|
68
|
+
console.log('🛡️ MODEL GUARDIAN: Verifying critical model availability...');
|
|
69
|
+
// Check if already verified in this session
|
|
70
|
+
if (this.isVerified && this.lastVerification) {
|
|
71
|
+
const hoursSinceVerification = (Date.now() - this.lastVerification.getTime()) / (1000 * 60 * 60);
|
|
72
|
+
if (hoursSinceVerification < 24) {
|
|
73
|
+
console.log('✅ Model previously verified in this session');
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Step 1: Check if model exists locally
|
|
78
|
+
const modelExists = await this.verifyLocalModel();
|
|
79
|
+
if (modelExists) {
|
|
80
|
+
console.log('✅ Critical model verified locally');
|
|
81
|
+
this.isVerified = true;
|
|
82
|
+
this.lastVerification = new Date();
|
|
83
|
+
this.configureTransformers();
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
// Step 2: In production, FAIL FAST
|
|
87
|
+
if (process.env.NODE_ENV === 'production' && !process.env.BRAINY_ALLOW_RUNTIME_DOWNLOAD) {
|
|
88
|
+
throw new Error('🚨 CRITICAL FAILURE: Transformer model not found in production!\n' +
|
|
89
|
+
'The model is REQUIRED for Brainy to function.\n' +
|
|
90
|
+
'Users CANNOT access their data without it.\n' +
|
|
91
|
+
'Solution: Run "npm run download-models" during build stage.');
|
|
92
|
+
}
|
|
93
|
+
// Step 3: Attempt to download from fallback sources
|
|
94
|
+
console.warn('⚠️ Model not found locally, attempting download...');
|
|
95
|
+
for (const source of CRITICAL_MODEL_CONFIG.fallbackSources) {
|
|
96
|
+
try {
|
|
97
|
+
console.log(`📥 Trying ${source.name}...`);
|
|
98
|
+
await this.downloadFromSource(source);
|
|
99
|
+
// Verify the download
|
|
100
|
+
if (await this.verifyLocalModel()) {
|
|
101
|
+
console.log(`✅ Successfully downloaded from ${source.name}`);
|
|
102
|
+
this.isVerified = true;
|
|
103
|
+
this.lastVerification = new Date();
|
|
104
|
+
this.configureTransformers();
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
console.warn(`❌ ${source.name} failed:`, error.message);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Step 4: CRITICAL FAILURE
|
|
113
|
+
throw new Error('🚨 CRITICAL FAILURE: Cannot obtain transformer model!\n' +
|
|
114
|
+
'Tried all fallback sources.\n' +
|
|
115
|
+
'Brainy CANNOT function without the model.\n' +
|
|
116
|
+
'Users CANNOT access their data.\n' +
|
|
117
|
+
'Please check network connectivity or pre-download models.');
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Verify the local model files exist and are correct
|
|
121
|
+
*/
|
|
122
|
+
async verifyLocalModel() {
|
|
123
|
+
const modelBasePath = join(this.modelPath, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
|
|
124
|
+
// Check critical files
|
|
125
|
+
const criticalFiles = [
|
|
126
|
+
'onnx/model.onnx',
|
|
127
|
+
'tokenizer.json',
|
|
128
|
+
'config.json'
|
|
129
|
+
];
|
|
130
|
+
for (const file of criticalFiles) {
|
|
131
|
+
const filePath = join(modelBasePath, file);
|
|
132
|
+
if (!existsSync(filePath)) {
|
|
133
|
+
console.log(`❌ Missing critical file: ${file}`);
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
// Verify size for critical files
|
|
137
|
+
if (CRITICAL_MODEL_CONFIG.modelSize[file]) {
|
|
138
|
+
const stats = await stat(filePath);
|
|
139
|
+
const expectedSize = CRITICAL_MODEL_CONFIG.modelSize[file];
|
|
140
|
+
if (Math.abs(stats.size - expectedSize) > 1000) { // Allow 1KB variance
|
|
141
|
+
console.error(`❌ CRITICAL: Model file size mismatch!\n` +
|
|
142
|
+
`File: ${file}\n` +
|
|
143
|
+
`Expected: ${expectedSize} bytes\n` +
|
|
144
|
+
`Actual: ${stats.size} bytes\n` +
|
|
145
|
+
`This indicates model corruption or version mismatch!`);
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// TODO: Add SHA256 verification for ultimate security
|
|
150
|
+
// if (CRITICAL_MODEL_CONFIG.modelHash[file]) {
|
|
151
|
+
// const hash = await this.computeFileHash(filePath)
|
|
152
|
+
// if (hash !== CRITICAL_MODEL_CONFIG.modelHash[file]) {
|
|
153
|
+
// console.error('❌ CRITICAL: Model hash mismatch!')
|
|
154
|
+
// return false
|
|
155
|
+
// }
|
|
156
|
+
// }
|
|
157
|
+
}
|
|
158
|
+
return true;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Download model from a fallback source
|
|
162
|
+
*/
|
|
163
|
+
async downloadFromSource(source) {
|
|
164
|
+
if (source.type === 'transformers') {
|
|
165
|
+
// Use transformers.js native download
|
|
166
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
167
|
+
env.cacheDir = this.modelPath;
|
|
168
|
+
env.allowRemoteModels = true;
|
|
169
|
+
const extractor = await pipeline('feature-extraction', CRITICAL_MODEL_CONFIG.modelName);
|
|
170
|
+
// Test the model
|
|
171
|
+
const test = await extractor('test', { pooling: 'mean', normalize: true });
|
|
172
|
+
if (test.data.length !== CRITICAL_MODEL_CONFIG.embeddingDimensions) {
|
|
173
|
+
throw new Error(`CRITICAL: Model dimension mismatch! ` +
|
|
174
|
+
`Expected ${CRITICAL_MODEL_CONFIG.embeddingDimensions}, ` +
|
|
175
|
+
`got ${test.data.length}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else if (source.type === 'tarball') {
|
|
179
|
+
// Download and extract tarball
|
|
180
|
+
// This would require implementation with proper tar extraction
|
|
181
|
+
throw new Error('Tarball extraction not yet implemented');
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Configure transformers.js to use verified local model
|
|
186
|
+
*/
|
|
187
|
+
configureTransformers() {
|
|
188
|
+
env.localModelPath = this.modelPath;
|
|
189
|
+
env.allowRemoteModels = false; // Force local only after verification
|
|
190
|
+
console.log('🔒 Transformers configured to use verified local model');
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Detect where models should be stored
|
|
194
|
+
*/
|
|
195
|
+
detectModelPath() {
|
|
196
|
+
const candidates = [
|
|
197
|
+
process.env.BRAINY_MODELS_PATH,
|
|
198
|
+
'./models',
|
|
199
|
+
join(process.cwd(), 'models'),
|
|
200
|
+
join(process.env.HOME || '', '.brainy', 'models'),
|
|
201
|
+
'/opt/models', // Lambda/container path
|
|
202
|
+
env.cacheDir
|
|
203
|
+
];
|
|
204
|
+
for (const path of candidates) {
|
|
205
|
+
if (path && existsSync(path)) {
|
|
206
|
+
const modelPath = join(path, ...CRITICAL_MODEL_CONFIG.modelName.split('/'));
|
|
207
|
+
if (existsSync(join(modelPath, 'onnx', 'model.onnx'))) {
|
|
208
|
+
return dirname(dirname(modelPath)); // Return base models directory
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// Default
|
|
213
|
+
return './models';
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Get model status for diagnostics
|
|
217
|
+
*/
|
|
218
|
+
async getStatus() {
|
|
219
|
+
return {
|
|
220
|
+
verified: this.isVerified,
|
|
221
|
+
path: this.modelPath,
|
|
222
|
+
lastVerification: this.lastVerification,
|
|
223
|
+
modelName: CRITICAL_MODEL_CONFIG.modelName,
|
|
224
|
+
dimensions: CRITICAL_MODEL_CONFIG.embeddingDimensions
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Force re-verification (for testing)
|
|
229
|
+
*/
|
|
230
|
+
async forceReverify() {
|
|
231
|
+
this.isVerified = false;
|
|
232
|
+
this.lastVerification = null;
|
|
233
|
+
await this.ensureCriticalModel();
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Export singleton instance
|
|
237
|
+
export const modelGuardian = ModelGuardian.getInstance();
|
|
238
|
+
//# sourceMappingURL=model-guardian.js.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Manager - Ensures transformer models are available at runtime
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Check local cache first
|
|
6
|
+
* 2. Try GitHub releases (our backup)
|
|
7
|
+
* 3. Fall back to Hugging Face
|
|
8
|
+
* 4. Future: CDN at models.soulcraft.com
|
|
9
|
+
*/
|
|
10
|
+
export declare class ModelManager {
|
|
11
|
+
private static instance;
|
|
12
|
+
private modelsPath;
|
|
13
|
+
private isInitialized;
|
|
14
|
+
private constructor();
|
|
15
|
+
static getInstance(): ModelManager;
|
|
16
|
+
private getModelsPath;
|
|
17
|
+
ensureModels(modelName?: string): Promise<boolean>;
|
|
18
|
+
private verifyModelFiles;
|
|
19
|
+
private downloadFromGitHub;
|
|
20
|
+
private downloadFromCDN;
|
|
21
|
+
private configureTransformers;
|
|
22
|
+
/**
|
|
23
|
+
* Pre-download models for deployment
|
|
24
|
+
* This is what npm run download-models calls
|
|
25
|
+
*/
|
|
26
|
+
static predownload(): Promise<void>;
|
|
27
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Manager - Ensures transformer models are available at runtime
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Check local cache first
|
|
6
|
+
* 2. Try GitHub releases (our backup)
|
|
7
|
+
* 3. Fall back to Hugging Face
|
|
8
|
+
* 4. Future: CDN at models.soulcraft.com
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync } from 'fs';
|
|
11
|
+
import { join, dirname } from 'path';
|
|
12
|
+
import { env } from '@huggingface/transformers';
|
|
13
|
+
// Model sources in order of preference
|
|
14
|
+
const MODEL_SOURCES = {
|
|
15
|
+
// GitHub Release - our controlled backup
|
|
16
|
+
github: 'https://github.com/soulcraftlabs/brainy/releases/download/models-v1/all-MiniLM-L6-v2.tar.gz',
|
|
17
|
+
// Future CDN - fastest option when available
|
|
18
|
+
cdn: 'https://models.soulcraft.com/brainy/all-MiniLM-L6-v2.tar.gz',
|
|
19
|
+
// Original Hugging Face - fallback
|
|
20
|
+
huggingface: 'default' // Uses transformers.js default
|
|
21
|
+
};
|
|
22
|
+
// Expected model files and their hashes
|
|
23
|
+
const MODEL_MANIFEST = {
|
|
24
|
+
'Xenova/all-MiniLM-L6-v2': {
|
|
25
|
+
files: {
|
|
26
|
+
'onnx/model.onnx': {
|
|
27
|
+
size: 90555481,
|
|
28
|
+
sha256: null // Will be computed from actual model
|
|
29
|
+
},
|
|
30
|
+
'tokenizer.json': {
|
|
31
|
+
size: 711661,
|
|
32
|
+
sha256: null
|
|
33
|
+
},
|
|
34
|
+
'config.json': {
|
|
35
|
+
size: 650,
|
|
36
|
+
sha256: null
|
|
37
|
+
},
|
|
38
|
+
'tokenizer_config.json': {
|
|
39
|
+
size: 366,
|
|
40
|
+
sha256: null
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
export class ModelManager {
|
|
46
|
+
constructor() {
|
|
47
|
+
this.isInitialized = false;
|
|
48
|
+
// Determine models path
|
|
49
|
+
this.modelsPath = this.getModelsPath();
|
|
50
|
+
}
|
|
51
|
+
static getInstance() {
|
|
52
|
+
if (!ModelManager.instance) {
|
|
53
|
+
ModelManager.instance = new ModelManager();
|
|
54
|
+
}
|
|
55
|
+
return ModelManager.instance;
|
|
56
|
+
}
|
|
57
|
+
getModelsPath() {
|
|
58
|
+
// Check various possible locations
|
|
59
|
+
const paths = [
|
|
60
|
+
process.env.BRAINY_MODELS_PATH,
|
|
61
|
+
'./models',
|
|
62
|
+
join(process.cwd(), 'models'),
|
|
63
|
+
join(process.env.HOME || '', '.brainy', 'models'),
|
|
64
|
+
env.cacheDir
|
|
65
|
+
];
|
|
66
|
+
// Find first existing path or use default
|
|
67
|
+
for (const path of paths) {
|
|
68
|
+
if (path && existsSync(path)) {
|
|
69
|
+
return path;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Default to local models directory
|
|
73
|
+
return join(process.cwd(), 'models');
|
|
74
|
+
}
|
|
75
|
+
async ensureModels(modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
76
|
+
if (this.isInitialized) {
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
const modelPath = join(this.modelsPath, ...modelName.split('/'));
|
|
80
|
+
// Check if model already exists locally
|
|
81
|
+
if (await this.verifyModelFiles(modelPath, modelName)) {
|
|
82
|
+
console.log('✅ Models found in cache:', modelPath);
|
|
83
|
+
this.configureTransformers(modelPath);
|
|
84
|
+
this.isInitialized = true;
|
|
85
|
+
return true;
|
|
86
|
+
}
|
|
87
|
+
// Try to download from our sources
|
|
88
|
+
console.log('📥 Downloading transformer models...');
|
|
89
|
+
// Try GitHub first (our backup)
|
|
90
|
+
if (await this.downloadFromGitHub(modelName)) {
|
|
91
|
+
this.isInitialized = true;
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
// Try CDN (when available)
|
|
95
|
+
if (await this.downloadFromCDN(modelName)) {
|
|
96
|
+
this.isInitialized = true;
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
// Fall back to Hugging Face (default transformers.js behavior)
|
|
100
|
+
console.log('⚠️ Using Hugging Face fallback for models');
|
|
101
|
+
env.allowRemoteModels = true;
|
|
102
|
+
this.isInitialized = true;
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
async verifyModelFiles(modelPath, modelName) {
|
|
106
|
+
const manifest = MODEL_MANIFEST[modelName];
|
|
107
|
+
if (!manifest)
|
|
108
|
+
return false;
|
|
109
|
+
for (const [filePath, info] of Object.entries(manifest.files)) {
|
|
110
|
+
const fullPath = join(modelPath, filePath);
|
|
111
|
+
if (!existsSync(fullPath)) {
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
// Optionally verify size
|
|
115
|
+
if (process.env.VERIFY_MODEL_SIZE === 'true') {
|
|
116
|
+
const stats = await import('fs').then(fs => fs.promises.stat(fullPath));
|
|
117
|
+
if (stats.size !== info.size) {
|
|
118
|
+
console.warn(`⚠️ Model file size mismatch: ${filePath}`);
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return true;
|
|
124
|
+
}
|
|
125
|
+
async downloadFromGitHub(modelName) {
|
|
126
|
+
try {
|
|
127
|
+
const url = MODEL_SOURCES.github;
|
|
128
|
+
console.log('📥 Downloading from GitHub releases...');
|
|
129
|
+
// Download tar.gz file
|
|
130
|
+
const response = await fetch(url);
|
|
131
|
+
if (!response.ok) {
|
|
132
|
+
throw new Error(`GitHub download failed: ${response.status}`);
|
|
133
|
+
}
|
|
134
|
+
const buffer = await response.arrayBuffer();
|
|
135
|
+
// Extract tar.gz (would need tar library in production)
|
|
136
|
+
// For now, return false to fall back to other methods
|
|
137
|
+
console.log('⚠️ GitHub model extraction not yet implemented');
|
|
138
|
+
return false;
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
console.log('⚠️ GitHub download failed:', error.message);
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
async downloadFromCDN(modelName) {
|
|
146
|
+
try {
|
|
147
|
+
const url = MODEL_SOURCES.cdn;
|
|
148
|
+
console.log('📥 Downloading from Soulcraft CDN...');
|
|
149
|
+
// Try to fetch from CDN
|
|
150
|
+
const response = await fetch(url);
|
|
151
|
+
if (!response.ok) {
|
|
152
|
+
throw new Error(`CDN download failed: ${response.status}`);
|
|
153
|
+
}
|
|
154
|
+
// Would extract files here
|
|
155
|
+
console.log('⚠️ CDN not yet available');
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
console.log('⚠️ CDN download failed:', error.message);
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
configureTransformers(modelPath) {
|
|
164
|
+
// Configure transformers.js to use our local models
|
|
165
|
+
env.localModelPath = dirname(modelPath);
|
|
166
|
+
env.allowRemoteModels = false;
|
|
167
|
+
console.log('🔧 Configured transformers.js to use local models');
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Pre-download models for deployment
|
|
171
|
+
* This is what npm run download-models calls
|
|
172
|
+
*/
|
|
173
|
+
static async predownload() {
|
|
174
|
+
const manager = ModelManager.getInstance();
|
|
175
|
+
const success = await manager.ensureModels();
|
|
176
|
+
if (!success) {
|
|
177
|
+
throw new Error('Failed to download models');
|
|
178
|
+
}
|
|
179
|
+
console.log('✅ Models downloaded successfully');
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Auto-initialize on import in production
|
|
183
|
+
if (process.env.NODE_ENV === 'production' && process.env.SKIP_MODEL_CHECK !== 'true') {
|
|
184
|
+
ModelManager.getInstance().ensureModels().catch(error => {
|
|
185
|
+
console.error('⚠️ Model initialization failed:', error);
|
|
186
|
+
// Don't throw - allow app to start and try downloading on first use
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=model-manager.js.map
|
package/dist/utils/embedding.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
|
|
4
4
|
*/
|
|
5
5
|
import { isBrowser } from './environment.js';
|
|
6
|
+
import { ModelManager } from '../embeddings/model-manager.js';
|
|
6
7
|
// @ts-ignore - Transformers.js is now the primary embedding library
|
|
7
8
|
import { pipeline, env } from '@huggingface/transformers';
|
|
8
9
|
/**
|
|
@@ -192,6 +193,9 @@ export class TransformerEmbedding {
|
|
|
192
193
|
}
|
|
193
194
|
// Always use real implementation - no mocking
|
|
194
195
|
try {
|
|
196
|
+
// Ensure models are available (downloads if needed)
|
|
197
|
+
const modelManager = ModelManager.getInstance();
|
|
198
|
+
await modelManager.ensureModels(this.options.model);
|
|
195
199
|
// Resolve device configuration and cache directory
|
|
196
200
|
const device = await resolveDevice(this.options.device);
|
|
197
201
|
const cacheDir = this.options.cacheDir === './models'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Multi-Dimensional AI Database - Vector similarity, graph relationships, metadata facets with HNSW indexing and OPFS storage",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -142,7 +142,10 @@
|
|
|
142
142
|
"_workflow:major": "node scripts/release-workflow.js major",
|
|
143
143
|
"_workflow:dry-run": "npm run build && npm test && npm run _release:dry-run",
|
|
144
144
|
"_dry-run": "npm pack --dry-run",
|
|
145
|
-
"download-models": "node scripts/download-models.cjs"
|
|
145
|
+
"download-models": "node scripts/download-models.cjs",
|
|
146
|
+
"prepare-models": "node scripts/prepare-models.js",
|
|
147
|
+
"models:verify": "node scripts/ensure-models.js",
|
|
148
|
+
"models:download": "BRAINY_ALLOW_REMOTE_MODELS=true node scripts/download-models.cjs"
|
|
146
149
|
},
|
|
147
150
|
"keywords": [
|
|
148
151
|
"vector-database",
|