@soulcraft/brainy 2.12.0 โ 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/brainyData.js +4 -17
- package/dist/config/index.d.ts +1 -0
- package/dist/config/index.js +2 -0
- package/dist/config/modelAutoConfig.d.ts +1 -0
- package/dist/config/modelAutoConfig.js +27 -22
- package/dist/config/modelPrecisionManager.d.ts +42 -0
- package/dist/config/modelPrecisionManager.js +98 -0
- package/dist/config/zeroConfig.js +1 -1
- package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
- package/dist/embeddings/CachedEmbeddings.js +146 -0
- package/dist/embeddings/EmbeddingManager.d.ts +106 -0
- package/dist/embeddings/EmbeddingManager.js +296 -0
- package/dist/embeddings/SingletonModelManager.d.ts +95 -0
- package/dist/embeddings/SingletonModelManager.js +220 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/lightweight-embedder.d.ts +0 -1
- package/dist/embeddings/lightweight-embedder.js +4 -12
- package/dist/embeddings/universal-memory-manager.js +13 -50
- package/dist/embeddings/worker-embedding.js +4 -8
- package/dist/utils/embedding.d.ts +7 -2
- package/dist/utils/embedding.js +51 -33
- package/dist/utils/hybridModelManager.d.ts +19 -28
- package/dist/utils/hybridModelManager.js +36 -200
- package/package.json +1 -1
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Embedding Manager
|
|
3
|
+
*
|
|
4
|
+
* THE single source of truth for all embedding operations in Brainy.
|
|
5
|
+
* Combines model management, precision configuration, and embedding generation
|
|
6
|
+
* into one clean, maintainable class.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Singleton pattern ensures ONE model instance
|
|
10
|
+
* - Automatic Q8 (default) or FP32 precision
|
|
11
|
+
* - Model downloading and caching
|
|
12
|
+
* - Thread-safe initialization
|
|
13
|
+
* - Memory monitoring
|
|
14
|
+
*
|
|
15
|
+
* This replaces: SingletonModelManager, TransformerEmbedding, ModelPrecisionManager,
|
|
16
|
+
* hybridModelManager, universalMemoryManager, and more.
|
|
17
|
+
*/
|
|
18
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
19
|
+
import { existsSync } from 'fs';
|
|
20
|
+
import { join } from 'path';
|
|
21
|
+
// Global state for true singleton across entire process
|
|
22
|
+
let globalInstance = null;
|
|
23
|
+
let globalInitPromise = null;
|
|
24
|
+
/**
|
|
25
|
+
* Unified Embedding Manager - Clean, simple, reliable
|
|
26
|
+
*/
|
|
27
|
+
export class EmbeddingManager {
|
|
28
|
+
constructor() {
|
|
29
|
+
this.model = null;
|
|
30
|
+
this.modelName = 'Xenova/all-MiniLM-L6-v2';
|
|
31
|
+
this.initialized = false;
|
|
32
|
+
this.initTime = null;
|
|
33
|
+
this.embedCount = 0;
|
|
34
|
+
this.locked = false;
|
|
35
|
+
// Determine precision - Q8 by default
|
|
36
|
+
this.precision = this.determinePrecision();
|
|
37
|
+
console.log(`๐ฏ EmbeddingManager: Using ${this.precision.toUpperCase()} precision`);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Get the singleton instance
|
|
41
|
+
*/
|
|
42
|
+
static getInstance() {
|
|
43
|
+
if (!globalInstance) {
|
|
44
|
+
globalInstance = new EmbeddingManager();
|
|
45
|
+
}
|
|
46
|
+
return globalInstance;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Initialize the model (happens once)
|
|
50
|
+
*/
|
|
51
|
+
async init() {
|
|
52
|
+
// In unit test mode, skip real model initialization
|
|
53
|
+
if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
|
|
54
|
+
if (!this.initialized) {
|
|
55
|
+
this.initialized = true;
|
|
56
|
+
this.initTime = 1; // Mock init time
|
|
57
|
+
console.log('๐งช EmbeddingManager: Using mocked embeddings for unit tests');
|
|
58
|
+
}
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
// Already initialized
|
|
62
|
+
if (this.initialized && this.model) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
// Initialization in progress
|
|
66
|
+
if (globalInitPromise) {
|
|
67
|
+
await globalInitPromise;
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
// Start initialization
|
|
71
|
+
globalInitPromise = this.performInit();
|
|
72
|
+
try {
|
|
73
|
+
await globalInitPromise;
|
|
74
|
+
}
|
|
75
|
+
finally {
|
|
76
|
+
globalInitPromise = null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Perform actual initialization
|
|
81
|
+
*/
|
|
82
|
+
async performInit() {
|
|
83
|
+
const startTime = Date.now();
|
|
84
|
+
console.log(`๐ Initializing embedding model (${this.precision.toUpperCase()})...`);
|
|
85
|
+
try {
|
|
86
|
+
// Configure transformers.js environment
|
|
87
|
+
const modelsPath = this.getModelsPath();
|
|
88
|
+
env.cacheDir = modelsPath;
|
|
89
|
+
env.allowLocalModels = true;
|
|
90
|
+
env.useFSCache = true;
|
|
91
|
+
// Check if models exist locally
|
|
92
|
+
const modelPath = join(modelsPath, ...this.modelName.split('/'));
|
|
93
|
+
const hasLocalModels = existsSync(modelPath);
|
|
94
|
+
if (hasLocalModels) {
|
|
95
|
+
console.log('โ
Using cached models from:', modelPath);
|
|
96
|
+
}
|
|
97
|
+
// Configure pipeline options for the selected precision
|
|
98
|
+
const pipelineOptions = {
|
|
99
|
+
cache_dir: modelsPath,
|
|
100
|
+
local_files_only: false,
|
|
101
|
+
// Specify precision
|
|
102
|
+
dtype: this.precision,
|
|
103
|
+
quantized: this.precision === 'q8',
|
|
104
|
+
// Memory optimizations
|
|
105
|
+
session_options: {
|
|
106
|
+
enableCpuMemArena: false,
|
|
107
|
+
enableMemPattern: false,
|
|
108
|
+
interOpNumThreads: 1,
|
|
109
|
+
intraOpNumThreads: 1,
|
|
110
|
+
graphOptimizationLevel: 'disabled'
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
// Load the model
|
|
114
|
+
this.model = await pipeline('feature-extraction', this.modelName, pipelineOptions);
|
|
115
|
+
// Lock precision after successful initialization
|
|
116
|
+
this.locked = true;
|
|
117
|
+
this.initialized = true;
|
|
118
|
+
this.initTime = Date.now() - startTime;
|
|
119
|
+
// Log success
|
|
120
|
+
const memoryMB = this.getMemoryUsage();
|
|
121
|
+
console.log(`โ
Model loaded in ${this.initTime}ms`);
|
|
122
|
+
console.log(`๐ Precision: ${this.precision.toUpperCase()} | Memory: ${memoryMB}MB`);
|
|
123
|
+
console.log(`๐ Configuration locked`);
|
|
124
|
+
}
|
|
125
|
+
catch (error) {
|
|
126
|
+
this.initialized = false;
|
|
127
|
+
this.model = null;
|
|
128
|
+
throw new Error(`Failed to initialize embedding model: ${error instanceof Error ? error.message : String(error)}`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Generate embeddings
|
|
133
|
+
*/
|
|
134
|
+
async embed(text) {
|
|
135
|
+
// Check for unit test environment - use mocks to prevent ONNX conflicts
|
|
136
|
+
if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
|
|
137
|
+
return this.getMockEmbedding(text);
|
|
138
|
+
}
|
|
139
|
+
// Ensure initialized
|
|
140
|
+
await this.init();
|
|
141
|
+
if (!this.model) {
|
|
142
|
+
throw new Error('Model not initialized');
|
|
143
|
+
}
|
|
144
|
+
// Handle array input
|
|
145
|
+
const input = Array.isArray(text) ? text.join(' ') : text;
|
|
146
|
+
// Generate embedding
|
|
147
|
+
const output = await this.model(input, {
|
|
148
|
+
pooling: 'mean',
|
|
149
|
+
normalize: true
|
|
150
|
+
});
|
|
151
|
+
// Extract embedding vector
|
|
152
|
+
const embedding = Array.from(output.data);
|
|
153
|
+
// Validate dimensions
|
|
154
|
+
if (embedding.length !== 384) {
|
|
155
|
+
console.warn(`Unexpected embedding dimension: ${embedding.length}`);
|
|
156
|
+
// Pad or truncate
|
|
157
|
+
if (embedding.length < 384) {
|
|
158
|
+
return [...embedding, ...new Array(384 - embedding.length).fill(0)];
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
return embedding.slice(0, 384);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
this.embedCount++;
|
|
165
|
+
return embedding;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Generate mock embeddings for unit tests
|
|
169
|
+
*/
|
|
170
|
+
getMockEmbedding(text) {
|
|
171
|
+
// Use the same mock logic as setup-unit.ts for consistency
|
|
172
|
+
const input = Array.isArray(text) ? text.join(' ') : text;
|
|
173
|
+
const str = typeof input === 'string' ? input : JSON.stringify(input);
|
|
174
|
+
const vector = new Array(384).fill(0);
|
|
175
|
+
// Create semi-realistic embeddings based on text content
|
|
176
|
+
for (let i = 0; i < Math.min(str.length, 384); i++) {
|
|
177
|
+
vector[i] = (str.charCodeAt(i % str.length) % 256) / 256;
|
|
178
|
+
}
|
|
179
|
+
// Add position-based variation
|
|
180
|
+
for (let i = 0; i < 384; i++) {
|
|
181
|
+
vector[i] += Math.sin(i * 0.1 + str.length) * 0.1;
|
|
182
|
+
}
|
|
183
|
+
// Track mock embedding count
|
|
184
|
+
this.embedCount++;
|
|
185
|
+
return vector;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Get embedding function for compatibility
|
|
189
|
+
*/
|
|
190
|
+
getEmbeddingFunction() {
|
|
191
|
+
return async (data) => {
|
|
192
|
+
return await this.embed(data);
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Determine model precision
|
|
197
|
+
*/
|
|
198
|
+
determinePrecision() {
|
|
199
|
+
// Check environment variable overrides
|
|
200
|
+
if (process.env.BRAINY_MODEL_PRECISION === 'fp32') {
|
|
201
|
+
return 'fp32';
|
|
202
|
+
}
|
|
203
|
+
if (process.env.BRAINY_MODEL_PRECISION === 'q8') {
|
|
204
|
+
return 'q8';
|
|
205
|
+
}
|
|
206
|
+
if (process.env.BRAINY_FORCE_FP32 === 'true') {
|
|
207
|
+
return 'fp32';
|
|
208
|
+
}
|
|
209
|
+
// Default to Q8 - optimal for most use cases
|
|
210
|
+
return 'q8';
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Get models directory path
|
|
214
|
+
*/
|
|
215
|
+
getModelsPath() {
|
|
216
|
+
// Check various possible locations
|
|
217
|
+
const paths = [
|
|
218
|
+
process.env.BRAINY_MODELS_PATH,
|
|
219
|
+
'./models',
|
|
220
|
+
join(process.cwd(), 'models'),
|
|
221
|
+
join(process.env.HOME || '', '.brainy', 'models')
|
|
222
|
+
];
|
|
223
|
+
for (const path of paths) {
|
|
224
|
+
if (path && existsSync(path)) {
|
|
225
|
+
return path;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// Default
|
|
229
|
+
return join(process.cwd(), 'models');
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Get memory usage in MB
|
|
233
|
+
*/
|
|
234
|
+
getMemoryUsage() {
|
|
235
|
+
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
236
|
+
const usage = process.memoryUsage();
|
|
237
|
+
return Math.round(usage.heapUsed / 1024 / 1024);
|
|
238
|
+
}
|
|
239
|
+
return null;
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Get current statistics
|
|
243
|
+
*/
|
|
244
|
+
getStats() {
|
|
245
|
+
return {
|
|
246
|
+
initialized: this.initialized,
|
|
247
|
+
precision: this.precision,
|
|
248
|
+
modelName: this.modelName,
|
|
249
|
+
embedCount: this.embedCount,
|
|
250
|
+
initTime: this.initTime,
|
|
251
|
+
memoryMB: this.getMemoryUsage()
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Check if initialized
|
|
256
|
+
*/
|
|
257
|
+
isInitialized() {
|
|
258
|
+
return this.initialized;
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Get current precision
|
|
262
|
+
*/
|
|
263
|
+
getPrecision() {
|
|
264
|
+
return this.precision;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Validate precision matches expected
|
|
268
|
+
*/
|
|
269
|
+
validatePrecision(expected) {
|
|
270
|
+
if (this.locked && expected !== this.precision) {
|
|
271
|
+
throw new Error(`Precision mismatch! System using ${this.precision.toUpperCase()} ` +
|
|
272
|
+
`but ${expected.toUpperCase()} was requested. Cannot mix precisions.`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// Export singleton instance and convenience functions
|
|
277
|
+
export const embeddingManager = EmbeddingManager.getInstance();
|
|
278
|
+
/**
|
|
279
|
+
* Direct embed function
|
|
280
|
+
*/
|
|
281
|
+
export async function embed(text) {
|
|
282
|
+
return await embeddingManager.embed(text);
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Get embedding function for compatibility
|
|
286
|
+
*/
|
|
287
|
+
export function getEmbeddingFunction() {
|
|
288
|
+
return embeddingManager.getEmbeddingFunction();
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Get statistics
|
|
292
|
+
*/
|
|
293
|
+
export function getEmbeddingStats() {
|
|
294
|
+
return embeddingManager.getStats();
|
|
295
|
+
}
|
|
296
|
+
//# sourceMappingURL=EmbeddingManager.js.map
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Singleton Model Manager - THE ONLY SOURCE OF EMBEDDING MODELS
|
|
3
|
+
*
|
|
4
|
+
* This is the SINGLE, UNIFIED model initialization system that ensures:
|
|
5
|
+
* - Only ONE model instance exists across the entire system
|
|
6
|
+
* - Precision is configured once and locked
|
|
7
|
+
* - All components share the same model
|
|
8
|
+
* - No possibility of mixed precisions
|
|
9
|
+
*
|
|
10
|
+
* CRITICAL: This manager is used by EVERYTHING:
|
|
11
|
+
* - Storage operations (add, update)
|
|
12
|
+
* - Search operations (search, find)
|
|
13
|
+
* - Public API (embed, cluster)
|
|
14
|
+
* - Neural API (all neural.* methods)
|
|
15
|
+
* - Internal operations (deduplication, indexing)
|
|
16
|
+
*/
|
|
17
|
+
import { TransformerEmbedding } from '../utils/embedding.js';
|
|
18
|
+
import { EmbeddingFunction, Vector } from '../coreTypes.js';
|
|
19
|
+
/**
|
|
20
|
+
* Statistics for monitoring
|
|
21
|
+
*/
|
|
22
|
+
interface ModelStats {
|
|
23
|
+
initialized: boolean;
|
|
24
|
+
precision: string;
|
|
25
|
+
initCount: number;
|
|
26
|
+
embedCount: number;
|
|
27
|
+
lastUsed: Date | null;
|
|
28
|
+
memoryFootprint?: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* The ONE TRUE model manager
|
|
32
|
+
*/
|
|
33
|
+
export declare class SingletonModelManager {
|
|
34
|
+
private static instance;
|
|
35
|
+
private stats;
|
|
36
|
+
private constructor();
|
|
37
|
+
/**
|
|
38
|
+
* Get the singleton instance
|
|
39
|
+
*/
|
|
40
|
+
static getInstance(): SingletonModelManager;
|
|
41
|
+
/**
|
|
42
|
+
* Get the model instance - creates if needed, reuses if exists
|
|
43
|
+
* This is THE ONLY way to get a model in the entire system
|
|
44
|
+
*/
|
|
45
|
+
getModel(): Promise<TransformerEmbedding>;
|
|
46
|
+
/**
|
|
47
|
+
* Initialize the model - happens exactly once
|
|
48
|
+
*/
|
|
49
|
+
private initializeModel;
|
|
50
|
+
/**
|
|
51
|
+
* Get embedding function that uses the singleton model
|
|
52
|
+
*/
|
|
53
|
+
getEmbeddingFunction(): Promise<EmbeddingFunction>;
|
|
54
|
+
/**
|
|
55
|
+
* Direct embed method for convenience
|
|
56
|
+
*/
|
|
57
|
+
embed(data: string | string[]): Promise<Vector>;
|
|
58
|
+
/**
|
|
59
|
+
* Check if model is initialized
|
|
60
|
+
*/
|
|
61
|
+
isInitialized(): boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Get current statistics
|
|
64
|
+
*/
|
|
65
|
+
getStats(): ModelStats;
|
|
66
|
+
/**
|
|
67
|
+
* Validate precision consistency
|
|
68
|
+
* Throws error if attempting to use different precision
|
|
69
|
+
*/
|
|
70
|
+
validatePrecision(requestedPrecision?: string): void;
|
|
71
|
+
/**
|
|
72
|
+
* Force cleanup (for testing only)
|
|
73
|
+
* WARNING: This will break consistency - use only in tests
|
|
74
|
+
*/
|
|
75
|
+
_testOnlyCleanup(): Promise<void>;
|
|
76
|
+
}
|
|
77
|
+
export declare const singletonModelManager: SingletonModelManager;
|
|
78
|
+
/**
|
|
79
|
+
* THE ONLY embedding function that should be used anywhere
|
|
80
|
+
* This ensures all operations use the same model instance
|
|
81
|
+
*/
|
|
82
|
+
export declare function getUnifiedEmbeddingFunction(): Promise<EmbeddingFunction>;
|
|
83
|
+
/**
|
|
84
|
+
* Direct embed function for convenience
|
|
85
|
+
*/
|
|
86
|
+
export declare function unifiedEmbed(data: string | string[]): Promise<Vector>;
|
|
87
|
+
/**
|
|
88
|
+
* Check if model is ready
|
|
89
|
+
*/
|
|
90
|
+
export declare function isModelReady(): boolean;
|
|
91
|
+
/**
|
|
92
|
+
* Get model statistics
|
|
93
|
+
*/
|
|
94
|
+
export declare function getModelStats(): ModelStats;
|
|
95
|
+
export {};
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Singleton Model Manager - THE ONLY SOURCE OF EMBEDDING MODELS
|
|
3
|
+
*
|
|
4
|
+
* This is the SINGLE, UNIFIED model initialization system that ensures:
|
|
5
|
+
* - Only ONE model instance exists across the entire system
|
|
6
|
+
* - Precision is configured once and locked
|
|
7
|
+
* - All components share the same model
|
|
8
|
+
* - No possibility of mixed precisions
|
|
9
|
+
*
|
|
10
|
+
* CRITICAL: This manager is used by EVERYTHING:
|
|
11
|
+
* - Storage operations (add, update)
|
|
12
|
+
* - Search operations (search, find)
|
|
13
|
+
* - Public API (embed, cluster)
|
|
14
|
+
* - Neural API (all neural.* methods)
|
|
15
|
+
* - Internal operations (deduplication, indexing)
|
|
16
|
+
*/
|
|
17
|
+
import { TransformerEmbedding } from '../utils/embedding.js';
|
|
18
|
+
import { getModelPrecision, lockModelPrecision } from '../config/modelPrecisionManager.js';
|
|
19
|
+
// Global state - ensures true singleton across entire process
|
|
20
|
+
let globalModelInstance = null;
|
|
21
|
+
let globalInitPromise = null;
|
|
22
|
+
let globalInitialized = false;
|
|
23
|
+
/**
|
|
24
|
+
* The ONE TRUE model manager
|
|
25
|
+
*/
|
|
26
|
+
export class SingletonModelManager {
|
|
27
|
+
constructor() {
|
|
28
|
+
this.stats = {
|
|
29
|
+
initialized: false,
|
|
30
|
+
precision: 'unknown',
|
|
31
|
+
initCount: 0,
|
|
32
|
+
embedCount: 0,
|
|
33
|
+
lastUsed: null
|
|
34
|
+
};
|
|
35
|
+
// Private constructor enforces singleton
|
|
36
|
+
this.stats.precision = getModelPrecision();
|
|
37
|
+
console.log(`๐ SingletonModelManager initialized with ${this.stats.precision.toUpperCase()} precision`);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Get the singleton instance
|
|
41
|
+
*/
|
|
42
|
+
static getInstance() {
|
|
43
|
+
if (!SingletonModelManager.instance) {
|
|
44
|
+
SingletonModelManager.instance = new SingletonModelManager();
|
|
45
|
+
}
|
|
46
|
+
return SingletonModelManager.instance;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get the model instance - creates if needed, reuses if exists
|
|
50
|
+
* This is THE ONLY way to get a model in the entire system
|
|
51
|
+
*/
|
|
52
|
+
async getModel() {
|
|
53
|
+
// If already initialized, return immediately
|
|
54
|
+
if (globalModelInstance && globalInitialized) {
|
|
55
|
+
this.stats.lastUsed = new Date();
|
|
56
|
+
return globalModelInstance;
|
|
57
|
+
}
|
|
58
|
+
// If initialization is in progress, wait for it
|
|
59
|
+
if (globalInitPromise) {
|
|
60
|
+
console.log('โณ Model initialization already in progress, waiting...');
|
|
61
|
+
return await globalInitPromise;
|
|
62
|
+
}
|
|
63
|
+
// Start initialization (only happens once ever)
|
|
64
|
+
globalInitPromise = this.initializeModel();
|
|
65
|
+
try {
|
|
66
|
+
const model = await globalInitPromise;
|
|
67
|
+
globalInitialized = true;
|
|
68
|
+
return model;
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
// Reset on error to allow retry
|
|
72
|
+
globalInitPromise = null;
|
|
73
|
+
throw error;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Initialize the model - happens exactly once
|
|
78
|
+
*/
|
|
79
|
+
async initializeModel() {
|
|
80
|
+
console.log('๐ Initializing singleton model instance...');
|
|
81
|
+
// Get precision from central manager
|
|
82
|
+
const precision = getModelPrecision();
|
|
83
|
+
console.log(`๐ Using ${precision.toUpperCase()} precision (${precision === 'q8' ? '23MB, 99% accuracy' : '90MB, 100% accuracy'})`);
|
|
84
|
+
// Detect environment for optimal settings
|
|
85
|
+
const isNode = typeof process !== 'undefined' && process.versions?.node;
|
|
86
|
+
const isBrowser = typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
87
|
+
const isServerless = typeof process !== 'undefined' && (process.env.VERCEL ||
|
|
88
|
+
process.env.NETLIFY ||
|
|
89
|
+
process.env.AWS_LAMBDA_FUNCTION_NAME ||
|
|
90
|
+
process.env.FUNCTIONS_WORKER_RUNTIME);
|
|
91
|
+
const isTest = globalThis.__BRAINY_TEST_ENV__ || process.env.NODE_ENV === 'test';
|
|
92
|
+
// Create optimized options based on environment
|
|
93
|
+
const options = {
|
|
94
|
+
precision: precision,
|
|
95
|
+
verbose: !isTest && !isServerless && !isBrowser,
|
|
96
|
+
device: 'cpu', // CPU is most compatible
|
|
97
|
+
localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS === 'false',
|
|
98
|
+
model: 'Xenova/all-MiniLM-L6-v2'
|
|
99
|
+
};
|
|
100
|
+
try {
|
|
101
|
+
// Create the ONE model instance
|
|
102
|
+
globalModelInstance = new TransformerEmbedding(options);
|
|
103
|
+
// Initialize it
|
|
104
|
+
await globalModelInstance.init();
|
|
105
|
+
// CRITICAL: Lock the precision after successful initialization
|
|
106
|
+
// This prevents any future changes to precision
|
|
107
|
+
lockModelPrecision();
|
|
108
|
+
console.log('๐ Model precision locked at:', precision.toUpperCase());
|
|
109
|
+
// Update stats
|
|
110
|
+
this.stats.initialized = true;
|
|
111
|
+
this.stats.initCount++;
|
|
112
|
+
this.stats.lastUsed = new Date();
|
|
113
|
+
// Log memory usage if available
|
|
114
|
+
if (isNode && process.memoryUsage) {
|
|
115
|
+
const usage = process.memoryUsage();
|
|
116
|
+
this.stats.memoryFootprint = Math.round(usage.heapUsed / 1024 / 1024);
|
|
117
|
+
console.log(`๐พ Model loaded, memory usage: ${this.stats.memoryFootprint}MB`);
|
|
118
|
+
}
|
|
119
|
+
console.log('โ
Singleton model initialized successfully');
|
|
120
|
+
return globalModelInstance;
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
console.error('โ Failed to initialize singleton model:', error);
|
|
124
|
+
globalModelInstance = null;
|
|
125
|
+
throw new Error(`Singleton model initialization failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Get embedding function that uses the singleton model
|
|
130
|
+
*/
|
|
131
|
+
async getEmbeddingFunction() {
|
|
132
|
+
const model = await this.getModel();
|
|
133
|
+
return async (data) => {
|
|
134
|
+
this.stats.embedCount++;
|
|
135
|
+
this.stats.lastUsed = new Date();
|
|
136
|
+
return await model.embed(data);
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Direct embed method for convenience
|
|
141
|
+
*/
|
|
142
|
+
async embed(data) {
|
|
143
|
+
const model = await this.getModel();
|
|
144
|
+
this.stats.embedCount++;
|
|
145
|
+
this.stats.lastUsed = new Date();
|
|
146
|
+
return await model.embed(data);
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Check if model is initialized
|
|
150
|
+
*/
|
|
151
|
+
isInitialized() {
|
|
152
|
+
return globalInitialized && globalModelInstance !== null;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Get current statistics
|
|
156
|
+
*/
|
|
157
|
+
getStats() {
|
|
158
|
+
return {
|
|
159
|
+
...this.stats,
|
|
160
|
+
precision: getModelPrecision()
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Validate precision consistency
|
|
165
|
+
* Throws error if attempting to use different precision
|
|
166
|
+
*/
|
|
167
|
+
validatePrecision(requestedPrecision) {
|
|
168
|
+
const currentPrecision = getModelPrecision();
|
|
169
|
+
if (requestedPrecision && requestedPrecision !== currentPrecision) {
|
|
170
|
+
throw new Error(`โ Precision mismatch! System is using ${currentPrecision.toUpperCase()} ` +
|
|
171
|
+
`but ${requestedPrecision.toUpperCase()} was requested. ` +
|
|
172
|
+
`All operations must use the same precision.`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Force cleanup (for testing only)
|
|
177
|
+
* WARNING: This will break consistency - use only in tests
|
|
178
|
+
*/
|
|
179
|
+
async _testOnlyCleanup() {
|
|
180
|
+
if (process.env.NODE_ENV !== 'test') {
|
|
181
|
+
throw new Error('Cleanup only allowed in test environment');
|
|
182
|
+
}
|
|
183
|
+
if (globalModelInstance && 'dispose' in globalModelInstance) {
|
|
184
|
+
await globalModelInstance.dispose();
|
|
185
|
+
}
|
|
186
|
+
globalModelInstance = null;
|
|
187
|
+
globalInitPromise = null;
|
|
188
|
+
globalInitialized = false;
|
|
189
|
+
this.stats.initialized = false;
|
|
190
|
+
console.log('๐งน Singleton model cleaned up (test only)');
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Export the singleton instance getter
|
|
194
|
+
export const singletonModelManager = SingletonModelManager.getInstance();
|
|
195
|
+
/**
|
|
196
|
+
* THE ONLY embedding function that should be used anywhere
|
|
197
|
+
* This ensures all operations use the same model instance
|
|
198
|
+
*/
|
|
199
|
+
export async function getUnifiedEmbeddingFunction() {
|
|
200
|
+
return await singletonModelManager.getEmbeddingFunction();
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Direct embed function for convenience
|
|
204
|
+
*/
|
|
205
|
+
export async function unifiedEmbed(data) {
|
|
206
|
+
return await singletonModelManager.embed(data);
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Check if model is ready
|
|
210
|
+
*/
|
|
211
|
+
export function isModelReady() {
|
|
212
|
+
return singletonModelManager.isInitialized();
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Get model statistics
|
|
216
|
+
*/
|
|
217
|
+
export function getModelStats() {
|
|
218
|
+
return singletonModelManager.getStats();
|
|
219
|
+
}
|
|
220
|
+
//# sourceMappingURL=SingletonModelManager.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Module - Clean, Unified Architecture
|
|
3
|
+
*
|
|
4
|
+
* This module provides all embedding functionality for Brainy.
|
|
5
|
+
*
|
|
6
|
+
* Main Components:
|
|
7
|
+
* - EmbeddingManager: Core embedding generation with Q8/FP32 support
|
|
8
|
+
* - CachedEmbeddings: Performance optimization layer with pre-computed embeddings
|
|
9
|
+
*/
|
|
10
|
+
export { EmbeddingManager, embeddingManager, embed, getEmbeddingFunction, getEmbeddingStats, type ModelPrecision } from './EmbeddingManager.js';
|
|
11
|
+
export { CachedEmbeddings, cachedEmbeddings } from './CachedEmbeddings.js';
|
|
12
|
+
export { embeddingManager as default } from './EmbeddingManager.js';
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Module - Clean, Unified Architecture
|
|
3
|
+
*
|
|
4
|
+
* This module provides all embedding functionality for Brainy.
|
|
5
|
+
*
|
|
6
|
+
* Main Components:
|
|
7
|
+
* - EmbeddingManager: Core embedding generation with Q8/FP32 support
|
|
8
|
+
* - CachedEmbeddings: Performance optimization layer with pre-computed embeddings
|
|
9
|
+
*/
|
|
10
|
+
// Core embedding functionality
|
|
11
|
+
export { EmbeddingManager, embeddingManager, embed, getEmbeddingFunction, getEmbeddingStats } from './EmbeddingManager.js';
|
|
12
|
+
// Cached embeddings for performance
|
|
13
|
+
export { CachedEmbeddings, cachedEmbeddings } from './CachedEmbeddings.js';
|
|
14
|
+
// Default export is the singleton manager
|
|
15
|
+
export { embeddingManager as default } from './EmbeddingManager.js';
|
|
16
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* This reduces memory usage by 90% for typical queries
|
|
8
8
|
*/
|
|
9
|
+
import { singletonModelManager } from './SingletonModelManager.js';
|
|
9
10
|
// Pre-computed embeddings for top 10,000 common terms
|
|
10
11
|
// In production, this would be loaded from a file
|
|
11
12
|
const PRECOMPUTED_EMBEDDINGS = {
|
|
@@ -59,7 +60,6 @@ function computeSimpleEmbedding(text) {
|
|
|
59
60
|
}
|
|
60
61
|
export class LightweightEmbedder {
|
|
61
62
|
constructor() {
|
|
62
|
-
this.onnxEmbedder = null;
|
|
63
63
|
this.stats = {
|
|
64
64
|
precomputedHits: 0,
|
|
65
65
|
simpleComputes: 0,
|
|
@@ -92,18 +92,10 @@ export class LightweightEmbedder {
|
|
|
92
92
|
this.stats.simpleComputes++;
|
|
93
93
|
return computeSimpleEmbedding(normalized);
|
|
94
94
|
}
|
|
95
|
-
// 4. Last resort:
|
|
96
|
-
|
|
97
|
-
console.log('โ ๏ธ Loading ONNX model for complex text...');
|
|
98
|
-
const { TransformerEmbedding } = await import('../utils/embedding.js');
|
|
99
|
-
this.onnxEmbedder = new TransformerEmbedding({
|
|
100
|
-
precision: 'fp32',
|
|
101
|
-
verbose: false
|
|
102
|
-
});
|
|
103
|
-
await this.onnxEmbedder.init();
|
|
104
|
-
}
|
|
95
|
+
// 4. Last resort: Use SingletonModelManager for complex text
|
|
96
|
+
console.log('โ ๏ธ Using singleton model for complex text...');
|
|
105
97
|
this.stats.onnxComputes++;
|
|
106
|
-
return await
|
|
98
|
+
return await singletonModelManager.embed(text);
|
|
107
99
|
}
|
|
108
100
|
getStats() {
|
|
109
101
|
return {
|