@soulcraft/brainy 2.12.0 โ†’ 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,296 @@
1
+ /**
2
+ * Unified Embedding Manager
3
+ *
4
+ * THE single source of truth for all embedding operations in Brainy.
5
+ * Combines model management, precision configuration, and embedding generation
6
+ * into one clean, maintainable class.
7
+ *
8
+ * Features:
9
+ * - Singleton pattern ensures ONE model instance
10
+ * - Automatic Q8 (default) or FP32 precision
11
+ * - Model downloading and caching
12
+ * - Thread-safe initialization
13
+ * - Memory monitoring
14
+ *
15
+ * This replaces: SingletonModelManager, TransformerEmbedding, ModelPrecisionManager,
16
+ * hybridModelManager, universalMemoryManager, and more.
17
+ */
18
+ import { pipeline, env } from '@huggingface/transformers';
19
+ import { existsSync } from 'fs';
20
+ import { join } from 'path';
21
+ // Global state for true singleton across entire process
22
+ let globalInstance = null;
23
+ let globalInitPromise = null;
24
+ /**
25
+ * Unified Embedding Manager - Clean, simple, reliable
26
+ */
27
+ export class EmbeddingManager {
28
+ constructor() {
29
+ this.model = null;
30
+ this.modelName = 'Xenova/all-MiniLM-L6-v2';
31
+ this.initialized = false;
32
+ this.initTime = null;
33
+ this.embedCount = 0;
34
+ this.locked = false;
35
+ // Determine precision - Q8 by default
36
+ this.precision = this.determinePrecision();
37
+ console.log(`๐ŸŽฏ EmbeddingManager: Using ${this.precision.toUpperCase()} precision`);
38
+ }
39
+ /**
40
+ * Get the singleton instance
41
+ */
42
+ static getInstance() {
43
+ if (!globalInstance) {
44
+ globalInstance = new EmbeddingManager();
45
+ }
46
+ return globalInstance;
47
+ }
48
+ /**
49
+ * Initialize the model (happens once)
50
+ */
51
+ async init() {
52
+ // In unit test mode, skip real model initialization
53
+ if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
54
+ if (!this.initialized) {
55
+ this.initialized = true;
56
+ this.initTime = 1; // Mock init time
57
+ console.log('๐Ÿงช EmbeddingManager: Using mocked embeddings for unit tests');
58
+ }
59
+ return;
60
+ }
61
+ // Already initialized
62
+ if (this.initialized && this.model) {
63
+ return;
64
+ }
65
+ // Initialization in progress
66
+ if (globalInitPromise) {
67
+ await globalInitPromise;
68
+ return;
69
+ }
70
+ // Start initialization
71
+ globalInitPromise = this.performInit();
72
+ try {
73
+ await globalInitPromise;
74
+ }
75
+ finally {
76
+ globalInitPromise = null;
77
+ }
78
+ }
79
+ /**
80
+ * Perform actual initialization
81
+ */
82
+ async performInit() {
83
+ const startTime = Date.now();
84
+ console.log(`๐Ÿš€ Initializing embedding model (${this.precision.toUpperCase()})...`);
85
+ try {
86
+ // Configure transformers.js environment
87
+ const modelsPath = this.getModelsPath();
88
+ env.cacheDir = modelsPath;
89
+ env.allowLocalModels = true;
90
+ env.useFSCache = true;
91
+ // Check if models exist locally
92
+ const modelPath = join(modelsPath, ...this.modelName.split('/'));
93
+ const hasLocalModels = existsSync(modelPath);
94
+ if (hasLocalModels) {
95
+ console.log('โœ… Using cached models from:', modelPath);
96
+ }
97
+ // Configure pipeline options for the selected precision
98
+ const pipelineOptions = {
99
+ cache_dir: modelsPath,
100
+ local_files_only: false,
101
+ // Specify precision
102
+ dtype: this.precision,
103
+ quantized: this.precision === 'q8',
104
+ // Memory optimizations
105
+ session_options: {
106
+ enableCpuMemArena: false,
107
+ enableMemPattern: false,
108
+ interOpNumThreads: 1,
109
+ intraOpNumThreads: 1,
110
+ graphOptimizationLevel: 'disabled'
111
+ }
112
+ };
113
+ // Load the model
114
+ this.model = await pipeline('feature-extraction', this.modelName, pipelineOptions);
115
+ // Lock precision after successful initialization
116
+ this.locked = true;
117
+ this.initialized = true;
118
+ this.initTime = Date.now() - startTime;
119
+ // Log success
120
+ const memoryMB = this.getMemoryUsage();
121
+ console.log(`โœ… Model loaded in ${this.initTime}ms`);
122
+ console.log(`๐Ÿ“Š Precision: ${this.precision.toUpperCase()} | Memory: ${memoryMB}MB`);
123
+ console.log(`๐Ÿ”’ Configuration locked`);
124
+ }
125
+ catch (error) {
126
+ this.initialized = false;
127
+ this.model = null;
128
+ throw new Error(`Failed to initialize embedding model: ${error instanceof Error ? error.message : String(error)}`);
129
+ }
130
+ }
131
+ /**
132
+ * Generate embeddings
133
+ */
134
+ async embed(text) {
135
+ // Check for unit test environment - use mocks to prevent ONNX conflicts
136
+ if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
137
+ return this.getMockEmbedding(text);
138
+ }
139
+ // Ensure initialized
140
+ await this.init();
141
+ if (!this.model) {
142
+ throw new Error('Model not initialized');
143
+ }
144
+ // Handle array input
145
+ const input = Array.isArray(text) ? text.join(' ') : text;
146
+ // Generate embedding
147
+ const output = await this.model(input, {
148
+ pooling: 'mean',
149
+ normalize: true
150
+ });
151
+ // Extract embedding vector
152
+ const embedding = Array.from(output.data);
153
+ // Validate dimensions
154
+ if (embedding.length !== 384) {
155
+ console.warn(`Unexpected embedding dimension: ${embedding.length}`);
156
+ // Pad or truncate
157
+ if (embedding.length < 384) {
158
+ return [...embedding, ...new Array(384 - embedding.length).fill(0)];
159
+ }
160
+ else {
161
+ return embedding.slice(0, 384);
162
+ }
163
+ }
164
+ this.embedCount++;
165
+ return embedding;
166
+ }
167
+ /**
168
+ * Generate mock embeddings for unit tests
169
+ */
170
+ getMockEmbedding(text) {
171
+ // Use the same mock logic as setup-unit.ts for consistency
172
+ const input = Array.isArray(text) ? text.join(' ') : text;
173
+ const str = typeof input === 'string' ? input : JSON.stringify(input);
174
+ const vector = new Array(384).fill(0);
175
+ // Create semi-realistic embeddings based on text content
176
+ for (let i = 0; i < Math.min(str.length, 384); i++) {
177
+ vector[i] = (str.charCodeAt(i % str.length) % 256) / 256;
178
+ }
179
+ // Add position-based variation
180
+ for (let i = 0; i < 384; i++) {
181
+ vector[i] += Math.sin(i * 0.1 + str.length) * 0.1;
182
+ }
183
+ // Track mock embedding count
184
+ this.embedCount++;
185
+ return vector;
186
+ }
187
+ /**
188
+ * Get embedding function for compatibility
189
+ */
190
+ getEmbeddingFunction() {
191
+ return async (data) => {
192
+ return await this.embed(data);
193
+ };
194
+ }
195
+ /**
196
+ * Determine model precision
197
+ */
198
+ determinePrecision() {
199
+ // Check environment variable overrides
200
+ if (process.env.BRAINY_MODEL_PRECISION === 'fp32') {
201
+ return 'fp32';
202
+ }
203
+ if (process.env.BRAINY_MODEL_PRECISION === 'q8') {
204
+ return 'q8';
205
+ }
206
+ if (process.env.BRAINY_FORCE_FP32 === 'true') {
207
+ return 'fp32';
208
+ }
209
+ // Default to Q8 - optimal for most use cases
210
+ return 'q8';
211
+ }
212
+ /**
213
+ * Get models directory path
214
+ */
215
+ getModelsPath() {
216
+ // Check various possible locations
217
+ const paths = [
218
+ process.env.BRAINY_MODELS_PATH,
219
+ './models',
220
+ join(process.cwd(), 'models'),
221
+ join(process.env.HOME || '', '.brainy', 'models')
222
+ ];
223
+ for (const path of paths) {
224
+ if (path && existsSync(path)) {
225
+ return path;
226
+ }
227
+ }
228
+ // Default
229
+ return join(process.cwd(), 'models');
230
+ }
231
+ /**
232
+ * Get memory usage in MB
233
+ */
234
+ getMemoryUsage() {
235
+ if (typeof process !== 'undefined' && process.memoryUsage) {
236
+ const usage = process.memoryUsage();
237
+ return Math.round(usage.heapUsed / 1024 / 1024);
238
+ }
239
+ return null;
240
+ }
241
+ /**
242
+ * Get current statistics
243
+ */
244
+ getStats() {
245
+ return {
246
+ initialized: this.initialized,
247
+ precision: this.precision,
248
+ modelName: this.modelName,
249
+ embedCount: this.embedCount,
250
+ initTime: this.initTime,
251
+ memoryMB: this.getMemoryUsage()
252
+ };
253
+ }
254
+ /**
255
+ * Check if initialized
256
+ */
257
+ isInitialized() {
258
+ return this.initialized;
259
+ }
260
+ /**
261
+ * Get current precision
262
+ */
263
+ getPrecision() {
264
+ return this.precision;
265
+ }
266
+ /**
267
+ * Validate precision matches expected
268
+ */
269
+ validatePrecision(expected) {
270
+ if (this.locked && expected !== this.precision) {
271
+ throw new Error(`Precision mismatch! System using ${this.precision.toUpperCase()} ` +
272
+ `but ${expected.toUpperCase()} was requested. Cannot mix precisions.`);
273
+ }
274
+ }
275
+ }
276
+ // Export singleton instance and convenience functions
277
+ export const embeddingManager = EmbeddingManager.getInstance();
278
+ /**
279
+ * Direct embed function
280
+ */
281
+ export async function embed(text) {
282
+ return await embeddingManager.embed(text);
283
+ }
284
+ /**
285
+ * Get embedding function for compatibility
286
+ */
287
+ export function getEmbeddingFunction() {
288
+ return embeddingManager.getEmbeddingFunction();
289
+ }
290
+ /**
291
+ * Get statistics
292
+ */
293
+ export function getEmbeddingStats() {
294
+ return embeddingManager.getStats();
295
+ }
296
+ //# sourceMappingURL=EmbeddingManager.js.map
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Singleton Model Manager - THE ONLY SOURCE OF EMBEDDING MODELS
3
+ *
4
+ * This is the SINGLE, UNIFIED model initialization system that ensures:
5
+ * - Only ONE model instance exists across the entire system
6
+ * - Precision is configured once and locked
7
+ * - All components share the same model
8
+ * - No possibility of mixed precisions
9
+ *
10
+ * CRITICAL: This manager is used by EVERYTHING:
11
+ * - Storage operations (add, update)
12
+ * - Search operations (search, find)
13
+ * - Public API (embed, cluster)
14
+ * - Neural API (all neural.* methods)
15
+ * - Internal operations (deduplication, indexing)
16
+ */
17
+ import { TransformerEmbedding } from '../utils/embedding.js';
18
+ import { EmbeddingFunction, Vector } from '../coreTypes.js';
19
+ /**
20
+ * Statistics for monitoring
21
+ */
22
+ interface ModelStats {
23
+ initialized: boolean;
24
+ precision: string;
25
+ initCount: number;
26
+ embedCount: number;
27
+ lastUsed: Date | null;
28
+ memoryFootprint?: number;
29
+ }
30
+ /**
31
+ * The ONE TRUE model manager
32
+ */
33
+ export declare class SingletonModelManager {
34
+ private static instance;
35
+ private stats;
36
+ private constructor();
37
+ /**
38
+ * Get the singleton instance
39
+ */
40
+ static getInstance(): SingletonModelManager;
41
+ /**
42
+ * Get the model instance - creates if needed, reuses if exists
43
+ * This is THE ONLY way to get a model in the entire system
44
+ */
45
+ getModel(): Promise<TransformerEmbedding>;
46
+ /**
47
+ * Initialize the model - happens exactly once
48
+ */
49
+ private initializeModel;
50
+ /**
51
+ * Get embedding function that uses the singleton model
52
+ */
53
+ getEmbeddingFunction(): Promise<EmbeddingFunction>;
54
+ /**
55
+ * Direct embed method for convenience
56
+ */
57
+ embed(data: string | string[]): Promise<Vector>;
58
+ /**
59
+ * Check if model is initialized
60
+ */
61
+ isInitialized(): boolean;
62
+ /**
63
+ * Get current statistics
64
+ */
65
+ getStats(): ModelStats;
66
+ /**
67
+ * Validate precision consistency
68
+ * Throws error if attempting to use different precision
69
+ */
70
+ validatePrecision(requestedPrecision?: string): void;
71
+ /**
72
+ * Force cleanup (for testing only)
73
+ * WARNING: This will break consistency - use only in tests
74
+ */
75
+ _testOnlyCleanup(): Promise<void>;
76
+ }
77
+ export declare const singletonModelManager: SingletonModelManager;
78
+ /**
79
+ * THE ONLY embedding function that should be used anywhere
80
+ * This ensures all operations use the same model instance
81
+ */
82
+ export declare function getUnifiedEmbeddingFunction(): Promise<EmbeddingFunction>;
83
+ /**
84
+ * Direct embed function for convenience
85
+ */
86
+ export declare function unifiedEmbed(data: string | string[]): Promise<Vector>;
87
+ /**
88
+ * Check if model is ready
89
+ */
90
+ export declare function isModelReady(): boolean;
91
+ /**
92
+ * Get model statistics
93
+ */
94
+ export declare function getModelStats(): ModelStats;
95
+ export {};
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Singleton Model Manager - THE ONLY SOURCE OF EMBEDDING MODELS
3
+ *
4
+ * This is the SINGLE, UNIFIED model initialization system that ensures:
5
+ * - Only ONE model instance exists across the entire system
6
+ * - Precision is configured once and locked
7
+ * - All components share the same model
8
+ * - No possibility of mixed precisions
9
+ *
10
+ * CRITICAL: This manager is used by EVERYTHING:
11
+ * - Storage operations (add, update)
12
+ * - Search operations (search, find)
13
+ * - Public API (embed, cluster)
14
+ * - Neural API (all neural.* methods)
15
+ * - Internal operations (deduplication, indexing)
16
+ */
17
+ import { TransformerEmbedding } from '../utils/embedding.js';
18
+ import { getModelPrecision, lockModelPrecision } from '../config/modelPrecisionManager.js';
19
+ // Global state - ensures true singleton across entire process
20
+ let globalModelInstance = null;
21
+ let globalInitPromise = null;
22
+ let globalInitialized = false;
23
+ /**
24
+ * The ONE TRUE model manager
25
+ */
26
+ export class SingletonModelManager {
27
+ constructor() {
28
+ this.stats = {
29
+ initialized: false,
30
+ precision: 'unknown',
31
+ initCount: 0,
32
+ embedCount: 0,
33
+ lastUsed: null
34
+ };
35
+ // Private constructor enforces singleton
36
+ this.stats.precision = getModelPrecision();
37
+ console.log(`๐Ÿ” SingletonModelManager initialized with ${this.stats.precision.toUpperCase()} precision`);
38
+ }
39
+ /**
40
+ * Get the singleton instance
41
+ */
42
+ static getInstance() {
43
+ if (!SingletonModelManager.instance) {
44
+ SingletonModelManager.instance = new SingletonModelManager();
45
+ }
46
+ return SingletonModelManager.instance;
47
+ }
48
+ /**
49
+ * Get the model instance - creates if needed, reuses if exists
50
+ * This is THE ONLY way to get a model in the entire system
51
+ */
52
+ async getModel() {
53
+ // If already initialized, return immediately
54
+ if (globalModelInstance && globalInitialized) {
55
+ this.stats.lastUsed = new Date();
56
+ return globalModelInstance;
57
+ }
58
+ // If initialization is in progress, wait for it
59
+ if (globalInitPromise) {
60
+ console.log('โณ Model initialization already in progress, waiting...');
61
+ return await globalInitPromise;
62
+ }
63
+ // Start initialization (only happens once ever)
64
+ globalInitPromise = this.initializeModel();
65
+ try {
66
+ const model = await globalInitPromise;
67
+ globalInitialized = true;
68
+ return model;
69
+ }
70
+ catch (error) {
71
+ // Reset on error to allow retry
72
+ globalInitPromise = null;
73
+ throw error;
74
+ }
75
+ }
76
+ /**
77
+ * Initialize the model - happens exactly once
78
+ */
79
+ async initializeModel() {
80
+ console.log('๐Ÿš€ Initializing singleton model instance...');
81
+ // Get precision from central manager
82
+ const precision = getModelPrecision();
83
+ console.log(`๐Ÿ“Š Using ${precision.toUpperCase()} precision (${precision === 'q8' ? '23MB, 99% accuracy' : '90MB, 100% accuracy'})`);
84
+ // Detect environment for optimal settings
85
+ const isNode = typeof process !== 'undefined' && process.versions?.node;
86
+ const isBrowser = typeof window !== 'undefined' && typeof document !== 'undefined';
87
+ const isServerless = typeof process !== 'undefined' && (process.env.VERCEL ||
88
+ process.env.NETLIFY ||
89
+ process.env.AWS_LAMBDA_FUNCTION_NAME ||
90
+ process.env.FUNCTIONS_WORKER_RUNTIME);
91
+ const isTest = globalThis.__BRAINY_TEST_ENV__ || process.env.NODE_ENV === 'test';
92
+ // Create optimized options based on environment
93
+ const options = {
94
+ precision: precision,
95
+ verbose: !isTest && !isServerless && !isBrowser,
96
+ device: 'cpu', // CPU is most compatible
97
+ localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS === 'false',
98
+ model: 'Xenova/all-MiniLM-L6-v2'
99
+ };
100
+ try {
101
+ // Create the ONE model instance
102
+ globalModelInstance = new TransformerEmbedding(options);
103
+ // Initialize it
104
+ await globalModelInstance.init();
105
+ // CRITICAL: Lock the precision after successful initialization
106
+ // This prevents any future changes to precision
107
+ lockModelPrecision();
108
+ console.log('๐Ÿ”’ Model precision locked at:', precision.toUpperCase());
109
+ // Update stats
110
+ this.stats.initialized = true;
111
+ this.stats.initCount++;
112
+ this.stats.lastUsed = new Date();
113
+ // Log memory usage if available
114
+ if (isNode && process.memoryUsage) {
115
+ const usage = process.memoryUsage();
116
+ this.stats.memoryFootprint = Math.round(usage.heapUsed / 1024 / 1024);
117
+ console.log(`๐Ÿ’พ Model loaded, memory usage: ${this.stats.memoryFootprint}MB`);
118
+ }
119
+ console.log('โœ… Singleton model initialized successfully');
120
+ return globalModelInstance;
121
+ }
122
+ catch (error) {
123
+ console.error('โŒ Failed to initialize singleton model:', error);
124
+ globalModelInstance = null;
125
+ throw new Error(`Singleton model initialization failed: ${error instanceof Error ? error.message : String(error)}`);
126
+ }
127
+ }
128
+ /**
129
+ * Get embedding function that uses the singleton model
130
+ */
131
+ async getEmbeddingFunction() {
132
+ const model = await this.getModel();
133
+ return async (data) => {
134
+ this.stats.embedCount++;
135
+ this.stats.lastUsed = new Date();
136
+ return await model.embed(data);
137
+ };
138
+ }
139
+ /**
140
+ * Direct embed method for convenience
141
+ */
142
+ async embed(data) {
143
+ const model = await this.getModel();
144
+ this.stats.embedCount++;
145
+ this.stats.lastUsed = new Date();
146
+ return await model.embed(data);
147
+ }
148
+ /**
149
+ * Check if model is initialized
150
+ */
151
+ isInitialized() {
152
+ return globalInitialized && globalModelInstance !== null;
153
+ }
154
+ /**
155
+ * Get current statistics
156
+ */
157
+ getStats() {
158
+ return {
159
+ ...this.stats,
160
+ precision: getModelPrecision()
161
+ };
162
+ }
163
+ /**
164
+ * Validate precision consistency
165
+ * Throws error if attempting to use different precision
166
+ */
167
+ validatePrecision(requestedPrecision) {
168
+ const currentPrecision = getModelPrecision();
169
+ if (requestedPrecision && requestedPrecision !== currentPrecision) {
170
+ throw new Error(`โŒ Precision mismatch! System is using ${currentPrecision.toUpperCase()} ` +
171
+ `but ${requestedPrecision.toUpperCase()} was requested. ` +
172
+ `All operations must use the same precision.`);
173
+ }
174
+ }
175
+ /**
176
+ * Force cleanup (for testing only)
177
+ * WARNING: This will break consistency - use only in tests
178
+ */
179
+ async _testOnlyCleanup() {
180
+ if (process.env.NODE_ENV !== 'test') {
181
+ throw new Error('Cleanup only allowed in test environment');
182
+ }
183
+ if (globalModelInstance && 'dispose' in globalModelInstance) {
184
+ await globalModelInstance.dispose();
185
+ }
186
+ globalModelInstance = null;
187
+ globalInitPromise = null;
188
+ globalInitialized = false;
189
+ this.stats.initialized = false;
190
+ console.log('๐Ÿงน Singleton model cleaned up (test only)');
191
+ }
192
+ }
193
+ // Export the singleton instance getter
194
+ export const singletonModelManager = SingletonModelManager.getInstance();
195
+ /**
196
+ * THE ONLY embedding function that should be used anywhere
197
+ * This ensures all operations use the same model instance
198
+ */
199
+ export async function getUnifiedEmbeddingFunction() {
200
+ return await singletonModelManager.getEmbeddingFunction();
201
+ }
202
+ /**
203
+ * Direct embed function for convenience
204
+ */
205
+ export async function unifiedEmbed(data) {
206
+ return await singletonModelManager.embed(data);
207
+ }
208
+ /**
209
+ * Check if model is ready
210
+ */
211
+ export function isModelReady() {
212
+ return singletonModelManager.isInitialized();
213
+ }
214
+ /**
215
+ * Get model statistics
216
+ */
217
+ export function getModelStats() {
218
+ return singletonModelManager.getStats();
219
+ }
220
+ //# sourceMappingURL=SingletonModelManager.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Embeddings Module - Clean, Unified Architecture
3
+ *
4
+ * This module provides all embedding functionality for Brainy.
5
+ *
6
+ * Main Components:
7
+ * - EmbeddingManager: Core embedding generation with Q8/FP32 support
8
+ * - CachedEmbeddings: Performance optimization layer with pre-computed embeddings
9
+ */
10
+ export { EmbeddingManager, embeddingManager, embed, getEmbeddingFunction, getEmbeddingStats, type ModelPrecision } from './EmbeddingManager.js';
11
+ export { CachedEmbeddings, cachedEmbeddings } from './CachedEmbeddings.js';
12
+ export { embeddingManager as default } from './EmbeddingManager.js';
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Embeddings Module - Clean, Unified Architecture
3
+ *
4
+ * This module provides all embedding functionality for Brainy.
5
+ *
6
+ * Main Components:
7
+ * - EmbeddingManager: Core embedding generation with Q8/FP32 support
8
+ * - CachedEmbeddings: Performance optimization layer with pre-computed embeddings
9
+ */
10
+ // Core embedding functionality
11
+ export { EmbeddingManager, embeddingManager, embed, getEmbeddingFunction, getEmbeddingStats } from './EmbeddingManager.js';
12
+ // Cached embeddings for performance
13
+ export { CachedEmbeddings, cachedEmbeddings } from './CachedEmbeddings.js';
14
+ // Default export is the singleton manager
15
+ export { embeddingManager as default } from './EmbeddingManager.js';
16
+ //# sourceMappingURL=index.js.map
@@ -8,7 +8,6 @@
8
8
  */
9
9
  import { Vector } from '../coreTypes.js';
10
10
  export declare class LightweightEmbedder {
11
- private onnxEmbedder;
12
11
  private stats;
13
12
  embed(text: string | string[]): Promise<Vector | Vector[]>;
14
13
  private embedSingle;
@@ -6,6 +6,7 @@
6
6
  *
7
7
  * This reduces memory usage by 90% for typical queries
8
8
  */
9
+ import { singletonModelManager } from './SingletonModelManager.js';
9
10
  // Pre-computed embeddings for top 10,000 common terms
10
11
  // In production, this would be loaded from a file
11
12
  const PRECOMPUTED_EMBEDDINGS = {
@@ -59,7 +60,6 @@ function computeSimpleEmbedding(text) {
59
60
  }
60
61
  export class LightweightEmbedder {
61
62
  constructor() {
62
- this.onnxEmbedder = null;
63
63
  this.stats = {
64
64
  precomputedHits: 0,
65
65
  simpleComputes: 0,
@@ -92,18 +92,10 @@ export class LightweightEmbedder {
92
92
  this.stats.simpleComputes++;
93
93
  return computeSimpleEmbedding(normalized);
94
94
  }
95
- // 4. Last resort: Load ONNX model (only if really needed)
96
- if (!this.onnxEmbedder) {
97
- console.log('โš ๏ธ Loading ONNX model for complex text...');
98
- const { TransformerEmbedding } = await import('../utils/embedding.js');
99
- this.onnxEmbedder = new TransformerEmbedding({
100
- precision: 'fp32',
101
- verbose: false
102
- });
103
- await this.onnxEmbedder.init();
104
- }
95
+ // 4. Last resort: Use SingletonModelManager for complex text
96
+ console.log('โš ๏ธ Using singleton model for complex text...');
105
97
  this.stats.onnxComputes++;
106
- return await this.onnxEmbedder.embed(text);
98
+ return await singletonModelManager.embed(text);
107
99
  }
108
100
  getStats() {
109
101
  return {