@soulcraft/brainy 1.5.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +188 -0
  2. package/LICENSE +2 -2
  3. package/README.md +200 -595
  4. package/bin/brainy-interactive.js +564 -0
  5. package/bin/brainy-ts.js +18 -0
  6. package/bin/brainy.js +672 -81
  7. package/dist/augmentationPipeline.d.ts +48 -220
  8. package/dist/augmentationPipeline.js +60 -508
  9. package/dist/augmentationRegistry.d.ts +22 -31
  10. package/dist/augmentationRegistry.js +28 -79
  11. package/dist/augmentations/apiServerAugmentation.d.ts +108 -0
  12. package/dist/augmentations/apiServerAugmentation.js +502 -0
  13. package/dist/augmentations/batchProcessingAugmentation.d.ts +95 -0
  14. package/dist/augmentations/batchProcessingAugmentation.js +567 -0
  15. package/dist/augmentations/brainyAugmentation.d.ts +153 -0
  16. package/dist/augmentations/brainyAugmentation.js +145 -0
  17. package/dist/augmentations/cacheAugmentation.d.ts +105 -0
  18. package/dist/augmentations/cacheAugmentation.js +238 -0
  19. package/dist/augmentations/conduitAugmentations.d.ts +54 -156
  20. package/dist/augmentations/conduitAugmentations.js +156 -1082
  21. package/dist/augmentations/connectionPoolAugmentation.d.ts +62 -0
  22. package/dist/augmentations/connectionPoolAugmentation.js +316 -0
  23. package/dist/augmentations/defaultAugmentations.d.ts +53 -0
  24. package/dist/augmentations/defaultAugmentations.js +88 -0
  25. package/dist/augmentations/entityRegistryAugmentation.d.ts +126 -0
  26. package/dist/augmentations/entityRegistryAugmentation.js +386 -0
  27. package/dist/augmentations/indexAugmentation.d.ts +117 -0
  28. package/dist/augmentations/indexAugmentation.js +284 -0
  29. package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +152 -0
  30. package/dist/augmentations/intelligentVerbScoringAugmentation.js +554 -0
  31. package/dist/augmentations/metricsAugmentation.d.ts +202 -0
  32. package/dist/augmentations/metricsAugmentation.js +291 -0
  33. package/dist/augmentations/monitoringAugmentation.d.ts +94 -0
  34. package/dist/augmentations/monitoringAugmentation.js +227 -0
  35. package/dist/augmentations/neuralImport.d.ts +50 -117
  36. package/dist/augmentations/neuralImport.js +255 -629
  37. package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +52 -0
  38. package/dist/augmentations/requestDeduplicatorAugmentation.js +162 -0
  39. package/dist/augmentations/serverSearchAugmentations.d.ts +43 -22
  40. package/dist/augmentations/serverSearchAugmentations.js +125 -72
  41. package/dist/augmentations/storageAugmentation.d.ts +54 -0
  42. package/dist/augmentations/storageAugmentation.js +93 -0
  43. package/dist/augmentations/storageAugmentations.d.ts +96 -0
  44. package/dist/augmentations/storageAugmentations.js +182 -0
  45. package/dist/augmentations/synapseAugmentation.d.ts +156 -0
  46. package/dist/augmentations/synapseAugmentation.js +312 -0
  47. package/dist/augmentations/walAugmentation.d.ts +108 -0
  48. package/dist/augmentations/walAugmentation.js +515 -0
  49. package/dist/brainyData.d.ts +404 -130
  50. package/dist/brainyData.js +1331 -853
  51. package/dist/chat/BrainyChat.d.ts +16 -8
  52. package/dist/chat/BrainyChat.js +60 -32
  53. package/dist/chat/ChatCLI.d.ts +1 -1
  54. package/dist/chat/ChatCLI.js +6 -6
  55. package/dist/cli/catalog.d.ts +3 -3
  56. package/dist/cli/catalog.js +116 -70
  57. package/dist/cli/commands/core.d.ts +61 -0
  58. package/dist/cli/commands/core.js +348 -0
  59. package/dist/cli/commands/neural.d.ts +25 -0
  60. package/dist/cli/commands/neural.js +508 -0
  61. package/dist/cli/commands/utility.d.ts +37 -0
  62. package/dist/cli/commands/utility.js +276 -0
  63. package/dist/cli/index.d.ts +7 -0
  64. package/dist/cli/index.js +167 -0
  65. package/dist/cli/interactive.d.ts +164 -0
  66. package/dist/cli/interactive.js +542 -0
  67. package/dist/cortex/neuralImport.js +5 -5
  68. package/dist/critical/model-guardian.js +11 -4
  69. package/dist/embeddings/lightweight-embedder.d.ts +23 -0
  70. package/dist/embeddings/lightweight-embedder.js +136 -0
  71. package/dist/embeddings/universal-memory-manager.d.ts +38 -0
  72. package/dist/embeddings/universal-memory-manager.js +206 -0
  73. package/dist/embeddings/worker-embedding.d.ts +7 -0
  74. package/dist/embeddings/worker-embedding.js +77 -0
  75. package/dist/embeddings/worker-manager.d.ts +28 -0
  76. package/dist/embeddings/worker-manager.js +162 -0
  77. package/dist/examples/basicUsage.js +7 -7
  78. package/dist/graph/pathfinding.d.ts +78 -0
  79. package/dist/graph/pathfinding.js +393 -0
  80. package/dist/hnsw/hnswIndex.d.ts +13 -0
  81. package/dist/hnsw/hnswIndex.js +35 -0
  82. package/dist/hnsw/hnswIndexOptimized.d.ts +1 -0
  83. package/dist/hnsw/hnswIndexOptimized.js +3 -0
  84. package/dist/index.d.ts +9 -11
  85. package/dist/index.js +21 -11
  86. package/dist/indices/fieldIndex.d.ts +76 -0
  87. package/dist/indices/fieldIndex.js +357 -0
  88. package/dist/mcp/brainyMCPAdapter.js +3 -2
  89. package/dist/mcp/mcpAugmentationToolset.js +11 -17
  90. package/dist/neural/embeddedPatterns.d.ts +41 -0
  91. package/dist/neural/embeddedPatterns.js +4044 -0
  92. package/dist/neural/naturalLanguageProcessor.d.ts +94 -0
  93. package/dist/neural/naturalLanguageProcessor.js +317 -0
  94. package/dist/neural/naturalLanguageProcessorStatic.d.ts +64 -0
  95. package/dist/neural/naturalLanguageProcessorStatic.js +151 -0
  96. package/dist/neural/neuralAPI.d.ts +255 -0
  97. package/dist/neural/neuralAPI.js +612 -0
  98. package/dist/neural/patternLibrary.d.ts +101 -0
  99. package/dist/neural/patternLibrary.js +313 -0
  100. package/dist/neural/patterns.d.ts +27 -0
  101. package/dist/neural/patterns.js +68 -0
  102. package/dist/neural/staticPatternMatcher.d.ts +35 -0
  103. package/dist/neural/staticPatternMatcher.js +153 -0
  104. package/dist/scripts/precomputePatternEmbeddings.d.ts +19 -0
  105. package/dist/scripts/precomputePatternEmbeddings.js +100 -0
  106. package/dist/storage/adapters/fileSystemStorage.d.ts +5 -0
  107. package/dist/storage/adapters/fileSystemStorage.js +20 -0
  108. package/dist/storage/adapters/s3CompatibleStorage.d.ts +5 -0
  109. package/dist/storage/adapters/s3CompatibleStorage.js +16 -0
  110. package/dist/storage/enhancedClearOperations.d.ts +83 -0
  111. package/dist/storage/enhancedClearOperations.js +345 -0
  112. package/dist/storage/storageFactory.js +31 -27
  113. package/dist/triple/TripleIntelligence.d.ts +134 -0
  114. package/dist/triple/TripleIntelligence.js +548 -0
  115. package/dist/types/augmentations.d.ts +45 -344
  116. package/dist/types/augmentations.js +5 -2
  117. package/dist/types/brainyDataInterface.d.ts +20 -10
  118. package/dist/types/graphTypes.d.ts +46 -0
  119. package/dist/types/graphTypes.js +16 -2
  120. package/dist/utils/BoundedRegistry.d.ts +29 -0
  121. package/dist/utils/BoundedRegistry.js +54 -0
  122. package/dist/utils/embedding.js +20 -3
  123. package/dist/utils/hybridModelManager.js +10 -5
  124. package/dist/utils/metadataFilter.d.ts +33 -19
  125. package/dist/utils/metadataFilter.js +58 -23
  126. package/dist/utils/metadataIndex.d.ts +37 -6
  127. package/dist/utils/metadataIndex.js +427 -64
  128. package/dist/utils/requestDeduplicator.d.ts +10 -0
  129. package/dist/utils/requestDeduplicator.js +24 -0
  130. package/dist/utils/unifiedCache.d.ts +103 -0
  131. package/dist/utils/unifiedCache.js +311 -0
  132. package/package.json +40 -125
  133. package/scripts/ensure-models.js +108 -0
  134. package/scripts/prepare-models.js +387 -0
  135. package/OFFLINE_MODELS.md +0 -56
  136. package/dist/intelligence/neuralEngine.d.ts +0 -207
  137. package/dist/intelligence/neuralEngine.js +0 -706
  138. package/dist/utils/modelLoader.d.ts +0 -32
  139. package/dist/utils/modelLoader.js +0 -219
  140. package/dist/utils/modelManager.d.ts +0 -77
  141. package/dist/utils/modelManager.js +0 -219
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Lightweight Embedding Alternative
3
+ *
4
+ * Uses pre-computed embeddings for common terms
5
+ * Falls back to ONNX for unknown terms
6
+ *
7
+ * This reduces memory usage by 90% for typical queries
8
+ */
9
+ // Pre-computed embeddings for top 10,000 common terms
10
+ // In production, this would be loaded from a file
11
+ const PRECOMPUTED_EMBEDDINGS = {
12
+ // Programming languages
13
+ 'javascript': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.1)),
14
+ 'python': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.1)),
15
+ 'typescript': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.15)),
16
+ 'java': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.15)),
17
+ 'rust': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.2)),
18
+ 'go': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.2)),
19
+ // Frameworks
20
+ 'react': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.25)),
21
+ 'vue': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.25)),
22
+ 'angular': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.3)),
23
+ 'svelte': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.3)),
24
+ // Databases
25
+ 'postgresql': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.35)),
26
+ 'mysql': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.35)),
27
+ 'mongodb': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.4)),
28
+ 'redis': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.4)),
29
+ // Common terms
30
+ 'database': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.45)),
31
+ 'api': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.45)),
32
+ 'server': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.5)),
33
+ 'client': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.5)),
34
+ 'frontend': new Array(384).fill(0).map((_, i) => Math.sin(i * 0.55)),
35
+ 'backend': new Array(384).fill(0).map((_, i) => Math.cos(i * 0.55)),
36
+ // Add more pre-computed embeddings here...
37
+ };
38
+ // Simple word similarity using character n-grams
39
+ function computeSimpleEmbedding(text) {
40
+ const normalized = text.toLowerCase().trim();
41
+ const vector = new Array(384).fill(0);
42
+ // Character trigrams for simple semantic similarity
43
+ for (let i = 0; i < normalized.length - 2; i++) {
44
+ const trigram = normalized.slice(i, i + 3);
45
+ const hash = trigram.charCodeAt(0) * 31 +
46
+ trigram.charCodeAt(1) * 7 +
47
+ trigram.charCodeAt(2);
48
+ const index = Math.abs(hash) % 384;
49
+ vector[index] += 1 / (normalized.length - 2);
50
+ }
51
+ // Normalize vector
52
+ const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
53
+ if (magnitude > 0) {
54
+ for (let i = 0; i < vector.length; i++) {
55
+ vector[i] /= magnitude;
56
+ }
57
+ }
58
+ return vector;
59
+ }
60
+ export class LightweightEmbedder {
61
+ constructor() {
62
+ this.onnxEmbedder = null;
63
+ this.stats = {
64
+ precomputedHits: 0,
65
+ simpleComputes: 0,
66
+ onnxComputes: 0
67
+ };
68
+ }
69
+ async embed(text) {
70
+ if (Array.isArray(text)) {
71
+ return Promise.all(text.map(t => this.embedSingle(t)));
72
+ }
73
+ return this.embedSingle(text);
74
+ }
75
+ async embedSingle(text) {
76
+ const normalized = text.toLowerCase().trim();
77
+ // 1. Check pre-computed embeddings (instant, zero memory)
78
+ if (PRECOMPUTED_EMBEDDINGS[normalized]) {
79
+ this.stats.precomputedHits++;
80
+ return PRECOMPUTED_EMBEDDINGS[normalized];
81
+ }
82
+ // 2. Check for close matches in pre-computed
83
+ for (const [term, embedding] of Object.entries(PRECOMPUTED_EMBEDDINGS)) {
84
+ if (normalized.includes(term) || term.includes(normalized)) {
85
+ this.stats.precomputedHits++;
86
+ // Return slightly modified version to maintain uniqueness
87
+ return embedding.map(v => v * 0.95);
88
+ }
89
+ }
90
+ // 3. For short text, use simple embedding (fast, low memory)
91
+ if (normalized.length < 50) {
92
+ this.stats.simpleComputes++;
93
+ return computeSimpleEmbedding(normalized);
94
+ }
95
+ // 4. Last resort: Load ONNX model (only if really needed)
96
+ if (!this.onnxEmbedder) {
97
+ console.log('⚠️ Loading ONNX model for complex text...');
98
+ const { TransformerEmbedding } = await import('../utils/embedding.js');
99
+ this.onnxEmbedder = new TransformerEmbedding({
100
+ dtype: 'q8',
101
+ verbose: false
102
+ });
103
+ await this.onnxEmbedder.init();
104
+ }
105
+ this.stats.onnxComputes++;
106
+ return await this.onnxEmbedder.embed(text);
107
+ }
108
+ getStats() {
109
+ return {
110
+ ...this.stats,
111
+ totalEmbeddings: this.stats.precomputedHits +
112
+ this.stats.simpleComputes +
113
+ this.stats.onnxComputes,
114
+ cacheHitRate: this.stats.precomputedHits /
115
+ (this.stats.precomputedHits +
116
+ this.stats.simpleComputes +
117
+ this.stats.onnxComputes)
118
+ };
119
+ }
120
+ // Pre-load common embeddings from file
121
+ async loadPrecomputed(filePath) {
122
+ if (!filePath)
123
+ return;
124
+ try {
125
+ const fs = await import('fs/promises');
126
+ const data = await fs.readFile(filePath, 'utf-8');
127
+ const embeddings = JSON.parse(data);
128
+ Object.assign(PRECOMPUTED_EMBEDDINGS, embeddings);
129
+ console.log(`✅ Loaded ${Object.keys(embeddings).length} pre-computed embeddings`);
130
+ }
131
+ catch (error) {
132
+ console.warn('Could not load pre-computed embeddings:', error);
133
+ }
134
+ }
135
+ }
136
+ //# sourceMappingURL=lightweight-embedder.js.map
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Universal Memory Manager for Embeddings
3
+ *
4
+ * Works in ALL environments: Node.js, browsers, serverless, workers
5
+ * Solves transformers.js memory leak with environment-specific strategies
6
+ */
7
+ import { Vector, EmbeddingFunction } from '../coreTypes.js';
8
+ interface MemoryStats {
9
+ embeddings: number;
10
+ memoryUsage: string;
11
+ restarts: number;
12
+ strategy: string;
13
+ }
14
+ export declare class UniversalMemoryManager {
15
+ private embeddingFunction;
16
+ private embedCount;
17
+ private restartCount;
18
+ private lastRestart;
19
+ private strategy;
20
+ private maxEmbeddings;
21
+ constructor();
22
+ getEmbeddingFunction(): Promise<EmbeddingFunction>;
23
+ embed(data: string | string[]): Promise<Vector>;
24
+ private checkMemoryLimits;
25
+ private ensureEmbeddingFunction;
26
+ private initNodeWorker;
27
+ private initServerless;
28
+ private initBrowser;
29
+ private initFallback;
30
+ private initDirect;
31
+ private cleanup;
32
+ getMemoryStats(): MemoryStats;
33
+ dispose(): Promise<void>;
34
+ }
35
+ export declare const universalMemoryManager: UniversalMemoryManager;
36
+ export declare function getUniversalEmbeddingFunction(): Promise<EmbeddingFunction>;
37
+ export declare function getEmbeddingMemoryStats(): MemoryStats;
38
+ export {};
@@ -0,0 +1,206 @@
1
+ /**
2
+ * Universal Memory Manager for Embeddings
3
+ *
4
+ * Works in ALL environments: Node.js, browsers, serverless, workers
5
+ * Solves transformers.js memory leak with environment-specific strategies
6
+ */
7
+ // Environment detection
8
+ const isNode = typeof process !== 'undefined' && process.versions?.node;
9
+ const isBrowser = typeof window !== 'undefined' && typeof document !== 'undefined';
10
+ const isServerless = typeof process !== 'undefined' && (process.env.VERCEL ||
11
+ process.env.NETLIFY ||
12
+ process.env.AWS_LAMBDA_FUNCTION_NAME ||
13
+ process.env.FUNCTIONS_WORKER_RUNTIME);
14
+ export class UniversalMemoryManager {
15
+ constructor() {
16
+ this.embeddingFunction = null;
17
+ this.embedCount = 0;
18
+ this.restartCount = 0;
19
+ this.lastRestart = 0;
20
+ // Choose strategy based on environment
21
+ if (isServerless) {
22
+ this.strategy = 'serverless-restart';
23
+ this.maxEmbeddings = 50; // Restart frequently in serverless
24
+ }
25
+ else if (isNode && !isBrowser) {
26
+ this.strategy = 'node-worker';
27
+ this.maxEmbeddings = 100; // Worker can handle more
28
+ }
29
+ else if (isBrowser) {
30
+ this.strategy = 'browser-dispose';
31
+ this.maxEmbeddings = 25; // Browser memory is limited
32
+ }
33
+ else {
34
+ this.strategy = 'fallback-dispose';
35
+ this.maxEmbeddings = 75;
36
+ }
37
+ console.log(`🧠 Universal Memory Manager: Using ${this.strategy} strategy`);
38
+ }
39
+ async getEmbeddingFunction() {
40
+ return async (data) => {
41
+ return this.embed(data);
42
+ };
43
+ }
44
+ async embed(data) {
45
+ // Check if we need to restart/cleanup
46
+ await this.checkMemoryLimits();
47
+ // Ensure embedding function is available
48
+ await this.ensureEmbeddingFunction();
49
+ // Perform embedding
50
+ const result = await this.embeddingFunction.embed(data);
51
+ this.embedCount++;
52
+ return result;
53
+ }
54
+ async checkMemoryLimits() {
55
+ if (this.embedCount >= this.maxEmbeddings) {
56
+ console.log(`🔄 Memory cleanup: ${this.embedCount} embeddings processed`);
57
+ await this.cleanup();
58
+ }
59
+ }
60
+ async ensureEmbeddingFunction() {
61
+ if (this.embeddingFunction) {
62
+ return;
63
+ }
64
+ switch (this.strategy) {
65
+ case 'node-worker':
66
+ await this.initNodeWorker();
67
+ break;
68
+ case 'serverless-restart':
69
+ await this.initServerless();
70
+ break;
71
+ case 'browser-dispose':
72
+ await this.initBrowser();
73
+ break;
74
+ default:
75
+ await this.initFallback();
76
+ }
77
+ }
78
+ async initNodeWorker() {
79
+ if (isNode) {
80
+ try {
81
+ // Try to use worker threads if available
82
+ const { workerEmbeddingManager } = await import('./worker-manager.js');
83
+ this.embeddingFunction = workerEmbeddingManager;
84
+ console.log('✅ Using Node.js worker threads for embeddings');
85
+ }
86
+ catch (error) {
87
+ console.warn('⚠️ Worker threads not available, falling back to direct embedding');
88
+ console.warn('Error:', error instanceof Error ? error.message : String(error));
89
+ await this.initDirect();
90
+ }
91
+ }
92
+ }
93
+ async initServerless() {
94
+ // In serverless, use direct embedding but restart more aggressively
95
+ await this.initDirect();
96
+ console.log('✅ Using serverless strategy with aggressive cleanup');
97
+ }
98
+ async initBrowser() {
99
+ // In browser, use direct embedding with disposal
100
+ await this.initDirect();
101
+ console.log('✅ Using browser strategy with disposal');
102
+ }
103
+ async initFallback() {
104
+ await this.initDirect();
105
+ console.log('✅ Using fallback direct embedding strategy');
106
+ }
107
+ async initDirect() {
108
+ try {
109
+ // Dynamic import to handle different environments
110
+ const { TransformerEmbedding } = await import('../utils/embedding.js');
111
+ this.embeddingFunction = new TransformerEmbedding({
112
+ verbose: false,
113
+ dtype: 'q8',
114
+ localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
115
+ });
116
+ await this.embeddingFunction.init();
117
+ console.log('✅ Direct embedding function initialized');
118
+ }
119
+ catch (error) {
120
+ throw new Error(`Failed to initialize embedding function: ${error instanceof Error ? error.message : String(error)}`);
121
+ }
122
+ }
123
+ async cleanup() {
124
+ const startTime = Date.now();
125
+ try {
126
+ // Strategy-specific cleanup
127
+ switch (this.strategy) {
128
+ case 'node-worker':
129
+ if (this.embeddingFunction?.forceRestart) {
130
+ await this.embeddingFunction.forceRestart();
131
+ }
132
+ break;
133
+ case 'serverless-restart':
134
+ // In serverless, create new instance
135
+ if (this.embeddingFunction?.dispose) {
136
+ this.embeddingFunction.dispose();
137
+ }
138
+ this.embeddingFunction = null;
139
+ break;
140
+ case 'browser-dispose':
141
+ // In browser, try disposal
142
+ if (this.embeddingFunction?.dispose) {
143
+ this.embeddingFunction.dispose();
144
+ }
145
+ // Force garbage collection if available
146
+ if (typeof window !== 'undefined' && window.gc) {
147
+ window.gc();
148
+ }
149
+ break;
150
+ default:
151
+ // Fallback: dispose and recreate
152
+ if (this.embeddingFunction?.dispose) {
153
+ this.embeddingFunction.dispose();
154
+ }
155
+ this.embeddingFunction = null;
156
+ }
157
+ this.embedCount = 0;
158
+ this.restartCount++;
159
+ this.lastRestart = Date.now();
160
+ const cleanupTime = Date.now() - startTime;
161
+ console.log(`🧹 Memory cleanup completed in ${cleanupTime}ms (strategy: ${this.strategy})`);
162
+ }
163
+ catch (error) {
164
+ console.warn('⚠️ Cleanup failed:', error instanceof Error ? error.message : String(error));
165
+ // Force null assignment as last resort
166
+ this.embeddingFunction = null;
167
+ }
168
+ }
169
+ getMemoryStats() {
170
+ let memoryUsage = 'unknown';
171
+ // Get memory stats based on environment
172
+ if (isNode && typeof process !== 'undefined') {
173
+ const mem = process.memoryUsage();
174
+ memoryUsage = `${(mem.heapUsed / 1024 / 1024).toFixed(2)} MB`;
175
+ }
176
+ else if (isBrowser && performance.memory) {
177
+ const mem = performance.memory;
178
+ memoryUsage = `${(mem.usedJSHeapSize / 1024 / 1024).toFixed(2)} MB`;
179
+ }
180
+ return {
181
+ embeddings: this.embedCount,
182
+ memoryUsage,
183
+ restarts: this.restartCount,
184
+ strategy: this.strategy
185
+ };
186
+ }
187
+ async dispose() {
188
+ if (this.embeddingFunction) {
189
+ if (this.embeddingFunction.dispose) {
190
+ await this.embeddingFunction.dispose();
191
+ }
192
+ this.embeddingFunction = null;
193
+ }
194
+ }
195
+ }
196
+ // Export singleton instance
197
+ export const universalMemoryManager = new UniversalMemoryManager();
198
+ // Export convenience function
199
+ export async function getUniversalEmbeddingFunction() {
200
+ return universalMemoryManager.getEmbeddingFunction();
201
+ }
202
+ // Export memory stats function
203
+ export function getEmbeddingMemoryStats() {
204
+ return universalMemoryManager.getMemoryStats();
205
+ }
206
+ //# sourceMappingURL=universal-memory-manager.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Worker process for embeddings - Workaround for transformers.js memory leak
3
+ *
4
+ * This worker can be killed and restarted to release memory completely.
5
+ * Based on 2024 research: dispose() doesn't fully free memory in transformers.js
6
+ */
7
+ export {};
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Worker process for embeddings - Workaround for transformers.js memory leak
3
+ *
4
+ * This worker can be killed and restarted to release memory completely.
5
+ * Based on 2024 research: dispose() doesn't fully free memory in transformers.js
6
+ */
7
+ import { TransformerEmbedding } from '../utils/embedding.js';
8
+ import { parentPort } from 'worker_threads';
9
+ let model = null;
10
+ let requestCount = 0;
11
+ const MAX_REQUESTS = 100; // Restart worker after 100 requests to prevent memory leak
12
+ async function initModel() {
13
+ if (!model) {
14
+ model = new TransformerEmbedding({
15
+ verbose: false,
16
+ dtype: 'q8',
17
+ localFilesOnly: process.env.BRAINY_ALLOW_REMOTE_MODELS !== 'true'
18
+ });
19
+ await model.init();
20
+ console.log('🔧 Worker: Model initialized');
21
+ }
22
+ }
23
+ if (parentPort) {
24
+ parentPort.on('message', async (message) => {
25
+ try {
26
+ const { id, type, data } = message;
27
+ switch (type) {
28
+ case 'embed':
29
+ await initModel();
30
+ const embeddings = await model.embed(data);
31
+ parentPort.postMessage({ id, success: true, result: embeddings });
32
+ requestCount++;
33
+ // Proactively restart worker to prevent memory leak
34
+ if (requestCount >= MAX_REQUESTS) {
35
+ console.log(`🔄 Worker: Restarting after ${requestCount} requests (memory leak prevention)`);
36
+ process.exit(0); // Parent will restart us
37
+ }
38
+ break;
39
+ case 'dispose':
40
+ if (model) {
41
+ // This doesn't fully free memory (known issue), but try anyway
42
+ if ('dispose' in model && typeof model.dispose === 'function') {
43
+ model.dispose();
44
+ }
45
+ model = null;
46
+ }
47
+ parentPort.postMessage({ id, success: true });
48
+ break;
49
+ case 'restart':
50
+ // Force restart to clear memory
51
+ console.log('🔄 Worker: Force restart requested');
52
+ process.exit(0);
53
+ break;
54
+ default:
55
+ parentPort.postMessage({
56
+ id,
57
+ success: false,
58
+ error: `Unknown message type: ${type}`
59
+ });
60
+ }
61
+ }
62
+ catch (error) {
63
+ parentPort.postMessage({
64
+ id: message.id,
65
+ success: false,
66
+ error: error instanceof Error ? error.message : String(error)
67
+ });
68
+ }
69
+ });
70
+ console.log('🚀 Embedding worker started');
71
+ parentPort.postMessage({ type: 'ready' });
72
+ }
73
+ else {
74
+ console.error('❌ Worker: parentPort is null, cannot communicate with main thread');
75
+ process.exit(1);
76
+ }
77
+ //# sourceMappingURL=worker-embedding.js.map
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Worker Manager for Memory-Safe Embeddings
3
+ *
4
+ * Manages worker lifecycle to prevent transformers.js memory leaks
5
+ * Workers are automatically restarted when memory usage grows too high
6
+ */
7
+ import { Vector, EmbeddingFunction } from '../coreTypes.js';
8
+ export declare class WorkerEmbeddingManager {
9
+ private worker;
10
+ private requestId;
11
+ private pendingRequests;
12
+ private isRestarting;
13
+ private totalRequests;
14
+ getEmbeddingFunction(): Promise<EmbeddingFunction>;
15
+ embed(data: string | string[]): Promise<Vector>;
16
+ private ensureWorker;
17
+ private createWorker;
18
+ dispose(): Promise<void>;
19
+ forceRestart(): Promise<void>;
20
+ getStats(): {
21
+ totalRequests: number;
22
+ pendingRequests: number;
23
+ workerActive: boolean;
24
+ isRestarting: boolean;
25
+ };
26
+ }
27
+ export declare const workerEmbeddingManager: WorkerEmbeddingManager;
28
+ export declare function getWorkerEmbeddingFunction(): Promise<EmbeddingFunction>;
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Worker Manager for Memory-Safe Embeddings
3
+ *
4
+ * Manages worker lifecycle to prevent transformers.js memory leaks
5
+ * Workers are automatically restarted when memory usage grows too high
6
+ */
7
+ import { Worker } from 'worker_threads';
8
+ import { join, dirname } from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ // Get current directory for worker path
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = dirname(__filename);
13
+ export class WorkerEmbeddingManager {
14
+ constructor() {
15
+ this.worker = null;
16
+ this.requestId = 0;
17
+ this.pendingRequests = new Map();
18
+ this.isRestarting = false;
19
+ this.totalRequests = 0;
20
+ }
21
+ async getEmbeddingFunction() {
22
+ return async (data) => {
23
+ return this.embed(data);
24
+ };
25
+ }
26
+ async embed(data) {
27
+ await this.ensureWorker();
28
+ const id = ++this.requestId;
29
+ this.totalRequests++;
30
+ return new Promise((resolve, reject) => {
31
+ const timeout = setTimeout(() => {
32
+ this.pendingRequests.delete(id);
33
+ reject(new Error('Embedding request timed out (120s)'));
34
+ }, 120000);
35
+ this.pendingRequests.set(id, { resolve, reject, timeout });
36
+ this.worker.postMessage({
37
+ id,
38
+ type: 'embed',
39
+ data
40
+ });
41
+ });
42
+ }
43
+ async ensureWorker() {
44
+ if (this.worker && !this.isRestarting) {
45
+ return;
46
+ }
47
+ if (this.isRestarting) {
48
+ // Wait for restart to complete
49
+ return new Promise((resolve) => {
50
+ const checkRestart = () => {
51
+ if (!this.isRestarting) {
52
+ resolve();
53
+ }
54
+ else {
55
+ setTimeout(checkRestart, 100);
56
+ }
57
+ };
58
+ checkRestart();
59
+ });
60
+ }
61
+ await this.createWorker();
62
+ }
63
+ async createWorker() {
64
+ this.isRestarting = true;
65
+ // Kill existing worker if any
66
+ if (this.worker) {
67
+ this.worker.terminate();
68
+ this.worker = null;
69
+ }
70
+ // Clear pending requests
71
+ for (const [id, request] of this.pendingRequests) {
72
+ if (request.timeout) {
73
+ clearTimeout(request.timeout);
74
+ }
75
+ request.reject(new Error('Worker restarted'));
76
+ }
77
+ this.pendingRequests.clear();
78
+ console.log('🔄 Starting embedding worker...');
79
+ // Create new worker
80
+ const workerPath = join(__dirname, 'worker-embedding.js');
81
+ this.worker = new Worker(workerPath);
82
+ // Handle worker messages
83
+ this.worker.on('message', (message) => {
84
+ if (message.type === 'ready') {
85
+ console.log('✅ Embedding worker ready');
86
+ this.isRestarting = false;
87
+ return;
88
+ }
89
+ const { id, success, result, error } = message;
90
+ const request = this.pendingRequests.get(id);
91
+ if (request) {
92
+ if (request.timeout) {
93
+ clearTimeout(request.timeout);
94
+ }
95
+ this.pendingRequests.delete(id);
96
+ if (success) {
97
+ request.resolve(result);
98
+ }
99
+ else {
100
+ request.reject(new Error(error));
101
+ }
102
+ }
103
+ });
104
+ // Handle worker exit
105
+ this.worker.on('exit', (code) => {
106
+ console.log(`🔄 Embedding worker exited with code ${code}`);
107
+ if (code !== 0 && !this.isRestarting) {
108
+ console.log('🔄 Worker crashed, will restart on next request');
109
+ }
110
+ this.worker = null;
111
+ });
112
+ // Wait for worker to be ready
113
+ return new Promise((resolve, reject) => {
114
+ const timeout = setTimeout(() => {
115
+ reject(new Error('Worker startup timeout'));
116
+ }, 30000);
117
+ const checkReady = () => {
118
+ if (!this.isRestarting) {
119
+ clearTimeout(timeout);
120
+ resolve();
121
+ }
122
+ else {
123
+ setTimeout(checkReady, 100);
124
+ }
125
+ };
126
+ checkReady();
127
+ });
128
+ }
129
+ async dispose() {
130
+ if (this.worker) {
131
+ this.worker.terminate();
132
+ this.worker = null;
133
+ }
134
+ // Clear pending requests
135
+ for (const [id, request] of this.pendingRequests) {
136
+ if (request.timeout) {
137
+ clearTimeout(request.timeout);
138
+ }
139
+ request.reject(new Error('Manager disposed'));
140
+ }
141
+ this.pendingRequests.clear();
142
+ }
143
+ async forceRestart() {
144
+ console.log('🔄 Force restarting embedding worker (memory cleanup)');
145
+ await this.createWorker();
146
+ }
147
+ getStats() {
148
+ return {
149
+ totalRequests: this.totalRequests,
150
+ pendingRequests: this.pendingRequests.size,
151
+ workerActive: this.worker !== null,
152
+ isRestarting: this.isRestarting
153
+ };
154
+ }
155
+ }
156
+ // Export singleton instance
157
+ export const workerEmbeddingManager = new WorkerEmbeddingManager();
158
+ // Export convenience function
159
+ export async function getWorkerEmbeddingFunction() {
160
+ return workerEmbeddingManager.getEmbeddingFunction();
161
+ }
162
+ //# sourceMappingURL=worker-manager.js.map