@soulcraft/brainy 2.11.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/brainyData.d.ts +5 -8
- package/dist/brainyData.js +56 -39
- package/dist/config/index.d.ts +1 -0
- package/dist/config/index.js +2 -0
- package/dist/config/modelAutoConfig.d.ts +1 -0
- package/dist/config/modelAutoConfig.js +27 -22
- package/dist/config/modelPrecisionManager.d.ts +42 -0
- package/dist/config/modelPrecisionManager.js +98 -0
- package/dist/config/zeroConfig.js +1 -1
- package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
- package/dist/embeddings/CachedEmbeddings.js +146 -0
- package/dist/embeddings/EmbeddingManager.d.ts +106 -0
- package/dist/embeddings/EmbeddingManager.js +296 -0
- package/dist/embeddings/SingletonModelManager.d.ts +95 -0
- package/dist/embeddings/SingletonModelManager.js +220 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/lightweight-embedder.d.ts +0 -1
- package/dist/embeddings/lightweight-embedder.js +4 -12
- package/dist/embeddings/universal-memory-manager.js +13 -50
- package/dist/embeddings/worker-embedding.js +4 -8
- package/dist/neural/improvedNeuralAPI.d.ts +346 -0
- package/dist/neural/improvedNeuralAPI.js +2439 -0
- package/dist/neural/types.d.ts +267 -0
- package/dist/neural/types.js +24 -0
- package/dist/utils/embedding.d.ts +7 -2
- package/dist/utils/embedding.js +51 -33
- package/dist/utils/hybridModelManager.d.ts +19 -28
- package/dist/utils/hybridModelManager.js +36 -200
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [2.14.0](https://github.com/soulcraftlabs/brainy/compare/v2.13.0...v2.14.0) (2025-09-02)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* implement clean embedding architecture with Q8/FP32 precision control ([b55c454](https://github.com/soulcraftlabs/brainy/commit/b55c454))
|
|
11
|
+
|
|
12
|
+
## [2.13.0](https://github.com/soulcraftlabs/brainy/compare/v2.12.0...v2.13.0) (2025-09-02)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Features
|
|
16
|
+
|
|
17
|
+
* implement comprehensive neural clustering system ([7345e53](https://github.com/soulcraftlabs/brainy/commit/7345e53))
|
|
18
|
+
* implement comprehensive type safety system with BrainyTypes API ([0f4ab52](https://github.com/soulcraftlabs/brainy/commit/0f4ab52))
|
|
19
|
+
|
|
5
20
|
## [2.10.0](https://github.com/soulcraftlabs/brainy/compare/v2.9.0...v2.10.0) (2025-08-29)
|
|
6
21
|
|
|
7
22
|
## [2.8.0](https://github.com/soulcraftlabs/brainy/compare/v2.7.4...v2.8.0) (2025-08-29)
|
package/dist/brainyData.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ import { WebSocketConnection } from './types/augmentations.js';
|
|
|
12
12
|
import { BrainyDataInterface } from './types/brainyDataInterface.js';
|
|
13
13
|
import { DistributedConfig } from './types/distributedTypes.js';
|
|
14
14
|
import { SearchCacheConfig } from './utils/searchCache.js';
|
|
15
|
+
import { ImprovedNeuralAPI } from './neural/improvedNeuralAPI.js';
|
|
15
16
|
import { TripleQuery, TripleResult } from './triple/TripleIntelligence.js';
|
|
16
17
|
export interface BrainyDataConfig {
|
|
17
18
|
/**
|
|
@@ -1687,23 +1688,19 @@ export declare class BrainyData<T = any> implements BrainyDataInterface<T> {
|
|
|
1687
1688
|
* - brain.neural.clusterStream() - Progressive streaming
|
|
1688
1689
|
* - brain.neural.getLOD() - Level-of-detail for scale
|
|
1689
1690
|
*/
|
|
1690
|
-
get neural():
|
|
1691
|
+
get neural(): ImprovedNeuralAPI;
|
|
1691
1692
|
/**
|
|
1692
1693
|
* Simple similarity check (shorthand for neural.similar)
|
|
1693
1694
|
*/
|
|
1694
|
-
similar(a: any, b: any): Promise<number>;
|
|
1695
|
+
similar(a: any, b: any, options?: any): Promise<number>;
|
|
1695
1696
|
/**
|
|
1696
1697
|
* Get semantic clusters (shorthand for neural.clusters)
|
|
1697
1698
|
*/
|
|
1698
|
-
clusters(options?: any): Promise<any[]>;
|
|
1699
|
+
clusters(items?: any, options?: any): Promise<any[]>;
|
|
1699
1700
|
/**
|
|
1700
1701
|
* Get related items (shorthand for neural.neighbors)
|
|
1701
1702
|
*/
|
|
1702
|
-
related(id: string,
|
|
1703
|
-
/**
|
|
1704
|
-
* Get visualization data (shorthand for neural.visualize)
|
|
1705
|
-
*/
|
|
1706
|
-
visualize(options?: any): Promise<any>;
|
|
1703
|
+
related(id: string, options?: any): Promise<any[]>;
|
|
1707
1704
|
/**
|
|
1708
1705
|
* 🚀 TRIPLE INTELLIGENCE SEARCH - Natural Language & Complex Queries
|
|
1709
1706
|
* The revolutionary search that combines vector, graph, and metadata intelligence!
|
package/dist/brainyData.js
CHANGED
|
@@ -29,7 +29,7 @@ import { EntityRegistryAugmentation, AutoRegisterEntitiesAugmentation } from './
|
|
|
29
29
|
import { createDefaultAugmentations } from './augmentations/defaultAugmentations.js';
|
|
30
30
|
// import { RealtimeStreamingAugmentation } from './augmentations/realtimeStreamingAugmentation.js'
|
|
31
31
|
import { IntelligentVerbScoringAugmentation } from './augmentations/intelligentVerbScoringAugmentation.js';
|
|
32
|
-
import {
|
|
32
|
+
import { ImprovedNeuralAPI } from './neural/improvedNeuralAPI.js';
|
|
33
33
|
import { TripleIntelligenceEngine } from './triple/TripleIntelligence.js';
|
|
34
34
|
export class BrainyData {
|
|
35
35
|
// REMOVED: HealthMonitor is now handled by MonitoringAugmentation
|
|
@@ -935,23 +935,10 @@ export class BrainyData {
|
|
|
935
935
|
// Continue with existing config
|
|
936
936
|
}
|
|
937
937
|
}
|
|
938
|
-
//
|
|
939
|
-
//
|
|
940
|
-
if (
|
|
941
|
-
|
|
942
|
-
const { universalMemoryManager } = await import('./embeddings/universal-memory-manager.js');
|
|
943
|
-
this.embeddingFunction = await universalMemoryManager.getEmbeddingFunction();
|
|
944
|
-
console.log('✅ UNIVERSAL: Memory-safe embedding system initialized');
|
|
945
|
-
}
|
|
946
|
-
catch (error) {
|
|
947
|
-
console.error('🚨 CRITICAL: Universal memory manager initialization failed!');
|
|
948
|
-
console.error('Falling back to standard embedding with potential memory issues.');
|
|
949
|
-
console.warn('Consider reducing usage or restarting process periodically.');
|
|
950
|
-
// Continue with default function - better than crashing
|
|
951
|
-
}
|
|
952
|
-
}
|
|
953
|
-
else if (this.embeddingFunction !== defaultEmbeddingFunction) {
|
|
954
|
-
console.log('✅ CUSTOM: Using custom embedding function (test or production override)');
|
|
938
|
+
// The embedding function is already set (either custom or default)
|
|
939
|
+
// EmbeddingManager handles all initialization internally
|
|
940
|
+
if (this.embeddingFunction !== defaultEmbeddingFunction) {
|
|
941
|
+
console.log('✅ Using custom embedding function');
|
|
955
942
|
}
|
|
956
943
|
try {
|
|
957
944
|
// Pre-load the embedding model early to ensure it's always available
|
|
@@ -3011,9 +2998,16 @@ export class BrainyData {
|
|
|
3011
2998
|
*/
|
|
3012
2999
|
async addVerbs(verbs) {
|
|
3013
3000
|
const ids = [];
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3001
|
+
const chunkSize = 10; // Conservative chunk size for parallel processing
|
|
3002
|
+
// Process verbs in parallel chunks to improve performance
|
|
3003
|
+
for (let i = 0; i < verbs.length; i += chunkSize) {
|
|
3004
|
+
const chunk = verbs.slice(i, i + chunkSize);
|
|
3005
|
+
// Process chunk in parallel
|
|
3006
|
+
const chunkPromises = chunk.map(verb => this.addVerb(verb.source, verb.target, verb.type, verb.metadata));
|
|
3007
|
+
// Wait for all in chunk to complete
|
|
3008
|
+
const chunkIds = await Promise.all(chunkPromises);
|
|
3009
|
+
// Maintain order by adding chunk results
|
|
3010
|
+
ids.push(...chunkIds);
|
|
3017
3011
|
}
|
|
3018
3012
|
return ids;
|
|
3019
3013
|
}
|
|
@@ -3024,8 +3018,16 @@ export class BrainyData {
|
|
|
3024
3018
|
*/
|
|
3025
3019
|
async deleteVerbs(ids) {
|
|
3026
3020
|
const results = [];
|
|
3027
|
-
|
|
3028
|
-
|
|
3021
|
+
const chunkSize = 10; // Conservative chunk size for parallel processing
|
|
3022
|
+
// Process deletions in parallel chunks to improve performance
|
|
3023
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
3024
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
3025
|
+
// Process chunk in parallel
|
|
3026
|
+
const chunkPromises = chunk.map(id => this.deleteVerb(id));
|
|
3027
|
+
// Wait for all in chunk to complete
|
|
3028
|
+
const chunkResults = await Promise.all(chunkPromises);
|
|
3029
|
+
// Maintain order by adding chunk results
|
|
3030
|
+
results.push(...chunkResults);
|
|
3029
3031
|
}
|
|
3030
3032
|
return results;
|
|
3031
3033
|
}
|
|
@@ -5749,8 +5751,16 @@ export class BrainyData {
|
|
|
5749
5751
|
*/
|
|
5750
5752
|
async deleteNouns(ids) {
|
|
5751
5753
|
const results = [];
|
|
5752
|
-
|
|
5753
|
-
|
|
5754
|
+
const chunkSize = 10; // Conservative chunk size for parallel processing
|
|
5755
|
+
// Process deletions in parallel chunks to improve performance
|
|
5756
|
+
for (let i = 0; i < ids.length; i += chunkSize) {
|
|
5757
|
+
const chunk = ids.slice(i, i + chunkSize);
|
|
5758
|
+
// Process chunk in parallel
|
|
5759
|
+
const chunkPromises = chunk.map(id => this.deleteNoun(id));
|
|
5760
|
+
// Wait for all in chunk to complete
|
|
5761
|
+
const chunkResults = await Promise.all(chunkPromises);
|
|
5762
|
+
// Maintain order by adding chunk results
|
|
5763
|
+
results.push(...chunkResults);
|
|
5754
5764
|
}
|
|
5755
5765
|
return results;
|
|
5756
5766
|
}
|
|
@@ -5931,34 +5941,41 @@ export class BrainyData {
|
|
|
5931
5941
|
get neural() {
|
|
5932
5942
|
if (!this._neural) {
|
|
5933
5943
|
// Create the unified Neural API instance
|
|
5934
|
-
this._neural = new
|
|
5944
|
+
this._neural = new ImprovedNeuralAPI(this);
|
|
5935
5945
|
}
|
|
5936
5946
|
return this._neural;
|
|
5937
5947
|
}
|
|
5938
5948
|
/**
|
|
5939
5949
|
* Simple similarity check (shorthand for neural.similar)
|
|
5940
5950
|
*/
|
|
5941
|
-
async similar(a, b) {
|
|
5942
|
-
|
|
5951
|
+
async similar(a, b, options) {
|
|
5952
|
+
const result = await this.neural.similar(a, b, options);
|
|
5953
|
+
// Always return simple number for main class shortcut
|
|
5954
|
+
return typeof result === 'object' ? result.score : result;
|
|
5943
5955
|
}
|
|
5944
5956
|
/**
|
|
5945
5957
|
* Get semantic clusters (shorthand for neural.clusters)
|
|
5946
5958
|
*/
|
|
5947
|
-
async clusters(options) {
|
|
5948
|
-
|
|
5959
|
+
async clusters(items, options) {
|
|
5960
|
+
// Support both (items, options) and (options) patterns
|
|
5961
|
+
if (typeof items === 'object' && !Array.isArray(items) && options === undefined) {
|
|
5962
|
+
// First argument is options object
|
|
5963
|
+
return this.neural.clusters(items);
|
|
5964
|
+
}
|
|
5965
|
+
// Standard (items, options) pattern
|
|
5966
|
+
if (options) {
|
|
5967
|
+
return this.neural.clusters({ ...options, items });
|
|
5968
|
+
}
|
|
5969
|
+
return this.neural.clusters(items);
|
|
5949
5970
|
}
|
|
5950
5971
|
/**
|
|
5951
5972
|
* Get related items (shorthand for neural.neighbors)
|
|
5952
5973
|
*/
|
|
5953
|
-
async related(id,
|
|
5954
|
-
const
|
|
5955
|
-
|
|
5956
|
-
|
|
5957
|
-
|
|
5958
|
-
* Get visualization data (shorthand for neural.visualize)
|
|
5959
|
-
*/
|
|
5960
|
-
async visualize(options) {
|
|
5961
|
-
return this.neural.visualize(options);
|
|
5974
|
+
async related(id, options) {
|
|
5975
|
+
const limit = typeof options === 'number' ? options : options?.limit;
|
|
5976
|
+
const fullOptions = typeof options === 'number' ? { limit } : options;
|
|
5977
|
+
const result = await this.neural.neighbors(id, fullOptions);
|
|
5978
|
+
return result.neighbors || [];
|
|
5962
5979
|
}
|
|
5963
5980
|
/**
|
|
5964
5981
|
* 🚀 TRIPLE INTELLIGENCE SEARCH - Natural Language & Complex Queries
|
package/dist/config/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { autoSelectModelPrecision, ModelPrecision as ModelPrecisionType, // Avoid conflict
|
|
6
6
|
ModelPreset, shouldAutoDownloadModels, getModelPath, logModelConfig } from './modelAutoConfig.js';
|
|
7
|
+
export { ModelPrecisionManager, getModelPrecision, setModelPrecision, lockModelPrecision, validateModelPrecision } from './modelPrecisionManager.js';
|
|
7
8
|
export { autoDetectStorage, StorageType, StoragePreset, StorageConfigResult, logStorageConfig, type StorageTypeString, type StoragePresetString } from './storageAutoConfig.js';
|
|
8
9
|
export { SharedConfig, SharedConfigManager } from './sharedConfigManager.js';
|
|
9
10
|
export { BrainyZeroConfig, processZeroConfig, createEmbeddingFunctionWithPrecision } from './zeroConfig.js';
|
package/dist/config/index.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
// Model configuration
|
|
6
6
|
export { autoSelectModelPrecision, shouldAutoDownloadModels, getModelPath, logModelConfig } from './modelAutoConfig.js';
|
|
7
|
+
// Model precision manager
|
|
8
|
+
export { ModelPrecisionManager, getModelPrecision, setModelPrecision, lockModelPrecision, validateModelPrecision } from './modelPrecisionManager.js';
|
|
7
9
|
// Storage configuration
|
|
8
10
|
export { autoDetectStorage, StorageType, StoragePreset, logStorageConfig } from './storageAutoConfig.js';
|
|
9
11
|
// Shared configuration for multi-instance
|
|
@@ -12,6 +12,7 @@ interface ModelConfigResult {
|
|
|
12
12
|
}
|
|
13
13
|
/**
|
|
14
14
|
* Auto-select model precision based on environment and resources
|
|
15
|
+
* DEFAULT: Q8 for optimal size/performance balance
|
|
15
16
|
* @param override - Manual override: 'fp32', 'q8', 'fast' (fp32), 'small' (q8), or 'auto'
|
|
16
17
|
*/
|
|
17
18
|
export declare function autoSelectModelPrecision(override?: ModelPrecision | ModelPreset): ModelConfigResult;
|
|
@@ -4,13 +4,16 @@
|
|
|
4
4
|
* while allowing manual override
|
|
5
5
|
*/
|
|
6
6
|
import { isBrowser, isNode } from '../utils/environment.js';
|
|
7
|
+
import { setModelPrecision } from './modelPrecisionManager.js';
|
|
7
8
|
/**
|
|
8
9
|
* Auto-select model precision based on environment and resources
|
|
10
|
+
* DEFAULT: Q8 for optimal size/performance balance
|
|
9
11
|
* @param override - Manual override: 'fp32', 'q8', 'fast' (fp32), 'small' (q8), or 'auto'
|
|
10
12
|
*/
|
|
11
13
|
export function autoSelectModelPrecision(override) {
|
|
12
14
|
// Handle direct precision override
|
|
13
15
|
if (override === 'fp32' || override === 'q8') {
|
|
16
|
+
setModelPrecision(override); // Update central config
|
|
14
17
|
return {
|
|
15
18
|
precision: override,
|
|
16
19
|
reason: `Manually specified: ${override}`,
|
|
@@ -19,6 +22,7 @@ export function autoSelectModelPrecision(override) {
|
|
|
19
22
|
}
|
|
20
23
|
// Handle preset overrides
|
|
21
24
|
if (override === 'fast') {
|
|
25
|
+
setModelPrecision('fp32'); // Update central config
|
|
22
26
|
return {
|
|
23
27
|
precision: 'fp32',
|
|
24
28
|
reason: 'Preset: fast (fp32 for best quality)',
|
|
@@ -26,6 +30,7 @@ export function autoSelectModelPrecision(override) {
|
|
|
26
30
|
};
|
|
27
31
|
}
|
|
28
32
|
if (override === 'small') {
|
|
33
|
+
setModelPrecision('q8'); // Update central config
|
|
29
34
|
return {
|
|
30
35
|
precision: 'q8',
|
|
31
36
|
reason: 'Preset: small (q8 for reduced size)',
|
|
@@ -37,53 +42,53 @@ export function autoSelectModelPrecision(override) {
|
|
|
37
42
|
}
|
|
38
43
|
/**
|
|
39
44
|
* Automatically detect the best model precision for the environment
|
|
45
|
+
* NEW DEFAULT: Q8 for optimal size/performance (75% smaller, 99% accuracy)
|
|
40
46
|
*/
|
|
41
47
|
function autoDetectBestPrecision() {
|
|
48
|
+
// Check if user explicitly wants FP32 via environment variable
|
|
49
|
+
if (process.env.BRAINY_FORCE_FP32 === 'true') {
|
|
50
|
+
setModelPrecision('fp32');
|
|
51
|
+
return {
|
|
52
|
+
precision: 'fp32',
|
|
53
|
+
reason: 'FP32 forced via BRAINY_FORCE_FP32 environment variable',
|
|
54
|
+
autoSelected: false
|
|
55
|
+
};
|
|
56
|
+
}
|
|
42
57
|
// Browser environment - use Q8 for smaller download/memory
|
|
43
58
|
if (isBrowser()) {
|
|
59
|
+
setModelPrecision('q8');
|
|
44
60
|
return {
|
|
45
61
|
precision: 'q8',
|
|
46
|
-
reason: 'Browser environment
|
|
62
|
+
reason: 'Browser environment - using Q8 (23MB vs 90MB)',
|
|
47
63
|
autoSelected: true
|
|
48
64
|
};
|
|
49
65
|
}
|
|
50
66
|
// Serverless environments - use Q8 for faster cold starts
|
|
51
67
|
if (isServerlessEnvironment()) {
|
|
68
|
+
setModelPrecision('q8');
|
|
52
69
|
return {
|
|
53
70
|
precision: 'q8',
|
|
54
|
-
reason: 'Serverless environment
|
|
71
|
+
reason: 'Serverless environment - using Q8 for 75% faster cold starts',
|
|
55
72
|
autoSelected: true
|
|
56
73
|
};
|
|
57
74
|
}
|
|
58
75
|
// Check available memory
|
|
59
76
|
const memoryMB = getAvailableMemoryMB();
|
|
60
|
-
if
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
reason: `Low memory detected (${memoryMB}MB) - using Q8`,
|
|
64
|
-
autoSelected: true
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
// Development environment - use FP32 for best quality
|
|
68
|
-
if (process.env.NODE_ENV === 'development') {
|
|
69
|
-
return {
|
|
70
|
-
precision: 'fp32',
|
|
71
|
-
reason: 'Development environment - using FP32 for best quality',
|
|
72
|
-
autoSelected: true
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
// Production with adequate memory - use FP32
|
|
76
|
-
if (memoryMB >= 2048) {
|
|
77
|
+
// Only use FP32 if explicitly high memory AND user opts in
|
|
78
|
+
if (memoryMB >= 4096 && process.env.BRAINY_PREFER_QUALITY === 'true') {
|
|
79
|
+
setModelPrecision('fp32');
|
|
77
80
|
return {
|
|
78
81
|
precision: 'fp32',
|
|
79
|
-
reason: `
|
|
82
|
+
reason: `High memory (${memoryMB}MB) + quality preference - using FP32`,
|
|
80
83
|
autoSelected: true
|
|
81
84
|
};
|
|
82
85
|
}
|
|
83
|
-
//
|
|
86
|
+
// DEFAULT TO Q8 - Optimal for 99% of use cases
|
|
87
|
+
// Q8 provides 99% accuracy at 25% of the size
|
|
88
|
+
setModelPrecision('q8');
|
|
84
89
|
return {
|
|
85
90
|
precision: 'q8',
|
|
86
|
-
reason:
|
|
91
|
+
reason: 'Default: Q8 model (23MB, 99% accuracy, 4x faster loads)',
|
|
87
92
|
autoSelected: true
|
|
88
93
|
};
|
|
89
94
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Central Model Precision Manager
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for model precision configuration.
|
|
5
|
+
* Ensures consistent usage of Q8 or FP32 models throughout the system.
|
|
6
|
+
*/
|
|
7
|
+
import { ModelPrecision } from './modelAutoConfig.js';
|
|
8
|
+
export declare class ModelPrecisionManager {
|
|
9
|
+
private static instance;
|
|
10
|
+
private precision;
|
|
11
|
+
private isLocked;
|
|
12
|
+
private constructor();
|
|
13
|
+
static getInstance(): ModelPrecisionManager;
|
|
14
|
+
/**
|
|
15
|
+
* Get the current model precision
|
|
16
|
+
*/
|
|
17
|
+
getPrecision(): ModelPrecision;
|
|
18
|
+
/**
|
|
19
|
+
* Set the model precision (can only be done before first model load)
|
|
20
|
+
*/
|
|
21
|
+
setPrecision(precision: ModelPrecision): void;
|
|
22
|
+
/**
|
|
23
|
+
* Lock the precision (called after first model load)
|
|
24
|
+
*/
|
|
25
|
+
lock(): void;
|
|
26
|
+
/**
|
|
27
|
+
* Check if precision is locked
|
|
28
|
+
*/
|
|
29
|
+
isConfigLocked(): boolean;
|
|
30
|
+
/**
|
|
31
|
+
* Get precision info for logging
|
|
32
|
+
*/
|
|
33
|
+
getInfo(): string;
|
|
34
|
+
/**
|
|
35
|
+
* Validate that a given precision matches the configured one
|
|
36
|
+
*/
|
|
37
|
+
validatePrecision(precision: ModelPrecision): boolean;
|
|
38
|
+
}
|
|
39
|
+
export declare const getModelPrecision: () => ModelPrecision;
|
|
40
|
+
export declare const setModelPrecision: (precision: ModelPrecision) => void;
|
|
41
|
+
export declare const lockModelPrecision: () => void;
|
|
42
|
+
export declare const validateModelPrecision: (precision: ModelPrecision) => boolean;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Central Model Precision Manager
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for model precision configuration.
|
|
5
|
+
* Ensures consistent usage of Q8 or FP32 models throughout the system.
|
|
6
|
+
*/
|
|
7
|
+
export class ModelPrecisionManager {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.precision = 'q8'; // DEFAULT TO Q8
|
|
10
|
+
this.isLocked = false;
|
|
11
|
+
// Check environment variable override
|
|
12
|
+
const envPrecision = process.env.BRAINY_MODEL_PRECISION;
|
|
13
|
+
if (envPrecision === 'fp32' || envPrecision === 'q8') {
|
|
14
|
+
this.precision = envPrecision;
|
|
15
|
+
console.log(`Model precision set from environment: ${envPrecision.toUpperCase()}`);
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
console.log('Using default model precision: Q8 (75% smaller, 99% accuracy)');
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
static getInstance() {
|
|
22
|
+
if (!ModelPrecisionManager.instance) {
|
|
23
|
+
ModelPrecisionManager.instance = new ModelPrecisionManager();
|
|
24
|
+
}
|
|
25
|
+
return ModelPrecisionManager.instance;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Get the current model precision
|
|
29
|
+
*/
|
|
30
|
+
getPrecision() {
|
|
31
|
+
return this.precision;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Set the model precision (can only be done before first model load)
|
|
35
|
+
*/
|
|
36
|
+
setPrecision(precision) {
|
|
37
|
+
if (this.isLocked) {
|
|
38
|
+
console.warn(`⚠️ Cannot change precision after model initialization. Current: ${this.precision.toUpperCase()}`);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
if (precision !== this.precision) {
|
|
42
|
+
console.log(`Model precision changed: ${this.precision.toUpperCase()} → ${precision.toUpperCase()}`);
|
|
43
|
+
this.precision = precision;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Lock the precision (called after first model load)
|
|
48
|
+
*/
|
|
49
|
+
lock() {
|
|
50
|
+
if (!this.isLocked) {
|
|
51
|
+
this.isLocked = true;
|
|
52
|
+
console.log(`Model precision locked: ${this.precision.toUpperCase()}`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Check if precision is locked
|
|
57
|
+
*/
|
|
58
|
+
isConfigLocked() {
|
|
59
|
+
return this.isLocked;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Get precision info for logging
|
|
63
|
+
*/
|
|
64
|
+
getInfo() {
|
|
65
|
+
const info = this.precision === 'q8'
|
|
66
|
+
? 'Q8 (quantized, 23MB, 99% accuracy)'
|
|
67
|
+
: 'FP32 (full precision, 90MB, 100% accuracy)';
|
|
68
|
+
return `${info}${this.isLocked ? ' [LOCKED]' : ''}`;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Validate that a given precision matches the configured one
|
|
72
|
+
*/
|
|
73
|
+
validatePrecision(precision) {
|
|
74
|
+
if (precision !== this.precision) {
|
|
75
|
+
console.error(`❌ Precision mismatch! Expected: ${this.precision.toUpperCase()}, Got: ${precision.toUpperCase()}`);
|
|
76
|
+
console.error('This will cause incompatible embeddings!');
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// Export singleton instance getter
|
|
83
|
+
export const getModelPrecision = () => {
|
|
84
|
+
return ModelPrecisionManager.getInstance().getPrecision();
|
|
85
|
+
};
|
|
86
|
+
// Export setter (for configuration phase)
|
|
87
|
+
export const setModelPrecision = (precision) => {
|
|
88
|
+
ModelPrecisionManager.getInstance().setPrecision(precision);
|
|
89
|
+
};
|
|
90
|
+
// Export lock function (for after model initialization)
|
|
91
|
+
export const lockModelPrecision = () => {
|
|
92
|
+
ModelPrecisionManager.getInstance().lock();
|
|
93
|
+
};
|
|
94
|
+
// Export validation function
|
|
95
|
+
export const validateModelPrecision = (precision) => {
|
|
96
|
+
return ModelPrecisionManager.getInstance().validatePrecision(precision);
|
|
97
|
+
};
|
|
98
|
+
//# sourceMappingURL=modelPrecisionManager.js.map
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cached Embeddings - Performance Optimization Layer
|
|
3
|
+
*
|
|
4
|
+
* Provides pre-computed embeddings for common terms to avoid
|
|
5
|
+
* unnecessary model calls. Falls back to EmbeddingManager for
|
|
6
|
+
* unknown terms.
|
|
7
|
+
*
|
|
8
|
+
* This is purely a performance optimization - it doesn't affect
|
|
9
|
+
* the consistency or accuracy of embeddings.
|
|
10
|
+
*/
|
|
11
|
+
import { Vector } from '../coreTypes.js';
|
|
12
|
+
/**
|
|
13
|
+
* Cached Embeddings with fallback to EmbeddingManager
|
|
14
|
+
*/
|
|
15
|
+
export declare class CachedEmbeddings {
|
|
16
|
+
private stats;
|
|
17
|
+
/**
|
|
18
|
+
* Generate embedding with caching
|
|
19
|
+
*/
|
|
20
|
+
embed(text: string | string[]): Promise<Vector | Vector[]>;
|
|
21
|
+
/**
|
|
22
|
+
* Embed single text with cache lookup
|
|
23
|
+
*/
|
|
24
|
+
private embedSingle;
|
|
25
|
+
/**
|
|
26
|
+
* Get cache statistics
|
|
27
|
+
*/
|
|
28
|
+
getStats(): {
|
|
29
|
+
totalEmbeddings: number;
|
|
30
|
+
cacheHitRate: number;
|
|
31
|
+
cacheHits: number;
|
|
32
|
+
simpleComputes: number;
|
|
33
|
+
modelCalls: number;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Add custom pre-computed embeddings
|
|
37
|
+
*/
|
|
38
|
+
addPrecomputed(term: string, embedding: Vector): void;
|
|
39
|
+
}
|
|
40
|
+
export declare const cachedEmbeddings: CachedEmbeddings;
|