@soulcraft/brainy 2.11.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ /**
2
+ * Neural API Type Definitions
3
+ * Comprehensive interfaces for clustering, similarity, and analysis
4
+ */
5
+ export interface Vector {
6
+ [index: number]: number;
7
+ length: number;
8
+ }
9
+ export interface SemanticCluster {
10
+ id: string;
11
+ centroid: Vector;
12
+ members: string[];
13
+ size: number;
14
+ confidence: number;
15
+ label?: string;
16
+ metadata?: Record<string, any>;
17
+ cohesion?: number;
18
+ level?: number;
19
+ }
20
+ export interface DomainCluster extends SemanticCluster {
21
+ domain: string;
22
+ domainConfidence: number;
23
+ crossDomainMembers?: string[];
24
+ }
25
+ export interface TemporalCluster extends SemanticCluster {
26
+ timeWindow: TimeWindow;
27
+ trend?: 'increasing' | 'decreasing' | 'stable';
28
+ temporal: {
29
+ startTime: Date;
30
+ endTime: Date;
31
+ peakTime?: Date;
32
+ frequency?: number;
33
+ };
34
+ }
35
+ export interface ExplainableCluster extends SemanticCluster {
36
+ explanation: {
37
+ primaryFeatures: string[];
38
+ commonTerms: string[];
39
+ reasoning: string;
40
+ confidence: number;
41
+ };
42
+ subClusters?: ExplainableCluster[];
43
+ }
44
+ export interface ConfidentCluster extends SemanticCluster {
45
+ minConfidence: number;
46
+ uncertainMembers: string[];
47
+ certainMembers: string[];
48
+ }
49
+ export interface BaseClusteringOptions {
50
+ maxClusters?: number;
51
+ minClusterSize?: number;
52
+ threshold?: number;
53
+ cacheResults?: boolean;
54
+ }
55
+ export interface ClusteringOptions extends BaseClusteringOptions {
56
+ algorithm?: 'auto' | 'hierarchical' | 'kmeans' | 'dbscan' | 'sample' | 'semantic' | 'graph' | 'multimodal';
57
+ sampleSize?: number;
58
+ strategy?: 'random' | 'diverse' | 'recent' | 'important';
59
+ memoryLimit?: string;
60
+ includeOutliers?: boolean;
61
+ maxIterations?: number;
62
+ tolerance?: number;
63
+ }
64
+ export interface DomainClusteringOptions extends BaseClusteringOptions {
65
+ domainField?: string;
66
+ crossDomainThreshold?: number;
67
+ preserveDomainBoundaries?: boolean;
68
+ }
69
+ export interface TemporalClusteringOptions extends BaseClusteringOptions {
70
+ timeField: string;
71
+ windows: TimeWindow[];
72
+ overlapStrategy?: 'merge' | 'separate' | 'hierarchical';
73
+ trendAnalysis?: boolean;
74
+ }
75
+ export interface StreamClusteringOptions extends BaseClusteringOptions {
76
+ batchSize?: number;
77
+ updateInterval?: number;
78
+ adaptiveThreshold?: boolean;
79
+ decayFactor?: number;
80
+ }
81
+ export interface SimilarityOptions {
82
+ detailed?: boolean;
83
+ metric?: 'cosine' | 'euclidean' | 'manhattan' | 'jaccard';
84
+ normalized?: boolean;
85
+ }
86
+ export interface SimilarityResult {
87
+ score: number;
88
+ confidence: number;
89
+ explanation?: string;
90
+ metric?: string;
91
+ }
92
+ export interface NeighborOptions {
93
+ limit?: number;
94
+ radius?: number;
95
+ minSimilarity?: number;
96
+ includeMetadata?: boolean;
97
+ sortBy?: 'similarity' | 'importance' | 'recency';
98
+ }
99
+ export interface Neighbor {
100
+ id: string;
101
+ similarity: number;
102
+ data?: any;
103
+ metadata?: Record<string, any>;
104
+ distance?: number;
105
+ }
106
+ export interface NeighborsResult {
107
+ neighbors: Neighbor[];
108
+ queryId: string;
109
+ totalFound: number;
110
+ averageSimilarity: number;
111
+ }
112
+ export interface SemanticHierarchy {
113
+ self: {
114
+ id: string;
115
+ vector?: Vector;
116
+ metadata?: any;
117
+ };
118
+ parent?: {
119
+ id: string;
120
+ similarity: number;
121
+ };
122
+ children?: Array<{
123
+ id: string;
124
+ similarity: number;
125
+ }>;
126
+ siblings?: Array<{
127
+ id: string;
128
+ similarity: number;
129
+ }>;
130
+ level?: number;
131
+ depth?: number;
132
+ }
133
+ export interface HierarchyOptions {
134
+ maxDepth?: number;
135
+ minSimilarity?: number;
136
+ includeMetadata?: boolean;
137
+ buildStrategy?: 'similarity' | 'metadata' | 'mixed';
138
+ }
139
+ export interface VisualizationOptions {
140
+ maxNodes?: number;
141
+ dimensions?: 2 | 3;
142
+ algorithm?: 'force' | 'spring' | 'circular' | 'hierarchical';
143
+ includeEdges?: boolean;
144
+ clusterColors?: boolean;
145
+ nodeSize?: 'uniform' | 'importance' | 'connections';
146
+ }
147
+ export interface VisualizationNode {
148
+ id: string;
149
+ x: number;
150
+ y: number;
151
+ z?: number;
152
+ cluster?: string;
153
+ size?: number;
154
+ color?: string;
155
+ metadata?: Record<string, any>;
156
+ }
157
+ export interface VisualizationEdge {
158
+ source: string;
159
+ target: string;
160
+ weight: number;
161
+ color?: string;
162
+ type?: string;
163
+ }
164
+ export interface VisualizationResult {
165
+ nodes: VisualizationNode[];
166
+ edges: VisualizationEdge[];
167
+ clusters?: Array<{
168
+ id: string;
169
+ color: string;
170
+ size: number;
171
+ label?: string;
172
+ }>;
173
+ metadata: {
174
+ algorithm: string;
175
+ dimensions: number;
176
+ totalNodes: number;
177
+ totalEdges: number;
178
+ generatedAt: Date;
179
+ };
180
+ }
181
+ export interface TimeWindow {
182
+ start: Date;
183
+ end: Date;
184
+ label?: string;
185
+ weight?: number;
186
+ }
187
+ export interface ClusterFeedback {
188
+ clusterId: string;
189
+ action: 'merge' | 'split' | 'relabel' | 'adjust';
190
+ parameters?: Record<string, any>;
191
+ confidence?: number;
192
+ }
193
+ export interface OutlierOptions {
194
+ threshold?: number;
195
+ method?: 'isolation' | 'statistical' | 'cluster-based';
196
+ minNeighbors?: number;
197
+ includeReasons?: boolean;
198
+ }
199
+ export interface Outlier {
200
+ id: string;
201
+ score: number;
202
+ reasons?: string[];
203
+ nearestNeighbors?: Neighbor[];
204
+ metadata?: Record<string, any>;
205
+ }
206
+ export interface PerformanceMetrics {
207
+ executionTime: number;
208
+ memoryUsed: number;
209
+ itemsProcessed: number;
210
+ cacheHits: number;
211
+ cacheMisses: number;
212
+ algorithm: string;
213
+ }
214
+ export interface ClusteringResult<T = SemanticCluster> {
215
+ clusters: T[];
216
+ metrics: PerformanceMetrics;
217
+ metadata: {
218
+ totalItems: number;
219
+ clustersFound: number;
220
+ averageClusterSize: number;
221
+ silhouetteScore?: number;
222
+ timestamp: Date;
223
+ semanticTypes?: number;
224
+ hnswLevel?: number;
225
+ kValue?: number;
226
+ hasConverged?: boolean;
227
+ outlierCount?: number;
228
+ eps?: number;
229
+ minPts?: number;
230
+ averageModularity?: number;
231
+ fusionMethod?: string;
232
+ componentAlgorithms?: string[];
233
+ sampleSize?: number;
234
+ samplingStrategy?: string;
235
+ };
236
+ }
237
+ export interface StreamingBatch<T = SemanticCluster> {
238
+ clusters: T[];
239
+ batchNumber: number;
240
+ isComplete: boolean;
241
+ progress: {
242
+ processed: number;
243
+ total: number;
244
+ percentage: number;
245
+ };
246
+ metrics: PerformanceMetrics;
247
+ }
248
+ export declare class NeuralAPIError extends Error {
249
+ code: string;
250
+ context?: Record<string, any> | undefined;
251
+ constructor(message: string, code: string, context?: Record<string, any> | undefined);
252
+ }
253
+ export declare class ClusteringError extends NeuralAPIError {
254
+ constructor(message: string, context?: Record<string, any>);
255
+ }
256
+ export declare class SimilarityError extends NeuralAPIError {
257
+ constructor(message: string, context?: Record<string, any>);
258
+ }
259
+ export interface NeuralAPIConfig {
260
+ cacheSize?: number;
261
+ defaultAlgorithm?: string;
262
+ similarityMetric?: 'cosine' | 'euclidean' | 'manhattan';
263
+ performanceTracking?: boolean;
264
+ maxMemoryUsage?: string;
265
+ parallelProcessing?: boolean;
266
+ streamingBatchSize?: number;
267
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Neural API Type Definitions
3
+ * Comprehensive interfaces for clustering, similarity, and analysis
4
+ */
5
+ // ===== ERROR TYPES =====
6
+ export class NeuralAPIError extends Error {
7
+ constructor(message, code, context) {
8
+ super(message);
9
+ this.code = code;
10
+ this.context = context;
11
+ this.name = 'NeuralAPIError';
12
+ }
13
+ }
14
+ export class ClusteringError extends NeuralAPIError {
15
+ constructor(message, context) {
16
+ super(message, 'CLUSTERING_ERROR', context);
17
+ }
18
+ }
19
+ export class SimilarityError extends NeuralAPIError {
20
+ constructor(message, context) {
21
+ super(message, 'SIMILARITY_ERROR', context);
22
+ }
23
+ }
24
+ //# sourceMappingURL=types.js.map
@@ -51,6 +51,10 @@ export declare class TransformerEmbedding implements EmbeddingModel {
51
51
  * Log message only if verbose mode is enabled
52
52
  */
53
53
  private logger;
54
+ /**
55
+ * Generate mock embeddings for unit tests
56
+ */
57
+ private getMockEmbedding;
54
58
  /**
55
59
  * Initialize the embedding model
56
60
  */
@@ -78,12 +82,13 @@ export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
78
82
  */
79
83
  export declare function createEmbeddingModel(options?: TransformerEmbeddingOptions): EmbeddingModel;
80
84
  /**
81
- * Default embedding function using the hybrid model manager (BEST OF BOTH WORLDS)
82
- * Prevents multiple model loads while supporting multi-source downloading
85
+ * Default embedding function using the unified EmbeddingManager
86
+ * Simple, clean, reliable - no more layers of indirection
83
87
  */
84
88
  export declare const defaultEmbeddingFunction: EmbeddingFunction;
85
89
  /**
86
90
  * Create an embedding function with custom options
91
+ * NOTE: Options are validated but the singleton EmbeddingManager is always used
87
92
  */
88
93
  export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
89
94
  /**
@@ -3,7 +3,6 @@
3
3
  * Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
4
4
  */
5
5
  import { isBrowser } from './environment.js';
6
- import { ModelManager } from '../embeddings/model-manager.js';
7
6
  import { join } from 'path';
8
7
  import { existsSync } from 'fs';
9
8
  // @ts-ignore - Transformers.js is now the primary embedding library
@@ -208,6 +207,24 @@ export class TransformerEmbedding {
208
207
  console[level](`[TransformerEmbedding] ${message}`, ...args);
209
208
  }
210
209
  }
210
+ /**
211
+ * Generate mock embeddings for unit tests
212
+ */
213
+ getMockEmbedding(data) {
214
+ // Use the same mock logic as setup-unit.ts for consistency
215
+ const input = Array.isArray(data) ? data.join(' ') : data;
216
+ const str = typeof input === 'string' ? input : JSON.stringify(input);
217
+ const vector = new Array(384).fill(0);
218
+ // Create semi-realistic embeddings based on text content
219
+ for (let i = 0; i < Math.min(str.length, 384); i++) {
220
+ vector[i] = (str.charCodeAt(i % str.length) % 256) / 256;
221
+ }
222
+ // Add position-based variation
223
+ for (let i = 0; i < 384; i++) {
224
+ vector[i] += Math.sin(i * 0.1 + str.length) * 0.1;
225
+ }
226
+ return vector;
227
+ }
211
228
  /**
212
229
  * Initialize the embedding model
213
230
  */
@@ -215,11 +232,13 @@ export class TransformerEmbedding {
215
232
  if (this.initialized) {
216
233
  return;
217
234
  }
218
- // Always use real implementation - no mocking
235
+ // In unit test mode, skip real model initialization to prevent ONNX conflicts
236
+ if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
237
+ this.initialized = true;
238
+ this.logger('log', '🧪 Using mocked embeddings for unit tests');
239
+ return;
240
+ }
219
241
  try {
220
- // Ensure models are available (downloads if needed)
221
- const modelManager = ModelManager.getInstance();
222
- await modelManager.ensureModels(this.options.model);
223
242
  // Resolve device configuration and cache directory
224
243
  const device = await resolveDevice(this.options.device);
225
244
  const cacheDir = this.options.cacheDir === './models'
@@ -227,35 +246,26 @@ export class TransformerEmbedding {
227
246
  : this.options.cacheDir;
228
247
  this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
229
248
  const startTime = Date.now();
230
- // Check model availability and select appropriate variant
231
- const available = modelManager.getAvailableModels(this.options.model);
232
- let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
233
- if (!actualType) {
234
- throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
235
- }
236
- if (actualType !== this.options.precision) {
237
- this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
238
- }
239
- // CRITICAL FIX: Control which model file transformers.js loads
240
- // When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
241
- // We need to explicitly control this based on the precision setting
242
- // Set environment to control model selection BEFORE creating pipeline
249
+ // Use the configured precision from EmbeddingManager
250
+ const { embeddingManager } = await import('../embeddings/EmbeddingManager.js');
251
+ let actualType = embeddingManager.getPrecision();
252
+ // CRITICAL: Control which model precision transformers.js uses
253
+ // Q8 models use quantized int8 weights for 75% size reduction
254
+ // FP32 models use full precision floating point
243
255
  if (actualType === 'q8') {
244
- // For Q8, we want to use the quantized model
245
- // transformers.js v3 doesn't have a direct flag, so we need to work around this
246
- // HACK: Temporarily modify the model file preference
247
- // This forces transformers.js to look for model_quantized.onnx first
248
- const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
249
- this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
256
+ this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller, 99% accuracy)');
250
257
  }
251
258
  else {
252
- this.logger('log', '📦 Using FP32 model (full precision)');
259
+ this.logger('log', '📦 Using FP32 model (full precision, larger size)');
253
260
  }
254
261
  // Load the feature extraction pipeline with memory optimizations
255
262
  const pipelineOptions = {
256
263
  cache_dir: cacheDir,
257
264
  local_files_only: isBrowser() ? false : this.options.localFilesOnly,
258
- // Remove the quantized flag - it doesn't work in transformers.js v3
265
+ // CRITICAL: Specify dtype for model precision
266
+ dtype: actualType === 'q8' ? 'q8' : 'fp32',
267
+ // CRITICAL: For Q8, explicitly use quantized model
268
+ quantized: actualType === 'q8',
259
269
  // CRITICAL: ONNX memory optimizations
260
270
  session_options: {
261
271
  enableCpuMemArena: false, // Disable pre-allocated memory arena
@@ -336,6 +346,10 @@ export class TransformerEmbedding {
336
346
  * Generate embeddings for text data
337
347
  */
338
348
  async embed(data) {
349
+ // In unit test mode, return mock embeddings
350
+ if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
351
+ return this.getMockEmbedding(data);
352
+ }
339
353
  if (!this.initialized) {
340
354
  await this.init();
341
355
  }
@@ -433,21 +447,25 @@ export function createEmbeddingModel(options) {
433
447
  return new TransformerEmbedding(options);
434
448
  }
435
449
  /**
436
- * Default embedding function using the hybrid model manager (BEST OF BOTH WORLDS)
437
- * Prevents multiple model loads while supporting multi-source downloading
450
+ * Default embedding function using the unified EmbeddingManager
451
+ * Simple, clean, reliable - no more layers of indirection
438
452
  */
439
453
  export const defaultEmbeddingFunction = async (data) => {
440
- const { getHybridEmbeddingFunction } = await import('./hybridModelManager.js');
441
- const embeddingFn = await getHybridEmbeddingFunction();
442
- return await embeddingFn(data);
454
+ const { embed } = await import('../embeddings/EmbeddingManager.js');
455
+ return await embed(data);
443
456
  };
444
457
  /**
445
458
  * Create an embedding function with custom options
459
+ * NOTE: Options are validated but the singleton EmbeddingManager is always used
446
460
  */
447
461
  export function createEmbeddingFunction(options = {}) {
448
- const embedder = new TransformerEmbedding(options);
449
462
  return async (data) => {
450
- return await embedder.embed(data);
463
+ const { embeddingManager } = await import('../embeddings/EmbeddingManager.js');
464
+ // Validate precision if specified
465
+ if (options.precision) {
466
+ embeddingManager.validatePrecision(options.precision);
467
+ }
468
+ return await embeddingManager.embed(data);
451
469
  };
452
470
  }
453
471
  /**
@@ -1,55 +1,44 @@
1
1
  /**
2
2
  * Hybrid Model Manager - BEST OF BOTH WORLDS
3
3
  *
4
- * Combines:
4
+ * NOW A WRAPPER AROUND SingletonModelManager
5
+ * Maintained for backward compatibility
6
+ *
7
+ * Previously combined:
5
8
  * 1. Multi-source downloading strategy (GitHub → CDN → Hugging Face)
6
9
  * 2. Singleton pattern preventing multiple ONNX model loads
7
10
  * 3. Environment-specific optimizations
8
11
  * 4. Graceful fallbacks and error handling
12
+ *
13
+ * Now delegates all operations to SingletonModelManager for true unification
9
14
  */
10
- import { TransformerEmbedding } from './embedding.js';
11
15
  import { EmbeddingFunction } from '../coreTypes.js';
12
16
  /**
13
- * Global singleton model manager - PREVENTS MULTIPLE MODEL LOADS
17
+ * HybridModelManager - Now a wrapper around SingletonModelManager
18
+ * Maintained for backward compatibility
14
19
  */
15
20
  declare class HybridModelManager {
16
21
  private static instance;
17
- private primaryModel;
18
- private modelPromise;
19
- private isInitialized;
20
- private modelsPath;
21
22
  private constructor();
22
23
  static getInstance(): HybridModelManager;
23
24
  /**
24
- * Get the primary embedding model - LOADS ONCE, REUSES FOREVER
25
- */
26
- getPrimaryModel(): Promise<TransformerEmbedding>;
27
- /**
28
- * Smart model path detection
29
- */
30
- private getModelsPath;
31
- /**
32
- * Initialize with BEST OF BOTH: Multi-source + Singleton
33
- */
34
- private initializePrimaryModel;
35
- /**
36
- * Create model with multi-source fallback strategy
25
+ * Get the primary embedding model - delegates to SingletonModelManager
37
26
  */
38
- private createModelWithFallbacks;
27
+ getPrimaryModel(): Promise<any>;
39
28
  /**
40
- * Get embedding function that reuses the singleton model
29
+ * Get embedding function - delegates to SingletonModelManager
41
30
  */
42
31
  getEmbeddingFunction(): Promise<EmbeddingFunction>;
43
32
  /**
44
- * Check if model is ready (loaded and initialized)
33
+ * Check if model is ready - delegates to SingletonModelManager
45
34
  */
46
35
  isModelReady(): boolean;
47
36
  /**
48
- * Force model reload (for testing or recovery)
37
+ * Force model reload - not supported with SingletonModelManager
49
38
  */
50
39
  reloadModel(): Promise<void>;
51
40
  /**
52
- * Get model status for debugging
41
+ * Get model status - delegates to SingletonModelManager
53
42
  */
54
43
  getModelStatus(): {
55
44
  loaded: boolean;
@@ -59,15 +48,17 @@ declare class HybridModelManager {
59
48
  }
60
49
  export declare const hybridModelManager: HybridModelManager;
61
50
  /**
62
- * Get the hybrid singleton embedding function - USE THIS EVERYWHERE!
51
+ * Get the hybrid singleton embedding function - Now delegates to SingletonModelManager
52
+ * Maintained for backward compatibility
63
53
  */
64
54
  export declare function getHybridEmbeddingFunction(): Promise<EmbeddingFunction>;
65
55
  /**
66
- * Optimized hybrid embedding function that uses multi-source + singleton
56
+ * Hybrid embedding function - Now delegates to SingletonModelManager
57
+ * Maintained for backward compatibility
67
58
  */
68
59
  export declare const hybridEmbeddingFunction: EmbeddingFunction;
69
60
  /**
70
- * Preload model for tests or production - CALL THIS ONCE AT START
61
+ * Preload model for tests or production - Now delegates to SingletonModelManager
71
62
  */
72
63
  export declare function preloadHybridModel(): Promise<void>;
73
64
  export {};