npm - @soulcraft/brainy - Versions diffs - 2.11.0 → 2.14.0 - Mend

@soulcraft/brainy 2.11.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +15 -0
package/dist/brainyData.d.ts +5 -8
package/dist/brainyData.js +56 -39
package/dist/config/index.d.ts +1 -0
package/dist/config/index.js +2 -0
package/dist/config/modelAutoConfig.d.ts +1 -0
package/dist/config/modelAutoConfig.js +27 -22
package/dist/config/modelPrecisionManager.d.ts +42 -0
package/dist/config/modelPrecisionManager.js +98 -0
package/dist/config/zeroConfig.js +1 -1
package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
package/dist/embeddings/CachedEmbeddings.js +146 -0
package/dist/embeddings/EmbeddingManager.d.ts +106 -0
package/dist/embeddings/EmbeddingManager.js +296 -0
package/dist/embeddings/SingletonModelManager.d.ts +95 -0
package/dist/embeddings/SingletonModelManager.js +220 -0
package/dist/embeddings/index.d.ts +12 -0
package/dist/embeddings/index.js +16 -0
package/dist/embeddings/lightweight-embedder.d.ts +0 -1
package/dist/embeddings/lightweight-embedder.js +4 -12
package/dist/embeddings/universal-memory-manager.js +13 -50
package/dist/embeddings/worker-embedding.js +4 -8
package/dist/neural/improvedNeuralAPI.d.ts +346 -0
package/dist/neural/improvedNeuralAPI.js +2439 -0
package/dist/neural/types.d.ts +267 -0
package/dist/neural/types.js +24 -0
package/dist/utils/embedding.d.ts +7 -2
package/dist/utils/embedding.js +51 -33
package/dist/utils/hybridModelManager.d.ts +19 -28
package/dist/utils/hybridModelManager.js +36 -200
package/package.json +1 -1

package/dist/neural/types.d.ts ADDED Viewed

@@ -0,0 +1,267 @@
+/**
+ * Neural API Type Definitions
+ * Comprehensive interfaces for clustering, similarity, and analysis
+ */
+export interface Vector {
+    [index: number]: number;
+    length: number;
+}
+export interface SemanticCluster {
+    id: string;
+    centroid: Vector;
+    members: string[];
+    size: number;
+    confidence: number;
+    label?: string;
+    metadata?: Record<string, any>;
+    cohesion?: number;
+    level?: number;
+}
+export interface DomainCluster extends SemanticCluster {
+    domain: string;
+    domainConfidence: number;
+    crossDomainMembers?: string[];
+}
+export interface TemporalCluster extends SemanticCluster {
+    timeWindow: TimeWindow;
+    trend?: 'increasing' | 'decreasing' | 'stable';
+    temporal: {
+        startTime: Date;
+        endTime: Date;
+        peakTime?: Date;
+        frequency?: number;
+    };
+}
+export interface ExplainableCluster extends SemanticCluster {
+    explanation: {
+        primaryFeatures: string[];
+        commonTerms: string[];
+        reasoning: string;
+        confidence: number;
+    };
+    subClusters?: ExplainableCluster[];
+}
+export interface ConfidentCluster extends SemanticCluster {
+    minConfidence: number;
+    uncertainMembers: string[];
+    certainMembers: string[];
+}
+export interface BaseClusteringOptions {
+    maxClusters?: number;
+    minClusterSize?: number;
+    threshold?: number;
+    cacheResults?: boolean;
+}
+export interface ClusteringOptions extends BaseClusteringOptions {
+    algorithm?: 'auto' | 'hierarchical' | 'kmeans' | 'dbscan' | 'sample' | 'semantic' | 'graph' | 'multimodal';
+    sampleSize?: number;
+    strategy?: 'random' | 'diverse' | 'recent' | 'important';
+    memoryLimit?: string;
+    includeOutliers?: boolean;
+    maxIterations?: number;
+    tolerance?: number;
+}
+export interface DomainClusteringOptions extends BaseClusteringOptions {
+    domainField?: string;
+    crossDomainThreshold?: number;
+    preserveDomainBoundaries?: boolean;
+}
+export interface TemporalClusteringOptions extends BaseClusteringOptions {
+    timeField: string;
+    windows: TimeWindow[];
+    overlapStrategy?: 'merge' | 'separate' | 'hierarchical';
+    trendAnalysis?: boolean;
+}
+export interface StreamClusteringOptions extends BaseClusteringOptions {
+    batchSize?: number;
+    updateInterval?: number;
+    adaptiveThreshold?: boolean;
+    decayFactor?: number;
+}
+export interface SimilarityOptions {
+    detailed?: boolean;
+    metric?: 'cosine' | 'euclidean' | 'manhattan' | 'jaccard';
+    normalized?: boolean;
+}
+export interface SimilarityResult {
+    score: number;
+    confidence: number;
+    explanation?: string;
+    metric?: string;
+}
+export interface NeighborOptions {
+    limit?: number;
+    radius?: number;
+    minSimilarity?: number;
+    includeMetadata?: boolean;
+    sortBy?: 'similarity' | 'importance' | 'recency';
+}
+export interface Neighbor {
+    id: string;
+    similarity: number;
+    data?: any;
+    metadata?: Record<string, any>;
+    distance?: number;
+}
+export interface NeighborsResult {
+    neighbors: Neighbor[];
+    queryId: string;
+    totalFound: number;
+    averageSimilarity: number;
+}
+export interface SemanticHierarchy {
+    self: {
+        id: string;
+        vector?: Vector;
+        metadata?: any;
+    };
+    parent?: {
+        id: string;
+        similarity: number;
+    };
+    children?: Array<{
+        id: string;
+        similarity: number;
+    }>;
+    siblings?: Array<{
+        id: string;
+        similarity: number;
+    }>;
+    level?: number;
+    depth?: number;
+}
+export interface HierarchyOptions {
+    maxDepth?: number;
+    minSimilarity?: number;
+    includeMetadata?: boolean;
+    buildStrategy?: 'similarity' | 'metadata' | 'mixed';
+}
+export interface VisualizationOptions {
+    maxNodes?: number;
+    dimensions?: 2 | 3;
+    algorithm?: 'force' | 'spring' | 'circular' | 'hierarchical';
+    includeEdges?: boolean;
+    clusterColors?: boolean;
+    nodeSize?: 'uniform' | 'importance' | 'connections';
+}
+export interface VisualizationNode {
+    id: string;
+    x: number;
+    y: number;
+    z?: number;
+    cluster?: string;
+    size?: number;
+    color?: string;
+    metadata?: Record<string, any>;
+}
+export interface VisualizationEdge {
+    source: string;
+    target: string;
+    weight: number;
+    color?: string;
+    type?: string;
+}
+export interface VisualizationResult {
+    nodes: VisualizationNode[];
+    edges: VisualizationEdge[];
+    clusters?: Array<{
+        id: string;
+        color: string;
+        size: number;
+        label?: string;
+    }>;
+    metadata: {
+        algorithm: string;
+        dimensions: number;
+        totalNodes: number;
+        totalEdges: number;
+        generatedAt: Date;
+    };
+}
+export interface TimeWindow {
+    start: Date;
+    end: Date;
+    label?: string;
+    weight?: number;
+}
+export interface ClusterFeedback {
+    clusterId: string;
+    action: 'merge' | 'split' | 'relabel' | 'adjust';
+    parameters?: Record<string, any>;
+    confidence?: number;
+}
+export interface OutlierOptions {
+    threshold?: number;
+    method?: 'isolation' | 'statistical' | 'cluster-based';
+    minNeighbors?: number;
+    includeReasons?: boolean;
+}
+export interface Outlier {
+    id: string;
+    score: number;
+    reasons?: string[];
+    nearestNeighbors?: Neighbor[];
+    metadata?: Record<string, any>;
+}
+export interface PerformanceMetrics {
+    executionTime: number;
+    memoryUsed: number;
+    itemsProcessed: number;
+    cacheHits: number;
+    cacheMisses: number;
+    algorithm: string;
+}
+export interface ClusteringResult<T = SemanticCluster> {
+    clusters: T[];
+    metrics: PerformanceMetrics;
+    metadata: {
+        totalItems: number;
+        clustersFound: number;
+        averageClusterSize: number;
+        silhouetteScore?: number;
+        timestamp: Date;
+        semanticTypes?: number;
+        hnswLevel?: number;
+        kValue?: number;
+        hasConverged?: boolean;
+        outlierCount?: number;
+        eps?: number;
+        minPts?: number;
+        averageModularity?: number;
+        fusionMethod?: string;
+        componentAlgorithms?: string[];
+        sampleSize?: number;
+        samplingStrategy?: string;
+    };
+}
+export interface StreamingBatch<T = SemanticCluster> {
+    clusters: T[];
+    batchNumber: number;
+    isComplete: boolean;
+    progress: {
+        processed: number;
+        total: number;
+        percentage: number;
+    };
+    metrics: PerformanceMetrics;
+}
+export declare class NeuralAPIError extends Error {
+    code: string;
+    context?: Record<string, any> | undefined;
+    constructor(message: string, code: string, context?: Record<string, any> | undefined);
+}
+export declare class ClusteringError extends NeuralAPIError {
+    constructor(message: string, context?: Record<string, any>);
+}
+export declare class SimilarityError extends NeuralAPIError {
+    constructor(message: string, context?: Record<string, any>);
+}
+export interface NeuralAPIConfig {
+    cacheSize?: number;
+    defaultAlgorithm?: string;
+    similarityMetric?: 'cosine' | 'euclidean' | 'manhattan';
+    performanceTracking?: boolean;
+    maxMemoryUsage?: string;
+    parallelProcessing?: boolean;
+    streamingBatchSize?: number;
+}

package/dist/neural/types.js ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * Neural API Type Definitions
+ * Comprehensive interfaces for clustering, similarity, and analysis
+ */
+// ===== ERROR TYPES =====
+export class NeuralAPIError extends Error {
+    constructor(message, code, context) {
+        super(message);
+        this.code = code;
+        this.context = context;
+        this.name = 'NeuralAPIError';
+    }
+}
+export class ClusteringError extends NeuralAPIError {
+    constructor(message, context) {
+        super(message, 'CLUSTERING_ERROR', context);
+    }
+}
+export class SimilarityError extends NeuralAPIError {
+    constructor(message, context) {
+        super(message, 'SIMILARITY_ERROR', context);
+    }
+}
+//# sourceMappingURL=types.js.map

package/dist/utils/embedding.d.ts CHANGED Viewed

@@ -51,6 +51,10 @@ export declare class TransformerEmbedding implements EmbeddingModel {
      * Log message only if verbose mode is enabled
      */
     private logger;
+    /**
+     * Generate mock embeddings for unit tests
+     */
+    private getMockEmbedding;
     /**
      * Initialize the embedding model
      */
@@ -78,12 +82,13 @@ export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
  */
 export declare function createEmbeddingModel(options?: TransformerEmbeddingOptions): EmbeddingModel;
 /**
- * Default embedding function using the hybrid model manager (BEST OF BOTH WORLDS)
- * Prevents multiple model loads while supporting multi-source downloading
+ * Default embedding function using the unified EmbeddingManager
+ * Simple, clean, reliable - no more layers of indirection
  */
 export declare const defaultEmbeddingFunction: EmbeddingFunction;
 /**
  * Create an embedding function with custom options
+ * NOTE: Options are validated but the singleton EmbeddingManager is always used
  */
 export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
 /**

package/dist/utils/embedding.js CHANGED Viewed

@@ -3,7 +3,6 @@
  * Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
  */
 import { isBrowser } from './environment.js';
-import { ModelManager } from '../embeddings/model-manager.js';
 import { join } from 'path';
 import { existsSync } from 'fs';
 // @ts-ignore - Transformers.js is now the primary embedding library
@@ -208,6 +207,24 @@ export class TransformerEmbedding {
             console[level](`[TransformerEmbedding] ${message}`, ...args);
         }
     }
+    /**
+     * Generate mock embeddings for unit tests
+     */
+    getMockEmbedding(data) {
+        // Use the same mock logic as setup-unit.ts for consistency
+        const input = Array.isArray(data) ? data.join(' ') : data;
+        const str = typeof input === 'string' ? input : JSON.stringify(input);
+        const vector = new Array(384).fill(0);
+        // Create semi-realistic embeddings based on text content
+        for (let i = 0; i < Math.min(str.length, 384); i++) {
+            vector[i] = (str.charCodeAt(i % str.length) % 256) / 256;
+        }
+        // Add position-based variation
+        for (let i = 0; i < 384; i++) {
+            vector[i] += Math.sin(i * 0.1 + str.length) * 0.1;
+        }
+        return vector;
+    }
     /**
      * Initialize the embedding model
      */
@@ -215,11 +232,13 @@ export class TransformerEmbedding {
         if (this.initialized) {
             return;
         }
-        // Always use real implementation - no mocking
+        // In unit test mode, skip real model initialization to prevent ONNX conflicts
+        if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
+            this.initialized = true;
+            this.logger('log', '🧪 Using mocked embeddings for unit tests');
+            return;
+        }
         try {
-            // Ensure models are available (downloads if needed)
-            const modelManager = ModelManager.getInstance();
-            await modelManager.ensureModels(this.options.model);
             // Resolve device configuration and cache directory
             const device = await resolveDevice(this.options.device);
             const cacheDir = this.options.cacheDir === './models'
@@ -227,35 +246,26 @@ export class TransformerEmbedding {
                 : this.options.cacheDir;
             this.logger('log', `Loading Transformer model: ${this.options.model} on device: ${device}`);
             const startTime = Date.now();
-            // Check model availability and select appropriate variant
-            const available = modelManager.getAvailableModels(this.options.model);
-            let actualType = modelManager.getBestAvailableModel(this.options.precision, this.options.model);
-            if (!actualType) {
-                throw new Error(`No model variants available for ${this.options.model}. Run 'npm run download-models' to download models.`);
-            }
-            if (actualType !== this.options.precision) {
-                this.logger('log', `Using ${actualType} model (${this.options.precision} not available)`);
-            }
-            // CRITICAL FIX: Control which model file transformers.js loads
-            // When both model.onnx and model_quantized.onnx exist, transformers.js defaults to model.onnx
-            // We need to explicitly control this based on the precision setting
-            // Set environment to control model selection BEFORE creating pipeline
+            // Use the configured precision from EmbeddingManager
+            const { embeddingManager } = await import('../embeddings/EmbeddingManager.js');
+            let actualType = embeddingManager.getPrecision();
+            // CRITICAL: Control which model precision transformers.js uses
+            // Q8 models use quantized int8 weights for 75% size reduction
+            // FP32 models use full precision floating point
             if (actualType === 'q8') {
-                // For Q8, we want to use the quantized model
-                // transformers.js v3 doesn't have a direct flag, so we need to work around this
-                // HACK: Temporarily modify the model file preference
-                // This forces transformers.js to look for model_quantized.onnx first
-                const originalModelFileName = env.onnxModelFileName(env).onnxModelFileName = 'model_quantized';
-                this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller)');
+                this.logger('log', '🎯 Selecting Q8 quantized model (75% smaller, 99% accuracy)');
             }
             else {
-                this.logger('log', '📦 Using FP32 model (full precision)');
+                this.logger('log', '📦 Using FP32 model (full precision, larger size)');
             }
             // Load the feature extraction pipeline with memory optimizations
             const pipelineOptions = {
                 cache_dir: cacheDir,
                 local_files_only: isBrowser() ? false : this.options.localFilesOnly,
-                // Remove the quantized flag - it doesn't work in transformers.js v3
+                // CRITICAL: Specify dtype for model precision
+                dtype: actualType === 'q8' ? 'q8' : 'fp32',
+                // CRITICAL: For Q8, explicitly use quantized model
+                quantized: actualType === 'q8',
                 // CRITICAL: ONNX memory optimizations
                 session_options: {
                     enableCpuMemArena: false, // Disable pre-allocated memory arena
@@ -336,6 +346,10 @@ export class TransformerEmbedding {
      * Generate embeddings for text data
      */
     async embed(data) {
+        // In unit test mode, return mock embeddings
+        if (process.env.BRAINY_UNIT_TEST === 'true' || globalThis.__BRAINY_UNIT_TEST__) {
+            return this.getMockEmbedding(data);
+        }
         if (!this.initialized) {
             await this.init();
         }
@@ -433,21 +447,25 @@ export function createEmbeddingModel(options) {
     return new TransformerEmbedding(options);
 }
 /**
- * Default embedding function using the hybrid model manager (BEST OF BOTH WORLDS)
- * Prevents multiple model loads while supporting multi-source downloading
+ * Default embedding function using the unified EmbeddingManager
+ * Simple, clean, reliable - no more layers of indirection
  */
 export const defaultEmbeddingFunction = async (data) => {
-    const { getHybridEmbeddingFunction } = await import('./hybridModelManager.js');
-    const embeddingFn = await getHybridEmbeddingFunction();
-    return await embeddingFn(data);
+    const { embed } = await import('../embeddings/EmbeddingManager.js');
+    return await embed(data);
 };
 /**
  * Create an embedding function with custom options
+ * NOTE: Options are validated but the singleton EmbeddingManager is always used
  */
 export function createEmbeddingFunction(options = {}) {
-    const embedder = new TransformerEmbedding(options);
     return async (data) => {
-        return await embedder.embed(data);
+        const { embeddingManager } = await import('../embeddings/EmbeddingManager.js');
+        // Validate precision if specified
+        if (options.precision) {
+            embeddingManager.validatePrecision(options.precision);
+        }
+        return await embeddingManager.embed(data);
     };
 }
 /**

package/dist/utils/hybridModelManager.d.ts CHANGED Viewed

@@ -1,55 +1,44 @@
 /**
  * Hybrid Model Manager - BEST OF BOTH WORLDS
  *
- * Combines:
+ * NOW A WRAPPER AROUND SingletonModelManager
+ * Maintained for backward compatibility
+ *
+ * Previously combined:
  * 1. Multi-source downloading strategy (GitHub → CDN → Hugging Face)
  * 2. Singleton pattern preventing multiple ONNX model loads
  * 3. Environment-specific optimizations
  * 4. Graceful fallbacks and error handling
+ *
+ * Now delegates all operations to SingletonModelManager for true unification
  */
-import { TransformerEmbedding } from './embedding.js';
 import { EmbeddingFunction } from '../coreTypes.js';
 /**
- * Global singleton model manager - PREVENTS MULTIPLE MODEL LOADS
+ * HybridModelManager - Now a wrapper around SingletonModelManager
+ * Maintained for backward compatibility
  */
 declare class HybridModelManager {
     private static instance;
-    private primaryModel;
-    private modelPromise;
-    private isInitialized;
-    private modelsPath;
     private constructor();
     static getInstance(): HybridModelManager;
     /**
-     * Get the primary embedding model - LOADS ONCE, REUSES FOREVER
-     */
-    getPrimaryModel(): Promise<TransformerEmbedding>;
-    /**
-     * Smart model path detection
-     */
-    private getModelsPath;
-    /**
-     * Initialize with BEST OF BOTH: Multi-source + Singleton
-     */
-    private initializePrimaryModel;
-    /**
-     * Create model with multi-source fallback strategy
+     * Get the primary embedding model - delegates to SingletonModelManager
      */
-    private createModelWithFallbacks;
+    getPrimaryModel(): Promise<any>;
     /**
-     * Get embedding function that reuses the singleton model
+     * Get embedding function - delegates to SingletonModelManager
      */
     getEmbeddingFunction(): Promise<EmbeddingFunction>;
     /**
-     * Check if model is ready (loaded and initialized)
+     * Check if model is ready - delegates to SingletonModelManager
      */
     isModelReady(): boolean;
     /**
-     * Force model reload (for testing or recovery)
+     * Force model reload - not supported with SingletonModelManager
      */
     reloadModel(): Promise<void>;
     /**
-     * Get model status for debugging
+     * Get model status - delegates to SingletonModelManager
      */
     getModelStatus(): {
         loaded: boolean;
@@ -59,15 +48,17 @@ declare class HybridModelManager {
 }
 export declare const hybridModelManager: HybridModelManager;
 /**
- * Get the hybrid singleton embedding function - USE THIS EVERYWHERE!
+ * Get the hybrid singleton embedding function - Now delegates to SingletonModelManager
+ * Maintained for backward compatibility
  */
 export declare function getHybridEmbeddingFunction(): Promise<EmbeddingFunction>;
 /**
- * Optimized hybrid embedding function that uses multi-source + singleton
+ * Hybrid embedding function - Now delegates to SingletonModelManager
+ * Maintained for backward compatibility
  */
 export declare const hybridEmbeddingFunction: EmbeddingFunction;
 /**
- * Preload model for tests or production - CALL THIS ONCE AT START
+ * Preload model for tests or production - Now delegates to SingletonModelManager
  */
 export declare function preloadHybridModel(): Promise<void>;
 export {};