npm - @soulcraft/brainy - Versions diffs - 0.46.0 → 0.48.0 - Mend

@soulcraft/brainy 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/OFFLINE_MODELS.md +56 -0
package/README.md +46 -1
package/dist/brainyData.js +7 -9
package/dist/brainyData.js.map +1 -1
package/dist/demo.js +2 -2
package/dist/demo.js.map +1 -1
package/dist/hnsw/hnswIndex.d.ts +1 -1
package/dist/hnsw/hnswIndex.js +4 -4
package/dist/hnsw/hnswIndex.js.map +1 -1
package/dist/index.d.ts +2 -3
package/dist/index.js +3 -9
package/dist/index.js.map +1 -1
package/dist/setup.d.ts +3 -3
package/dist/setup.js +6 -6
package/dist/setup.js.map +1 -1
package/dist/utils/distance.d.ts +4 -4
package/dist/utils/distance.js +67 -140
package/dist/utils/distance.js.map +1 -1
package/dist/utils/embedding.d.ts +58 -84
package/dist/utils/embedding.js +250 -594
package/dist/utils/embedding.js.map +1 -1
package/dist/utils/robustModelLoader.d.ts +4 -0
package/dist/utils/robustModelLoader.js +58 -7
package/dist/utils/robustModelLoader.js.map +1 -1
package/dist/utils/textEncoding.d.ts +2 -3
package/dist/utils/textEncoding.js +31 -274
package/dist/utils/textEncoding.js.map +1 -1
package/package.json +10 -19
package/scripts/download-models.cjs +190 -0

package/dist/utils/embedding.d.ts CHANGED Viewed

@@ -1,128 +1,102 @@
 /**
- * Embedding functions for converting data to vectors
+ * Embedding functions for converting data to vectors using Transformers.js
+ * Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
  */
 import { EmbeddingFunction, EmbeddingModel, Vector } from '../coreTypes.js';
-import { ModelLoadOptions } from './robustModelLoader.js';
 /**
- * TensorFlow Universal Sentence Encoder embedding model
- * This model provides high-quality text embeddings using TensorFlow.js
- * The required TensorFlow.js dependencies are automatically installed with this package
- *
- * This implementation attempts to use GPU processing when available for better performance,
- * falling back to CPU processing for compatibility across all environments.
+ * Detect the best available GPU device for the current environment
  */
-export interface UniversalSentenceEncoderOptions extends ModelLoadOptions {
+export declare function detectBestDevice(): Promise<'cpu' | 'webgpu' | 'cuda'>;
+/**
+ * Resolve device string to actual device configuration
+ */
+export declare function resolveDevice(device?: string): Promise<string>;
+/**
+ * Transformers.js Sentence Encoder embedding model
+ * Uses ONNX Runtime for fast, offline embeddings with smaller models
+ * Default model: all-MiniLM-L6-v2 (384 dimensions, ~90MB)
+ */
+export interface TransformerEmbeddingOptions {
+    /** Model name/path to use - defaults to all-MiniLM-L6-v2 */
+    model?: string;
     /** Whether to enable verbose logging */
     verbose?: boolean;
+    /** Custom cache directory for models */
+    cacheDir?: string;
+    /** Force local files only (no downloads) */
+    localFilesOnly?: boolean;
+    /** Quantization setting (fp32, fp16, q8, q4) */
+    dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
+    /** Device to run inference on - 'auto' detects best available */
+    device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
 }
-export declare class UniversalSentenceEncoder implements EmbeddingModel {
-    private model;
+export declare class TransformerEmbedding implements EmbeddingModel {
+    private extractor;
     private initialized;
-    private tf;
-    private use;
-    private backend;
     private verbose;
-    private robustLoader;
+    private options;
     /**
-     * Create a new UniversalSentenceEncoder instance
-     * @param options Configuration options including reliability settings
+     * Create a new TransformerEmbedding instance
      */
-    constructor(options?: UniversalSentenceEncoderOptions);
+    constructor(options?: TransformerEmbeddingOptions);
     /**
-     * Add polyfills and patches for TensorFlow.js compatibility
-     * This addresses issues with TensorFlow.js across all server environments
-     * (Node.js, serverless, and other server environments)
-     *
-     * Note: The main TensorFlow.js patching is now centralized in textEncoding.ts
-     * and applied through setup.ts. This method only adds additional utility functions
-     * that might be needed by TensorFlow.js.
+     * Get the default cache directory for models
      */
-    private addServerCompatibilityPolyfills;
+    private getDefaultCacheDir;
     /**
      * Check if we're running in a test environment
      */
     private isTestEnvironment;
     /**
-     * Log message only if verbose mode is enabled or if it's an error
-     * This helps suppress non-essential log messages
+     * Log message only if verbose mode is enabled
      */
     private logger;
-    /**
-     * Load the Universal Sentence Encoder model with robust retry and fallback mechanisms
-     * @param loadFunction The function to load the model from TensorFlow Hub
-     */
-    private loadModelFromLocal;
     /**
      * Initialize the embedding model
      */
     init(): Promise<void>;
     /**
-     * Embed text into a vector using Universal Sentence Encoder
-     * @param data Text to embed
+     * Generate embeddings for text data
      */
     embed(data: string | string[]): Promise<Vector>;
     /**
-     * Embed multiple texts into vectors using Universal Sentence Encoder
-     * This is more efficient than calling embed() multiple times
-     * @param dataArray Array of texts to embed
-     * @returns Array of embedding vectors
+     * Dispose of the model and free resources
      */
-    embedBatch(dataArray: string[]): Promise<Vector[]>;
+    dispose(): Promise<void>;
     /**
-     * Dispose of the model resources
+     * Get the dimension of embeddings produced by this model
      */
-    dispose(): Promise<void>;
+    getDimension(): number;
+    /**
+     * Check if the model is initialized
+     */
+    isInitialized(): boolean;
 }
+export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
 /**
- * Create an embedding function from an embedding model
- * @param model Embedding model to use (optional, defaults to UniversalSentenceEncoder)
- */
-export declare function createEmbeddingFunction(model?: EmbeddingModel): EmbeddingFunction;
-export declare function createTensorFlowEmbeddingFunction(options?: {
-    verbose?: boolean;
-}): EmbeddingFunction;
-/**
- * Default embedding function
- * Uses UniversalSentenceEncoder for all text embeddings
- * TensorFlow.js is required for this to work
- * Uses CPU for compatibility
- * @param options Configuration options
- * @param options.verbose Whether to log non-essential messages (default: true)
+ * Create a new embedding model instance
  */
-export declare function getDefaultEmbeddingFunction(options?: {
-    verbose?: boolean;
-}): EmbeddingFunction;
+export declare function createEmbeddingModel(options?: TransformerEmbeddingOptions): EmbeddingModel;
 /**
- * Default embedding function with default options
- * Uses UniversalSentenceEncoder for all text embeddings
- * TensorFlow.js is required for this to work
- * Uses CPU for compatibility
+ * Default embedding function using the lightweight transformer model
  */
 export declare const defaultEmbeddingFunction: EmbeddingFunction;
-export declare function createBatchEmbeddingFunction(options?: {
-    verbose?: boolean;
-}): (dataArray: string[]) => Promise<Vector[]>;
 /**
- * Get a batch embedding function with custom options
- * Uses UniversalSentenceEncoder for all text embeddings
- * TensorFlow.js is required for this to work
- * Processes all items in a single batch operation
- * @param options Configuration options
- * @param options.verbose Whether to log non-essential messages (default: true)
+ * Create an embedding function with custom options
  */
-export declare function getDefaultBatchEmbeddingFunction(options?: {
-    verbose?: boolean;
-}): (dataArray: string[]) => Promise<Vector[]>;
+export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
 /**
- * Default batch embedding function with default options
- * Uses UniversalSentenceEncoder for all text embeddings
- * TensorFlow.js is required for this to work
- * Processes all items in a single batch operation
+ * Batch embedding function for processing multiple texts efficiently
  */
-export declare const defaultBatchEmbeddingFunction: (dataArray: string[]) => Promise<Vector[]>;
+export declare function batchEmbed(texts: string[], options?: TransformerEmbeddingOptions): Promise<Vector[]>;
 /**
- * Creates an embedding function that runs in a separate thread
- * This is a wrapper around createEmbeddingFunction that uses executeInThread
- * @param model Embedding model to use
+ * Embedding functions for specific model types
  */
-export declare function createThreadedEmbeddingFunction(model: EmbeddingModel): EmbeddingFunction;
+export declare const embeddingFunctions: {
+    /** Default lightweight model (all-MiniLM-L6-v2, 384 dimensions) */
+    default: EmbeddingFunction;
+    /** Create custom embedding function */
+    create: typeof createEmbeddingFunction;
+    /** Batch processing */
+    batch: typeof batchEmbed;
+};