@soulcraft/brainy 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,128 +1,102 @@
1
1
  /**
2
- * Embedding functions for converting data to vectors
2
+ * Embedding functions for converting data to vectors using Transformers.js
3
+ * Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
3
4
  */
4
5
  import { EmbeddingFunction, EmbeddingModel, Vector } from '../coreTypes.js';
5
- import { ModelLoadOptions } from './robustModelLoader.js';
6
6
  /**
7
- * TensorFlow Universal Sentence Encoder embedding model
8
- * This model provides high-quality text embeddings using TensorFlow.js
9
- * The required TensorFlow.js dependencies are automatically installed with this package
10
- *
11
- * This implementation attempts to use GPU processing when available for better performance,
12
- * falling back to CPU processing for compatibility across all environments.
7
+ * Detect the best available GPU device for the current environment
13
8
  */
14
- export interface UniversalSentenceEncoderOptions extends ModelLoadOptions {
9
+ export declare function detectBestDevice(): Promise<'cpu' | 'webgpu' | 'cuda'>;
10
+ /**
11
+ * Resolve device string to actual device configuration
12
+ */
13
+ export declare function resolveDevice(device?: string): Promise<string>;
14
+ /**
15
+ * Transformers.js Sentence Encoder embedding model
16
+ * Uses ONNX Runtime for fast, offline embeddings with smaller models
17
+ * Default model: all-MiniLM-L6-v2 (384 dimensions, ~90MB)
18
+ */
19
+ export interface TransformerEmbeddingOptions {
20
+ /** Model name/path to use - defaults to all-MiniLM-L6-v2 */
21
+ model?: string;
15
22
  /** Whether to enable verbose logging */
16
23
  verbose?: boolean;
24
+ /** Custom cache directory for models */
25
+ cacheDir?: string;
26
+ /** Force local files only (no downloads) */
27
+ localFilesOnly?: boolean;
28
+ /** Quantization setting (fp32, fp16, q8, q4) */
29
+ dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
30
+ /** Device to run inference on - 'auto' detects best available */
31
+ device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
17
32
  }
18
- export declare class UniversalSentenceEncoder implements EmbeddingModel {
19
- private model;
33
+ export declare class TransformerEmbedding implements EmbeddingModel {
34
+ private extractor;
20
35
  private initialized;
21
- private tf;
22
- private use;
23
- private backend;
24
36
  private verbose;
25
- private robustLoader;
37
+ private options;
26
38
  /**
27
- * Create a new UniversalSentenceEncoder instance
28
- * @param options Configuration options including reliability settings
39
+ * Create a new TransformerEmbedding instance
29
40
  */
30
- constructor(options?: UniversalSentenceEncoderOptions);
41
+ constructor(options?: TransformerEmbeddingOptions);
31
42
  /**
32
- * Add polyfills and patches for TensorFlow.js compatibility
33
- * This addresses issues with TensorFlow.js across all server environments
34
- * (Node.js, serverless, and other server environments)
35
- *
36
- * Note: The main TensorFlow.js patching is now centralized in textEncoding.ts
37
- * and applied through setup.ts. This method only adds additional utility functions
38
- * that might be needed by TensorFlow.js.
43
+ * Get the default cache directory for models
39
44
  */
40
- private addServerCompatibilityPolyfills;
45
+ private getDefaultCacheDir;
41
46
  /**
42
47
  * Check if we're running in a test environment
43
48
  */
44
49
  private isTestEnvironment;
45
50
  /**
46
- * Log message only if verbose mode is enabled or if it's an error
47
- * This helps suppress non-essential log messages
51
+ * Log message only if verbose mode is enabled
48
52
  */
49
53
  private logger;
50
- /**
51
- * Load the Universal Sentence Encoder model with robust retry and fallback mechanisms
52
- * @param loadFunction The function to load the model from TensorFlow Hub
53
- */
54
- private loadModelFromLocal;
55
54
  /**
56
55
  * Initialize the embedding model
57
56
  */
58
57
  init(): Promise<void>;
59
58
  /**
60
- * Embed text into a vector using Universal Sentence Encoder
61
- * @param data Text to embed
59
+ * Generate embeddings for text data
62
60
  */
63
61
  embed(data: string | string[]): Promise<Vector>;
64
62
  /**
65
- * Embed multiple texts into vectors using Universal Sentence Encoder
66
- * This is more efficient than calling embed() multiple times
67
- * @param dataArray Array of texts to embed
68
- * @returns Array of embedding vectors
63
+ * Dispose of the model and free resources
69
64
  */
70
- embedBatch(dataArray: string[]): Promise<Vector[]>;
65
+ dispose(): Promise<void>;
71
66
  /**
72
- * Dispose of the model resources
67
+ * Get the dimension of embeddings produced by this model
73
68
  */
74
- dispose(): Promise<void>;
69
+ getDimension(): number;
70
+ /**
71
+ * Check if the model is initialized
72
+ */
73
+ isInitialized(): boolean;
75
74
  }
75
+ export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
76
76
  /**
77
- * Create an embedding function from an embedding model
78
- * @param model Embedding model to use (optional, defaults to UniversalSentenceEncoder)
79
- */
80
- export declare function createEmbeddingFunction(model?: EmbeddingModel): EmbeddingFunction;
81
- export declare function createTensorFlowEmbeddingFunction(options?: {
82
- verbose?: boolean;
83
- }): EmbeddingFunction;
84
- /**
85
- * Default embedding function
86
- * Uses UniversalSentenceEncoder for all text embeddings
87
- * TensorFlow.js is required for this to work
88
- * Uses CPU for compatibility
89
- * @param options Configuration options
90
- * @param options.verbose Whether to log non-essential messages (default: true)
77
+ * Create a new embedding model instance
91
78
  */
92
- export declare function getDefaultEmbeddingFunction(options?: {
93
- verbose?: boolean;
94
- }): EmbeddingFunction;
79
+ export declare function createEmbeddingModel(options?: TransformerEmbeddingOptions): EmbeddingModel;
95
80
  /**
96
- * Default embedding function with default options
97
- * Uses UniversalSentenceEncoder for all text embeddings
98
- * TensorFlow.js is required for this to work
99
- * Uses CPU for compatibility
81
+ * Default embedding function using the lightweight transformer model
100
82
  */
101
83
  export declare const defaultEmbeddingFunction: EmbeddingFunction;
102
- export declare function createBatchEmbeddingFunction(options?: {
103
- verbose?: boolean;
104
- }): (dataArray: string[]) => Promise<Vector[]>;
105
84
  /**
106
- * Get a batch embedding function with custom options
107
- * Uses UniversalSentenceEncoder for all text embeddings
108
- * TensorFlow.js is required for this to work
109
- * Processes all items in a single batch operation
110
- * @param options Configuration options
111
- * @param options.verbose Whether to log non-essential messages (default: true)
85
+ * Create an embedding function with custom options
112
86
  */
113
- export declare function getDefaultBatchEmbeddingFunction(options?: {
114
- verbose?: boolean;
115
- }): (dataArray: string[]) => Promise<Vector[]>;
87
+ export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
116
88
  /**
117
- * Default batch embedding function with default options
118
- * Uses UniversalSentenceEncoder for all text embeddings
119
- * TensorFlow.js is required for this to work
120
- * Processes all items in a single batch operation
89
+ * Batch embedding function for processing multiple texts efficiently
121
90
  */
122
- export declare const defaultBatchEmbeddingFunction: (dataArray: string[]) => Promise<Vector[]>;
91
+ export declare function batchEmbed(texts: string[], options?: TransformerEmbeddingOptions): Promise<Vector[]>;
123
92
  /**
124
- * Creates an embedding function that runs in a separate thread
125
- * This is a wrapper around createEmbeddingFunction that uses executeInThread
126
- * @param model Embedding model to use
93
+ * Embedding functions for specific model types
127
94
  */
128
- export declare function createThreadedEmbeddingFunction(model: EmbeddingModel): EmbeddingFunction;
95
+ export declare const embeddingFunctions: {
96
+ /** Default lightweight model (all-MiniLM-L6-v2, 384 dimensions) */
97
+ default: EmbeddingFunction;
98
+ /** Create custom embedding function */
99
+ create: typeof createEmbeddingFunction;
100
+ /** Batch processing */
101
+ batch: typeof batchEmbed;
102
+ };