@soulcraft/brainy 6.5.0 → 6.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/models/all-MiniLM-L6-v2-q8/config.json +25 -0
- package/assets/models/all-MiniLM-L6-v2-q8/model.onnx +0 -0
- package/assets/models/all-MiniLM-L6-v2-q8/tokenizer.json +30686 -0
- package/assets/models/all-MiniLM-L6-v2-q8/vocab.json +1 -0
- package/dist/critical/model-guardian.d.ts +5 -22
- package/dist/critical/model-guardian.js +38 -210
- package/dist/embeddings/EmbeddingManager.d.ts +7 -17
- package/dist/embeddings/EmbeddingManager.js +28 -136
- package/dist/embeddings/wasm/AssetLoader.d.ts +67 -0
- package/dist/embeddings/wasm/AssetLoader.js +238 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.d.ts +60 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.js +123 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.d.ts +55 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.js +154 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.d.ts +82 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.js +231 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.d.ts +71 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.js +264 -0
- package/dist/embeddings/wasm/index.d.ts +13 -0
- package/dist/embeddings/wasm/index.js +15 -0
- package/dist/embeddings/wasm/types.d.ts +114 -0
- package/dist/embeddings/wasm/types.js +25 -0
- package/dist/setup.d.ts +11 -11
- package/dist/setup.js +17 -31
- package/dist/utils/embedding.d.ts +45 -62
- package/dist/utils/embedding.js +61 -440
- package/package.json +10 -3
- package/scripts/download-model.cjs +175 -0
|
@@ -1,60 +1,37 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Embedding functions for converting data to vectors
|
|
3
|
-
*
|
|
2
|
+
* Embedding functions for converting data to vectors
|
|
3
|
+
*
|
|
4
|
+
* Uses direct ONNX WASM for universal compatibility.
|
|
5
|
+
* No transformers.js dependency - clean, production-grade implementation.
|
|
4
6
|
*/
|
|
5
7
|
import { EmbeddingFunction, EmbeddingModel, Vector } from '../coreTypes.js';
|
|
6
8
|
/**
|
|
7
|
-
*
|
|
8
|
-
*/
|
|
9
|
-
export declare function detectBestDevice(): Promise<'cpu' | 'webgpu' | 'cuda'>;
|
|
10
|
-
/**
|
|
11
|
-
* Resolve device string to actual device configuration
|
|
12
|
-
*/
|
|
13
|
-
export declare function resolveDevice(device?: string): Promise<string>;
|
|
14
|
-
/**
|
|
15
|
-
* Transformers.js Sentence Encoder embedding model
|
|
16
|
-
* Uses ONNX Runtime for fast, offline embeddings with smaller models
|
|
17
|
-
* Default model: all-MiniLM-L6-v2 (384 dimensions, ~90MB)
|
|
9
|
+
* TransformerEmbedding options (kept for backward compatibility)
|
|
18
10
|
*/
|
|
19
11
|
export interface TransformerEmbeddingOptions {
|
|
20
|
-
/** Model name
|
|
12
|
+
/** Model name - only all-MiniLM-L6-v2 is supported */
|
|
21
13
|
model?: string;
|
|
22
14
|
/** Whether to enable verbose logging */
|
|
23
15
|
verbose?: boolean;
|
|
24
|
-
/** Custom cache directory
|
|
16
|
+
/** Custom cache directory - ignored (model is bundled) */
|
|
25
17
|
cacheDir?: string;
|
|
26
|
-
/** Force local files only (
|
|
18
|
+
/** Force local files only - ignored (model is bundled) */
|
|
27
19
|
localFilesOnly?: boolean;
|
|
28
|
-
/** Model precision
|
|
20
|
+
/** Model precision - always q8 */
|
|
29
21
|
precision?: 'fp32' | 'q8';
|
|
30
|
-
/** Device
|
|
22
|
+
/** Device - always WASM */
|
|
31
23
|
device?: 'auto' | 'cpu' | 'webgpu' | 'cuda' | 'gpu';
|
|
32
24
|
}
|
|
25
|
+
/**
|
|
26
|
+
* TransformerEmbedding - Sentence embeddings using WASM ONNX
|
|
27
|
+
*
|
|
28
|
+
* This class delegates all work to EmbeddingManager which uses
|
|
29
|
+
* the direct ONNX WASM engine. Kept for backward compatibility.
|
|
30
|
+
*/
|
|
33
31
|
export declare class TransformerEmbedding implements EmbeddingModel {
|
|
34
|
-
private extractor;
|
|
35
32
|
private initialized;
|
|
36
33
|
private verbose;
|
|
37
|
-
private options;
|
|
38
|
-
/**
|
|
39
|
-
* Create a new TransformerEmbedding instance
|
|
40
|
-
*/
|
|
41
34
|
constructor(options?: TransformerEmbeddingOptions);
|
|
42
|
-
/**
|
|
43
|
-
* Get the default cache directory for models
|
|
44
|
-
*/
|
|
45
|
-
private getDefaultCacheDir;
|
|
46
|
-
/**
|
|
47
|
-
* Check if we're running in a test environment
|
|
48
|
-
*/
|
|
49
|
-
private isTestEnvironment;
|
|
50
|
-
/**
|
|
51
|
-
* Log message only if verbose mode is enabled
|
|
52
|
-
*/
|
|
53
|
-
private logger;
|
|
54
|
-
/**
|
|
55
|
-
* Generate mock embeddings for unit tests
|
|
56
|
-
*/
|
|
57
|
-
private getMockEmbedding;
|
|
58
35
|
/**
|
|
59
36
|
* Initialize the embedding model
|
|
60
37
|
*/
|
|
@@ -64,45 +41,51 @@ export declare class TransformerEmbedding implements EmbeddingModel {
|
|
|
64
41
|
*/
|
|
65
42
|
embed(data: string | string[]): Promise<Vector>;
|
|
66
43
|
/**
|
|
67
|
-
*
|
|
44
|
+
* Get the embedding function
|
|
68
45
|
*/
|
|
69
|
-
|
|
46
|
+
getEmbeddingFunction(): EmbeddingFunction;
|
|
70
47
|
/**
|
|
71
|
-
*
|
|
48
|
+
* Check if initialized
|
|
72
49
|
*/
|
|
73
|
-
|
|
50
|
+
isInitialized(): boolean;
|
|
74
51
|
/**
|
|
75
|
-
*
|
|
52
|
+
* Dispose resources (no-op for WASM engine)
|
|
76
53
|
*/
|
|
77
|
-
|
|
54
|
+
dispose(): Promise<void>;
|
|
78
55
|
}
|
|
79
|
-
export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
|
|
80
56
|
/**
|
|
81
|
-
* Create a
|
|
57
|
+
* Create a simple embedding function using the default TransformerEmbedding
|
|
58
|
+
* This is the recommended way to create an embedding function for Brainy
|
|
82
59
|
*/
|
|
83
|
-
export declare function
|
|
60
|
+
export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
|
|
84
61
|
/**
|
|
85
|
-
*
|
|
86
|
-
|
|
62
|
+
* Create a TransformerEmbedding instance (backward compatibility)
|
|
63
|
+
*/
|
|
64
|
+
export declare function createTransformerEmbedding(options?: TransformerEmbeddingOptions): TransformerEmbedding;
|
|
65
|
+
/**
|
|
66
|
+
* Convenience function to detect best device (always returns 'wasm')
|
|
67
|
+
*/
|
|
68
|
+
export declare function detectBestDevice(): Promise<'cpu' | 'webgpu' | 'cuda' | 'wasm'>;
|
|
69
|
+
/**
|
|
70
|
+
* Resolve device string (always returns 'wasm')
|
|
71
|
+
*/
|
|
72
|
+
export declare function resolveDevice(_device?: string): Promise<string>;
|
|
73
|
+
/**
|
|
74
|
+
* Default embedding function (backward compatibility)
|
|
87
75
|
*/
|
|
88
76
|
export declare const defaultEmbeddingFunction: EmbeddingFunction;
|
|
89
77
|
/**
|
|
90
|
-
*
|
|
91
|
-
* NOTE: Options are validated but the singleton EmbeddingManager is always used
|
|
78
|
+
* UniversalSentenceEncoder alias (backward compatibility)
|
|
92
79
|
*/
|
|
93
|
-
export declare
|
|
80
|
+
export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
|
|
94
81
|
/**
|
|
95
|
-
* Batch
|
|
82
|
+
* Batch embed function (backward compatibility)
|
|
96
83
|
*/
|
|
97
|
-
export declare function batchEmbed(texts: string[]
|
|
84
|
+
export declare function batchEmbed(texts: string[]): Promise<Vector[]>;
|
|
98
85
|
/**
|
|
99
|
-
* Embedding functions
|
|
86
|
+
* Embedding functions registry (backward compatibility)
|
|
100
87
|
*/
|
|
101
88
|
export declare const embeddingFunctions: {
|
|
102
|
-
|
|
103
|
-
default:
|
|
104
|
-
/** Create custom embedding function */
|
|
105
|
-
create: typeof createEmbeddingFunction;
|
|
106
|
-
/** Batch processing */
|
|
107
|
-
batch: typeof batchEmbed;
|
|
89
|
+
transformer: typeof createEmbeddingFunction;
|
|
90
|
+
default: typeof createEmbeddingFunction;
|
|
108
91
|
};
|