@soulcraft/brainy 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OFFLINE_MODELS.md +56 -0
- package/README.md +45 -0
- package/dist/brainyData.js +7 -9
- package/dist/brainyData.js.map +1 -1
- package/dist/demo.js +2 -2
- package/dist/demo.js.map +1 -1
- package/dist/index.d.ts +2 -3
- package/dist/index.js +3 -9
- package/dist/index.js.map +1 -1
- package/dist/setup.d.ts +3 -3
- package/dist/setup.js +6 -6
- package/dist/setup.js.map +1 -1
- package/dist/utils/distance.js +63 -136
- package/dist/utils/distance.js.map +1 -1
- package/dist/utils/embedding.d.ts +48 -84
- package/dist/utils/embedding.js +182 -598
- package/dist/utils/embedding.js.map +1 -1
- package/dist/utils/robustModelLoader.d.ts +4 -0
- package/dist/utils/robustModelLoader.js +105 -53
- package/dist/utils/robustModelLoader.js.map +1 -1
- package/dist/utils/textEncoding.d.ts +2 -3
- package/dist/utils/textEncoding.js +31 -274
- package/dist/utils/textEncoding.js.map +1 -1
- package/package.json +7 -16
- package/scripts/download-models.cjs +190 -0
package/dist/index.js
CHANGED
|
@@ -2,13 +2,7 @@
|
|
|
2
2
|
* Brainy
|
|
3
3
|
* A vector and graph database using HNSW
|
|
4
4
|
*/
|
|
5
|
-
//
|
|
6
|
-
// We import setup.js below which applies the necessary patches through textEncoding.js
|
|
7
|
-
// This ensures a consistent patching approach and avoids conflicts
|
|
8
|
-
// Import the setup file for its side-effects.
|
|
9
|
-
// This MUST be the very first import to ensure patches are applied
|
|
10
|
-
// before any other module (like TensorFlow.js) is loaded.
|
|
11
|
-
import './setup.js';
|
|
5
|
+
// No setup needed - using clean ONNX Runtime with Transformers.js
|
|
12
6
|
// Export main BrainyData class and related types
|
|
13
7
|
import { BrainyData } from './brainyData.js';
|
|
14
8
|
export { BrainyData };
|
|
@@ -16,14 +10,14 @@ export { BrainyData };
|
|
|
16
10
|
import { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance, getStatistics } from './utils/index.js';
|
|
17
11
|
export { euclideanDistance, cosineDistance, manhattanDistance, dotProductDistance, getStatistics };
|
|
18
12
|
// Export embedding functionality
|
|
19
|
-
import { UniversalSentenceEncoder, createEmbeddingFunction,
|
|
13
|
+
import { UniversalSentenceEncoder, TransformerEmbedding, createEmbeddingFunction, defaultEmbeddingFunction, batchEmbed, embeddingFunctions } from './utils/embedding.js';
|
|
20
14
|
// Export worker utilities
|
|
21
15
|
import { executeInThread, cleanupWorkerPools } from './utils/workerUtils.js';
|
|
22
16
|
// Export logging utilities
|
|
23
17
|
import { logger, LogLevel, configureLogger, createModuleLogger } from './utils/logger.js';
|
|
24
18
|
// Export environment utilities
|
|
25
19
|
import { isBrowser, isNode, isWebWorker, areWebWorkersAvailable, areWorkerThreadsAvailable, areWorkerThreadsAvailableSync, isThreadingAvailable, isThreadingAvailableAsync } from './utils/environment.js';
|
|
26
|
-
export { UniversalSentenceEncoder, createEmbeddingFunction,
|
|
20
|
+
export { UniversalSentenceEncoder, TransformerEmbedding, createEmbeddingFunction, defaultEmbeddingFunction, batchEmbed, embeddingFunctions,
|
|
27
21
|
// Worker utilities
|
|
28
22
|
executeInThread, cleanupWorkerPools,
|
|
29
23
|
// Environment utilities
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,kEAAkE;AAElE,iDAAiD;AACjD,OAAO,EAAE,UAAU,EAAoB,MAAM,iBAAiB,CAAA;AAE9D,OAAO,EAAE,UAAU,EAAE,CAAA;AAGrB,4CAA4C;AAC5C,OAAO,EACL,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,kBAAkB,EAClB,aAAa,EACd,MAAM,kBAAkB,CAAA;AAEzB,OAAO,EACL,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,kBAAkB,EAClB,aAAa,EACd,CAAA;AAED,iCAAiC;AACjC,OAAO,EACL,wBAAwB,EACxB,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACxB,UAAU,EACV,kBAAkB,EACnB,MAAM,sBAAsB,CAAA;AAE7B,0BAA0B;AAC1B,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAE5E,2BAA2B;AAC3B,OAAO,EACL,MAAM,EACN,QAAQ,EACR,eAAe,EACf,kBAAkB,EACnB,MAAM,mBAAmB,CAAA;AAE1B,+BAA+B;AAC/B,OAAO,EACL,SAAS,EACT,MAAM,EACN,WAAW,EACX,sBAAsB,EACtB,yBAAyB,EACzB,6BAA6B,EAC7B,oBAAoB,EACpB,yBAAyB,EAC1B,MAAM,wBAAwB,CAAA;AAE/B,OAAO,EACL,wBAAwB,EACxB,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACxB,UAAU,EACV,kBAAkB;AAElB,mBAAmB;AACnB,eAAe,EACf,kBAAkB;AAElB,wBAAwB;AACxB,SAAS,EACT,MAAM,EACN,WAAW,EACX,sBAAsB,EACtB,yBAAyB,EACzB,6BAA6B,EAC7B,oBAAoB,EACpB,yBAAyB;AAEzB,oBAAoB;AACpB,MAAM,EACN,QAAQ,EACR,eAAe,EACf,kBAAkB,EACnB,CAAA;AAED,0BAA0B;AAC1B,OAAO,EACL,WAAW,EACX,aAAa,EACb,SAAS,EACT,mBAAmB,EACnB,aAAa,EACd,MAAM,6BAA6B,CAAA;AAEpC,OAAO,EACL,WAAW,EACX,aAAa,EACb,SAAS,EACT,mBAAmB,EACnB,aAAa,EACd,CAAA;AAED,yEAAyE;AACzE,OAAO,EAAE,iBAAiB,EAAE,MAAM,yCAAyC,CAAA;AAE3E,0BAA0B;AAC1B,OAAO,EACL,QAAQ,EACR,QAAQ,EACR,oBAAoB,EACpB,aAAa,EAGb,kBAAkB,EAClB,aAAa,EACb,aAAa,EACb,iBAAiB,EACjB,oBAAoB,EACpB,cAAc,EACd,uBAAuB,EACvB,wBAAwB,EAGzB,MAAM,eAAe,CAAA;AAEtB,0DAA0D;AAC1D,OAAO,EACL,kBAAkB,EAClB,kBAAkB,EAEnB,MAAM,yBAAyB,CAAA;AAEhC,8BAA8B;AAC9B,OAAO,EACL,uBAAuB,EACvB,mBAAmB,EACnB,mBAAmB,EACnB,sBAAsB,EAEvB,MAAM,0BAA0B,CAAA;AAEjC,OAAO;AACL,2BAA2B;AAC3B,QAAQ,EACR,QAAQ,EACR,oBAAoB,EACpB,aAAa,EACb,kBAAkB,EAClB,kBAAkB;AAElB,8DAA8D;AAC9D,kBAAkB,EAClB,aAAa,EACb,aAAa,EACb,iBAAiB,EACjB,oBAAoB,EACpB,cAAc,EACd,uBAAuB,EACvB,wBAAwB;AAExB,+BAA+B;AAC/B,uBAAuB,EACvB,mBAAmB,EACnB,mBAAmB,EACnB,sBAAsB,EACvB,CAAA;AAUD,sDAAsD;AACtD,OAAO,EACL,sBAAsB,EACtB,oBAAoB,EACpB,8BAA8B,EAC9B,sBAAsB,EACtB,sBAAsB,EACvB,MAAM,2BAA2B,CAAA;AAElC,OAAO,EACL,sBAAsB,EACtB,oBAAoB,EACpB,8BAA8B,EAC9B,sBAAsB,EACtB,sBAAsB,EACvB,CAAA;AAED,sDAAsD;AACtD,OAAO,EACL,4BAA4B,EAC5B,gCAAgC,EAChC,sCAAsC,EACvC,MAAM,iCAAiC,CAAA;AAMxC,OAAO,EACL,4BAA4B,EAC5B,gCAAgC,EAChC,sCAAsC,EACvC,CAAA;AAGD,sCAAsC;AACtC,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACzB,MAAM,wCAAwC,CAAA;AAC/C,OAAO,EACL,4BAA4B,EAC5B,yBAAyB,EACzB,yBAAyB,EAC1B,MAAM,yCAAyC,CAAA;AAChD,OAAO,EACL,+BAA+B,EAC/B,kCAAkC,EAClC,+BAA+B,EAChC,MAAM,8CAA8C,CAAA;AAErD,kBAAkB;AAClB,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,4BAA4B,EAC5B,yBAAyB,EACzB,yBAAyB,EACzB,+BAA+B,EAC/B,kCAAkC,EAClC,+BAA+B,EAChC,CAAA;AAoBD,0CAA0C;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAA;AAC/C,OAAO,EACL,kBAAkB,EAEnB,MAAM,8BAA8B,CAAA;AAErC,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAA;AA6BxC,OAAO,EAAE,gBAAgB,EAAuB,MAAM,0BAA0B,CAAA;AAGhF,OAAO,EACL,gBAAgB,EASjB,CAAA;AA4CD,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAA;AAgC1D,gCAAgC;AAChC,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AAEjG,OAAO,EACL,QAAQ,EACR,QAAQ,EACR,YAAY,EACZ,YAAY,EACZ,cAAc,EACd,cAAc,EACf,CAAA;AAED,iDAAiD;AACjD,OAAO,EACL,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,gBAAgB,CAAA,CAAC,2BAA2B;AACnD,OAAO,EAOL,cAAc,EAGd,WAAW,EACZ,MAAM,qBAAqB,CAAA;AAE5B,OAAO;AACL,cAAc;AACd,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB;AAEhB,YAAY;AACZ,cAAc,EACd,WAAW,EACZ,CAAA"}
|
package/dist/setup.d.ts
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CRITICAL: This file is imported for its side effects to patch the environment
|
|
3
|
-
* for
|
|
3
|
+
* for Node.js compatibility before any other library code runs.
|
|
4
4
|
*
|
|
5
|
-
* It ensures that by the time
|
|
5
|
+
* It ensures that by the time Transformers.js/ONNX Runtime is imported by any other
|
|
6
6
|
* module, the necessary compatibility fixes for the current Node.js
|
|
7
7
|
* environment are already in place.
|
|
8
8
|
*
|
|
9
9
|
* This file MUST be imported as the first import in unified.ts to prevent
|
|
10
|
-
* race conditions with
|
|
10
|
+
* race conditions with library initialization. Failure to do so may
|
|
11
11
|
* result in errors like "TextEncoder is not a constructor" when the package
|
|
12
12
|
* is used in Node.js environments.
|
|
13
13
|
*
|
package/dist/setup.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CRITICAL: This file is imported for its side effects to patch the environment
|
|
3
|
-
* for
|
|
3
|
+
* for Node.js compatibility before any other library code runs.
|
|
4
4
|
*
|
|
5
|
-
* It ensures that by the time
|
|
5
|
+
* It ensures that by the time Transformers.js/ONNX Runtime is imported by any other
|
|
6
6
|
* module, the necessary compatibility fixes for the current Node.js
|
|
7
7
|
* environment are already in place.
|
|
8
8
|
*
|
|
9
9
|
* This file MUST be imported as the first import in unified.ts to prevent
|
|
10
|
-
* race conditions with
|
|
10
|
+
* race conditions with library initialization. Failure to do so may
|
|
11
11
|
* result in errors like "TextEncoder is not a constructor" when the package
|
|
12
12
|
* is used in Node.js environments.
|
|
13
13
|
*
|
|
@@ -33,14 +33,14 @@ if (globalObj) {
|
|
|
33
33
|
if (!globalObj.TextDecoder) {
|
|
34
34
|
globalObj.TextDecoder = TextDecoder;
|
|
35
35
|
}
|
|
36
|
-
// Create
|
|
36
|
+
// Create special global constructors for library compatibility
|
|
37
37
|
;
|
|
38
38
|
globalObj.__TextEncoder__ = TextEncoder;
|
|
39
39
|
globalObj.__TextDecoder__ = TextDecoder;
|
|
40
40
|
}
|
|
41
41
|
// Also import normally for ES modules environments
|
|
42
42
|
import { applyTensorFlowPatch } from './utils/textEncoding.js';
|
|
43
|
-
// Apply the
|
|
43
|
+
// Apply the TextEncoder/TextDecoder compatibility patch
|
|
44
44
|
applyTensorFlowPatch();
|
|
45
|
-
console.log('Applied
|
|
45
|
+
console.log('Applied TextEncoder/TextDecoder patch via ES modules in setup.ts');
|
|
46
46
|
//# sourceMappingURL=setup.js.map
|
package/dist/setup.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"setup.js","sourceRoot":"","sources":["../src/setup.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,gEAAgE;AAChE,MAAM,SAAS,GAAG,CAAC,GAAG,EAAE;IACtB,IAAI,OAAO,UAAU,KAAK,WAAW;QAAE,OAAO,UAAU,CAAA;IACxD,IAAI,OAAO,MAAM,KAAK,WAAW;QAAE,OAAO,MAAM,CAAA;IAChD,IAAI,OAAO,IAAI,KAAK,WAAW;QAAE,OAAO,IAAI,CAAA;IAC5C,OAAO,IAAI,CAAA,CAAC,6BAA6B;AAC3C,CAAC,CAAC,EAAE,CAAA;AAEJ,6EAA6E;AAC7E,wFAAwF;AACxF,IAAI,SAAS,EAAE,CAAC;IACd,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;IACrC,CAAC;IACD,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;IACrC,CAAC;IAED
|
|
1
|
+
{"version":3,"file":"setup.js","sourceRoot":"","sources":["../src/setup.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,gEAAgE;AAChE,MAAM,SAAS,GAAG,CAAC,GAAG,EAAE;IACtB,IAAI,OAAO,UAAU,KAAK,WAAW;QAAE,OAAO,UAAU,CAAA;IACxD,IAAI,OAAO,MAAM,KAAK,WAAW;QAAE,OAAO,MAAM,CAAA;IAChD,IAAI,OAAO,IAAI,KAAK,WAAW;QAAE,OAAO,IAAI,CAAA;IAC5C,OAAO,IAAI,CAAA,CAAC,6BAA6B;AAC3C,CAAC,CAAC,EAAE,CAAA;AAEJ,6EAA6E;AAC7E,wFAAwF;AACxF,IAAI,SAAS,EAAE,CAAC;IACd,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;IACrC,CAAC;IACD,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;QAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;IACrC,CAAC;IAED,+DAA+D;IAC/D,CAAC;IAAC,SAAiB,CAAC,eAAe,GAAG,WAAW,CAChD;IAAC,SAAiB,CAAC,eAAe,GAAG,WAAW,CAAA;AACnD,CAAC;AAED,mDAAmD;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAE9D,wDAAwD;AACxD,oBAAoB,EAAE,CAAA;AACtB,OAAO,CAAC,GAAG,CAAC,kEAAkE,CAAC,CAAA"}
|
package/dist/utils/distance.js
CHANGED
|
@@ -86,149 +86,76 @@ export async function calculateDistancesBatch(queryVector, vectors, distanceFunc
|
|
|
86
86
|
return vectors.map((vector) => distanceFunction(queryVector, vector));
|
|
87
87
|
}
|
|
88
88
|
try {
|
|
89
|
-
// Function
|
|
90
|
-
const distanceCalculator =
|
|
89
|
+
// Function for optimized batch distance calculation
|
|
90
|
+
const distanceCalculator = (args) => {
|
|
91
91
|
const { queryVector, vectors, distanceFnString } = args;
|
|
92
|
-
//
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
//
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
tf = await self.importTensorFlow();
|
|
102
|
-
}
|
|
103
|
-
else {
|
|
104
|
-
// CRITICAL: Ensure TextEncoder/TextDecoder are available before TensorFlow.js loads
|
|
105
|
-
try {
|
|
106
|
-
// Use dynamic imports for all environments to ensure TensorFlow loads after patch
|
|
107
|
-
if (typeof process !== 'undefined' && process.versions && process.versions.node) {
|
|
108
|
-
// Ensure TextEncoder/TextDecoder are globally available in Node.js
|
|
109
|
-
const util = await import('util');
|
|
110
|
-
if (typeof global.TextEncoder === 'undefined') {
|
|
111
|
-
global.TextEncoder = util.TextEncoder;
|
|
112
|
-
}
|
|
113
|
-
if (typeof global.TextDecoder === 'undefined') {
|
|
114
|
-
global.TextDecoder = util.TextDecoder;
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
// Apply the TensorFlow.js patch
|
|
118
|
-
const { applyTensorFlowPatch } = await import('./textEncoding.js');
|
|
119
|
-
await applyTensorFlowPatch();
|
|
120
|
-
// Now load TensorFlow.js core module using dynamic imports
|
|
121
|
-
tf = await import('@tensorflow/tfjs-core');
|
|
122
|
-
await import('@tensorflow/tfjs-backend-cpu');
|
|
123
|
-
await tf.setBackend('cpu');
|
|
92
|
+
// Optimized JavaScript implementations for different distance functions
|
|
93
|
+
let distances;
|
|
94
|
+
if (distanceFnString.includes('euclideanDistance')) {
|
|
95
|
+
// Euclidean distance: sqrt(sum((a - b)^2))
|
|
96
|
+
distances = vectors.map((vector) => {
|
|
97
|
+
let sum = 0;
|
|
98
|
+
for (let i = 0; i < queryVector.length; i++) {
|
|
99
|
+
const diff = queryVector[i] - vector[i];
|
|
100
|
+
sum += diff * diff;
|
|
124
101
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
102
|
+
return Math.sqrt(sum);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
else if (distanceFnString.includes('cosineDistance')) {
|
|
106
|
+
// Cosine distance: 1 - (a·b / (||a|| * ||b||))
|
|
107
|
+
distances = vectors.map((vector) => {
|
|
108
|
+
let dotProduct = 0;
|
|
109
|
+
let queryNorm = 0;
|
|
110
|
+
let vectorNorm = 0;
|
|
111
|
+
for (let i = 0; i < queryVector.length; i++) {
|
|
112
|
+
dotProduct += queryVector[i] * vector[i];
|
|
113
|
+
queryNorm += queryVector[i] * queryVector[i];
|
|
114
|
+
vectorNorm += vector[i] * vector[i];
|
|
115
|
+
}
|
|
116
|
+
queryNorm = Math.sqrt(queryNorm);
|
|
117
|
+
vectorNorm = Math.sqrt(vectorNorm);
|
|
118
|
+
if (queryNorm === 0 || vectorNorm === 0) {
|
|
119
|
+
return 1; // Maximum distance for zero vectors
|
|
120
|
+
}
|
|
121
|
+
const cosineSimilarity = dotProduct / (queryNorm * vectorNorm);
|
|
122
|
+
return 1 - cosineSimilarity;
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
else if (distanceFnString.includes('manhattanDistance')) {
|
|
126
|
+
// Manhattan distance: sum(|a - b|)
|
|
127
|
+
distances = vectors.map((vector) => {
|
|
128
|
+
let sum = 0;
|
|
129
|
+
for (let i = 0; i < queryVector.length; i++) {
|
|
130
|
+
sum += Math.abs(queryVector[i] - vector[i]);
|
|
128
131
|
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const queryTensor = tf.tensor2d([queryVector]);
|
|
132
|
-
const vectorsTensor = tf.tensor2d(vectors);
|
|
133
|
-
let distances;
|
|
134
|
-
// Calculate distances based on the distance function type
|
|
135
|
-
if (distanceFnString.includes('euclideanDistance')) {
|
|
136
|
-
// Euclidean distance using GPU-optimized operations
|
|
137
|
-
// Formula: sqrt(sum((a - b)^2))
|
|
138
|
-
const expanded = tf.sub(queryTensor.expandDims(1), vectorsTensor.expandDims(0));
|
|
139
|
-
const squaredDiff = tf.square(expanded);
|
|
140
|
-
const sumSquaredDiff = tf.sum(squaredDiff, -1);
|
|
141
|
-
const distancesTensor = tf.sqrt(sumSquaredDiff);
|
|
142
|
-
distances = (await distancesTensor
|
|
143
|
-
.squeeze()
|
|
144
|
-
.array());
|
|
145
|
-
// Clean up tensors
|
|
146
|
-
queryTensor.dispose();
|
|
147
|
-
vectorsTensor.dispose();
|
|
148
|
-
expanded.dispose();
|
|
149
|
-
squaredDiff.dispose();
|
|
150
|
-
sumSquaredDiff.dispose();
|
|
151
|
-
distancesTensor.dispose();
|
|
152
|
-
}
|
|
153
|
-
else if (distanceFnString.includes('cosineDistance')) {
|
|
154
|
-
// Cosine distance using GPU-optimized operations
|
|
155
|
-
// Formula: 1 - (a·b / (||a|| * ||b||))
|
|
156
|
-
const dotProduct = tf.matMul(queryTensor, vectorsTensor.transpose());
|
|
157
|
-
const queryNorm = tf.norm(queryTensor, 2, 1);
|
|
158
|
-
const vectorsNorm = tf.norm(vectorsTensor, 2, 1);
|
|
159
|
-
const normProduct = tf.outerProduct(queryNorm, vectorsNorm);
|
|
160
|
-
const cosineSimilarity = tf.div(dotProduct, normProduct);
|
|
161
|
-
const distancesTensor = tf.sub(tf.scalar(1), cosineSimilarity);
|
|
162
|
-
distances = (await distancesTensor
|
|
163
|
-
.squeeze()
|
|
164
|
-
.array());
|
|
165
|
-
// Clean up tensors
|
|
166
|
-
queryTensor.dispose();
|
|
167
|
-
vectorsTensor.dispose();
|
|
168
|
-
dotProduct.dispose();
|
|
169
|
-
queryNorm.dispose();
|
|
170
|
-
vectorsNorm.dispose();
|
|
171
|
-
normProduct.dispose();
|
|
172
|
-
cosineSimilarity.dispose();
|
|
173
|
-
distancesTensor.dispose();
|
|
174
|
-
}
|
|
175
|
-
else if (distanceFnString.includes('manhattanDistance')) {
|
|
176
|
-
// Manhattan distance using GPU-optimized operations
|
|
177
|
-
// Formula: sum(|a - b|)
|
|
178
|
-
const diff = tf.sub(queryTensor.expandDims(1), vectorsTensor.expandDims(0));
|
|
179
|
-
const absDiff = tf.abs(diff);
|
|
180
|
-
const distancesTensor = tf.sum(absDiff, -1);
|
|
181
|
-
distances = (await distancesTensor
|
|
182
|
-
.squeeze()
|
|
183
|
-
.array());
|
|
184
|
-
// Clean up tensors
|
|
185
|
-
queryTensor.dispose();
|
|
186
|
-
vectorsTensor.dispose();
|
|
187
|
-
diff.dispose();
|
|
188
|
-
absDiff.dispose();
|
|
189
|
-
distancesTensor.dispose();
|
|
190
|
-
}
|
|
191
|
-
else if (distanceFnString.includes('dotProductDistance')) {
|
|
192
|
-
// Dot product distance using GPU-optimized operations
|
|
193
|
-
// Formula: -sum(a * b)
|
|
194
|
-
const dotProduct = tf.matMul(queryTensor, vectorsTensor.transpose());
|
|
195
|
-
const distancesTensor = tf.neg(dotProduct);
|
|
196
|
-
distances = (await distancesTensor
|
|
197
|
-
.squeeze()
|
|
198
|
-
.array());
|
|
199
|
-
// Clean up tensors
|
|
200
|
-
queryTensor.dispose();
|
|
201
|
-
vectorsTensor.dispose();
|
|
202
|
-
dotProduct.dispose();
|
|
203
|
-
distancesTensor.dispose();
|
|
204
|
-
}
|
|
205
|
-
else {
|
|
206
|
-
// For unknown distance functions, fall back to direct CPU implementation
|
|
207
|
-
throw new Error('Unsupported distance function for TensorFlow optimization');
|
|
208
|
-
}
|
|
209
|
-
return {
|
|
210
|
-
distances
|
|
211
|
-
};
|
|
212
|
-
};
|
|
213
|
-
// Try to use TensorFlow.js with CPU optimization
|
|
214
|
-
try {
|
|
215
|
-
return await useTensorFlow();
|
|
132
|
+
return sum;
|
|
133
|
+
});
|
|
216
134
|
}
|
|
217
|
-
|
|
218
|
-
//
|
|
219
|
-
|
|
135
|
+
else if (distanceFnString.includes('dotProductDistance')) {
|
|
136
|
+
// Dot product distance: -sum(a * b)
|
|
137
|
+
distances = vectors.map((vector) => {
|
|
138
|
+
let dotProduct = 0;
|
|
139
|
+
for (let i = 0; i < queryVector.length; i++) {
|
|
140
|
+
dotProduct += queryVector[i] * vector[i];
|
|
141
|
+
}
|
|
142
|
+
return -dotProduct;
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
// For unknown distance functions, use the provided function
|
|
220
147
|
const distanceFunction = new Function('return ' + distanceFnString)();
|
|
221
|
-
|
|
222
|
-
const distances = vectors.map((vector) => distanceFunction(queryVector, vector));
|
|
223
|
-
return {
|
|
224
|
-
distances
|
|
225
|
-
};
|
|
148
|
+
distances = vectors.map((vector) => distanceFunction(queryVector, vector));
|
|
226
149
|
}
|
|
150
|
+
return { distances };
|
|
227
151
|
};
|
|
228
|
-
//
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
152
|
+
// Use the optimized distance calculator
|
|
153
|
+
const result = distanceCalculator({
|
|
154
|
+
queryVector,
|
|
155
|
+
vectors,
|
|
156
|
+
distanceFnString: distanceFunction.toString()
|
|
157
|
+
});
|
|
158
|
+
return result.distances;
|
|
232
159
|
}
|
|
233
160
|
catch (error) {
|
|
234
161
|
// If anything fails, fall back to the standard distance function
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"distance.js","sourceRoot":"","sources":["../../src/utils/distance.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAqB,CACjD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACvB,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAA;IAC1B,CAAC,EAAE,CAAC,CAAC,CAAA;IAEL,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvB,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAAqB,CAC9C,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC,MAAM,CAC3C,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;QACd,OAAO;YACL,UAAU,EAAE,GAAG,CAAC,UAAU,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACvC,KAAK,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,GAAG,GAAG;YAC5B,KAAK,EAAE,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SAC/B,CAAA;IACH,CAAC,EACD,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CACtC,CAAA;IAED,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAA,CAAC,oCAAoC;IAC/C,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAA;IACrE,2DAA2D;IAC3D,OAAO,CAAC,GAAG,UAAU,CAAA;AACvB,CAAC,CAAA;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAqB,CACjD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;AACjE,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAqB,CAClD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IAEjE,iDAAiD;IACjD,OAAO,CAAC,UAAU,CAAA;AACpB,CAAC,CAAA;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,WAAmB,EACnB,OAAiB,EACjB,mBAAqC,iBAAiB;IAEtD,wDAAwD;IACxD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAA;IACvE,CAAC;IAED,IAAI,CAAC;QACH,
|
|
1
|
+
{"version":3,"file":"distance.js","sourceRoot":"","sources":["../../src/utils/distance.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAqB,CACjD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACvB,OAAO,GAAG,GAAG,IAAI,GAAG,IAAI,CAAA;IAC1B,CAAC,EAAE,CAAC,CAAC,CAAA;IAEL,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvB,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAAqB,CAC9C,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC,MAAM,CAC3C,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE;QACd,OAAO;YACL,UAAU,EAAE,GAAG,CAAC,UAAU,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACvC,KAAK,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,GAAG,GAAG;YAC5B,KAAK,EAAE,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SAC/B,CAAA;IACH,CAAC,EACD,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CACtC,CAAA;IAED,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAA,CAAC,oCAAoC;IAC/C,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAA;IACrE,2DAA2D;IAC3D,OAAO,CAAC,GAAG,UAAU,CAAA;AACvB,CAAC,CAAA;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAqB,CACjD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;AACjE,CAAC,CAAA;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAqB,CAClD,CAAS,EACT,CAAS,EACD,EAAE;IACV,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAA;IAC1D,CAAC;IAED,4DAA4D;IAC5D,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IAEjE,iDAAiD;IACjD,OAAO,CAAC,UAAU,CAAA;AACpB,CAAC,CAAA;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,WAAmB,EACnB,OAAiB,EACjB,mBAAqC,iBAAiB;IAEtD,wDAAwD;IACxD,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAA;IACvE,CAAC;IAED,IAAI,CAAC;QACH,oDAAoD;QACpD,MAAM,kBAAkB,GAAG,CAAC,IAI3B,EAAE,EAAE;YACH,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAA;YAEvD,wEAAwE;YACxE,IAAI,SAAmB,CAAA;YAEvB,IAAI,gBAAgB,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBACnD,2CAA2C;gBAC3C,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;oBACjC,IAAI,GAAG,GAAG,CAAC,CAAA;oBACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC5C,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;wBACvC,GAAG,IAAI,IAAI,GAAG,IAAI,CAAA;oBACpB,CAAC;oBACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBACvB,CAAC,CAAC,CAAA;YACJ,CAAC;iBAAM,IAAI,gBAAgB,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;gBACvD,+CAA+C;gBAC/C,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;oBACjC,IAAI,UAAU,GAAG,CAAC,CAAA;oBAClB,IAAI,SAAS,GAAG,CAAC,CAAA;oBACjB,IAAI,UAAU,GAAG,CAAC,CAAA;oBAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC5C,UAAU,IAAI,WAAW,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;wBACxC,SAAS,IAAI,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAA;wBAC5C,UAAU,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;oBACrC,CAAC;oBAED,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;oBAChC,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;oBAElC,IAAI,SAAS,KAAK,CAAC,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;wBACxC,OAAO,CAAC,CAAA,CAAC,oCAAoC;oBAC/C,CAAC;oBAED,MAAM,gBAAgB,GAAG,UAAU,GAAG,CAAC,SAAS,GAAG,UAAU,CAAC,CAAA;oBAC9D,OAAO,CAAC,GAAG,gBAAgB,CAAA;gBAC7B,CAAC,CAAC,CAAA;YACJ,CAAC;iBAAM,IAAI,gBAAgB,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBAC1D,mCAAmC;gBACnC,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;oBACjC,IAAI,GAAG,GAAG,CAAC,CAAA;oBACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC5C,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;oBAC7C,CAAC;oBACD,OAAO,GAAG,CAAA;gBACZ,CAAC,CAAC,CAAA;YACJ,CAAC;iBAAM,IAAI,gBAAgB,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBAC3D,oCAAoC;gBACpC,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;oBACjC,IAAI,UAAU,GAAG,CAAC,CAAA;oBAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC5C,UAAU,IAAI,WAAW,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;oBAC1C,CAAC;oBACD,OAAO,CAAC,UAAU,CAAA;gBACpB,CAAC,CAAC,CAAA;YACJ,CAAC;iBAAM,CAAC;gBACN,4DAA4D;gBAC5D,MAAM,gBAAgB,GAAG,IAAI,QAAQ,CACnC,SAAS,GAAG,gBAAgB,CAC7B,EAAsB,CAAA;gBAEvB,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CACjC,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CACtC,CAAA;YACH,CAAC;YAED,OAAO,EAAE,SAAS,EAAE,CAAA;QACtB,CAAC,CAAA;QAED,wCAAwC;QACxC,MAAM,MAAM,GAAG,kBAAkB,CAAC;YAChC,WAAW;YACX,OAAO;YACP,gBAAgB,EAAE,gBAAgB,CAAC,QAAQ,EAAE;SAC9C,CAAC,CAAA;QAEF,OAAO,MAAM,CAAC,SAAS,CAAA;IACzB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,iEAAiE;QACjE,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAA;QAC1D,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAA;IACvE,CAAC;AACH,CAAC"}
|
|
@@ -1,128 +1,92 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Embedding functions for converting data to vectors
|
|
2
|
+
* Embedding functions for converting data to vectors using Transformers.js
|
|
3
|
+
* Complete rewrite to eliminate TensorFlow.js and use ONNX-based models
|
|
3
4
|
*/
|
|
4
5
|
import { EmbeddingFunction, EmbeddingModel, Vector } from '../coreTypes.js';
|
|
5
|
-
import { ModelLoadOptions } from './robustModelLoader.js';
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* This implementation attempts to use GPU processing when available for better performance,
|
|
12
|
-
* falling back to CPU processing for compatibility across all environments.
|
|
7
|
+
* Transformers.js Sentence Encoder embedding model
|
|
8
|
+
* Uses ONNX Runtime for fast, offline embeddings with smaller models
|
|
9
|
+
* Default model: all-MiniLM-L6-v2 (384 dimensions, ~90MB)
|
|
13
10
|
*/
|
|
14
|
-
export interface
|
|
11
|
+
export interface TransformerEmbeddingOptions {
|
|
12
|
+
/** Model name/path to use - defaults to all-MiniLM-L6-v2 */
|
|
13
|
+
model?: string;
|
|
15
14
|
/** Whether to enable verbose logging */
|
|
16
15
|
verbose?: boolean;
|
|
16
|
+
/** Custom cache directory for models */
|
|
17
|
+
cacheDir?: string;
|
|
18
|
+
/** Force local files only (no downloads) */
|
|
19
|
+
localFilesOnly?: boolean;
|
|
20
|
+
/** Quantization setting (fp32, fp16, q8, q4) */
|
|
21
|
+
dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
|
|
17
22
|
}
|
|
18
|
-
export declare class
|
|
19
|
-
private
|
|
23
|
+
export declare class TransformerEmbedding implements EmbeddingModel {
|
|
24
|
+
private extractor;
|
|
20
25
|
private initialized;
|
|
21
|
-
private tf;
|
|
22
|
-
private use;
|
|
23
|
-
private backend;
|
|
24
26
|
private verbose;
|
|
25
|
-
private
|
|
27
|
+
private options;
|
|
26
28
|
/**
|
|
27
|
-
* Create a new
|
|
28
|
-
* @param options Configuration options including reliability settings
|
|
29
|
+
* Create a new TransformerEmbedding instance
|
|
29
30
|
*/
|
|
30
|
-
constructor(options?:
|
|
31
|
+
constructor(options?: TransformerEmbeddingOptions);
|
|
31
32
|
/**
|
|
32
|
-
*
|
|
33
|
-
* This addresses issues with TensorFlow.js across all server environments
|
|
34
|
-
* (Node.js, serverless, and other server environments)
|
|
35
|
-
*
|
|
36
|
-
* Note: The main TensorFlow.js patching is now centralized in textEncoding.ts
|
|
37
|
-
* and applied through setup.ts. This method only adds additional utility functions
|
|
38
|
-
* that might be needed by TensorFlow.js.
|
|
33
|
+
* Get the default cache directory for models
|
|
39
34
|
*/
|
|
40
|
-
private
|
|
35
|
+
private getDefaultCacheDir;
|
|
41
36
|
/**
|
|
42
37
|
* Check if we're running in a test environment
|
|
43
38
|
*/
|
|
44
39
|
private isTestEnvironment;
|
|
45
40
|
/**
|
|
46
|
-
* Log message only if verbose mode is enabled
|
|
47
|
-
* This helps suppress non-essential log messages
|
|
41
|
+
* Log message only if verbose mode is enabled
|
|
48
42
|
*/
|
|
49
43
|
private logger;
|
|
50
|
-
/**
|
|
51
|
-
* Load the Universal Sentence Encoder model with robust retry and fallback mechanisms
|
|
52
|
-
* @param loadFunction The function to load the model from TensorFlow Hub
|
|
53
|
-
*/
|
|
54
|
-
private loadModelFromLocal;
|
|
55
44
|
/**
|
|
56
45
|
* Initialize the embedding model
|
|
57
46
|
*/
|
|
58
47
|
init(): Promise<void>;
|
|
59
48
|
/**
|
|
60
|
-
*
|
|
61
|
-
* @param data Text to embed
|
|
49
|
+
* Generate embeddings for text data
|
|
62
50
|
*/
|
|
63
51
|
embed(data: string | string[]): Promise<Vector>;
|
|
64
52
|
/**
|
|
65
|
-
*
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
53
|
+
* Dispose of the model and free resources
|
|
54
|
+
*/
|
|
55
|
+
dispose(): Promise<void>;
|
|
56
|
+
/**
|
|
57
|
+
* Get the dimension of embeddings produced by this model
|
|
69
58
|
*/
|
|
70
|
-
|
|
59
|
+
getDimension(): number;
|
|
71
60
|
/**
|
|
72
|
-
*
|
|
61
|
+
* Check if the model is initialized
|
|
73
62
|
*/
|
|
74
|
-
|
|
63
|
+
isInitialized(): boolean;
|
|
75
64
|
}
|
|
65
|
+
export declare const UniversalSentenceEncoder: typeof TransformerEmbedding;
|
|
76
66
|
/**
|
|
77
|
-
* Create
|
|
78
|
-
* @param model Embedding model to use (optional, defaults to UniversalSentenceEncoder)
|
|
67
|
+
* Create a new embedding model instance
|
|
79
68
|
*/
|
|
80
|
-
export declare function
|
|
81
|
-
export declare function createTensorFlowEmbeddingFunction(options?: {
|
|
82
|
-
verbose?: boolean;
|
|
83
|
-
}): EmbeddingFunction;
|
|
84
|
-
/**
|
|
85
|
-
* Default embedding function
|
|
86
|
-
* Uses UniversalSentenceEncoder for all text embeddings
|
|
87
|
-
* TensorFlow.js is required for this to work
|
|
88
|
-
* Uses CPU for compatibility
|
|
89
|
-
* @param options Configuration options
|
|
90
|
-
* @param options.verbose Whether to log non-essential messages (default: true)
|
|
91
|
-
*/
|
|
92
|
-
export declare function getDefaultEmbeddingFunction(options?: {
|
|
93
|
-
verbose?: boolean;
|
|
94
|
-
}): EmbeddingFunction;
|
|
69
|
+
export declare function createEmbeddingModel(options?: TransformerEmbeddingOptions): EmbeddingModel;
|
|
95
70
|
/**
|
|
96
|
-
* Default embedding function
|
|
97
|
-
* Uses UniversalSentenceEncoder for all text embeddings
|
|
98
|
-
* TensorFlow.js is required for this to work
|
|
99
|
-
* Uses CPU for compatibility
|
|
71
|
+
* Default embedding function using the lightweight transformer model
|
|
100
72
|
*/
|
|
101
73
|
export declare const defaultEmbeddingFunction: EmbeddingFunction;
|
|
102
|
-
export declare function createBatchEmbeddingFunction(options?: {
|
|
103
|
-
verbose?: boolean;
|
|
104
|
-
}): (dataArray: string[]) => Promise<Vector[]>;
|
|
105
74
|
/**
|
|
106
|
-
*
|
|
107
|
-
* Uses UniversalSentenceEncoder for all text embeddings
|
|
108
|
-
* TensorFlow.js is required for this to work
|
|
109
|
-
* Processes all items in a single batch operation
|
|
110
|
-
* @param options Configuration options
|
|
111
|
-
* @param options.verbose Whether to log non-essential messages (default: true)
|
|
75
|
+
* Create an embedding function with custom options
|
|
112
76
|
*/
|
|
113
|
-
export declare function
|
|
114
|
-
verbose?: boolean;
|
|
115
|
-
}): (dataArray: string[]) => Promise<Vector[]>;
|
|
77
|
+
export declare function createEmbeddingFunction(options?: TransformerEmbeddingOptions): EmbeddingFunction;
|
|
116
78
|
/**
|
|
117
|
-
*
|
|
118
|
-
* Uses UniversalSentenceEncoder for all text embeddings
|
|
119
|
-
* TensorFlow.js is required for this to work
|
|
120
|
-
* Processes all items in a single batch operation
|
|
79
|
+
* Batch embedding function for processing multiple texts efficiently
|
|
121
80
|
*/
|
|
122
|
-
export declare
|
|
81
|
+
export declare function batchEmbed(texts: string[], options?: TransformerEmbeddingOptions): Promise<Vector[]>;
|
|
123
82
|
/**
|
|
124
|
-
*
|
|
125
|
-
* This is a wrapper around createEmbeddingFunction that uses executeInThread
|
|
126
|
-
* @param model Embedding model to use
|
|
83
|
+
* Embedding functions for specific model types
|
|
127
84
|
*/
|
|
128
|
-
export declare
|
|
85
|
+
export declare const embeddingFunctions: {
|
|
86
|
+
/** Default lightweight model (all-MiniLM-L6-v2, 384 dimensions) */
|
|
87
|
+
default: EmbeddingFunction;
|
|
88
|
+
/** Create custom embedding function */
|
|
89
|
+
create: typeof createEmbeddingFunction;
|
|
90
|
+
/** Batch processing */
|
|
91
|
+
batch: typeof batchEmbed;
|
|
92
|
+
};
|