simile-search 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/dist/ann.d.ts +110 -0
- package/dist/ann.js +374 -0
- package/dist/cache.d.ts +94 -0
- package/dist/cache.js +179 -0
- package/dist/embedder.d.ts +55 -4
- package/dist/embedder.js +144 -12
- package/dist/engine.d.ts +16 -3
- package/dist/engine.js +164 -20
- package/dist/engine.test.js +49 -1
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/quantization.d.ts +50 -0
- package/dist/quantization.js +271 -0
- package/dist/similarity.d.ts +24 -0
- package/dist/similarity.js +105 -0
- package/dist/types.d.ts +35 -0
- package/dist/updater.d.ts +172 -0
- package/dist/updater.js +336 -0
- package/package.json +1 -1
package/dist/engine.js
CHANGED
|
@@ -2,9 +2,14 @@ import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js
|
|
|
2
2
|
import { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
|
|
3
3
|
import { hybridScore, getDefaultWeights } from "./ranker.js";
|
|
4
4
|
import { extractText, normalizeScore } from "./utils.js";
|
|
5
|
-
|
|
5
|
+
import { VectorCache, createCacheKey } from "./cache.js";
|
|
6
|
+
import { HNSWIndex } from "./ann.js";
|
|
7
|
+
import { BackgroundUpdater } from "./updater.js";
|
|
8
|
+
const PACKAGE_VERSION = "0.4.0";
|
|
6
9
|
export class Simile {
|
|
7
10
|
constructor(items, vectors, config = {}) {
|
|
11
|
+
this.cache = null;
|
|
12
|
+
this.annIndex = null;
|
|
8
13
|
this.items = items;
|
|
9
14
|
this.vectors = vectors;
|
|
10
15
|
this.itemIndex = new Map(items.map((item, i) => [item.id, i]));
|
|
@@ -13,7 +18,31 @@ export class Simile {
|
|
|
13
18
|
model: config.model ?? "Xenova/all-MiniLM-L6-v2",
|
|
14
19
|
textPaths: config.textPaths ?? [],
|
|
15
20
|
normalizeScores: config.normalizeScores ?? true,
|
|
21
|
+
cache: config.cache ?? true,
|
|
22
|
+
quantization: config.quantization ?? 'float32',
|
|
23
|
+
useANN: config.useANN ?? false,
|
|
24
|
+
annThreshold: config.annThreshold ?? 1000,
|
|
16
25
|
};
|
|
26
|
+
// Initialize Cache
|
|
27
|
+
if (this.config.cache) {
|
|
28
|
+
this.cache = new VectorCache(typeof this.config.cache === 'object' ? this.config.cache : {});
|
|
29
|
+
}
|
|
30
|
+
// Initialize ANN Index if threshold reached or forced
|
|
31
|
+
if (this.config.useANN || this.items.length >= this.config.annThreshold) {
|
|
32
|
+
this.buildANNIndex();
|
|
33
|
+
}
|
|
34
|
+
// Initialize Updater
|
|
35
|
+
this.updater = new BackgroundUpdater(this);
|
|
36
|
+
}
|
|
37
|
+
buildANNIndex() {
|
|
38
|
+
if (this.vectors.length === 0)
|
|
39
|
+
return;
|
|
40
|
+
const dims = this.vectors[0].length;
|
|
41
|
+
const hnswConfig = typeof this.config.useANN === 'object' ? this.config.useANN : {};
|
|
42
|
+
this.annIndex = new HNSWIndex(dims, hnswConfig);
|
|
43
|
+
for (let i = 0; i < this.vectors.length; i++) {
|
|
44
|
+
this.annIndex.add(i, this.vectors[i]);
|
|
45
|
+
}
|
|
17
46
|
}
|
|
18
47
|
/**
|
|
19
48
|
* Extract searchable text from an item using configured paths.
|
|
@@ -28,10 +57,57 @@ export class Simile {
|
|
|
28
57
|
static async from(items, config = {}) {
|
|
29
58
|
const model = config.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
30
59
|
const textPaths = config.textPaths ?? [];
|
|
31
|
-
//
|
|
60
|
+
// For initialization, we create a temporary cache to avoid duplicate embeddings
|
|
61
|
+
// even if caching is disabled in config, it's useful during bulk init
|
|
62
|
+
const tempCache = new VectorCache({ maxSize: items.length });
|
|
32
63
|
const texts = items.map((item) => extractText(item, textPaths.length > 0 ? textPaths : undefined));
|
|
33
|
-
const vectors =
|
|
34
|
-
|
|
64
|
+
const vectors = [];
|
|
65
|
+
const textsToEmbed = [];
|
|
66
|
+
const textToVectorIdx = new Map();
|
|
67
|
+
for (let i = 0; i < texts.length; i++) {
|
|
68
|
+
const text = texts[i];
|
|
69
|
+
const cacheKey = createCacheKey(text, model);
|
|
70
|
+
const cached = tempCache.get(cacheKey);
|
|
71
|
+
if (cached) {
|
|
72
|
+
vectors[i] = cached;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
textToVectorIdx.set(textsToEmbed.length, i);
|
|
76
|
+
textsToEmbed.push(text);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
if (textsToEmbed.length > 0) {
|
|
80
|
+
const newVectors = await embedBatch(textsToEmbed, model);
|
|
81
|
+
for (let i = 0; i < newVectors.length; i++) {
|
|
82
|
+
const originalIdx = textToVectorIdx.get(i);
|
|
83
|
+
vectors[originalIdx] = newVectors[i];
|
|
84
|
+
tempCache.set(createCacheKey(textsToEmbed[i], model), newVectors[i]);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const engine = new Simile(items, vectors, config);
|
|
88
|
+
// Warm up the engine's cache with the vectors we just computed
|
|
89
|
+
if (engine.cache) {
|
|
90
|
+
for (let i = 0; i < texts.length; i++) {
|
|
91
|
+
engine.cache.set(createCacheKey(texts[i], model), vectors[i]);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return engine;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Internal helper for embedding text with caching.
|
|
98
|
+
*/
|
|
99
|
+
async embedWithCache(text) {
|
|
100
|
+
const cacheKey = createCacheKey(text, this.config.model);
|
|
101
|
+
if (this.cache) {
|
|
102
|
+
const cached = this.cache.get(cacheKey);
|
|
103
|
+
if (cached)
|
|
104
|
+
return cached;
|
|
105
|
+
}
|
|
106
|
+
const vector = await embed(text, this.config.model);
|
|
107
|
+
if (this.cache) {
|
|
108
|
+
this.cache.set(cacheKey, vector);
|
|
109
|
+
}
|
|
110
|
+
return vector;
|
|
35
111
|
}
|
|
36
112
|
/**
|
|
37
113
|
* Load a Simile instance from a previously saved snapshot.
|
|
@@ -72,29 +148,76 @@ export class Simile {
|
|
|
72
148
|
toJSON() {
|
|
73
149
|
return JSON.stringify(this.save());
|
|
74
150
|
}
|
|
75
|
-
/**
|
|
76
|
-
* Add new items to the index
|
|
77
|
-
*/
|
|
78
151
|
async add(items) {
|
|
79
152
|
const texts = items.map((item) => this.getSearchableText(item));
|
|
80
|
-
|
|
153
|
+
// Use embedBatch with cache optimization
|
|
154
|
+
const newVectors = [];
|
|
155
|
+
const textsToEmbed = [];
|
|
156
|
+
const textToIdx = new Map();
|
|
157
|
+
for (let i = 0; i < texts.length; i++) {
|
|
158
|
+
const cacheKey = createCacheKey(texts[i], this.config.model);
|
|
159
|
+
const cached = this.cache?.get(cacheKey);
|
|
160
|
+
if (cached) {
|
|
161
|
+
newVectors[i] = cached;
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
textToIdx.set(textsToEmbed.length, i);
|
|
165
|
+
textsToEmbed.push(texts[i]);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (textsToEmbed.length > 0) {
|
|
169
|
+
const embedded = await embedBatch(textsToEmbed, this.config.model);
|
|
170
|
+
for (let i = 0; i < embedded.length; i++) {
|
|
171
|
+
const originalIdx = textToIdx.get(i);
|
|
172
|
+
newVectors[originalIdx] = embedded[i];
|
|
173
|
+
this.cache?.set(createCacheKey(textsToEmbed[i], this.config.model), embedded[i]);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
81
176
|
for (let i = 0; i < items.length; i++) {
|
|
82
177
|
const item = items[i];
|
|
83
178
|
const existingIdx = this.itemIndex.get(item.id);
|
|
84
179
|
if (existingIdx !== undefined) {
|
|
85
|
-
// Update existing item
|
|
86
180
|
this.items[existingIdx] = item;
|
|
87
181
|
this.vectors[existingIdx] = newVectors[i];
|
|
182
|
+
this.annIndex?.remove(existingIdx);
|
|
183
|
+
this.annIndex?.add(existingIdx, newVectors[i]);
|
|
88
184
|
}
|
|
89
185
|
else {
|
|
90
|
-
// Add new item
|
|
91
186
|
const newIdx = this.items.length;
|
|
92
187
|
this.items.push(item);
|
|
93
188
|
this.vectors.push(newVectors[i]);
|
|
94
189
|
this.itemIndex.set(item.id, newIdx);
|
|
190
|
+
// Auto-enable ANN if threshold reached
|
|
191
|
+
if (!this.annIndex && this.items.length >= this.config.annThreshold) {
|
|
192
|
+
this.buildANNIndex();
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
this.annIndex?.add(newIdx, newVectors[i]);
|
|
196
|
+
}
|
|
95
197
|
}
|
|
96
198
|
}
|
|
97
199
|
}
|
|
200
|
+
/**
|
|
201
|
+
* Queue items for background indexing (non-blocking).
|
|
202
|
+
*/
|
|
203
|
+
enqueue(items) {
|
|
204
|
+
this.updater.enqueue(items);
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Get indexing information and stats.
|
|
208
|
+
*/
|
|
209
|
+
getIndexInfo() {
|
|
210
|
+
let memoryBytes = 0;
|
|
211
|
+
for (const v of this.vectors)
|
|
212
|
+
memoryBytes += v.byteLength;
|
|
213
|
+
return {
|
|
214
|
+
type: this.annIndex ? 'hnsw' : 'linear',
|
|
215
|
+
size: this.items.length,
|
|
216
|
+
memory: `${(memoryBytes / 1024 / 1024).toFixed(2)} MB`,
|
|
217
|
+
cacheStats: this.cache?.getStats(),
|
|
218
|
+
annStats: this.annIndex?.getStats(),
|
|
219
|
+
};
|
|
220
|
+
}
|
|
98
221
|
/**
|
|
99
222
|
* Remove items by ID
|
|
100
223
|
*/
|
|
@@ -111,6 +234,10 @@ export class Simile {
|
|
|
111
234
|
this.items = newItems;
|
|
112
235
|
this.vectors = newVectors;
|
|
113
236
|
this.itemIndex = new Map(this.items.map((item, i) => [item.id, i]));
|
|
237
|
+
// Rebuild ANN index if it exists
|
|
238
|
+
if (this.annIndex) {
|
|
239
|
+
this.buildANNIndex();
|
|
240
|
+
}
|
|
114
241
|
}
|
|
115
242
|
/**
|
|
116
243
|
* Get item by ID
|
|
@@ -150,18 +277,35 @@ export class Simile {
|
|
|
150
277
|
if (query.length < minLength) {
|
|
151
278
|
return [];
|
|
152
279
|
}
|
|
153
|
-
const qVector = await
|
|
280
|
+
const qVector = await this.embedWithCache(query);
|
|
154
281
|
// First pass: calculate raw scores
|
|
155
282
|
const rawResults = [];
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
283
|
+
// Use ANN if enabled and available
|
|
284
|
+
if (this.annIndex && (options.useANN ?? true)) {
|
|
285
|
+
const annResults = this.annIndex.search(qVector, topK * 2); // Get more for filtering
|
|
286
|
+
for (const res of annResults) {
|
|
287
|
+
const item = this.items[res.id];
|
|
288
|
+
if (filter && !filter(item.metadata))
|
|
289
|
+
continue;
|
|
290
|
+
const searchableText = this.getSearchableText(item);
|
|
291
|
+
const semantic = 1 - res.distance; // distance to similarity
|
|
292
|
+
const fuzzy = fuzzyScore(query, searchableText);
|
|
293
|
+
const keyword = keywordScore(query, searchableText);
|
|
294
|
+
rawResults.push({ index: res.id, item, semantic, fuzzy, keyword });
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
else {
|
|
298
|
+
// Fallback to linear scan
|
|
299
|
+
for (let i = 0; i < this.items.length; i++) {
|
|
300
|
+
const item = this.items[i];
|
|
301
|
+
if (filter && !filter(item.metadata))
|
|
302
|
+
continue;
|
|
303
|
+
const searchableText = this.getSearchableText(item);
|
|
304
|
+
const semantic = cosine(qVector, this.vectors[i]);
|
|
305
|
+
const fuzzy = fuzzyScore(query, searchableText);
|
|
306
|
+
const keyword = keywordScore(query, searchableText);
|
|
307
|
+
rawResults.push({ index: i, item, semantic, fuzzy, keyword });
|
|
308
|
+
}
|
|
165
309
|
}
|
|
166
310
|
// Calculate score statistics for normalization
|
|
167
311
|
const stats = calculateScoreStats(rawResults);
|
package/dist/engine.test.js
CHANGED
|
@@ -178,7 +178,7 @@ describe("simile persistence", () => {
|
|
|
178
178
|
// Create engine and save
|
|
179
179
|
const engine = await Simile.from(testItems);
|
|
180
180
|
const snapshot = engine.save();
|
|
181
|
-
expect(snapshot.version).toBe("0.
|
|
181
|
+
expect(snapshot.version).toBe("0.4.0");
|
|
182
182
|
expect(snapshot.items.length).toBe(5);
|
|
183
183
|
expect(snapshot.vectors.length).toBe(5);
|
|
184
184
|
expect(snapshot.model).toBe("Xenova/all-MiniLM-L6-v2");
|
|
@@ -268,3 +268,51 @@ describe("simile custom weights", () => {
|
|
|
268
268
|
expect(keywordResults[0].text).toContain("floor");
|
|
269
269
|
}, 30000);
|
|
270
270
|
});
|
|
271
|
+
describe("simile performance features", () => {
|
|
272
|
+
it("enables ANN index for large datasets", async () => {
|
|
273
|
+
// Generate many items to trigger ANN threshold
|
|
274
|
+
const manyItems = Array.from({ length: 100 }, (_, i) => ({
|
|
275
|
+
id: `many-${i}`,
|
|
276
|
+
text: `Item number ${i} for testing ANN index`,
|
|
277
|
+
metadata: { index: i },
|
|
278
|
+
}));
|
|
279
|
+
const engine = await Simile.from(manyItems, {
|
|
280
|
+
annThreshold: 50, // Set low to trigger for 100 items
|
|
281
|
+
useANN: true,
|
|
282
|
+
});
|
|
283
|
+
const info = engine.getIndexInfo();
|
|
284
|
+
expect(info.type).toBe("hnsw");
|
|
285
|
+
expect(info.annStats).toBeDefined();
|
|
286
|
+
expect(info.annStats?.size).toBe(100);
|
|
287
|
+
const results = await engine.search("Item number 42");
|
|
288
|
+
expect(results[0].id).toBe("many-42");
|
|
289
|
+
}, 30000);
|
|
290
|
+
it("uses cache for redundant embeddings", async () => {
|
|
291
|
+
const engine = await Simile.from(testItems, {
|
|
292
|
+
cache: { enableStats: true },
|
|
293
|
+
});
|
|
294
|
+
// Reset stats
|
|
295
|
+
const info1 = engine.getIndexInfo();
|
|
296
|
+
// Search for same query twice
|
|
297
|
+
await engine.search("cleaner");
|
|
298
|
+
await engine.search("cleaner");
|
|
299
|
+
const info2 = engine.getIndexInfo();
|
|
300
|
+
expect(info2.cacheStats?.hits).toBeGreaterThan(0);
|
|
301
|
+
expect(info2.cacheStats?.hitRate).toBeGreaterThan(0);
|
|
302
|
+
}, 30000);
|
|
303
|
+
it("adds items with cache optimization", async () => {
|
|
304
|
+
const engine = await Simile.from(testItems.slice(0, 1));
|
|
305
|
+
const info1 = engine.getIndexInfo();
|
|
306
|
+
// Add same item again (should hit cache)
|
|
307
|
+
await engine.add(testItems.slice(0, 1));
|
|
308
|
+
const info2 = engine.getIndexInfo();
|
|
309
|
+
// Cache stats might be null if not enabled, let's enable it
|
|
310
|
+
engine.remove(['1']);
|
|
311
|
+
const engineWithStats = await Simile.from(testItems.slice(0, 1), {
|
|
312
|
+
cache: { enableStats: true }
|
|
313
|
+
});
|
|
314
|
+
await engineWithStats.add([{ id: 'new', text: testItems[0].text }]);
|
|
315
|
+
const stats = engineWithStats.getIndexInfo().cacheStats;
|
|
316
|
+
expect(stats?.hits).toBe(1);
|
|
317
|
+
}, 30000);
|
|
318
|
+
});
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,9 @@ export * from "./types.js";
|
|
|
2
2
|
export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
|
|
3
3
|
export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
|
|
4
4
|
export { hybridScore, getDefaultWeights } from "./ranker.js";
|
|
5
|
+
export { Simile } from "./engine.js";
|
|
5
6
|
export { getByPath, extractText, normalizeScore } from "./utils.js";
|
|
7
|
+
export { HNSWIndex } from "./ann.js";
|
|
8
|
+
export { VectorCache, createCacheKey } from "./cache.js";
|
|
9
|
+
export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
|
|
10
|
+
export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
|
package/dist/index.js
CHANGED
|
@@ -2,4 +2,9 @@ export * from "./types.js";
|
|
|
2
2
|
export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
|
|
3
3
|
export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
|
|
4
4
|
export { hybridScore, getDefaultWeights } from "./ranker.js";
|
|
5
|
+
export { Simile } from "./engine.js";
|
|
5
6
|
export { getByPath, extractText, normalizeScore } from "./utils.js";
|
|
7
|
+
export { HNSWIndex } from "./ann.js";
|
|
8
|
+
export { VectorCache, createCacheKey } from "./cache.js";
|
|
9
|
+
export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
|
|
10
|
+
export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Quantization - Reduce memory footprint by 50-75%.
|
|
3
|
+
*
|
|
4
|
+
* Quantization levels:
|
|
5
|
+
* - Float32: Full precision (4 bytes per dimension) - default
|
|
6
|
+
* - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
|
|
7
|
+
* - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
|
|
8
|
+
*/
|
|
9
|
+
export type QuantizationType = 'float32' | 'float16' | 'int8';
|
|
10
|
+
export interface QuantizedVector {
|
|
11
|
+
data: ArrayBuffer;
|
|
12
|
+
type: QuantizationType;
|
|
13
|
+
/** Scale factor for Int8 quantization */
|
|
14
|
+
scale?: number;
|
|
15
|
+
/** Offset for Int8 quantization */
|
|
16
|
+
offset?: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Quantize a Float32Array to a more memory-efficient format.
|
|
20
|
+
*/
|
|
21
|
+
export declare function quantizeVector(vector: Float32Array, type: QuantizationType): QuantizedVector;
|
|
22
|
+
/**
|
|
23
|
+
* Dequantize back to Float32Array for computation.
|
|
24
|
+
*/
|
|
25
|
+
export declare function dequantizeVector(quantized: QuantizedVector): Float32Array;
|
|
26
|
+
/**
|
|
27
|
+
* Compute cosine similarity directly on quantized vectors.
|
|
28
|
+
* More efficient than dequantizing first for large batches.
|
|
29
|
+
*/
|
|
30
|
+
export declare function cosineQuantized(a: QuantizedVector, b: QuantizedVector): number;
|
|
31
|
+
/**
|
|
32
|
+
* Serialize quantized vector to base64 for storage.
|
|
33
|
+
*/
|
|
34
|
+
export declare function quantizedToBase64(quantized: QuantizedVector): string;
|
|
35
|
+
/**
|
|
36
|
+
* Deserialize quantized vector from base64.
|
|
37
|
+
*/
|
|
38
|
+
export declare function base64ToQuantized(base64: string): QuantizedVector;
|
|
39
|
+
/**
|
|
40
|
+
* Get bytes per dimension for a quantization type.
|
|
41
|
+
*/
|
|
42
|
+
export declare function getBytesPerDimension(type: QuantizationType): number;
|
|
43
|
+
/**
|
|
44
|
+
* Estimate memory savings compared to Float32.
|
|
45
|
+
*/
|
|
46
|
+
export declare function estimateMemorySavings(vectorCount: number, dimensions: number, type: QuantizationType): {
|
|
47
|
+
original: number;
|
|
48
|
+
quantized: number;
|
|
49
|
+
savings: number;
|
|
50
|
+
};
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Quantization - Reduce memory footprint by 50-75%.
|
|
3
|
+
*
|
|
4
|
+
* Quantization levels:
|
|
5
|
+
* - Float32: Full precision (4 bytes per dimension) - default
|
|
6
|
+
* - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
|
|
7
|
+
* - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Quantize a Float32Array to a more memory-efficient format.
|
|
11
|
+
*/
|
|
12
|
+
export function quantizeVector(vector, type) {
|
|
13
|
+
switch (type) {
|
|
14
|
+
case 'float32':
|
|
15
|
+
// Ensure we get a proper ArrayBuffer (not SharedArrayBuffer)
|
|
16
|
+
const float32Data = new ArrayBuffer(vector.byteLength);
|
|
17
|
+
new Float32Array(float32Data).set(vector);
|
|
18
|
+
return {
|
|
19
|
+
data: float32Data,
|
|
20
|
+
type: 'float32',
|
|
21
|
+
};
|
|
22
|
+
case 'float16':
|
|
23
|
+
return {
|
|
24
|
+
data: float32ToFloat16(vector),
|
|
25
|
+
type: 'float16',
|
|
26
|
+
};
|
|
27
|
+
case 'int8':
|
|
28
|
+
return quantizeToInt8(vector);
|
|
29
|
+
default:
|
|
30
|
+
throw new Error(`Unknown quantization type: ${type}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Dequantize back to Float32Array for computation.
|
|
35
|
+
*/
|
|
36
|
+
export function dequantizeVector(quantized) {
|
|
37
|
+
switch (quantized.type) {
|
|
38
|
+
case 'float32':
|
|
39
|
+
return new Float32Array(quantized.data);
|
|
40
|
+
case 'float16':
|
|
41
|
+
return float16ToFloat32(quantized.data);
|
|
42
|
+
case 'int8':
|
|
43
|
+
return dequantizeFromInt8(quantized);
|
|
44
|
+
default:
|
|
45
|
+
throw new Error(`Unknown quantization type: ${quantized.type}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Compute cosine similarity directly on quantized vectors.
|
|
50
|
+
* More efficient than dequantizing first for large batches.
|
|
51
|
+
*/
|
|
52
|
+
export function cosineQuantized(a, b) {
|
|
53
|
+
if (a.type !== b.type) {
|
|
54
|
+
throw new Error('Cannot compute similarity between different quantization types');
|
|
55
|
+
}
|
|
56
|
+
switch (a.type) {
|
|
57
|
+
case 'float32': {
|
|
58
|
+
const va = new Float32Array(a.data);
|
|
59
|
+
const vb = new Float32Array(b.data);
|
|
60
|
+
return cosineFloat32(va, vb);
|
|
61
|
+
}
|
|
62
|
+
case 'float16': {
|
|
63
|
+
// For Float16, dequantize and compute (hardware Float16 not widely supported)
|
|
64
|
+
const va = float16ToFloat32(a.data);
|
|
65
|
+
const vb = float16ToFloat32(b.data);
|
|
66
|
+
return cosineFloat32(va, vb);
|
|
67
|
+
}
|
|
68
|
+
case 'int8': {
|
|
69
|
+
// For Int8, use integer arithmetic then scale
|
|
70
|
+
return cosineInt8(a, b);
|
|
71
|
+
}
|
|
72
|
+
default:
|
|
73
|
+
throw new Error(`Unknown quantization type: ${a.type}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Serialize quantized vector to base64 for storage.
|
|
78
|
+
*/
|
|
79
|
+
export function quantizedToBase64(quantized) {
|
|
80
|
+
const meta = JSON.stringify({
|
|
81
|
+
type: quantized.type,
|
|
82
|
+
scale: quantized.scale,
|
|
83
|
+
offset: quantized.offset,
|
|
84
|
+
});
|
|
85
|
+
const metaBuffer = Buffer.from(meta);
|
|
86
|
+
const dataBuffer = Buffer.from(quantized.data);
|
|
87
|
+
// Format: [2 bytes meta length][meta][data]
|
|
88
|
+
const combined = Buffer.alloc(2 + metaBuffer.length + dataBuffer.length);
|
|
89
|
+
combined.writeUInt16LE(metaBuffer.length, 0);
|
|
90
|
+
metaBuffer.copy(combined, 2);
|
|
91
|
+
dataBuffer.copy(combined, 2 + metaBuffer.length);
|
|
92
|
+
return combined.toString('base64');
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Deserialize quantized vector from base64.
|
|
96
|
+
*/
|
|
97
|
+
export function base64ToQuantized(base64) {
|
|
98
|
+
const combined = Buffer.from(base64, 'base64');
|
|
99
|
+
const metaLength = combined.readUInt16LE(0);
|
|
100
|
+
const metaBuffer = combined.subarray(2, 2 + metaLength);
|
|
101
|
+
const dataBuffer = combined.subarray(2 + metaLength);
|
|
102
|
+
const meta = JSON.parse(metaBuffer.toString());
|
|
103
|
+
return {
|
|
104
|
+
data: dataBuffer.buffer.slice(dataBuffer.byteOffset, dataBuffer.byteOffset + dataBuffer.length),
|
|
105
|
+
type: meta.type,
|
|
106
|
+
scale: meta.scale,
|
|
107
|
+
offset: meta.offset,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Get bytes per dimension for a quantization type.
|
|
112
|
+
*/
|
|
113
|
+
export function getBytesPerDimension(type) {
|
|
114
|
+
switch (type) {
|
|
115
|
+
case 'float32': return 4;
|
|
116
|
+
case 'float16': return 2;
|
|
117
|
+
case 'int8': return 1;
|
|
118
|
+
default: return 4;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Estimate memory savings compared to Float32.
|
|
123
|
+
*/
|
|
124
|
+
export function estimateMemorySavings(vectorCount, dimensions, type) {
|
|
125
|
+
const original = vectorCount * dimensions * 4; // Float32
|
|
126
|
+
const quantized = vectorCount * dimensions * getBytesPerDimension(type);
|
|
127
|
+
return {
|
|
128
|
+
original,
|
|
129
|
+
quantized,
|
|
130
|
+
savings: 1 - (quantized / original),
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
// ============ Internal Helpers ============
|
|
134
|
+
/**
|
|
135
|
+
* Convert Float32Array to Float16 (stored as Uint16Array buffer).
|
|
136
|
+
*/
|
|
137
|
+
function float32ToFloat16(float32) {
|
|
138
|
+
const uint16 = new Uint16Array(float32.length);
|
|
139
|
+
for (let i = 0; i < float32.length; i++) {
|
|
140
|
+
uint16[i] = floatToHalf(float32[i]);
|
|
141
|
+
}
|
|
142
|
+
return uint16.buffer;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Convert Float16 buffer back to Float32Array.
|
|
146
|
+
*/
|
|
147
|
+
function float16ToFloat32(buffer) {
|
|
148
|
+
const uint16 = new Uint16Array(buffer);
|
|
149
|
+
const float32 = new Float32Array(uint16.length);
|
|
150
|
+
for (let i = 0; i < uint16.length; i++) {
|
|
151
|
+
float32[i] = halfToFloat(uint16[i]);
|
|
152
|
+
}
|
|
153
|
+
return float32;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Quantize Float32Array to Int8 with scale/offset.
|
|
157
|
+
*/
|
|
158
|
+
function quantizeToInt8(vector) {
|
|
159
|
+
let min = Infinity;
|
|
160
|
+
let max = -Infinity;
|
|
161
|
+
for (let i = 0; i < vector.length; i++) {
|
|
162
|
+
if (vector[i] < min)
|
|
163
|
+
min = vector[i];
|
|
164
|
+
if (vector[i] > max)
|
|
165
|
+
max = vector[i];
|
|
166
|
+
}
|
|
167
|
+
const scale = (max - min) / 255;
|
|
168
|
+
const offset = min;
|
|
169
|
+
const int8 = new Int8Array(vector.length);
|
|
170
|
+
for (let i = 0; i < vector.length; i++) {
|
|
171
|
+
// Map to 0-255 range, then shift to -128 to 127
|
|
172
|
+
const normalized = scale > 0 ? (vector[i] - offset) / scale : 0;
|
|
173
|
+
int8[i] = Math.round(normalized) - 128;
|
|
174
|
+
}
|
|
175
|
+
return {
|
|
176
|
+
data: int8.buffer,
|
|
177
|
+
type: 'int8',
|
|
178
|
+
scale,
|
|
179
|
+
offset,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Dequantize Int8 back to Float32Array.
|
|
184
|
+
*/
|
|
185
|
+
function dequantizeFromInt8(quantized) {
|
|
186
|
+
const int8 = new Int8Array(quantized.data);
|
|
187
|
+
const float32 = new Float32Array(int8.length);
|
|
188
|
+
const scale = quantized.scale ?? 1;
|
|
189
|
+
const offset = quantized.offset ?? 0;
|
|
190
|
+
for (let i = 0; i < int8.length; i++) {
|
|
191
|
+
float32[i] = (int8[i] + 128) * scale + offset;
|
|
192
|
+
}
|
|
193
|
+
return float32;
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Cosine similarity for Float32 vectors.
|
|
197
|
+
*/
|
|
198
|
+
function cosineFloat32(a, b) {
|
|
199
|
+
let dot = 0;
|
|
200
|
+
for (let i = 0; i < a.length; i++) {
|
|
201
|
+
dot += a[i] * b[i];
|
|
202
|
+
}
|
|
203
|
+
return dot;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Cosine similarity for Int8 quantized vectors.
|
|
207
|
+
* Uses integer arithmetic for speed.
|
|
208
|
+
*/
|
|
209
|
+
function cosineInt8(a, b) {
|
|
210
|
+
const int8A = new Int8Array(a.data);
|
|
211
|
+
const int8B = new Int8Array(b.data);
|
|
212
|
+
// Compute dot product in integer space
|
|
213
|
+
let dotInt = 0;
|
|
214
|
+
let normAInt = 0;
|
|
215
|
+
let normBInt = 0;
|
|
216
|
+
for (let i = 0; i < int8A.length; i++) {
|
|
217
|
+
const valA = int8A[i] + 128;
|
|
218
|
+
const valB = int8B[i] + 128;
|
|
219
|
+
dotInt += valA * valB;
|
|
220
|
+
normAInt += valA * valA;
|
|
221
|
+
normBInt += valB * valB;
|
|
222
|
+
}
|
|
223
|
+
// For normalized vectors, we can simplify
|
|
224
|
+
// Since embeddings are already normalized, dot product ≈ cosine
|
|
225
|
+
const scaleA = a.scale ?? 1;
|
|
226
|
+
const scaleB = b.scale ?? 1;
|
|
227
|
+
return (dotInt * scaleA * scaleB) / (Math.sqrt(normAInt) * Math.sqrt(normBInt) * scaleA * scaleB);
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Convert a 32-bit float to 16-bit half precision.
|
|
231
|
+
*/
|
|
232
|
+
function floatToHalf(val) {
|
|
233
|
+
const floatView = new Float32Array(1);
|
|
234
|
+
const int32View = new Int32Array(floatView.buffer);
|
|
235
|
+
floatView[0] = val;
|
|
236
|
+
const x = int32View[0];
|
|
237
|
+
let bits = (x >> 16) & 0x8000; // Sign
|
|
238
|
+
let m = (x >> 12) & 0x07ff; // Mantissa
|
|
239
|
+
const e = (x >> 23) & 0xff; // Exponent
|
|
240
|
+
if (e < 103) {
|
|
241
|
+
return bits;
|
|
242
|
+
}
|
|
243
|
+
if (e > 142) {
|
|
244
|
+
bits |= 0x7c00;
|
|
245
|
+
bits |= (e === 255 ? 0 : 1) && (x & 0x007fffff);
|
|
246
|
+
return bits;
|
|
247
|
+
}
|
|
248
|
+
if (e < 113) {
|
|
249
|
+
m |= 0x0800;
|
|
250
|
+
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
|
|
251
|
+
return bits;
|
|
252
|
+
}
|
|
253
|
+
bits |= ((e - 112) << 10) | (m >> 1);
|
|
254
|
+
bits += m & 1;
|
|
255
|
+
return bits;
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Convert a 16-bit half precision to 32-bit float.
|
|
259
|
+
*/
|
|
260
|
+
function halfToFloat(val) {
|
|
261
|
+
const s = (val & 0x8000) >> 15;
|
|
262
|
+
const e = (val & 0x7c00) >> 10;
|
|
263
|
+
const f = val & 0x03ff;
|
|
264
|
+
if (e === 0) {
|
|
265
|
+
return (s ? -1 : 1) * Math.pow(2, -14) * (f / Math.pow(2, 10));
|
|
266
|
+
}
|
|
267
|
+
else if (e === 0x1f) {
|
|
268
|
+
return f ? NaN : (s ? -Infinity : Infinity);
|
|
269
|
+
}
|
|
270
|
+
return (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / Math.pow(2, 10));
|
|
271
|
+
}
|
package/dist/similarity.d.ts
CHANGED
|
@@ -4,6 +4,25 @@
|
|
|
4
4
|
* Returns a value between -1 and 1, where 1 is identical.
|
|
5
5
|
*/
|
|
6
6
|
export declare function cosine(a: Float32Array, b: Float32Array): number;
|
|
7
|
+
/**
|
|
8
|
+
* SIMD-style unrolled cosine similarity for better performance.
|
|
9
|
+
* Processes 4 elements at a time for ~2-4x speedup.
|
|
10
|
+
*/
|
|
11
|
+
export declare function cosineFast(a: Float32Array, b: Float32Array): number;
|
|
12
|
+
/**
|
|
13
|
+
* Early-exit cosine similarity with threshold.
|
|
14
|
+
* Returns null if the result would definitely be below threshold.
|
|
15
|
+
* Useful for filtering out low-scoring candidates quickly.
|
|
16
|
+
*/
|
|
17
|
+
export declare function cosineWithThreshold(a: Float32Array, b: Float32Array, threshold: number): number | null;
|
|
18
|
+
/**
|
|
19
|
+
* Batch cosine similarity with built-in top-K selection.
|
|
20
|
+
* More efficient than computing all similarities then sorting.
|
|
21
|
+
*/
|
|
22
|
+
export declare function batchCosine(query: Float32Array, vectors: Float32Array[], topK: number, threshold?: number): Array<{
|
|
23
|
+
index: number;
|
|
24
|
+
score: number;
|
|
25
|
+
}>;
|
|
7
26
|
/**
|
|
8
27
|
* Compute fuzzy similarity score using Levenshtein distance.
|
|
9
28
|
* Returns a value between 0 and 1, where 1 is an exact match.
|
|
@@ -14,6 +33,11 @@ export declare function fuzzyScore(a: string, b: string): number;
|
|
|
14
33
|
* Returns the proportion of query words found in the text (0 to 1).
|
|
15
34
|
*/
|
|
16
35
|
export declare function keywordScore(query: string, text: string): number;
|
|
36
|
+
/**
|
|
37
|
+
* Fast keyword score with early exit.
|
|
38
|
+
* Stops as soon as all query words are found.
|
|
39
|
+
*/
|
|
40
|
+
export declare function keywordScoreFast(query: string, text: string): number;
|
|
17
41
|
/**
|
|
18
42
|
* Score normalization statistics for a batch of results.
|
|
19
43
|
*/
|