simile-search 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/engine.js CHANGED
@@ -2,9 +2,14 @@ import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js
2
2
  import { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
3
3
  import { hybridScore, getDefaultWeights } from "./ranker.js";
4
4
  import { extractText, normalizeScore } from "./utils.js";
5
- const PACKAGE_VERSION = "0.3.1";
5
+ import { VectorCache, createCacheKey } from "./cache.js";
6
+ import { HNSWIndex } from "./ann.js";
7
+ import { BackgroundUpdater } from "./updater.js";
8
+ const PACKAGE_VERSION = "0.4.0";
6
9
  export class Simile {
7
10
  constructor(items, vectors, config = {}) {
11
+ this.cache = null;
12
+ this.annIndex = null;
8
13
  this.items = items;
9
14
  this.vectors = vectors;
10
15
  this.itemIndex = new Map(items.map((item, i) => [item.id, i]));
@@ -13,7 +18,31 @@ export class Simile {
13
18
  model: config.model ?? "Xenova/all-MiniLM-L6-v2",
14
19
  textPaths: config.textPaths ?? [],
15
20
  normalizeScores: config.normalizeScores ?? true,
21
+ cache: config.cache ?? true,
22
+ quantization: config.quantization ?? 'float32',
23
+ useANN: config.useANN ?? false,
24
+ annThreshold: config.annThreshold ?? 1000,
16
25
  };
26
+ // Initialize Cache
27
+ if (this.config.cache) {
28
+ this.cache = new VectorCache(typeof this.config.cache === 'object' ? this.config.cache : {});
29
+ }
30
+ // Initialize ANN Index if threshold reached or forced
31
+ if (this.config.useANN || this.items.length >= this.config.annThreshold) {
32
+ this.buildANNIndex();
33
+ }
34
+ // Initialize Updater
35
+ this.updater = new BackgroundUpdater(this);
36
+ }
37
+ buildANNIndex() {
38
+ if (this.vectors.length === 0)
39
+ return;
40
+ const dims = this.vectors[0].length;
41
+ const hnswConfig = typeof this.config.useANN === 'object' ? this.config.useANN : {};
42
+ this.annIndex = new HNSWIndex(dims, hnswConfig);
43
+ for (let i = 0; i < this.vectors.length; i++) {
44
+ this.annIndex.add(i, this.vectors[i]);
45
+ }
17
46
  }
18
47
  /**
19
48
  * Extract searchable text from an item using configured paths.
@@ -28,10 +57,57 @@ export class Simile {
28
57
  static async from(items, config = {}) {
29
58
  const model = config.model ?? "Xenova/all-MiniLM-L6-v2";
30
59
  const textPaths = config.textPaths ?? [];
31
- // Extract text using paths if configured
60
+ // For initialization, we create a temporary cache to avoid duplicate embeddings
61
+ // even if caching is disabled in config, it's useful during bulk init
62
+ const tempCache = new VectorCache({ maxSize: items.length });
32
63
  const texts = items.map((item) => extractText(item, textPaths.length > 0 ? textPaths : undefined));
33
- const vectors = await embedBatch(texts, model);
34
- return new Simile(items, vectors, config);
64
+ const vectors = [];
65
+ const textsToEmbed = [];
66
+ const textToVectorIdx = new Map();
67
+ for (let i = 0; i < texts.length; i++) {
68
+ const text = texts[i];
69
+ const cacheKey = createCacheKey(text, model);
70
+ const cached = tempCache.get(cacheKey);
71
+ if (cached) {
72
+ vectors[i] = cached;
73
+ }
74
+ else {
75
+ textToVectorIdx.set(textsToEmbed.length, i);
76
+ textsToEmbed.push(text);
77
+ }
78
+ }
79
+ if (textsToEmbed.length > 0) {
80
+ const newVectors = await embedBatch(textsToEmbed, model);
81
+ for (let i = 0; i < newVectors.length; i++) {
82
+ const originalIdx = textToVectorIdx.get(i);
83
+ vectors[originalIdx] = newVectors[i];
84
+ tempCache.set(createCacheKey(textsToEmbed[i], model), newVectors[i]);
85
+ }
86
+ }
87
+ const engine = new Simile(items, vectors, config);
88
+ // Warm up the engine's cache with the vectors we just computed
89
+ if (engine.cache) {
90
+ for (let i = 0; i < texts.length; i++) {
91
+ engine.cache.set(createCacheKey(texts[i], model), vectors[i]);
92
+ }
93
+ }
94
+ return engine;
95
+ }
96
+ /**
97
+ * Internal helper for embedding text with caching.
98
+ */
99
+ async embedWithCache(text) {
100
+ const cacheKey = createCacheKey(text, this.config.model);
101
+ if (this.cache) {
102
+ const cached = this.cache.get(cacheKey);
103
+ if (cached)
104
+ return cached;
105
+ }
106
+ const vector = await embed(text, this.config.model);
107
+ if (this.cache) {
108
+ this.cache.set(cacheKey, vector);
109
+ }
110
+ return vector;
35
111
  }
36
112
  /**
37
113
  * Load a Simile instance from a previously saved snapshot.
@@ -72,29 +148,76 @@ export class Simile {
72
148
  toJSON() {
73
149
  return JSON.stringify(this.save());
74
150
  }
75
- /**
76
- * Add new items to the index
77
- */
78
151
  async add(items) {
79
152
  const texts = items.map((item) => this.getSearchableText(item));
80
- const newVectors = await embedBatch(texts, this.config.model);
153
+ // Use embedBatch with cache optimization
154
+ const newVectors = [];
155
+ const textsToEmbed = [];
156
+ const textToIdx = new Map();
157
+ for (let i = 0; i < texts.length; i++) {
158
+ const cacheKey = createCacheKey(texts[i], this.config.model);
159
+ const cached = this.cache?.get(cacheKey);
160
+ if (cached) {
161
+ newVectors[i] = cached;
162
+ }
163
+ else {
164
+ textToIdx.set(textsToEmbed.length, i);
165
+ textsToEmbed.push(texts[i]);
166
+ }
167
+ }
168
+ if (textsToEmbed.length > 0) {
169
+ const embedded = await embedBatch(textsToEmbed, this.config.model);
170
+ for (let i = 0; i < embedded.length; i++) {
171
+ const originalIdx = textToIdx.get(i);
172
+ newVectors[originalIdx] = embedded[i];
173
+ this.cache?.set(createCacheKey(textsToEmbed[i], this.config.model), embedded[i]);
174
+ }
175
+ }
81
176
  for (let i = 0; i < items.length; i++) {
82
177
  const item = items[i];
83
178
  const existingIdx = this.itemIndex.get(item.id);
84
179
  if (existingIdx !== undefined) {
85
- // Update existing item
86
180
  this.items[existingIdx] = item;
87
181
  this.vectors[existingIdx] = newVectors[i];
182
+ this.annIndex?.remove(existingIdx);
183
+ this.annIndex?.add(existingIdx, newVectors[i]);
88
184
  }
89
185
  else {
90
- // Add new item
91
186
  const newIdx = this.items.length;
92
187
  this.items.push(item);
93
188
  this.vectors.push(newVectors[i]);
94
189
  this.itemIndex.set(item.id, newIdx);
190
+ // Auto-enable ANN if threshold reached
191
+ if (!this.annIndex && this.items.length >= this.config.annThreshold) {
192
+ this.buildANNIndex();
193
+ }
194
+ else {
195
+ this.annIndex?.add(newIdx, newVectors[i]);
196
+ }
95
197
  }
96
198
  }
97
199
  }
200
+ /**
201
+ * Queue items for background indexing (non-blocking).
202
+ */
203
+ enqueue(items) {
204
+ this.updater.enqueue(items);
205
+ }
206
+ /**
207
+ * Get indexing information and stats.
208
+ */
209
+ getIndexInfo() {
210
+ let memoryBytes = 0;
211
+ for (const v of this.vectors)
212
+ memoryBytes += v.byteLength;
213
+ return {
214
+ type: this.annIndex ? 'hnsw' : 'linear',
215
+ size: this.items.length,
216
+ memory: `${(memoryBytes / 1024 / 1024).toFixed(2)} MB`,
217
+ cacheStats: this.cache?.getStats(),
218
+ annStats: this.annIndex?.getStats(),
219
+ };
220
+ }
98
221
  /**
99
222
  * Remove items by ID
100
223
  */
@@ -111,6 +234,10 @@ export class Simile {
111
234
  this.items = newItems;
112
235
  this.vectors = newVectors;
113
236
  this.itemIndex = new Map(this.items.map((item, i) => [item.id, i]));
237
+ // Rebuild ANN index if it exists
238
+ if (this.annIndex) {
239
+ this.buildANNIndex();
240
+ }
114
241
  }
115
242
  /**
116
243
  * Get item by ID
@@ -150,18 +277,35 @@ export class Simile {
150
277
  if (query.length < minLength) {
151
278
  return [];
152
279
  }
153
- const qVector = await embed(query, this.config.model);
280
+ const qVector = await this.embedWithCache(query);
154
281
  // First pass: calculate raw scores
155
282
  const rawResults = [];
156
- for (let i = 0; i < this.items.length; i++) {
157
- const item = this.items[i];
158
- if (filter && !filter(item.metadata))
159
- continue;
160
- const searchableText = this.getSearchableText(item);
161
- const semantic = cosine(qVector, this.vectors[i]);
162
- const fuzzy = fuzzyScore(query, searchableText);
163
- const keyword = keywordScore(query, searchableText);
164
- rawResults.push({ index: i, item, semantic, fuzzy, keyword });
283
+ // Use ANN if enabled and available
284
+ if (this.annIndex && (options.useANN ?? true)) {
285
+ const annResults = this.annIndex.search(qVector, topK * 2); // Get more for filtering
286
+ for (const res of annResults) {
287
+ const item = this.items[res.id];
288
+ if (filter && !filter(item.metadata))
289
+ continue;
290
+ const searchableText = this.getSearchableText(item);
291
+ const semantic = 1 - res.distance; // distance to similarity
292
+ const fuzzy = fuzzyScore(query, searchableText);
293
+ const keyword = keywordScore(query, searchableText);
294
+ rawResults.push({ index: res.id, item, semantic, fuzzy, keyword });
295
+ }
296
+ }
297
+ else {
298
+ // Fallback to linear scan
299
+ for (let i = 0; i < this.items.length; i++) {
300
+ const item = this.items[i];
301
+ if (filter && !filter(item.metadata))
302
+ continue;
303
+ const searchableText = this.getSearchableText(item);
304
+ const semantic = cosine(qVector, this.vectors[i]);
305
+ const fuzzy = fuzzyScore(query, searchableText);
306
+ const keyword = keywordScore(query, searchableText);
307
+ rawResults.push({ index: i, item, semantic, fuzzy, keyword });
308
+ }
165
309
  }
166
310
  // Calculate score statistics for normalization
167
311
  const stats = calculateScoreStats(rawResults);
@@ -178,7 +178,7 @@ describe("simile persistence", () => {
178
178
  // Create engine and save
179
179
  const engine = await Simile.from(testItems);
180
180
  const snapshot = engine.save();
181
- expect(snapshot.version).toBe("0.2.0");
181
+ expect(snapshot.version).toBe("0.4.0");
182
182
  expect(snapshot.items.length).toBe(5);
183
183
  expect(snapshot.vectors.length).toBe(5);
184
184
  expect(snapshot.model).toBe("Xenova/all-MiniLM-L6-v2");
@@ -268,3 +268,51 @@ describe("simile custom weights", () => {
268
268
  expect(keywordResults[0].text).toContain("floor");
269
269
  }, 30000);
270
270
  });
271
+ describe("simile performance features", () => {
272
+ it("enables ANN index for large datasets", async () => {
273
+ // Generate many items to trigger ANN threshold
274
+ const manyItems = Array.from({ length: 100 }, (_, i) => ({
275
+ id: `many-${i}`,
276
+ text: `Item number ${i} for testing ANN index`,
277
+ metadata: { index: i },
278
+ }));
279
+ const engine = await Simile.from(manyItems, {
280
+ annThreshold: 50, // Set low to trigger for 100 items
281
+ useANN: true,
282
+ });
283
+ const info = engine.getIndexInfo();
284
+ expect(info.type).toBe("hnsw");
285
+ expect(info.annStats).toBeDefined();
286
+ expect(info.annStats?.size).toBe(100);
287
+ const results = await engine.search("Item number 42");
288
+ expect(results[0].id).toBe("many-42");
289
+ }, 30000);
290
+ it("uses cache for redundant embeddings", async () => {
291
+ const engine = await Simile.from(testItems, {
292
+ cache: { enableStats: true },
293
+ });
294
+ // Reset stats
295
+ const info1 = engine.getIndexInfo();
296
+ // Search for same query twice
297
+ await engine.search("cleaner");
298
+ await engine.search("cleaner");
299
+ const info2 = engine.getIndexInfo();
300
+ expect(info2.cacheStats?.hits).toBeGreaterThan(0);
301
+ expect(info2.cacheStats?.hitRate).toBeGreaterThan(0);
302
+ }, 30000);
303
+ it("adds items with cache optimization", async () => {
304
+ const engine = await Simile.from(testItems.slice(0, 1));
305
+ const info1 = engine.getIndexInfo();
306
+ // Add same item again (should hit cache)
307
+ await engine.add(testItems.slice(0, 1));
308
+ const info2 = engine.getIndexInfo();
309
+ // Cache stats might be null if not enabled, let's enable it
310
+ engine.remove(['1']);
311
+ const engineWithStats = await Simile.from(testItems.slice(0, 1), {
312
+ cache: { enableStats: true }
313
+ });
314
+ await engineWithStats.add([{ id: 'new', text: testItems[0].text }]);
315
+ const stats = engineWithStats.getIndexInfo().cacheStats;
316
+ expect(stats?.hits).toBe(1);
317
+ }, 30000);
318
+ });
package/dist/index.d.ts CHANGED
@@ -2,4 +2,9 @@ export * from "./types.js";
2
2
  export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
3
3
  export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
4
4
  export { hybridScore, getDefaultWeights } from "./ranker.js";
5
+ export { Simile } from "./engine.js";
5
6
  export { getByPath, extractText, normalizeScore } from "./utils.js";
7
+ export { HNSWIndex } from "./ann.js";
8
+ export { VectorCache, createCacheKey } from "./cache.js";
9
+ export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
10
+ export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
package/dist/index.js CHANGED
@@ -2,4 +2,9 @@ export * from "./types.js";
2
2
  export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
3
3
  export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
4
4
  export { hybridScore, getDefaultWeights } from "./ranker.js";
5
+ export { Simile } from "./engine.js";
5
6
  export { getByPath, extractText, normalizeScore } from "./utils.js";
7
+ export { HNSWIndex } from "./ann.js";
8
+ export { VectorCache, createCacheKey } from "./cache.js";
9
+ export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
10
+ export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Vector Quantization - Reduce memory footprint by 50-75%.
3
+ *
4
+ * Quantization levels:
5
+ * - Float32: Full precision (4 bytes per dimension) - default
6
+ * - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
7
+ * - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
8
+ */
9
+ export type QuantizationType = 'float32' | 'float16' | 'int8';
10
+ export interface QuantizedVector {
11
+ data: ArrayBuffer;
12
+ type: QuantizationType;
13
+ /** Scale factor for Int8 quantization */
14
+ scale?: number;
15
+ /** Offset for Int8 quantization */
16
+ offset?: number;
17
+ }
18
+ /**
19
+ * Quantize a Float32Array to a more memory-efficient format.
20
+ */
21
+ export declare function quantizeVector(vector: Float32Array, type: QuantizationType): QuantizedVector;
22
+ /**
23
+ * Dequantize back to Float32Array for computation.
24
+ */
25
+ export declare function dequantizeVector(quantized: QuantizedVector): Float32Array;
26
+ /**
27
+ * Compute cosine similarity directly on quantized vectors.
28
+ * More efficient than dequantizing first for large batches.
29
+ */
30
+ export declare function cosineQuantized(a: QuantizedVector, b: QuantizedVector): number;
31
+ /**
32
+ * Serialize quantized vector to base64 for storage.
33
+ */
34
+ export declare function quantizedToBase64(quantized: QuantizedVector): string;
35
+ /**
36
+ * Deserialize quantized vector from base64.
37
+ */
38
+ export declare function base64ToQuantized(base64: string): QuantizedVector;
39
+ /**
40
+ * Get bytes per dimension for a quantization type.
41
+ */
42
+ export declare function getBytesPerDimension(type: QuantizationType): number;
43
+ /**
44
+ * Estimate memory savings compared to Float32.
45
+ */
46
+ export declare function estimateMemorySavings(vectorCount: number, dimensions: number, type: QuantizationType): {
47
+ original: number;
48
+ quantized: number;
49
+ savings: number;
50
+ };
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Vector Quantization - Reduce memory footprint by 50-75%.
3
+ *
4
+ * Quantization levels:
5
+ * - Float32: Full precision (4 bytes per dimension) - default
6
+ * - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
7
+ * - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
8
+ */
9
+ /**
10
+ * Quantize a Float32Array to a more memory-efficient format.
11
+ */
12
+ export function quantizeVector(vector, type) {
13
+ switch (type) {
14
+ case 'float32':
15
+ // Ensure we get a proper ArrayBuffer (not SharedArrayBuffer)
16
+ const float32Data = new ArrayBuffer(vector.byteLength);
17
+ new Float32Array(float32Data).set(vector);
18
+ return {
19
+ data: float32Data,
20
+ type: 'float32',
21
+ };
22
+ case 'float16':
23
+ return {
24
+ data: float32ToFloat16(vector),
25
+ type: 'float16',
26
+ };
27
+ case 'int8':
28
+ return quantizeToInt8(vector);
29
+ default:
30
+ throw new Error(`Unknown quantization type: ${type}`);
31
+ }
32
+ }
33
+ /**
34
+ * Dequantize back to Float32Array for computation.
35
+ */
36
+ export function dequantizeVector(quantized) {
37
+ switch (quantized.type) {
38
+ case 'float32':
39
+ return new Float32Array(quantized.data);
40
+ case 'float16':
41
+ return float16ToFloat32(quantized.data);
42
+ case 'int8':
43
+ return dequantizeFromInt8(quantized);
44
+ default:
45
+ throw new Error(`Unknown quantization type: ${quantized.type}`);
46
+ }
47
+ }
48
+ /**
49
+ * Compute cosine similarity directly on quantized vectors.
50
+ * More efficient than dequantizing first for large batches.
51
+ */
52
+ export function cosineQuantized(a, b) {
53
+ if (a.type !== b.type) {
54
+ throw new Error('Cannot compute similarity between different quantization types');
55
+ }
56
+ switch (a.type) {
57
+ case 'float32': {
58
+ const va = new Float32Array(a.data);
59
+ const vb = new Float32Array(b.data);
60
+ return cosineFloat32(va, vb);
61
+ }
62
+ case 'float16': {
63
+ // For Float16, dequantize and compute (hardware Float16 not widely supported)
64
+ const va = float16ToFloat32(a.data);
65
+ const vb = float16ToFloat32(b.data);
66
+ return cosineFloat32(va, vb);
67
+ }
68
+ case 'int8': {
69
+ // For Int8, use integer arithmetic then scale
70
+ return cosineInt8(a, b);
71
+ }
72
+ default:
73
+ throw new Error(`Unknown quantization type: ${a.type}`);
74
+ }
75
+ }
76
+ /**
77
+ * Serialize quantized vector to base64 for storage.
78
+ */
79
+ export function quantizedToBase64(quantized) {
80
+ const meta = JSON.stringify({
81
+ type: quantized.type,
82
+ scale: quantized.scale,
83
+ offset: quantized.offset,
84
+ });
85
+ const metaBuffer = Buffer.from(meta);
86
+ const dataBuffer = Buffer.from(quantized.data);
87
+ // Format: [2 bytes meta length][meta][data]
88
+ const combined = Buffer.alloc(2 + metaBuffer.length + dataBuffer.length);
89
+ combined.writeUInt16LE(metaBuffer.length, 0);
90
+ metaBuffer.copy(combined, 2);
91
+ dataBuffer.copy(combined, 2 + metaBuffer.length);
92
+ return combined.toString('base64');
93
+ }
94
+ /**
95
+ * Deserialize quantized vector from base64.
96
+ */
97
+ export function base64ToQuantized(base64) {
98
+ const combined = Buffer.from(base64, 'base64');
99
+ const metaLength = combined.readUInt16LE(0);
100
+ const metaBuffer = combined.subarray(2, 2 + metaLength);
101
+ const dataBuffer = combined.subarray(2 + metaLength);
102
+ const meta = JSON.parse(metaBuffer.toString());
103
+ return {
104
+ data: dataBuffer.buffer.slice(dataBuffer.byteOffset, dataBuffer.byteOffset + dataBuffer.length),
105
+ type: meta.type,
106
+ scale: meta.scale,
107
+ offset: meta.offset,
108
+ };
109
+ }
110
+ /**
111
+ * Get bytes per dimension for a quantization type.
112
+ */
113
+ export function getBytesPerDimension(type) {
114
+ switch (type) {
115
+ case 'float32': return 4;
116
+ case 'float16': return 2;
117
+ case 'int8': return 1;
118
+ default: return 4;
119
+ }
120
+ }
121
+ /**
122
+ * Estimate memory savings compared to Float32.
123
+ */
124
+ export function estimateMemorySavings(vectorCount, dimensions, type) {
125
+ const original = vectorCount * dimensions * 4; // Float32
126
+ const quantized = vectorCount * dimensions * getBytesPerDimension(type);
127
+ return {
128
+ original,
129
+ quantized,
130
+ savings: 1 - (quantized / original),
131
+ };
132
+ }
133
+ // ============ Internal Helpers ============
134
+ /**
135
+ * Convert Float32Array to Float16 (stored as Uint16Array buffer).
136
+ */
137
+ function float32ToFloat16(float32) {
138
+ const uint16 = new Uint16Array(float32.length);
139
+ for (let i = 0; i < float32.length; i++) {
140
+ uint16[i] = floatToHalf(float32[i]);
141
+ }
142
+ return uint16.buffer;
143
+ }
144
+ /**
145
+ * Convert Float16 buffer back to Float32Array.
146
+ */
147
+ function float16ToFloat32(buffer) {
148
+ const uint16 = new Uint16Array(buffer);
149
+ const float32 = new Float32Array(uint16.length);
150
+ for (let i = 0; i < uint16.length; i++) {
151
+ float32[i] = halfToFloat(uint16[i]);
152
+ }
153
+ return float32;
154
+ }
155
+ /**
156
+ * Quantize Float32Array to Int8 with scale/offset.
157
+ */
158
+ function quantizeToInt8(vector) {
159
+ let min = Infinity;
160
+ let max = -Infinity;
161
+ for (let i = 0; i < vector.length; i++) {
162
+ if (vector[i] < min)
163
+ min = vector[i];
164
+ if (vector[i] > max)
165
+ max = vector[i];
166
+ }
167
+ const scale = (max - min) / 255;
168
+ const offset = min;
169
+ const int8 = new Int8Array(vector.length);
170
+ for (let i = 0; i < vector.length; i++) {
171
+ // Map to 0-255 range, then shift to -128 to 127
172
+ const normalized = scale > 0 ? (vector[i] - offset) / scale : 0;
173
+ int8[i] = Math.round(normalized) - 128;
174
+ }
175
+ return {
176
+ data: int8.buffer,
177
+ type: 'int8',
178
+ scale,
179
+ offset,
180
+ };
181
+ }
182
+ /**
183
+ * Dequantize Int8 back to Float32Array.
184
+ */
185
+ function dequantizeFromInt8(quantized) {
186
+ const int8 = new Int8Array(quantized.data);
187
+ const float32 = new Float32Array(int8.length);
188
+ const scale = quantized.scale ?? 1;
189
+ const offset = quantized.offset ?? 0;
190
+ for (let i = 0; i < int8.length; i++) {
191
+ float32[i] = (int8[i] + 128) * scale + offset;
192
+ }
193
+ return float32;
194
+ }
195
+ /**
196
+ * Cosine similarity for Float32 vectors.
197
+ */
198
+ function cosineFloat32(a, b) {
199
+ let dot = 0;
200
+ for (let i = 0; i < a.length; i++) {
201
+ dot += a[i] * b[i];
202
+ }
203
+ return dot;
204
+ }
205
+ /**
206
+ * Cosine similarity for Int8 quantized vectors.
207
+ * Uses integer arithmetic for speed.
208
+ */
209
+ function cosineInt8(a, b) {
210
+ const int8A = new Int8Array(a.data);
211
+ const int8B = new Int8Array(b.data);
212
+ // Compute dot product in integer space
213
+ let dotInt = 0;
214
+ let normAInt = 0;
215
+ let normBInt = 0;
216
+ for (let i = 0; i < int8A.length; i++) {
217
+ const valA = int8A[i] + 128;
218
+ const valB = int8B[i] + 128;
219
+ dotInt += valA * valB;
220
+ normAInt += valA * valA;
221
+ normBInt += valB * valB;
222
+ }
223
+ // For normalized vectors, we can simplify
224
+ // Since embeddings are already normalized, dot product ≈ cosine
225
+ const scaleA = a.scale ?? 1;
226
+ const scaleB = b.scale ?? 1;
227
+ return (dotInt * scaleA * scaleB) / (Math.sqrt(normAInt) * Math.sqrt(normBInt) * scaleA * scaleB);
228
+ }
229
+ /**
230
+ * Convert a 32-bit float to 16-bit half precision.
231
+ */
232
+ function floatToHalf(val) {
233
+ const floatView = new Float32Array(1);
234
+ const int32View = new Int32Array(floatView.buffer);
235
+ floatView[0] = val;
236
+ const x = int32View[0];
237
+ let bits = (x >> 16) & 0x8000; // Sign
238
+ let m = (x >> 12) & 0x07ff; // Mantissa
239
+ const e = (x >> 23) & 0xff; // Exponent
240
+ if (e < 103) {
241
+ return bits;
242
+ }
243
+ if (e > 142) {
244
+ bits |= 0x7c00;
245
+ bits |= (e === 255 ? 0 : 1) && (x & 0x007fffff);
246
+ return bits;
247
+ }
248
+ if (e < 113) {
249
+ m |= 0x0800;
250
+ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
251
+ return bits;
252
+ }
253
+ bits |= ((e - 112) << 10) | (m >> 1);
254
+ bits += m & 1;
255
+ return bits;
256
+ }
257
+ /**
258
+ * Convert a 16-bit half precision to 32-bit float.
259
+ */
260
+ function halfToFloat(val) {
261
+ const s = (val & 0x8000) >> 15;
262
+ const e = (val & 0x7c00) >> 10;
263
+ const f = val & 0x03ff;
264
+ if (e === 0) {
265
+ return (s ? -1 : 1) * Math.pow(2, -14) * (f / Math.pow(2, 10));
266
+ }
267
+ else if (e === 0x1f) {
268
+ return f ? NaN : (s ? -Infinity : Infinity);
269
+ }
270
+ return (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / Math.pow(2, 10));
271
+ }
@@ -4,6 +4,25 @@
4
4
  * Returns a value between -1 and 1, where 1 is identical.
5
5
  */
6
6
  export declare function cosine(a: Float32Array, b: Float32Array): number;
7
+ /**
8
+ * SIMD-style unrolled cosine similarity for better performance.
9
+ * Processes 4 elements at a time for ~2-4x speedup.
10
+ */
11
+ export declare function cosineFast(a: Float32Array, b: Float32Array): number;
12
+ /**
13
+ * Early-exit cosine similarity with threshold.
14
+ * Returns null if the result would definitely be below threshold.
15
+ * Useful for filtering out low-scoring candidates quickly.
16
+ */
17
+ export declare function cosineWithThreshold(a: Float32Array, b: Float32Array, threshold: number): number | null;
18
+ /**
19
+ * Batch cosine similarity with built-in top-K selection.
20
+ * More efficient than computing all similarities then sorting.
21
+ */
22
+ export declare function batchCosine(query: Float32Array, vectors: Float32Array[], topK: number, threshold?: number): Array<{
23
+ index: number;
24
+ score: number;
25
+ }>;
7
26
  /**
8
27
  * Compute fuzzy similarity score using Levenshtein distance.
9
28
  * Returns a value between 0 and 1, where 1 is an exact match.
@@ -14,6 +33,11 @@ export declare function fuzzyScore(a: string, b: string): number;
14
33
  * Returns the proportion of query words found in the text (0 to 1).
15
34
  */
16
35
  export declare function keywordScore(query: string, text: string): number;
36
+ /**
37
+ * Fast keyword score with early exit.
38
+ * Stops as soon as all query words are found.
39
+ */
40
+ export declare function keywordScoreFast(query: string, text: string): number;
17
41
  /**
18
42
  * Score normalization statistics for a batch of results.
19
43
  */