simile-search 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,270 +1,84 @@
1
1
  import { describe, it, expect } from "vitest";
2
2
  import { Simile } from "./engine";
3
- import { getByPath, extractText } from "./utils";
4
- import * as fs from "fs";
5
- import * as path from "path";
6
- const testItems = [
7
- {
8
- id: "1",
9
- text: "Bathroom floor cleaner",
10
- metadata: { category: "Cleaning" },
11
- },
12
- {
13
- id: "2",
14
- text: "Dishwashing liquid",
15
- metadata: { category: "Kitchen" },
16
- },
17
- {
18
- id: "3",
19
- text: "Ipod Charger",
20
- metadata: { category: "Electronics" },
21
- },
22
- {
23
- id: "4",
24
- text: "Kitchen cleaning spray",
25
- metadata: { category: "Cleaning" },
26
- },
27
- {
28
- id: "5",
29
- text: "USB-C phone charger cable",
30
- metadata: { category: "Electronics" },
31
- },
32
- ];
33
3
  describe("simile search", () => {
34
4
  it("returns semantically similar items", async () => {
35
- const engine = await Simile.from(testItems.slice(0, 3));
36
- const results = await engine.search("cleaner", { explain: true });
37
- console.log("Search for 'cleaner':", results);
38
- expect(results.length).toBeGreaterThan(0);
39
- expect(results[0].id).toBe("1");
40
- expect(results[0].score).toBeGreaterThan(0.5);
41
- }, 30000);
42
- it("differentiates between unrelated items", async () => {
43
- const engine = await Simile.from(testItems);
44
- // Search for "phone charger" - should clearly prefer electronics
45
- const results = await engine.search("phone charger", { explain: true });
46
- console.log("Search for 'phone charger':", results);
47
- // Both chargers should be in top 2 (order may vary based on model)
48
- const topTwoIds = [results[0].id, results[1].id];
49
- expect(topTwoIds).toContain("5"); // USB-C phone charger
50
- expect(topTwoIds).toContain("3"); // iPod Charger
51
- // Both chargers should score significantly higher than cleaning products
52
- const chargerScores = results.filter((r) => r.metadata?.category === "Electronics");
53
- const cleaningScores = results.filter((r) => r.metadata?.category === "Cleaning");
54
- // Electronics should score higher than cleaning items
55
- expect(chargerScores[0].score).toBeGreaterThan(cleaningScores[0].score);
56
- }, 30000);
57
- it("applies threshold filtering", async () => {
58
- const engine = await Simile.from(testItems);
59
- // With high threshold, should filter out low-scoring results
60
- const results = await engine.search("cleaner", { threshold: 0.5 });
61
- console.log("Search with threshold 0.5:", results);
62
- results.forEach((r) => {
63
- expect(r.score).toBeGreaterThanOrEqual(0.5);
64
- });
65
- }, 30000);
66
- it("sorts results by relevance (highest score first)", async () => {
67
- const engine = await Simile.from(testItems);
68
- const results = await engine.search("cleaning products");
69
- // Verify results are sorted by score descending
70
- for (let i = 1; i < results.length; i++) {
71
- expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
72
- }
73
- }, 30000);
74
- });
75
- describe("min character limit", () => {
76
- it("returns empty results when query is below minLength", async () => {
77
- const engine = await Simile.from(testItems);
78
- // Default minLength is 1
79
- const results1 = await engine.search("c");
80
- expect(results1.length).toBeGreaterThan(0);
81
- // With minLength: 3, short queries return empty
82
- const results2 = await engine.search("cl", { minLength: 3 });
83
- expect(results2.length).toBe(0);
84
- // Exactly 3 characters should work
85
- const results3 = await engine.search("usb", { minLength: 3 });
86
- expect(results3.length).toBeGreaterThan(0);
87
- }, 30000);
88
- });
89
- describe("nested path search", () => {
90
- const nestedItems = [
91
- {
92
- id: "1",
93
- text: "",
94
- metadata: {
95
- author: { firstName: "John", lastName: "Doe" },
96
- title: "The Art of Programming",
97
- tags: ["coding", "javascript"],
5
+ const engine = await Simile.from([
6
+ {
7
+ id: "1",
8
+ text: "Bathroom floor cleaner",
9
+ metadata: { category: "Cleaning" },
98
10
  },
99
- },
100
- {
101
- id: "2",
102
- text: "",
103
- metadata: {
104
- author: { firstName: "Jane", lastName: "Smith" },
105
- title: "Machine Learning Basics",
106
- tags: ["ai", "python"],
11
+ {
12
+ id: "2",
13
+ text: "Dishwashing liquid",
14
+ metadata: { category: "Kitchen" },
107
15
  },
108
- },
109
- {
110
- id: "3",
111
- text: "",
112
- metadata: {
113
- author: { firstName: "John", lastName: "Smith" },
114
- title: "Advanced JavaScript",
115
- tags: ["coding", "javascript", "advanced"],
16
+ {
17
+ id: "3",
18
+ text: "Ipod Charger",
19
+ metadata: { categoryq: "Electronics" },
116
20
  },
117
- },
118
- ];
119
- it("extracts text from nested paths", () => {
120
- const item = nestedItems[0];
121
- expect(getByPath(item, "metadata.author.firstName")).toBe("John");
122
- expect(getByPath(item, "metadata.title")).toBe("The Art of Programming");
123
- expect(getByPath(item, "metadata.tags[0]")).toBe("coding");
124
- expect(getByPath(item, "metadata.tags[1]")).toBe("javascript");
125
- });
126
- it("combines multiple paths into searchable text", () => {
127
- const text = extractText(nestedItems[0], [
128
- "metadata.author.firstName",
129
- "metadata.author.lastName",
130
- "metadata.title",
131
21
  ]);
132
- expect(text).toBe("John Doe The Art of Programming");
133
- });
134
- it("searches using nested paths", async () => {
135
- const engine = await Simile.from(nestedItems, {
136
- textPaths: [
137
- "metadata.author.firstName",
138
- "metadata.author.lastName",
139
- "metadata.title",
140
- ],
141
- });
142
- // Search by author name
143
- const johnResults = await engine.search("John");
144
- expect(johnResults.length).toBeGreaterThan(0);
145
- expect(johnResults[0].metadata?.author.firstName).toBe("John");
146
- // Search by title
147
- const jsResults = await engine.search("JavaScript programming");
148
- expect(jsResults.length).toBeGreaterThan(0);
22
+ const results = await engine.search("cleaner");
23
+ console.log(results);
24
+ expect(results.length).toBeGreaterThan(0);
25
+ expect(results[0].id).toBe("1");
26
+ expect(results[0].score).toBeGreaterThan(0.5);
149
27
  }, 30000);
150
- it("includes tags in nested path search", async () => {
151
- const engine = await Simile.from(nestedItems, {
152
- textPaths: ["metadata.title", "metadata.tags"],
28
+ it("performance test: 10K items should search in <100ms", async () => {
29
+ // Generate 10K test items
30
+ const items = Array.from({ length: 10000 }, (_, i) => ({
31
+ id: `item-${i}`,
32
+ text: `Product ${i} - ${[
33
+ "cleaner",
34
+ "charger",
35
+ "liquid",
36
+ "cable",
37
+ "headphones",
38
+ "keyboard",
39
+ "mouse",
40
+ "monitor",
41
+ ][i % 8]}`,
42
+ metadata: { category: ["Electronics", "Cleaning", "Kitchen"][i % 3] },
43
+ }));
44
+ // Create engine with optimized ANN settings
45
+ const engine = await Simile.from(items, {
46
+ useANN: {
47
+ efSearch: 20, // Fast search
48
+ M: 16,
49
+ efConstruction: 200,
50
+ },
51
+ annThreshold: 100, // Enable ANN early
153
52
  });
154
- const pythonResults = await engine.search("python ai");
155
- expect(pythonResults[0].id).toBe("2"); // Machine Learning Basics
156
- }, 30000);
157
- });
158
- describe("score normalization", () => {
159
- it("includes raw scores in explain output", async () => {
160
- const engine = await Simile.from(testItems);
161
- const results = await engine.search("cleaner", { explain: true });
162
- expect(results[0].explain).toBeDefined();
163
- expect(results[0].explain?.raw).toBeDefined();
164
- expect(results[0].explain?.raw?.semantic).toBeDefined();
165
- expect(results[0].explain?.raw?.fuzzy).toBeDefined();
166
- expect(results[0].explain?.raw?.keyword).toBeDefined();
167
- }, 30000);
168
- it("can disable score normalization", async () => {
169
- const engine = await Simile.from(testItems, { normalizeScores: false });
170
- const results = await engine.search("cleaner", { explain: true });
171
- // Without normalization, normalized scores should equal raw scores
172
- expect(results[0].explain?.semantic).toBe(results[0].explain?.raw?.semantic);
173
- }, 30000);
174
- });
175
- describe("simile persistence", () => {
176
- const snapshotPath = path.join(__dirname, "../.test-snapshot.json");
177
- it("saves and loads from snapshot", async () => {
178
- // Create engine and save
179
- const engine = await Simile.from(testItems);
180
- const snapshot = engine.save();
181
- expect(snapshot.version).toBe("0.2.0");
182
- expect(snapshot.items.length).toBe(5);
183
- expect(snapshot.vectors.length).toBe(5);
184
- expect(snapshot.model).toBe("Xenova/all-MiniLM-L6-v2");
185
- // Load from snapshot (instant - no embedding!)
186
- const loadedEngine = Simile.load(snapshot);
187
- expect(loadedEngine.size).toBe(5);
188
- // Search should work the same
189
- const results = await loadedEngine.search("cleaner");
190
- expect(results[0].text).toContain("cleaner");
191
- }, 30000);
192
- it("saves and loads from JSON file", async () => {
193
- // Create and save to file
194
- const engine = await Simile.from(testItems);
195
- const json = engine.toJSON();
196
- fs.writeFileSync(snapshotPath, json);
197
- // Load from file (instant!)
198
- const loadedJson = fs.readFileSync(snapshotPath, "utf-8");
199
- const loadedEngine = Simile.loadFromJSON(loadedJson);
200
- expect(loadedEngine.size).toBe(5);
201
- // Cleanup
202
- fs.unlinkSync(snapshotPath);
203
- }, 30000);
204
- it("preserves textPaths in snapshot", async () => {
205
- const nestedItems = [
206
- { id: "1", text: "", metadata: { title: "Hello World" } },
207
- ];
208
- const engine = await Simile.from(nestedItems, {
209
- textPaths: ["metadata.title"],
53
+ // Verify ANN is enabled
54
+ const info = engine.getIndexInfo();
55
+ expect(info.type).toBe("hnsw");
56
+ expect(info.size).toBe(10000);
57
+ // Warm up: first search includes embedding time
58
+ await engine.search("cleaner");
59
+ // Performance test: search should be <100ms (excluding first-time embedding)
60
+ const query = "phone charger";
61
+ const startTime = performance.now();
62
+ const results = await engine.search(query, {
63
+ topK: 5,
64
+ semanticOnly: true, // Fast mode: skip fuzzy/keyword
210
65
  });
211
- const snapshot = engine.save();
212
- expect(snapshot.textPaths).toEqual(["metadata.title"]);
213
- const loaded = Simile.load(snapshot);
214
- const results = await loaded.search("Hello");
66
+ const endTime = performance.now();
67
+ const searchTime = endTime - startTime;
68
+ console.log(`Search time for 10K items: ${searchTime.toFixed(2)}ms`);
69
+ console.log(`Results: ${results.length}`);
70
+ console.log(`Index info:`, info);
215
71
  expect(results.length).toBeGreaterThan(0);
216
- }, 30000);
217
- });
218
- describe("simile dynamic items", () => {
219
- it("adds new items", async () => {
220
- const engine = await Simile.from(testItems.slice(0, 2));
221
- expect(engine.size).toBe(2);
222
- await engine.add([testItems[2], testItems[3]]);
223
- expect(engine.size).toBe(4);
224
- const results = await engine.search("charger");
225
- expect(results.some((r) => r.id === "3")).toBe(true);
226
- }, 30000);
227
- it("removes items", async () => {
228
- const engine = await Simile.from(testItems);
229
- expect(engine.size).toBe(5);
230
- engine.remove(["1", "2"]);
231
- expect(engine.size).toBe(3);
232
- expect(engine.get("1")).toBeUndefined();
233
- expect(engine.get("3")).toBeDefined();
234
- }, 30000);
235
- it("updates existing items", async () => {
236
- const engine = await Simile.from(testItems.slice(0, 2));
237
- // Update item with same ID but different text
238
- await engine.add([
239
- { id: "1", text: "Wireless Bluetooth headphones", metadata: { category: "Electronics" } },
240
- ]);
241
- expect(engine.size).toBe(2); // Still 2 items, not 3
242
- expect(engine.get("1")?.text).toBe("Wireless Bluetooth headphones");
243
- }, 30000);
244
- });
245
- describe("simile custom weights", () => {
246
- it("respects custom weights", async () => {
247
- // Engine with high semantic weight
248
- const semanticEngine = await Simile.from(testItems, {
249
- weights: { semantic: 0.9, fuzzy: 0.05, keyword: 0.05 },
72
+ expect(searchTime).toBeLessThan(100); // Should be <100ms
73
+ // Also test with full hybrid search
74
+ const startTime2 = performance.now();
75
+ const results2 = await engine.search(query, {
76
+ topK: 5,
77
+ semanticOnly: false, // Full hybrid search
250
78
  });
251
- // Engine with high keyword weight
252
- const keywordEngine = await Simile.from(testItems, {
253
- weights: { semantic: 0.1, fuzzy: 0.1, keyword: 0.8 },
254
- });
255
- const query = "floor";
256
- const semanticResults = await semanticEngine.search(query, { explain: true });
257
- const keywordResults = await keywordEngine.search(query, { explain: true });
258
- console.log("Semantic-weighted results:", semanticResults.map((r) => ({
259
- text: r.text,
260
- score: r.score,
261
- })));
262
- console.log("Keyword-weighted results:", keywordResults.map((r) => ({
263
- text: r.text,
264
- score: r.score,
265
- })));
266
- // Both should find floor cleaner first (it has "floor" in text)
267
- expect(semanticResults[0].text).toContain("floor");
268
- expect(keywordResults[0].text).toContain("floor");
269
- }, 30000);
79
+ const endTime2 = performance.now();
80
+ const hybridTime = endTime2 - startTime2;
81
+ console.log(`Hybrid search time: ${hybridTime.toFixed(2)}ms`);
82
+ expect(hybridTime).toBeLessThan(200);
83
+ }, 300000); // Longer timeout for 10K items (embedding takes ~3 minutes)
270
84
  });
package/dist/index.d.ts CHANGED
@@ -4,3 +4,7 @@ export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similar
4
4
  export { hybridScore, getDefaultWeights } from "./ranker.js";
5
5
  export { Simile } from "./engine.js";
6
6
  export { getByPath, extractText, normalizeScore } from "./utils.js";
7
+ export { HNSWIndex } from "./ann.js";
8
+ export { VectorCache, createCacheKey } from "./cache.js";
9
+ export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
10
+ export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
package/dist/index.js CHANGED
@@ -4,3 +4,7 @@ export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similar
4
4
  export { hybridScore, getDefaultWeights } from "./ranker.js";
5
5
  export { Simile } from "./engine.js";
6
6
  export { getByPath, extractText, normalizeScore } from "./utils.js";
7
+ export { HNSWIndex } from "./ann.js";
8
+ export { VectorCache, createCacheKey } from "./cache.js";
9
+ export { BackgroundUpdater, DebouncedUpdater, PriorityUpdater } from "./updater.js";
10
+ export { quantizeVector, dequantizeVector, cosineQuantized, quantizedToBase64, base64ToQuantized } from "./quantization.js";
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Vector Quantization - Reduce memory footprint by 50-75%.
3
+ *
4
+ * Quantization levels:
5
+ * - Float32: Full precision (4 bytes per dimension) - default
6
+ * - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
7
+ * - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
8
+ */
9
+ export type QuantizationType = 'float32' | 'float16' | 'int8';
10
+ export interface QuantizedVector {
11
+ data: ArrayBuffer;
12
+ type: QuantizationType;
13
+ /** Scale factor for Int8 quantization */
14
+ scale?: number;
15
+ /** Offset for Int8 quantization */
16
+ offset?: number;
17
+ }
18
+ /**
19
+ * Quantize a Float32Array to a more memory-efficient format.
20
+ */
21
+ export declare function quantizeVector(vector: Float32Array, type: QuantizationType): QuantizedVector;
22
+ /**
23
+ * Dequantize back to Float32Array for computation.
24
+ */
25
+ export declare function dequantizeVector(quantized: QuantizedVector): Float32Array;
26
+ /**
27
+ * Compute cosine similarity directly on quantized vectors.
28
+ * More efficient than dequantizing first for large batches.
29
+ */
30
+ export declare function cosineQuantized(a: QuantizedVector, b: QuantizedVector): number;
31
+ /**
32
+ * Serialize quantized vector to base64 for storage.
33
+ */
34
+ export declare function quantizedToBase64(quantized: QuantizedVector): string;
35
+ /**
36
+ * Deserialize quantized vector from base64.
37
+ */
38
+ export declare function base64ToQuantized(base64: string): QuantizedVector;
39
+ /**
40
+ * Get bytes per dimension for a quantization type.
41
+ */
42
+ export declare function getBytesPerDimension(type: QuantizationType): number;
43
+ /**
44
+ * Estimate memory savings compared to Float32.
45
+ */
46
+ export declare function estimateMemorySavings(vectorCount: number, dimensions: number, type: QuantizationType): {
47
+ original: number;
48
+ quantized: number;
49
+ savings: number;
50
+ };
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Vector Quantization - Reduce memory footprint by 50-75%.
3
+ *
4
+ * Quantization levels:
5
+ * - Float32: Full precision (4 bytes per dimension) - default
6
+ * - Float16: Half precision (2 bytes, ~0.1% accuracy loss)
7
+ * - Int8: 8-bit quantization (1 byte, ~1% accuracy loss)
8
+ */
9
+ /**
10
+ * Quantize a Float32Array to a more memory-efficient format.
11
+ */
12
+ export function quantizeVector(vector, type) {
13
+ switch (type) {
14
+ case 'float32':
15
+ // Ensure we get a proper ArrayBuffer (not SharedArrayBuffer)
16
+ const float32Data = new ArrayBuffer(vector.byteLength);
17
+ new Float32Array(float32Data).set(vector);
18
+ return {
19
+ data: float32Data,
20
+ type: 'float32',
21
+ };
22
+ case 'float16':
23
+ return {
24
+ data: float32ToFloat16(vector),
25
+ type: 'float16',
26
+ };
27
+ case 'int8':
28
+ return quantizeToInt8(vector);
29
+ default:
30
+ throw new Error(`Unknown quantization type: ${type}`);
31
+ }
32
+ }
33
+ /**
34
+ * Dequantize back to Float32Array for computation.
35
+ */
36
+ export function dequantizeVector(quantized) {
37
+ switch (quantized.type) {
38
+ case 'float32':
39
+ return new Float32Array(quantized.data);
40
+ case 'float16':
41
+ return float16ToFloat32(quantized.data);
42
+ case 'int8':
43
+ return dequantizeFromInt8(quantized);
44
+ default:
45
+ throw new Error(`Unknown quantization type: ${quantized.type}`);
46
+ }
47
+ }
48
+ /**
49
+ * Compute cosine similarity directly on quantized vectors.
50
+ * More efficient than dequantizing first for large batches.
51
+ */
52
+ export function cosineQuantized(a, b) {
53
+ if (a.type !== b.type) {
54
+ throw new Error('Cannot compute similarity between different quantization types');
55
+ }
56
+ switch (a.type) {
57
+ case 'float32': {
58
+ const va = new Float32Array(a.data);
59
+ const vb = new Float32Array(b.data);
60
+ return cosineFloat32(va, vb);
61
+ }
62
+ case 'float16': {
63
+ // For Float16, dequantize and compute (hardware Float16 not widely supported)
64
+ const va = float16ToFloat32(a.data);
65
+ const vb = float16ToFloat32(b.data);
66
+ return cosineFloat32(va, vb);
67
+ }
68
+ case 'int8': {
69
+ // For Int8, use integer arithmetic then scale
70
+ return cosineInt8(a, b);
71
+ }
72
+ default:
73
+ throw new Error(`Unknown quantization type: ${a.type}`);
74
+ }
75
+ }
76
+ /**
77
+ * Serialize quantized vector to base64 for storage.
78
+ */
79
+ export function quantizedToBase64(quantized) {
80
+ const meta = JSON.stringify({
81
+ type: quantized.type,
82
+ scale: quantized.scale,
83
+ offset: quantized.offset,
84
+ });
85
+ const metaBuffer = Buffer.from(meta);
86
+ const dataBuffer = Buffer.from(quantized.data);
87
+ // Format: [2 bytes meta length][meta][data]
88
+ const combined = Buffer.alloc(2 + metaBuffer.length + dataBuffer.length);
89
+ combined.writeUInt16LE(metaBuffer.length, 0);
90
+ metaBuffer.copy(combined, 2);
91
+ dataBuffer.copy(combined, 2 + metaBuffer.length);
92
+ return combined.toString('base64');
93
+ }
94
+ /**
95
+ * Deserialize quantized vector from base64.
96
+ */
97
+ export function base64ToQuantized(base64) {
98
+ const combined = Buffer.from(base64, 'base64');
99
+ const metaLength = combined.readUInt16LE(0);
100
+ const metaBuffer = combined.subarray(2, 2 + metaLength);
101
+ const dataBuffer = combined.subarray(2 + metaLength);
102
+ const meta = JSON.parse(metaBuffer.toString());
103
+ return {
104
+ data: dataBuffer.buffer.slice(dataBuffer.byteOffset, dataBuffer.byteOffset + dataBuffer.length),
105
+ type: meta.type,
106
+ scale: meta.scale,
107
+ offset: meta.offset,
108
+ };
109
+ }
110
+ /**
111
+ * Get bytes per dimension for a quantization type.
112
+ */
113
+ export function getBytesPerDimension(type) {
114
+ switch (type) {
115
+ case 'float32': return 4;
116
+ case 'float16': return 2;
117
+ case 'int8': return 1;
118
+ default: return 4;
119
+ }
120
+ }
121
+ /**
122
+ * Estimate memory savings compared to Float32.
123
+ */
124
+ export function estimateMemorySavings(vectorCount, dimensions, type) {
125
+ const original = vectorCount * dimensions * 4; // Float32
126
+ const quantized = vectorCount * dimensions * getBytesPerDimension(type);
127
+ return {
128
+ original,
129
+ quantized,
130
+ savings: 1 - (quantized / original),
131
+ };
132
+ }
133
+ // ============ Internal Helpers ============
134
+ /**
135
+ * Convert Float32Array to Float16 (stored as Uint16Array buffer).
136
+ */
137
+ function float32ToFloat16(float32) {
138
+ const uint16 = new Uint16Array(float32.length);
139
+ for (let i = 0; i < float32.length; i++) {
140
+ uint16[i] = floatToHalf(float32[i]);
141
+ }
142
+ return uint16.buffer;
143
+ }
144
+ /**
145
+ * Convert Float16 buffer back to Float32Array.
146
+ */
147
+ function float16ToFloat32(buffer) {
148
+ const uint16 = new Uint16Array(buffer);
149
+ const float32 = new Float32Array(uint16.length);
150
+ for (let i = 0; i < uint16.length; i++) {
151
+ float32[i] = halfToFloat(uint16[i]);
152
+ }
153
+ return float32;
154
+ }
155
+ /**
156
+ * Quantize Float32Array to Int8 with scale/offset.
157
+ */
158
+ function quantizeToInt8(vector) {
159
+ let min = Infinity;
160
+ let max = -Infinity;
161
+ for (let i = 0; i < vector.length; i++) {
162
+ if (vector[i] < min)
163
+ min = vector[i];
164
+ if (vector[i] > max)
165
+ max = vector[i];
166
+ }
167
+ const scale = (max - min) / 255;
168
+ const offset = min;
169
+ const int8 = new Int8Array(vector.length);
170
+ for (let i = 0; i < vector.length; i++) {
171
+ // Map to 0-255 range, then shift to -128 to 127
172
+ const normalized = scale > 0 ? (vector[i] - offset) / scale : 0;
173
+ int8[i] = Math.round(normalized) - 128;
174
+ }
175
+ return {
176
+ data: int8.buffer,
177
+ type: 'int8',
178
+ scale,
179
+ offset,
180
+ };
181
+ }
182
+ /**
183
+ * Dequantize Int8 back to Float32Array.
184
+ */
185
+ function dequantizeFromInt8(quantized) {
186
+ const int8 = new Int8Array(quantized.data);
187
+ const float32 = new Float32Array(int8.length);
188
+ const scale = quantized.scale ?? 1;
189
+ const offset = quantized.offset ?? 0;
190
+ for (let i = 0; i < int8.length; i++) {
191
+ float32[i] = (int8[i] + 128) * scale + offset;
192
+ }
193
+ return float32;
194
+ }
195
+ /**
196
+ * Cosine similarity for Float32 vectors.
197
+ */
198
+ function cosineFloat32(a, b) {
199
+ let dot = 0;
200
+ for (let i = 0; i < a.length; i++) {
201
+ dot += a[i] * b[i];
202
+ }
203
+ return dot;
204
+ }
205
+ /**
206
+ * Cosine similarity for Int8 quantized vectors.
207
+ * Uses integer arithmetic for speed.
208
+ */
209
+ function cosineInt8(a, b) {
210
+ const int8A = new Int8Array(a.data);
211
+ const int8B = new Int8Array(b.data);
212
+ // Compute dot product in integer space
213
+ let dotInt = 0;
214
+ let normAInt = 0;
215
+ let normBInt = 0;
216
+ for (let i = 0; i < int8A.length; i++) {
217
+ const valA = int8A[i] + 128;
218
+ const valB = int8B[i] + 128;
219
+ dotInt += valA * valB;
220
+ normAInt += valA * valA;
221
+ normBInt += valB * valB;
222
+ }
223
+ // For normalized vectors, we can simplify
224
+ // Since embeddings are already normalized, dot product ≈ cosine
225
+ const scaleA = a.scale ?? 1;
226
+ const scaleB = b.scale ?? 1;
227
+ return (dotInt * scaleA * scaleB) / (Math.sqrt(normAInt) * Math.sqrt(normBInt) * scaleA * scaleB);
228
+ }
229
+ /**
230
+ * Convert a 32-bit float to 16-bit half precision.
231
+ */
232
+ function floatToHalf(val) {
233
+ const floatView = new Float32Array(1);
234
+ const int32View = new Int32Array(floatView.buffer);
235
+ floatView[0] = val;
236
+ const x = int32View[0];
237
+ let bits = (x >> 16) & 0x8000; // Sign
238
+ let m = (x >> 12) & 0x07ff; // Mantissa
239
+ const e = (x >> 23) & 0xff; // Exponent
240
+ if (e < 103) {
241
+ return bits;
242
+ }
243
+ if (e > 142) {
244
+ bits |= 0x7c00;
245
+ bits |= (e === 255 ? 0 : 1) && (x & 0x007fffff);
246
+ return bits;
247
+ }
248
+ if (e < 113) {
249
+ m |= 0x0800;
250
+ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
251
+ return bits;
252
+ }
253
+ bits |= ((e - 112) << 10) | (m >> 1);
254
+ bits += m & 1;
255
+ return bits;
256
+ }
257
+ /**
258
+ * Convert a 16-bit half precision to 32-bit float.
259
+ */
260
+ function halfToFloat(val) {
261
+ const s = (val & 0x8000) >> 15;
262
+ const e = (val & 0x7c00) >> 10;
263
+ const f = val & 0x03ff;
264
+ if (e === 0) {
265
+ return (s ? -1 : 1) * Math.pow(2, -14) * (f / Math.pow(2, 10));
266
+ }
267
+ else if (e === 0x1f) {
268
+ return f ? NaN : (s ? -Infinity : Infinity);
269
+ }
270
+ return (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / Math.pow(2, 10));
271
+ }