simile-search 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/engine.js CHANGED
@@ -1,5 +1,5 @@
1
- import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
2
- import { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
1
+ import { embed, embedBatch, vectorToBase64, base64ToVector, } from "./embedder.js";
2
+ import { cosine, fuzzyScore, keywordScore, calculateScoreStats, } from "./similarity.js";
3
3
  import { hybridScore, getDefaultWeights } from "./ranker.js";
4
4
  import { extractText, normalizeScore } from "./utils.js";
5
5
  import { VectorCache, createCacheKey } from "./cache.js";
@@ -19,26 +19,35 @@ export class Simile {
19
19
  textPaths: config.textPaths ?? [],
20
20
  normalizeScores: config.normalizeScores ?? true,
21
21
  cache: config.cache ?? true,
22
- quantization: config.quantization ?? 'float32',
22
+ quantization: config.quantization ?? "float32",
23
23
  useANN: config.useANN ?? false,
24
24
  annThreshold: config.annThreshold ?? 1000,
25
25
  };
26
26
  // Initialize Cache
27
27
  if (this.config.cache) {
28
- this.cache = new VectorCache(typeof this.config.cache === 'object' ? this.config.cache : {});
28
+ this.cache = new VectorCache(typeof this.config.cache === "object" ? this.config.cache : {});
29
29
  }
30
30
  // Initialize ANN Index if threshold reached or forced
31
31
  if (this.config.useANN || this.items.length >= this.config.annThreshold) {
32
- this.buildANNIndex();
32
+ // Optimize HNSW for speed when not explicitly configured
33
+ const hnswConfig = typeof this.config.useANN === "object"
34
+ ? this.config.useANN
35
+ : {
36
+ efSearch: 20, // Reduced from default 50 for faster search
37
+ M: 16, // Keep default
38
+ efConstruction: 200, // Keep default for build quality
39
+ };
40
+ this.buildANNIndex(hnswConfig);
33
41
  }
34
42
  // Initialize Updater
35
43
  this.updater = new BackgroundUpdater(this);
36
44
  }
37
- buildANNIndex() {
45
+ buildANNIndex(config) {
38
46
  if (this.vectors.length === 0)
39
47
  return;
40
48
  const dims = this.vectors[0].length;
41
- const hnswConfig = typeof this.config.useANN === 'object' ? this.config.useANN : {};
49
+ const hnswConfig = config ||
50
+ (typeof this.config.useANN === "object" ? this.config.useANN : {});
42
51
  this.annIndex = new HNSWIndex(dims, hnswConfig);
43
52
  for (let i = 0; i < this.vectors.length; i++) {
44
53
  this.annIndex.add(i, this.vectors[i]);
@@ -211,7 +220,7 @@ export class Simile {
211
220
  for (const v of this.vectors)
212
221
  memoryBytes += v.byteLength;
213
222
  return {
214
- type: this.annIndex ? 'hnsw' : 'linear',
223
+ type: this.annIndex ? "hnsw" : "linear",
215
224
  size: this.items.length,
216
225
  memory: `${(memoryBytes / 1024 / 1024).toFixed(2)} MB`,
217
226
  cacheStats: this.cache?.getStats(),
@@ -272,17 +281,39 @@ export class Simile {
272
281
  * @returns Sorted results by relevance (highest score first)
273
282
  */
274
283
  async search(query, options = {}) {
275
- const { topK = 5, explain = false, filter, threshold = 0, minLength = 1, } = options;
284
+ const { topK = 5, explain = false, filter, threshold = 0, minLength = 1, semanticOnly = false, } = options;
276
285
  // Min character limit - don't search until query meets minimum length
277
286
  if (query.length < minLength) {
278
287
  return [];
279
288
  }
280
289
  const qVector = await this.embedWithCache(query);
281
- // First pass: calculate raw scores
282
- const rawResults = [];
283
290
  // Use ANN if enabled and available
284
291
  if (this.annIndex && (options.useANN ?? true)) {
285
- const annResults = this.annIndex.search(qVector, topK * 2); // Get more for filtering
292
+ // Optimize: get fewer candidates for faster search
293
+ const candidateCount = semanticOnly ? topK : Math.min(topK * 2, 20);
294
+ const annResults = this.annIndex.search(qVector, candidateCount);
295
+ // Fast path: semantic-only search (no fuzzy/keyword)
296
+ if (semanticOnly) {
297
+ const results = [];
298
+ for (const res of annResults) {
299
+ const item = this.items[res.id];
300
+ if (filter && !filter(item.metadata))
301
+ continue;
302
+ const semantic = 1 - res.distance;
303
+ if (semantic < threshold)
304
+ continue;
305
+ results.push({
306
+ id: item.id,
307
+ text: item.text,
308
+ metadata: item.metadata,
309
+ score: semantic,
310
+ explain: explain ? { semantic, fuzzy: 0, keyword: 0 } : undefined,
311
+ });
312
+ }
313
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
314
+ }
315
+ // Full hybrid search path
316
+ const rawResults = [];
286
317
  for (const res of annResults) {
287
318
  const item = this.items[res.id];
288
319
  if (filter && !filter(item.metadata))
@@ -293,9 +324,49 @@ export class Simile {
293
324
  const keyword = keywordScore(query, searchableText);
294
325
  rawResults.push({ index: res.id, item, semantic, fuzzy, keyword });
295
326
  }
327
+ // Calculate score statistics for normalization
328
+ const stats = calculateScoreStats(rawResults);
329
+ // Second pass: normalize scores and compute hybrid score
330
+ const results = [];
331
+ for (const raw of rawResults) {
332
+ let semantic = raw.semantic;
333
+ let fuzzy = raw.fuzzy;
334
+ let keyword = raw.keyword;
335
+ // Normalize scores if enabled
336
+ if (this.config.normalizeScores) {
337
+ semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
338
+ fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
339
+ keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
340
+ }
341
+ const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
342
+ // Apply threshold filter
343
+ if (score < threshold)
344
+ continue;
345
+ results.push({
346
+ id: raw.item.id,
347
+ text: raw.item.text,
348
+ metadata: raw.item.metadata,
349
+ score,
350
+ explain: explain
351
+ ? {
352
+ semantic,
353
+ fuzzy,
354
+ keyword,
355
+ raw: {
356
+ semantic: raw.semantic,
357
+ fuzzy: raw.fuzzy,
358
+ keyword: raw.keyword,
359
+ },
360
+ }
361
+ : undefined,
362
+ });
363
+ }
364
+ // Sort by relevance (highest score first)
365
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
296
366
  }
297
367
  else {
298
368
  // Fallback to linear scan
369
+ const rawResults = [];
299
370
  for (let i = 0; i < this.items.length; i++) {
300
371
  const item = this.items[i];
301
372
  if (filter && !filter(item.metadata))
@@ -306,45 +377,45 @@ export class Simile {
306
377
  const keyword = keywordScore(query, searchableText);
307
378
  rawResults.push({ index: i, item, semantic, fuzzy, keyword });
308
379
  }
309
- }
310
- // Calculate score statistics for normalization
311
- const stats = calculateScoreStats(rawResults);
312
- // Second pass: normalize scores and compute hybrid score
313
- const results = [];
314
- for (const raw of rawResults) {
315
- let semantic = raw.semantic;
316
- let fuzzy = raw.fuzzy;
317
- let keyword = raw.keyword;
318
- // Normalize scores if enabled
319
- if (this.config.normalizeScores) {
320
- semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
321
- fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
322
- keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
380
+ // Calculate score statistics for normalization
381
+ const stats = calculateScoreStats(rawResults);
382
+ // Second pass: normalize scores and compute hybrid score
383
+ const results = [];
384
+ for (const raw of rawResults) {
385
+ let semantic = raw.semantic;
386
+ let fuzzy = raw.fuzzy;
387
+ let keyword = raw.keyword;
388
+ // Normalize scores if enabled
389
+ if (this.config.normalizeScores) {
390
+ semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
391
+ fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
392
+ keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
393
+ }
394
+ const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
395
+ // Apply threshold filter
396
+ if (score < threshold)
397
+ continue;
398
+ results.push({
399
+ id: raw.item.id,
400
+ text: raw.item.text,
401
+ metadata: raw.item.metadata,
402
+ score,
403
+ explain: explain
404
+ ? {
405
+ semantic,
406
+ fuzzy,
407
+ keyword,
408
+ raw: {
409
+ semantic: raw.semantic,
410
+ fuzzy: raw.fuzzy,
411
+ keyword: raw.keyword,
412
+ },
413
+ }
414
+ : undefined,
415
+ });
323
416
  }
324
- const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
325
- // Apply threshold filter
326
- if (score < threshold)
327
- continue;
328
- results.push({
329
- id: raw.item.id,
330
- text: raw.item.text,
331
- metadata: raw.item.metadata,
332
- score,
333
- explain: explain
334
- ? {
335
- semantic,
336
- fuzzy,
337
- keyword,
338
- raw: {
339
- semantic: raw.semantic,
340
- fuzzy: raw.fuzzy,
341
- keyword: raw.keyword,
342
- },
343
- }
344
- : undefined,
345
- });
417
+ // Sort by relevance (highest score first)
418
+ return results.sort((a, b) => b.score - a.score).slice(0, topK);
346
419
  }
347
- // Sort by relevance (highest score first)
348
- return results.sort((a, b) => b.score - a.score).slice(0, topK);
349
420
  }
350
421
  }
@@ -1,318 +1,84 @@
1
1
  import { describe, it, expect } from "vitest";
2
2
  import { Simile } from "./engine";
3
- import { getByPath, extractText } from "./utils";
4
- import * as fs from "fs";
5
- import * as path from "path";
6
- const testItems = [
7
- {
8
- id: "1",
9
- text: "Bathroom floor cleaner",
10
- metadata: { category: "Cleaning" },
11
- },
12
- {
13
- id: "2",
14
- text: "Dishwashing liquid",
15
- metadata: { category: "Kitchen" },
16
- },
17
- {
18
- id: "3",
19
- text: "Ipod Charger",
20
- metadata: { category: "Electronics" },
21
- },
22
- {
23
- id: "4",
24
- text: "Kitchen cleaning spray",
25
- metadata: { category: "Cleaning" },
26
- },
27
- {
28
- id: "5",
29
- text: "USB-C phone charger cable",
30
- metadata: { category: "Electronics" },
31
- },
32
- ];
33
3
  describe("simile search", () => {
34
4
  it("returns semantically similar items", async () => {
35
- const engine = await Simile.from(testItems.slice(0, 3));
36
- const results = await engine.search("cleaner", { explain: true });
37
- console.log("Search for 'cleaner':", results);
38
- expect(results.length).toBeGreaterThan(0);
39
- expect(results[0].id).toBe("1");
40
- expect(results[0].score).toBeGreaterThan(0.5);
41
- }, 30000);
42
- it("differentiates between unrelated items", async () => {
43
- const engine = await Simile.from(testItems);
44
- // Search for "phone charger" - should clearly prefer electronics
45
- const results = await engine.search("phone charger", { explain: true });
46
- console.log("Search for 'phone charger':", results);
47
- // Both chargers should be in top 2 (order may vary based on model)
48
- const topTwoIds = [results[0].id, results[1].id];
49
- expect(topTwoIds).toContain("5"); // USB-C phone charger
50
- expect(topTwoIds).toContain("3"); // iPod Charger
51
- // Both chargers should score significantly higher than cleaning products
52
- const chargerScores = results.filter((r) => r.metadata?.category === "Electronics");
53
- const cleaningScores = results.filter((r) => r.metadata?.category === "Cleaning");
54
- // Electronics should score higher than cleaning items
55
- expect(chargerScores[0].score).toBeGreaterThan(cleaningScores[0].score);
56
- }, 30000);
57
- it("applies threshold filtering", async () => {
58
- const engine = await Simile.from(testItems);
59
- // With high threshold, should filter out low-scoring results
60
- const results = await engine.search("cleaner", { threshold: 0.5 });
61
- console.log("Search with threshold 0.5:", results);
62
- results.forEach((r) => {
63
- expect(r.score).toBeGreaterThanOrEqual(0.5);
64
- });
65
- }, 30000);
66
- it("sorts results by relevance (highest score first)", async () => {
67
- const engine = await Simile.from(testItems);
68
- const results = await engine.search("cleaning products");
69
- // Verify results are sorted by score descending
70
- for (let i = 1; i < results.length; i++) {
71
- expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
72
- }
73
- }, 30000);
74
- });
75
- describe("min character limit", () => {
76
- it("returns empty results when query is below minLength", async () => {
77
- const engine = await Simile.from(testItems);
78
- // Default minLength is 1
79
- const results1 = await engine.search("c");
80
- expect(results1.length).toBeGreaterThan(0);
81
- // With minLength: 3, short queries return empty
82
- const results2 = await engine.search("cl", { minLength: 3 });
83
- expect(results2.length).toBe(0);
84
- // Exactly 3 characters should work
85
- const results3 = await engine.search("usb", { minLength: 3 });
86
- expect(results3.length).toBeGreaterThan(0);
87
- }, 30000);
88
- });
89
- describe("nested path search", () => {
90
- const nestedItems = [
91
- {
92
- id: "1",
93
- text: "",
94
- metadata: {
95
- author: { firstName: "John", lastName: "Doe" },
96
- title: "The Art of Programming",
97
- tags: ["coding", "javascript"],
5
+ const engine = await Simile.from([
6
+ {
7
+ id: "1",
8
+ text: "Bathroom floor cleaner",
9
+ metadata: { category: "Cleaning" },
98
10
  },
99
- },
100
- {
101
- id: "2",
102
- text: "",
103
- metadata: {
104
- author: { firstName: "Jane", lastName: "Smith" },
105
- title: "Machine Learning Basics",
106
- tags: ["ai", "python"],
11
+ {
12
+ id: "2",
13
+ text: "Dishwashing liquid",
14
+ metadata: { category: "Kitchen" },
107
15
  },
108
- },
109
- {
110
- id: "3",
111
- text: "",
112
- metadata: {
113
- author: { firstName: "John", lastName: "Smith" },
114
- title: "Advanced JavaScript",
115
- tags: ["coding", "javascript", "advanced"],
16
+ {
17
+ id: "3",
18
+ text: "Ipod Charger",
19
+ metadata: { categoryq: "Electronics" },
116
20
  },
117
- },
118
- ];
119
- it("extracts text from nested paths", () => {
120
- const item = nestedItems[0];
121
- expect(getByPath(item, "metadata.author.firstName")).toBe("John");
122
- expect(getByPath(item, "metadata.title")).toBe("The Art of Programming");
123
- expect(getByPath(item, "metadata.tags[0]")).toBe("coding");
124
- expect(getByPath(item, "metadata.tags[1]")).toBe("javascript");
125
- });
126
- it("combines multiple paths into searchable text", () => {
127
- const text = extractText(nestedItems[0], [
128
- "metadata.author.firstName",
129
- "metadata.author.lastName",
130
- "metadata.title",
131
21
  ]);
132
- expect(text).toBe("John Doe The Art of Programming");
133
- });
134
- it("searches using nested paths", async () => {
135
- const engine = await Simile.from(nestedItems, {
136
- textPaths: [
137
- "metadata.author.firstName",
138
- "metadata.author.lastName",
139
- "metadata.title",
140
- ],
141
- });
142
- // Search by author name
143
- const johnResults = await engine.search("John");
144
- expect(johnResults.length).toBeGreaterThan(0);
145
- expect(johnResults[0].metadata?.author.firstName).toBe("John");
146
- // Search by title
147
- const jsResults = await engine.search("JavaScript programming");
148
- expect(jsResults.length).toBeGreaterThan(0);
149
- }, 30000);
150
- it("includes tags in nested path search", async () => {
151
- const engine = await Simile.from(nestedItems, {
152
- textPaths: ["metadata.title", "metadata.tags"],
153
- });
154
- const pythonResults = await engine.search("python ai");
155
- expect(pythonResults[0].id).toBe("2"); // Machine Learning Basics
156
- }, 30000);
157
- });
158
- describe("score normalization", () => {
159
- it("includes raw scores in explain output", async () => {
160
- const engine = await Simile.from(testItems);
161
- const results = await engine.search("cleaner", { explain: true });
162
- expect(results[0].explain).toBeDefined();
163
- expect(results[0].explain?.raw).toBeDefined();
164
- expect(results[0].explain?.raw?.semantic).toBeDefined();
165
- expect(results[0].explain?.raw?.fuzzy).toBeDefined();
166
- expect(results[0].explain?.raw?.keyword).toBeDefined();
167
- }, 30000);
168
- it("can disable score normalization", async () => {
169
- const engine = await Simile.from(testItems, { normalizeScores: false });
170
- const results = await engine.search("cleaner", { explain: true });
171
- // Without normalization, normalized scores should equal raw scores
172
- expect(results[0].explain?.semantic).toBe(results[0].explain?.raw?.semantic);
173
- }, 30000);
174
- });
175
- describe("simile persistence", () => {
176
- const snapshotPath = path.join(__dirname, "../.test-snapshot.json");
177
- it("saves and loads from snapshot", async () => {
178
- // Create engine and save
179
- const engine = await Simile.from(testItems);
180
- const snapshot = engine.save();
181
- expect(snapshot.version).toBe("0.4.0");
182
- expect(snapshot.items.length).toBe(5);
183
- expect(snapshot.vectors.length).toBe(5);
184
- expect(snapshot.model).toBe("Xenova/all-MiniLM-L6-v2");
185
- // Load from snapshot (instant - no embedding!)
186
- const loadedEngine = Simile.load(snapshot);
187
- expect(loadedEngine.size).toBe(5);
188
- // Search should work the same
189
- const results = await loadedEngine.search("cleaner");
190
- expect(results[0].text).toContain("cleaner");
191
- }, 30000);
192
- it("saves and loads from JSON file", async () => {
193
- // Create and save to file
194
- const engine = await Simile.from(testItems);
195
- const json = engine.toJSON();
196
- fs.writeFileSync(snapshotPath, json);
197
- // Load from file (instant!)
198
- const loadedJson = fs.readFileSync(snapshotPath, "utf-8");
199
- const loadedEngine = Simile.loadFromJSON(loadedJson);
200
- expect(loadedEngine.size).toBe(5);
201
- // Cleanup
202
- fs.unlinkSync(snapshotPath);
203
- }, 30000);
204
- it("preserves textPaths in snapshot", async () => {
205
- const nestedItems = [
206
- { id: "1", text: "", metadata: { title: "Hello World" } },
207
- ];
208
- const engine = await Simile.from(nestedItems, {
209
- textPaths: ["metadata.title"],
210
- });
211
- const snapshot = engine.save();
212
- expect(snapshot.textPaths).toEqual(["metadata.title"]);
213
- const loaded = Simile.load(snapshot);
214
- const results = await loaded.search("Hello");
22
+ const results = await engine.search("cleaner");
23
+ console.log(results);
215
24
  expect(results.length).toBeGreaterThan(0);
25
+ expect(results[0].id).toBe("1");
26
+ expect(results[0].score).toBeGreaterThan(0.5);
216
27
  }, 30000);
217
- });
218
- describe("simile dynamic items", () => {
219
- it("adds new items", async () => {
220
- const engine = await Simile.from(testItems.slice(0, 2));
221
- expect(engine.size).toBe(2);
222
- await engine.add([testItems[2], testItems[3]]);
223
- expect(engine.size).toBe(4);
224
- const results = await engine.search("charger");
225
- expect(results.some((r) => r.id === "3")).toBe(true);
226
- }, 30000);
227
- it("removes items", async () => {
228
- const engine = await Simile.from(testItems);
229
- expect(engine.size).toBe(5);
230
- engine.remove(["1", "2"]);
231
- expect(engine.size).toBe(3);
232
- expect(engine.get("1")).toBeUndefined();
233
- expect(engine.get("3")).toBeDefined();
234
- }, 30000);
235
- it("updates existing items", async () => {
236
- const engine = await Simile.from(testItems.slice(0, 2));
237
- // Update item with same ID but different text
238
- await engine.add([
239
- { id: "1", text: "Wireless Bluetooth headphones", metadata: { category: "Electronics" } },
240
- ]);
241
- expect(engine.size).toBe(2); // Still 2 items, not 3
242
- expect(engine.get("1")?.text).toBe("Wireless Bluetooth headphones");
243
- }, 30000);
244
- });
245
- describe("simile custom weights", () => {
246
- it("respects custom weights", async () => {
247
- // Engine with high semantic weight
248
- const semanticEngine = await Simile.from(testItems, {
249
- weights: { semantic: 0.9, fuzzy: 0.05, keyword: 0.05 },
250
- });
251
- // Engine with high keyword weight
252
- const keywordEngine = await Simile.from(testItems, {
253
- weights: { semantic: 0.1, fuzzy: 0.1, keyword: 0.8 },
254
- });
255
- const query = "floor";
256
- const semanticResults = await semanticEngine.search(query, { explain: true });
257
- const keywordResults = await keywordEngine.search(query, { explain: true });
258
- console.log("Semantic-weighted results:", semanticResults.map((r) => ({
259
- text: r.text,
260
- score: r.score,
261
- })));
262
- console.log("Keyword-weighted results:", keywordResults.map((r) => ({
263
- text: r.text,
264
- score: r.score,
265
- })));
266
- // Both should find floor cleaner first (it has "floor" in text)
267
- expect(semanticResults[0].text).toContain("floor");
268
- expect(keywordResults[0].text).toContain("floor");
269
- }, 30000);
270
- });
271
- describe("simile performance features", () => {
272
- it("enables ANN index for large datasets", async () => {
273
- // Generate many items to trigger ANN threshold
274
- const manyItems = Array.from({ length: 100 }, (_, i) => ({
275
- id: `many-${i}`,
276
- text: `Item number ${i} for testing ANN index`,
277
- metadata: { index: i },
28
+ it("performance test: 10K items should search in <100ms", async () => {
29
+ // Generate 10K test items
30
+ const items = Array.from({ length: 10000 }, (_, i) => ({
31
+ id: `item-${i}`,
32
+ text: `Product ${i} - ${[
33
+ "cleaner",
34
+ "charger",
35
+ "liquid",
36
+ "cable",
37
+ "headphones",
38
+ "keyboard",
39
+ "mouse",
40
+ "monitor",
41
+ ][i % 8]}`,
42
+ metadata: { category: ["Electronics", "Cleaning", "Kitchen"][i % 3] },
278
43
  }));
279
- const engine = await Simile.from(manyItems, {
280
- annThreshold: 50, // Set low to trigger for 100 items
281
- useANN: true,
44
+ // Create engine with optimized ANN settings
45
+ const engine = await Simile.from(items, {
46
+ useANN: {
47
+ efSearch: 20, // Fast search
48
+ M: 16,
49
+ efConstruction: 200,
50
+ },
51
+ annThreshold: 100, // Enable ANN early
282
52
  });
53
+ // Verify ANN is enabled
283
54
  const info = engine.getIndexInfo();
284
55
  expect(info.type).toBe("hnsw");
285
- expect(info.annStats).toBeDefined();
286
- expect(info.annStats?.size).toBe(100);
287
- const results = await engine.search("Item number 42");
288
- expect(results[0].id).toBe("many-42");
289
- }, 30000);
290
- it("uses cache for redundant embeddings", async () => {
291
- const engine = await Simile.from(testItems, {
292
- cache: { enableStats: true },
293
- });
294
- // Reset stats
295
- const info1 = engine.getIndexInfo();
296
- // Search for same query twice
56
+ expect(info.size).toBe(10000);
57
+ // Warm up: first search includes embedding time
297
58
  await engine.search("cleaner");
298
- await engine.search("cleaner");
299
- const info2 = engine.getIndexInfo();
300
- expect(info2.cacheStats?.hits).toBeGreaterThan(0);
301
- expect(info2.cacheStats?.hitRate).toBeGreaterThan(0);
302
- }, 30000);
303
- it("adds items with cache optimization", async () => {
304
- const engine = await Simile.from(testItems.slice(0, 1));
305
- const info1 = engine.getIndexInfo();
306
- // Add same item again (should hit cache)
307
- await engine.add(testItems.slice(0, 1));
308
- const info2 = engine.getIndexInfo();
309
- // Cache stats might be null if not enabled, let's enable it
310
- engine.remove(['1']);
311
- const engineWithStats = await Simile.from(testItems.slice(0, 1), {
312
- cache: { enableStats: true }
59
+ // Performance test: search should be <100ms (excluding first-time embedding)
60
+ const query = "phone charger";
61
+ const startTime = performance.now();
62
+ const results = await engine.search(query, {
63
+ topK: 5,
64
+ semanticOnly: true, // Fast mode: skip fuzzy/keyword
313
65
  });
314
- await engineWithStats.add([{ id: 'new', text: testItems[0].text }]);
315
- const stats = engineWithStats.getIndexInfo().cacheStats;
316
- expect(stats?.hits).toBe(1);
317
- }, 30000);
66
+ const endTime = performance.now();
67
+ const searchTime = endTime - startTime;
68
+ console.log(`Search time for 10K items: ${searchTime.toFixed(2)}ms`);
69
+ console.log(`Results: ${results.length}`);
70
+ console.log(`Index info:`, info);
71
+ expect(results.length).toBeGreaterThan(0);
72
+ expect(searchTime).toBeLessThan(100); // Should be <100ms
73
+ // Also test with full hybrid search
74
+ const startTime2 = performance.now();
75
+ const results2 = await engine.search(query, {
76
+ topK: 5,
77
+ semanticOnly: false, // Full hybrid search
78
+ });
79
+ const endTime2 = performance.now();
80
+ const hybridTime = endTime2 - startTime2;
81
+ console.log(`Hybrid search time: ${hybridTime.toFixed(2)}ms`);
82
+ expect(hybridTime).toBeLessThan(200);
83
+ }, 300000); // Longer timeout for 10K items (embedding takes ~3 minutes)
318
84
  });
package/dist/types.d.ts CHANGED
@@ -2,7 +2,7 @@ import { HNSWConfig } from "./ann.js";
2
2
  import { CacheOptions, CacheStats } from "./cache.js";
3
3
  import { QuantizationType } from "./quantization.js";
4
4
  import { UpdaterConfig } from "./updater.js";
5
- export { HNSWConfig, CacheOptions, CacheStats, QuantizationType, UpdaterConfig };
5
+ export { HNSWConfig, CacheOptions, CacheStats, QuantizationType, UpdaterConfig, };
6
6
  export interface SearchItem<T = any> {
7
7
  id: string;
8
8
  text: string;
@@ -37,6 +37,8 @@ export interface SearchOptions {
37
37
  useFastSimilarity?: boolean;
38
38
  /** Use ANN index if available (default: true) */
39
39
  useANN?: boolean;
40
+ /** Semantic-only search (skip fuzzy/keyword for maximum speed) */
41
+ semanticOnly?: boolean;
40
42
  }
41
43
  export interface HybridWeights {
42
44
  /** Semantic similarity weight (0-1), default: 0.7 */
@@ -86,7 +88,7 @@ export interface SimileSnapshot<T = any> {
86
88
  cache?: any;
87
89
  }
88
90
  export interface IndexInfo {
89
- type: 'linear' | 'hnsw';
91
+ type: "linear" | "hnsw";
90
92
  size: number;
91
93
  memory: string;
92
94
  annStats?: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "simile-search",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "Offline-first semantic + fuzzy search engine for catalogs, names, and products",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",