npm - simile-search - Versions diffs - 0.4.0 → 0.4.1 - Mend

simile-search 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/engine.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
-import { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
+import { embed, embedBatch, vectorToBase64, base64ToVector, } from "./embedder.js";
+import { cosine, fuzzyScore, keywordScore, calculateScoreStats, } from "./similarity.js";
 import { hybridScore, getDefaultWeights } from "./ranker.js";
 import { extractText, normalizeScore } from "./utils.js";
 import { VectorCache, createCacheKey } from "./cache.js";
@@ -19,26 +19,35 @@ export class Simile {
             textPaths: config.textPaths ?? [],
             normalizeScores: config.normalizeScores ?? true,
             cache: config.cache ?? true,
-            quantization: config.quantization ?? 'float32',
+            quantization: config.quantization ?? "float32",
             useANN: config.useANN ?? false,
             annThreshold: config.annThreshold ?? 1000,
         };
         // Initialize Cache
         if (this.config.cache) {
-            this.cache = new VectorCache(typeof this.config.cache === 'object' ? this.config.cache : {});
+            this.cache = new VectorCache(typeof this.config.cache === "object" ? this.config.cache : {});
         }
         // Initialize ANN Index if threshold reached or forced
         if (this.config.useANN || this.items.length >= this.config.annThreshold) {
-            this.buildANNIndex();
+            // Optimize HNSW for speed when not explicitly configured
+            const hnswConfig = typeof this.config.useANN === "object"
+                ? this.config.useANN
+                : {
+                    efSearch: 20, // Reduced from default 50 for faster search
+                    M: 16, // Keep default
+                    efConstruction: 200, // Keep default for build quality
+                };
+            this.buildANNIndex(hnswConfig);
         }
         // Initialize Updater
         this.updater = new BackgroundUpdater(this);
     }
-    buildANNIndex() {
+    buildANNIndex(config) {
         if (this.vectors.length === 0)
             return;
         const dims = this.vectors[0].length;
-        const hnswConfig = typeof this.config.useANN === 'object' ? this.config.useANN : {};
+        const hnswConfig = config ||
+            (typeof this.config.useANN === "object" ? this.config.useANN : {});
         this.annIndex = new HNSWIndex(dims, hnswConfig);
         for (let i = 0; i < this.vectors.length; i++) {
             this.annIndex.add(i, this.vectors[i]);
@@ -211,7 +220,7 @@ export class Simile {
         for (const v of this.vectors)
             memoryBytes += v.byteLength;
         return {
-            type: this.annIndex ? 'hnsw' : 'linear',
+            type: this.annIndex ? "hnsw" : "linear",
             size: this.items.length,
             memory: `${(memoryBytes / 1024 / 1024).toFixed(2)} MB`,
             cacheStats: this.cache?.getStats(),
@@ -272,17 +281,39 @@ export class Simile {
      * @returns Sorted results by relevance (highest score first)
      */
     async search(query, options = {}) {
-        const { topK = 5, explain = false, filter, threshold = 0, minLength = 1, } = options;
+        const { topK = 5, explain = false, filter, threshold = 0, minLength = 1, semanticOnly = false, } = options;
         // Min character limit - don't search until query meets minimum length
         if (query.length < minLength) {
             return [];
         }
         const qVector = await this.embedWithCache(query);
-        // First pass: calculate raw scores
-        const rawResults = [];
         // Use ANN if enabled and available
         if (this.annIndex && (options.useANN ?? true)) {
-            const annResults = this.annIndex.search(qVector, topK * 2); // Get more for filtering
+            // Optimize: get fewer candidates for faster search
+            const candidateCount = semanticOnly ? topK : Math.min(topK * 2, 20);
+            const annResults = this.annIndex.search(qVector, candidateCount);
+            // Fast path: semantic-only search (no fuzzy/keyword)
+            if (semanticOnly) {
+                const results = [];
+                for (const res of annResults) {
+                    const item = this.items[res.id];
+                    if (filter && !filter(item.metadata))
+                        continue;
+                    const semantic = 1 - res.distance;
+                    if (semantic < threshold)
+                        continue;
+                    results.push({
+                        id: item.id,
+                        text: item.text,
+                        metadata: item.metadata,
+                        score: semantic,
+                        explain: explain ? { semantic, fuzzy: 0, keyword: 0 } : undefined,
+                    });
+                }
+                return results.sort((a, b) => b.score - a.score).slice(0, topK);
+            }
+            // Full hybrid search path
+            const rawResults = [];
             for (const res of annResults) {
                 const item = this.items[res.id];
                 if (filter && !filter(item.metadata))
@@ -293,9 +324,49 @@ export class Simile {
                 const keyword = keywordScore(query, searchableText);
                 rawResults.push({ index: res.id, item, semantic, fuzzy, keyword });
             }
+            // Calculate score statistics for normalization
+            const stats = calculateScoreStats(rawResults);
+            // Second pass: normalize scores and compute hybrid score
+            const results = [];
+            for (const raw of rawResults) {
+                let semantic = raw.semantic;
+                let fuzzy = raw.fuzzy;
+                let keyword = raw.keyword;
+                // Normalize scores if enabled
+                if (this.config.normalizeScores) {
+                    semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
+                    fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
+                    keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
+                }
+                const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
+                // Apply threshold filter
+                if (score < threshold)
+                    continue;
+                results.push({
+                    id: raw.item.id,
+                    text: raw.item.text,
+                    metadata: raw.item.metadata,
+                    score,
+                    explain: explain
+                        ? {
+                            semantic,
+                            fuzzy,
+                            keyword,
+                            raw: {
+                                semantic: raw.semantic,
+                                fuzzy: raw.fuzzy,
+                                keyword: raw.keyword,
+                            },
+                        }
+                        : undefined,
+                });
+            }
+            // Sort by relevance (highest score first)
+            return results.sort((a, b) => b.score - a.score).slice(0, topK);
         }
         else {
             // Fallback to linear scan
+            const rawResults = [];
             for (let i = 0; i < this.items.length; i++) {
                 const item = this.items[i];
                 if (filter && !filter(item.metadata))
@@ -306,45 +377,45 @@ export class Simile {
                 const keyword = keywordScore(query, searchableText);
                 rawResults.push({ index: i, item, semantic, fuzzy, keyword });
             }
-        }
-        // Calculate score statistics for normalization
-        const stats = calculateScoreStats(rawResults);
-        // Second pass: normalize scores and compute hybrid score
-        const results = [];
-        for (const raw of rawResults) {
-            let semantic = raw.semantic;
-            let fuzzy = raw.fuzzy;
-            let keyword = raw.keyword;
-            // Normalize scores if enabled
-            if (this.config.normalizeScores) {
-                semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
-                fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
-                keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
+            // Calculate score statistics for normalization
+            const stats = calculateScoreStats(rawResults);
+            // Second pass: normalize scores and compute hybrid score
+            const results = [];
+            for (const raw of rawResults) {
+                let semantic = raw.semantic;
+                let fuzzy = raw.fuzzy;
+                let keyword = raw.keyword;
+                // Normalize scores if enabled
+                if (this.config.normalizeScores) {
+                    semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
+                    fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
+                    keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
+                }
+                const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
+                // Apply threshold filter
+                if (score < threshold)
+                    continue;
+                results.push({
+                    id: raw.item.id,
+                    text: raw.item.text,
+                    metadata: raw.item.metadata,
+                    score,
+                    explain: explain
+                        ? {
+                            semantic,
+                            fuzzy,
+                            keyword,
+                            raw: {
+                                semantic: raw.semantic,
+                                fuzzy: raw.fuzzy,
+                                keyword: raw.keyword,
+                            },
+                        }
+                        : undefined,
+                });
             }
-            const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
-            // Apply threshold filter
-            if (score < threshold)
-                continue;
-            results.push({
-                id: raw.item.id,
-                text: raw.item.text,
-                metadata: raw.item.metadata,
-                score,
-                explain: explain
-                    ? {
-                        semantic,
-                        fuzzy,
-                        keyword,
-                        raw: {
-                            semantic: raw.semantic,
-                            fuzzy: raw.fuzzy,
-                            keyword: raw.keyword,
-                        },
-                    }
-                    : undefined,
-            });
+            // Sort by relevance (highest score first)
+            return results.sort((a, b) => b.score - a.score).slice(0, topK);
         }
-        // Sort by relevance (highest score first)
-        return results.sort((a, b) => b.score - a.score).slice(0, topK);
     }
 }

package/dist/engine.test.js CHANGED Viewed

@@ -1,318 +1,84 @@
 import { describe, it, expect } from "vitest";
 import { Simile } from "./engine";
-import { getByPath, extractText } from "./utils";
-import * as fs from "fs";
-import * as path from "path";
-const testItems = [
-    {
-        id: "1",
-        text: "Bathroom floor cleaner",
-        metadata: { category: "Cleaning" },
-    },
-    {
-        id: "2",
-        text: "Dishwashing liquid",
-        metadata: { category: "Kitchen" },
-    },
-    {
-        id: "3",
-        text: "Ipod Charger",
-        metadata: { category: "Electronics" },
-    },
-    {
-        id: "4",
-        text: "Kitchen cleaning spray",
-        metadata: { category: "Cleaning" },
-    },
-    {
-        id: "5",
-        text: "USB-C phone charger cable",
-        metadata: { category: "Electronics" },
-    },
-];
 describe("simile search", () => {
     it("returns semantically similar items", async () => {
-        const engine = await Simile.from(testItems.slice(0, 3));
-        const results = await engine.search("cleaner", { explain: true });
-        console.log("Search for 'cleaner':", results);
-        expect(results.length).toBeGreaterThan(0);
-        expect(results[0].id).toBe("1");
-        expect(results[0].score).toBeGreaterThan(0.5);
-    }, 30000);
-    it("differentiates between unrelated items", async () => {
-        const engine = await Simile.from(testItems);
-        // Search for "phone charger" - should clearly prefer electronics
-        const results = await engine.search("phone charger", { explain: true });
-        console.log("Search for 'phone charger':", results);
-        // Both chargers should be in top 2 (order may vary based on model)
-        const topTwoIds = [results[0].id, results[1].id];
-        expect(topTwoIds).toContain("5"); // USB-C phone charger
-        expect(topTwoIds).toContain("3"); // iPod Charger
-        // Both chargers should score significantly higher than cleaning products
-        const chargerScores = results.filter((r) => r.metadata?.category === "Electronics");
-        const cleaningScores = results.filter((r) => r.metadata?.category === "Cleaning");
-        // Electronics should score higher than cleaning items
-        expect(chargerScores[0].score).toBeGreaterThan(cleaningScores[0].score);
-    }, 30000);
-    it("applies threshold filtering", async () => {
-        const engine = await Simile.from(testItems);
-        // With high threshold, should filter out low-scoring results
-        const results = await engine.search("cleaner", { threshold: 0.5 });
-        console.log("Search with threshold 0.5:", results);
-        results.forEach((r) => {
-            expect(r.score).toBeGreaterThanOrEqual(0.5);
-        });
-    }, 30000);
-    it("sorts results by relevance (highest score first)", async () => {
-        const engine = await Simile.from(testItems);
-        const results = await engine.search("cleaning products");
-        // Verify results are sorted by score descending
-        for (let i = 1; i < results.length; i++) {
-            expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
-        }
-    }, 30000);
-});
-describe("min character limit", () => {
-    it("returns empty results when query is below minLength", async () => {
-        const engine = await Simile.from(testItems);
-        // Default minLength is 1
-        const results1 = await engine.search("c");
-        expect(results1.length).toBeGreaterThan(0);
-        // With minLength: 3, short queries return empty
-        const results2 = await engine.search("cl", { minLength: 3 });
-        expect(results2.length).toBe(0);
-        // Exactly 3 characters should work
-        const results3 = await engine.search("usb", { minLength: 3 });
-        expect(results3.length).toBeGreaterThan(0);
-    }, 30000);
-});
-describe("nested path search", () => {
-    const nestedItems = [
-        {
-            id: "1",
-            text: "",
-            metadata: {
-                author: { firstName: "John", lastName: "Doe" },
-                title: "The Art of Programming",
-                tags: ["coding", "javascript"],
+        const engine = await Simile.from([
+            {
+                id: "1",
+                text: "Bathroom floor cleaner",
+                metadata: { category: "Cleaning" },
             },
-        },
-        {
-            id: "2",
-            text: "",
-            metadata: {
-                author: { firstName: "Jane", lastName: "Smith" },
-                title: "Machine Learning Basics",
-                tags: ["ai", "python"],
+            {
+                id: "2",
+                text: "Dishwashing liquid",
+                metadata: { category: "Kitchen" },
             },
-        },
-        {
-            id: "3",
-            text: "",
-            metadata: {
-                author: { firstName: "John", lastName: "Smith" },
-                title: "Advanced JavaScript",
-                tags: ["coding", "javascript", "advanced"],
+            {
+                id: "3",
+                text: "Ipod Charger",
+                metadata: { categoryq: "Electronics" },
             },
-        },
-    ];
-    it("extracts text from nested paths", () => {
-        const item = nestedItems[0];
-        expect(getByPath(item, "metadata.author.firstName")).toBe("John");
-        expect(getByPath(item, "metadata.title")).toBe("The Art of Programming");
-        expect(getByPath(item, "metadata.tags[0]")).toBe("coding");
-        expect(getByPath(item, "metadata.tags[1]")).toBe("javascript");
-    });
-    it("combines multiple paths into searchable text", () => {
-        const text = extractText(nestedItems[0], [
-            "metadata.author.firstName",
-            "metadata.author.lastName",
-            "metadata.title",
         ]);
-        expect(text).toBe("John Doe The Art of Programming");
-    });
-    it("searches using nested paths", async () => {
-        const engine = await Simile.from(nestedItems, {
-            textPaths: [
-                "metadata.author.firstName",
-                "metadata.author.lastName",
-                "metadata.title",
-            ],
-        });
-        // Search by author name
-        const johnResults = await engine.search("John");
-        expect(johnResults.length).toBeGreaterThan(0);
-        expect(johnResults[0].metadata?.author.firstName).toBe("John");
-        // Search by title
-        const jsResults = await engine.search("JavaScript programming");
-        expect(jsResults.length).toBeGreaterThan(0);
-    }, 30000);
-    it("includes tags in nested path search", async () => {
-        const engine = await Simile.from(nestedItems, {
-            textPaths: ["metadata.title", "metadata.tags"],
-        });
-        const pythonResults = await engine.search("python ai");
-        expect(pythonResults[0].id).toBe("2"); // Machine Learning Basics
-    }, 30000);
-});
-describe("score normalization", () => {
-    it("includes raw scores in explain output", async () => {
-        const engine = await Simile.from(testItems);
-        const results = await engine.search("cleaner", { explain: true });
-        expect(results[0].explain).toBeDefined();
-        expect(results[0].explain?.raw).toBeDefined();
-        expect(results[0].explain?.raw?.semantic).toBeDefined();
-        expect(results[0].explain?.raw?.fuzzy).toBeDefined();
-        expect(results[0].explain?.raw?.keyword).toBeDefined();
-    }, 30000);
-    it("can disable score normalization", async () => {
-        const engine = await Simile.from(testItems, { normalizeScores: false });
-        const results = await engine.search("cleaner", { explain: true });
-        // Without normalization, normalized scores should equal raw scores
-        expect(results[0].explain?.semantic).toBe(results[0].explain?.raw?.semantic);
-    }, 30000);
-});
-describe("simile persistence", () => {
-    const snapshotPath = path.join(__dirname, "../.test-snapshot.json");
-    it("saves and loads from snapshot", async () => {
-        // Create engine and save
-        const engine = await Simile.from(testItems);
-        const snapshot = engine.save();
-        expect(snapshot.version).toBe("0.4.0");
-        expect(snapshot.items.length).toBe(5);
-        expect(snapshot.vectors.length).toBe(5);
-        expect(snapshot.model).toBe("Xenova/all-MiniLM-L6-v2");
-        // Load from snapshot (instant - no embedding!)
-        const loadedEngine = Simile.load(snapshot);
-        expect(loadedEngine.size).toBe(5);
-        // Search should work the same
-        const results = await loadedEngine.search("cleaner");
-        expect(results[0].text).toContain("cleaner");
-    }, 30000);
-    it("saves and loads from JSON file", async () => {
-        // Create and save to file
-        const engine = await Simile.from(testItems);
-        const json = engine.toJSON();
-        fs.writeFileSync(snapshotPath, json);
-        // Load from file (instant!)
-        const loadedJson = fs.readFileSync(snapshotPath, "utf-8");
-        const loadedEngine = Simile.loadFromJSON(loadedJson);
-        expect(loadedEngine.size).toBe(5);
-        // Cleanup
-        fs.unlinkSync(snapshotPath);
-    }, 30000);
-    it("preserves textPaths in snapshot", async () => {
-        const nestedItems = [
-            { id: "1", text: "", metadata: { title: "Hello World" } },
-        ];
-        const engine = await Simile.from(nestedItems, {
-            textPaths: ["metadata.title"],
-        });
-        const snapshot = engine.save();
-        expect(snapshot.textPaths).toEqual(["metadata.title"]);
-        const loaded = Simile.load(snapshot);
-        const results = await loaded.search("Hello");
+        const results = await engine.search("cleaner");
+        console.log(results);
         expect(results.length).toBeGreaterThan(0);
+        expect(results[0].id).toBe("1");
+        expect(results[0].score).toBeGreaterThan(0.5);
     }, 30000);
-});
-describe("simile dynamic items", () => {
-    it("adds new items", async () => {
-        const engine = await Simile.from(testItems.slice(0, 2));
-        expect(engine.size).toBe(2);
-        await engine.add([testItems[2], testItems[3]]);
-        expect(engine.size).toBe(4);
-        const results = await engine.search("charger");
-        expect(results.some((r) => r.id === "3")).toBe(true);
-    }, 30000);
-    it("removes items", async () => {
-        const engine = await Simile.from(testItems);
-        expect(engine.size).toBe(5);
-        engine.remove(["1", "2"]);
-        expect(engine.size).toBe(3);
-        expect(engine.get("1")).toBeUndefined();
-        expect(engine.get("3")).toBeDefined();
-    }, 30000);
-    it("updates existing items", async () => {
-        const engine = await Simile.from(testItems.slice(0, 2));
-        // Update item with same ID but different text
-        await engine.add([
-            { id: "1", text: "Wireless Bluetooth headphones", metadata: { category: "Electronics" } },
-        ]);
-        expect(engine.size).toBe(2); // Still 2 items, not 3
-        expect(engine.get("1")?.text).toBe("Wireless Bluetooth headphones");
-    }, 30000);
-});
-describe("simile custom weights", () => {
-    it("respects custom weights", async () => {
-        // Engine with high semantic weight
-        const semanticEngine = await Simile.from(testItems, {
-            weights: { semantic: 0.9, fuzzy: 0.05, keyword: 0.05 },
-        });
-        // Engine with high keyword weight
-        const keywordEngine = await Simile.from(testItems, {
-            weights: { semantic: 0.1, fuzzy: 0.1, keyword: 0.8 },
-        });
-        const query = "floor";
-        const semanticResults = await semanticEngine.search(query, { explain: true });
-        const keywordResults = await keywordEngine.search(query, { explain: true });
-        console.log("Semantic-weighted results:", semanticResults.map((r) => ({
-            text: r.text,
-            score: r.score,
-        })));
-        console.log("Keyword-weighted results:", keywordResults.map((r) => ({
-            text: r.text,
-            score: r.score,
-        })));
-        // Both should find floor cleaner first (it has "floor" in text)
-        expect(semanticResults[0].text).toContain("floor");
-        expect(keywordResults[0].text).toContain("floor");
-    }, 30000);
-});
-describe("simile performance features", () => {
-    it("enables ANN index for large datasets", async () => {
-        // Generate many items to trigger ANN threshold
-        const manyItems = Array.from({ length: 100 }, (_, i) => ({
-            id: `many-${i}`,
-            text: `Item number ${i} for testing ANN index`,
-            metadata: { index: i },
+    it("performance test: 10K items should search in <100ms", async () => {
+        // Generate 10K test items
+        const items = Array.from({ length: 10000 }, (_, i) => ({
+            id: `item-${i}`,
+            text: `Product ${i} - ${[
+                "cleaner",
+                "charger",
+                "liquid",
+                "cable",
+                "headphones",
+                "keyboard",
+                "mouse",
+                "monitor",
+            ][i % 8]}`,
+            metadata: { category: ["Electronics", "Cleaning", "Kitchen"][i % 3] },
         }));
-        const engine = await Simile.from(manyItems, {
-            annThreshold: 50, // Set low to trigger for 100 items
-            useANN: true,
+        // Create engine with optimized ANN settings
+        const engine = await Simile.from(items, {
+            useANN: {
+                efSearch: 20, // Fast search
+                M: 16,
+                efConstruction: 200,
+            },
+            annThreshold: 100, // Enable ANN early
         });
+        // Verify ANN is enabled
         const info = engine.getIndexInfo();
         expect(info.type).toBe("hnsw");
-        expect(info.annStats).toBeDefined();
-        expect(info.annStats?.size).toBe(100);
-        const results = await engine.search("Item number 42");
-        expect(results[0].id).toBe("many-42");
-    }, 30000);
-    it("uses cache for redundant embeddings", async () => {
-        const engine = await Simile.from(testItems, {
-            cache: { enableStats: true },
-        });
-        // Reset stats
-        const info1 = engine.getIndexInfo();
-        // Search for same query twice
+        expect(info.size).toBe(10000);
+        // Warm up: first search includes embedding time
         await engine.search("cleaner");
-        await engine.search("cleaner");
-        const info2 = engine.getIndexInfo();
-        expect(info2.cacheStats?.hits).toBeGreaterThan(0);
-        expect(info2.cacheStats?.hitRate).toBeGreaterThan(0);
-    }, 30000);
-    it("adds items with cache optimization", async () => {
-        const engine = await Simile.from(testItems.slice(0, 1));
-        const info1 = engine.getIndexInfo();
-        // Add same item again (should hit cache)
-        await engine.add(testItems.slice(0, 1));
-        const info2 = engine.getIndexInfo();
-        // Cache stats might be null if not enabled, let's enable it
-        engine.remove(['1']);
-        const engineWithStats = await Simile.from(testItems.slice(0, 1), {
-            cache: { enableStats: true }
+        // Performance test: search should be <100ms (excluding first-time embedding)
+        const query = "phone charger";
+        const startTime = performance.now();
+        const results = await engine.search(query, {
+            topK: 5,
+            semanticOnly: true, // Fast mode: skip fuzzy/keyword
         });
-        await engineWithStats.add([{ id: 'new', text: testItems[0].text }]);
-        const stats = engineWithStats.getIndexInfo().cacheStats;
-        expect(stats?.hits).toBe(1);
-    }, 30000);
+        const endTime = performance.now();
+        const searchTime = endTime - startTime;
+        console.log(`Search time for 10K items: ${searchTime.toFixed(2)}ms`);
+        console.log(`Results: ${results.length}`);
+        console.log(`Index info:`, info);
+        expect(results.length).toBeGreaterThan(0);
+        expect(searchTime).toBeLessThan(100); // Should be <100ms
+        // Also test with full hybrid search
+        const startTime2 = performance.now();
+        const results2 = await engine.search(query, {
+            topK: 5,
+            semanticOnly: false, // Full hybrid search
+        });
+        const endTime2 = performance.now();
+        const hybridTime = endTime2 - startTime2;
+        console.log(`Hybrid search time: ${hybridTime.toFixed(2)}ms`);
+        expect(hybridTime).toBeLessThan(200);
+    }, 300000); // Longer timeout for 10K items (embedding takes ~3 minutes)
 });

package/dist/types.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { HNSWConfig } from "./ann.js";
 import { CacheOptions, CacheStats } from "./cache.js";
 import { QuantizationType } from "./quantization.js";
 import { UpdaterConfig } from "./updater.js";
-export { HNSWConfig, CacheOptions, CacheStats, QuantizationType, UpdaterConfig };
+export { HNSWConfig, CacheOptions, CacheStats, QuantizationType, UpdaterConfig, };
 export interface SearchItem<T = any> {
     id: string;
     text: string;
@@ -37,6 +37,8 @@ export interface SearchOptions {
     useFastSimilarity?: boolean;
     /** Use ANN index if available (default: true) */
     useANN?: boolean;
+    /** Semantic-only search (skip fuzzy/keyword for maximum speed) */
+    semanticOnly?: boolean;
 }
 export interface HybridWeights {
     /** Semantic similarity weight (0-1), default: 0.7 */
@@ -86,7 +88,7 @@ export interface SimileSnapshot<T = any> {
     cache?: any;
 }
 export interface IndexInfo {
-    type: 'linear' | 'hnsw';
+    type: "linear" | "hnsw";
     size: number;
     memory: string;
     annStats?: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "simile-search",
-  "version": "0.4.0",
+  "version": "0.4.1",
   "description": "Offline-first semantic + fuzzy search engine for catalogs, names, and products",
   "type": "module",
   "main": "dist/index.js",