npm - simile-search - Versions diffs - 0.2.0 → 0.3.1 - Mend

simile-search 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md CHANGED Viewed

@@ -21,6 +21,9 @@ Simile combines the power of AI embeddings with fuzzy string matching and keywor
 - ⚡ **Batch Processing** - Optimized for large catalogs
 - 🔧 **Configurable** - Tune scoring weights for your use case
 - 📦 **Zero API Calls** - Everything runs locally with Transformers.js
+- 🔗 **Nested Path Search** - Search `author.firstName` instead of flat strings
+- 📊 **Score Normalization** - Consistent scoring across different methods
+- ✂️ **Min Character Limit** - Control when search triggers
 ## 📦 Installation
@@ -45,8 +48,8 @@ const engine = await Simile.from([
 const results = await engine.search('phone charger');
 console.log(results);
 // [
-//   { id: '3', text: 'iPhone Charger', score: 0.72, ... },
-//   { id: '4', text: 'USB-C phone charger cable', score: 0.68, ... },
+//   { id: '3', text: 'iPhone Charger', score: 0.92, ... },
+//   { id: '4', text: 'USB-C phone charger cable', score: 0.87, ... },
 //   ...
 // ]
 ```
@@ -81,13 +84,69 @@ const snapshot = engine.save();
 //   model: 'Xenova/all-MiniLM-L6-v2',
 //   items: [...],
 //   vectors: ['base64...', 'base64...'],
-//   createdAt: '2024-12-28T...'
+//   createdAt: '2024-12-28T...',
+//   textPaths: ['metadata.title', ...]  // if configured
 // }
 // Load from snapshot object
 const restored = Simile.load(snapshot);
 ```
+## 🔗 Nested Path Search
+Search complex objects by specifying paths to extract text from:
+```typescript
+const books = [
+  {
+    id: '1',
+    text: '',  // Can be empty when using textPaths
+    metadata: {
+      author: { firstName: 'John', lastName: 'Doe' },
+      title: 'The Art of Programming',
+      tags: ['coding', 'javascript'],
+    },
+  },
+  {
+    id: '2',
+    text: '',
+    metadata: {
+      author: { firstName: 'Jane', lastName: 'Smith' },
+      title: 'Machine Learning Basics',
+      tags: ['ai', 'python'],
+    },
+  },
+];
+// Configure which paths to extract and search
+const engine = await Simile.from(books, {
+  textPaths: [
+    'metadata.author.firstName',
+    'metadata.author.lastName',
+    'metadata.title',
+    'metadata.tags',  // Arrays are joined with spaces
+  ],
+});
+// Now you can search by author name!
+const results = await engine.search('John programming');
+// Finds "The Art of Programming" by John Doe
+```
+### Supported Path Formats
+```typescript
+// Dot notation for nested objects
+'metadata.author.firstName'  // → "John"
+// Array index access
+'metadata.tags[0]'           // → "coding"
+'items[0].name'              // → nested array access
+// Arrays without index (joins all elements)
+'metadata.tags'              // → "coding javascript"
+```
 ## 🔧 Configuration
 ### Custom Scoring Weights
@@ -107,6 +166,38 @@ const engine = await Simile.from(items, {
 engine.setWeights({ semantic: 0.9, fuzzy: 0.05, keyword: 0.05 });
 ```
+### Score Normalization
+By default, scores are normalized so that a "0.8" semantic score means the same as a "0.8" fuzzy score. This ensures fair comparison across different scoring methods.
+```typescript
+// Enabled by default
+const engine = await Simile.from(items, {
+  normalizeScores: true,  // default
+});
+// Disable if you want raw scores
+const rawEngine = await Simile.from(items, {
+  normalizeScores: false,
+});
+// With explain: true, you can see both normalized and raw scores
+const results = await engine.search('cleaner', { explain: true });
+// {
+//   score: 1.0,
+//   explain: {
+//     semantic: 1.0,    // normalized
+//     fuzzy: 1.0,       // normalized
+//     keyword: 1.0,     // normalized
+//     raw: {
+//       semantic: 0.62, // original score
+//       fuzzy: 0.32,    // original score
+//       keyword: 1.0,   // original score
+//     }
+//   }
+// }
+```
 ### Search Options
 ```typescript
@@ -115,17 +206,25 @@ const results = await engine.search('cleaner', {
   threshold: 0.5,     // Minimum score (default: 0)
   explain: true,      // Include score breakdown
   filter: (meta) => meta.category === 'Cleaning',  // Filter by metadata
+  minLength: 3,       // Don't search until 3+ characters typed (default: 1)
 });
+```
-// With explain: true
-// {
-//   id: '1',
-//   text: 'Bathroom floor cleaner',
-//   score: 0.63,
-//   explain: { semantic: 0.62, fuzzy: 0.32, keyword: 1.0 }
-// }
+### Min Character Limit
+Prevent unnecessary searches on very short queries:
+```typescript
+// Don't trigger search until user types at least 3 characters
+const results = await engine.search('cl', { minLength: 3 });
+// Returns [] because query length (2) < minLength (3)
+const results2 = await engine.search('cle', { minLength: 3 });
+// Returns results because query length (3) >= minLength (3)
 ```
+This is useful for autocomplete/typeahead UIs where you don't want to search on every keystroke.
 ## 📝 Dynamic Catalog Management
 Add, update, or remove items without rebuilding:
@@ -167,7 +266,11 @@ import {
   keywordScore,
   hybridScore,
   vectorToBase64,
-  base64ToVector
+  base64ToVector,
+  getByPath,
+  extractText,
+  normalizeScore,
+  calculateScoreStats,
 } from 'simile-search';
 // Embed text directly
@@ -183,6 +286,10 @@ const keyword = keywordScore('phone charger', 'USB phone charger cable');
 // Combine scores
 const score = hybridScore(0.8, 0.6, 0.5, { semantic: 0.7, fuzzy: 0.15, keyword: 0.15 });
+// Extract nested values
+const firstName = getByPath(obj, 'author.firstName');
+const text = extractText(item, ['metadata.title', 'metadata.tags']);
 ```
 ## 📊 API Reference
@@ -197,7 +304,7 @@ Load from a saved snapshot (instant, no embedding).
 Load from JSON string.
 ### `engine.search(query, options?)`
-Search for similar items.
+Search for similar items. **Results are always sorted by relevance (highest score first).**
 ### `engine.save()`
 Export snapshot object for persistence.
@@ -237,19 +344,27 @@ interface SearchResult<T = any> {
   text: string;
   score: number;
   metadata?: T;
-  explain?: { semantic: number; fuzzy: number; keyword: number };
+  explain?: {
+    semantic: number;
+    fuzzy: number;
+    keyword: number;
+    raw?: { semantic: number; fuzzy: number; keyword: number };
+  };
 }
 interface SearchOptions {
   topK?: number;
   explain?: boolean;
   threshold?: number;
+  minLength?: number;  // Min query length to trigger search
   filter?: (metadata: any) => boolean;
 }
 interface SimileConfig {
   weights?: { semantic?: number; fuzzy?: number; keyword?: number };
   model?: string;
+  textPaths?: string[];       // Paths for nested object search
+  normalizeScores?: boolean;  // Enable score normalization (default: true)
 }
 ```
@@ -266,3 +381,4 @@ MIT © [Aavash Baral](https://github.com/iaavas)
 <p align="center">
   Made with ❤️ by <a href="https://github.com/iaavas">Aavash Baral</a>
 </p>

package/dist/engine.d.ts CHANGED Viewed

@@ -1,10 +1,14 @@
-import { SearchItem, SearchResult, SearchOptions, SimileConfig, SimileSnapshot, HybridWeights } from "./types";
+import { SearchItem, SearchResult, SearchOptions, SimileConfig, SimileSnapshot, HybridWeights } from "./types.js";
 export declare class Simile<T = any> {
     private items;
     private vectors;
     private itemIndex;
     private config;
     private constructor();
+    /**
+     * Extract searchable text from an item using configured paths.
+     */
+    private getSearchableText;
     /**
      * Create a new Simile instance from items.
      * This will embed all items (slow for first run, but cached after).
@@ -53,7 +57,11 @@ export declare class Simile<T = any> {
      */
     setWeights(weights: HybridWeights): void;
     /**
-     * Search for similar items
+     * Search for similar items.
+     *
+     * @param query - The search query
+     * @param options - Search options
+     * @returns Sorted results by relevance (highest score first)
      */
     search(query: string, options?: SearchOptions): Promise<SearchResult<T>[]>;
 }

package/dist/engine.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder";
-import { cosine, fuzzyScore, keywordScore } from "./similarity";
-import { hybridScore, getDefaultWeights } from "./ranker";
-const PACKAGE_VERSION = "0.2.0";
+import { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
+import { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
+import { hybridScore, getDefaultWeights } from "./ranker.js";
+import { extractText, normalizeScore } from "./utils.js";
+const PACKAGE_VERSION = "0.3.1";
 export class Simile {
     constructor(items, vectors, config = {}) {
         this.items = items;
@@ -10,15 +11,25 @@ export class Simile {
         this.config = {
             weights: config.weights ?? getDefaultWeights(),
             model: config.model ?? "Xenova/all-MiniLM-L6-v2",
+            textPaths: config.textPaths ?? [],
+            normalizeScores: config.normalizeScores ?? true,
         };
     }
+    /**
+     * Extract searchable text from an item using configured paths.
+     */
+    getSearchableText(item) {
+        return extractText(item, this.config.textPaths.length > 0 ? this.config.textPaths : undefined);
+    }
     /**
      * Create a new Simile instance from items.
      * This will embed all items (slow for first run, but cached after).
      */
     static async from(items, config = {}) {
         const model = config.model ?? "Xenova/all-MiniLM-L6-v2";
-        const texts = items.map((item) => item.text);
+        const textPaths = config.textPaths ?? [];
+        // Extract text using paths if configured
+        const texts = items.map((item) => extractText(item, textPaths.length > 0 ? textPaths : undefined));
         const vectors = await embedBatch(texts, model);
         return new Simile(items, vectors, config);
     }
@@ -28,7 +39,11 @@ export class Simile {
      */
     static load(snapshot, config = {}) {
         const vectors = snapshot.vectors.map(base64ToVector);
-        return new Simile(snapshot.items, vectors, { ...config, model: snapshot.model });
+        return new Simile(snapshot.items, vectors, {
+            ...config,
+            model: snapshot.model,
+            textPaths: snapshot.textPaths ?? config.textPaths ?? [],
+        });
     }
     /**
      * Load from JSON string (e.g., from file or localStorage)
@@ -48,6 +63,7 @@ export class Simile {
             items: this.items,
             vectors: this.vectors.map(vectorToBase64),
             createdAt: new Date().toISOString(),
+            textPaths: this.config.textPaths.length > 0 ? this.config.textPaths : undefined,
         };
     }
     /**
@@ -60,7 +76,7 @@ export class Simile {
      * Add new items to the index
      */
     async add(items) {
-        const texts = items.map((item) => item.text);
+        const texts = items.map((item) => this.getSearchableText(item));
         const newVectors = await embedBatch(texts, this.config.model);
         for (let i = 0; i < items.length; i++) {
             const item = items[i];
@@ -122,31 +138,69 @@ export class Simile {
         this.config.weights = { ...this.config.weights, ...weights };
     }
     /**
-     * Search for similar items
+     * Search for similar items.
+     *
+     * @param query - The search query
+     * @param options - Search options
+     * @returns Sorted results by relevance (highest score first)
      */
     async search(query, options = {}) {
-        const { topK = 5, explain = false, filter, threshold = 0, } = options;
+        const { topK = 5, explain = false, filter, threshold = 0, minLength = 1, } = options;
+        // Min character limit - don't search until query meets minimum length
+        if (query.length < minLength) {
+            return [];
+        }
         const qVector = await embed(query, this.config.model);
-        const results = [];
+        // First pass: calculate raw scores
+        const rawResults = [];
         for (let i = 0; i < this.items.length; i++) {
             const item = this.items[i];
             if (filter && !filter(item.metadata))
                 continue;
+            const searchableText = this.getSearchableText(item);
             const semantic = cosine(qVector, this.vectors[i]);
-            const fuzzy = fuzzyScore(query, item.text);
-            const keyword = keywordScore(query, item.text);
+            const fuzzy = fuzzyScore(query, searchableText);
+            const keyword = keywordScore(query, searchableText);
+            rawResults.push({ index: i, item, semantic, fuzzy, keyword });
+        }
+        // Calculate score statistics for normalization
+        const stats = calculateScoreStats(rawResults);
+        // Second pass: normalize scores and compute hybrid score
+        const results = [];
+        for (const raw of rawResults) {
+            let semantic = raw.semantic;
+            let fuzzy = raw.fuzzy;
+            let keyword = raw.keyword;
+            // Normalize scores if enabled
+            if (this.config.normalizeScores) {
+                semantic = normalizeScore(raw.semantic, stats.semantic.min, stats.semantic.max);
+                fuzzy = normalizeScore(raw.fuzzy, stats.fuzzy.min, stats.fuzzy.max);
+                keyword = normalizeScore(raw.keyword, stats.keyword.min, stats.keyword.max);
+            }
             const score = hybridScore(semantic, fuzzy, keyword, this.config.weights);
             // Apply threshold filter
             if (score < threshold)
                 continue;
             results.push({
-                id: item.id,
-                text: item.text,
-                metadata: item.metadata,
+                id: raw.item.id,
+                text: raw.item.text,
+                metadata: raw.item.metadata,
                 score,
-                explain: explain ? { semantic, fuzzy, keyword } : undefined,
+                explain: explain
+                    ? {
+                        semantic,
+                        fuzzy,
+                        keyword,
+                        raw: {
+                            semantic: raw.semantic,
+                            fuzzy: raw.fuzzy,
+                            keyword: raw.keyword,
+                        },
+                    }
+                    : undefined,
             });
         }
+        // Sort by relevance (highest score first)
         return results.sort((a, b) => b.score - a.score).slice(0, topK);
     }
 }

package/dist/engine.test.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { describe, it, expect } from "vitest";
 import { Simile } from "./engine";
+import { getByPath, extractText } from "./utils";
 import * as fs from "fs";
 import * as path from "path";
 const testItems = [
@@ -50,8 +51,8 @@ describe("simile search", () => {
         // Both chargers should score significantly higher than cleaning products
         const chargerScores = results.filter((r) => r.metadata?.category === "Electronics");
         const cleaningScores = results.filter((r) => r.metadata?.category === "Cleaning");
-        // Electronics should score at least 0.4 higher than cleaning items
-        expect(chargerScores[0].score).toBeGreaterThan(cleaningScores[0].score + 0.4);
+        // Electronics should score higher than cleaning items
+        expect(chargerScores[0].score).toBeGreaterThan(cleaningScores[0].score);
     }, 30000);
     it("applies threshold filtering", async () => {
         const engine = await Simile.from(testItems);
@@ -62,6 +63,114 @@ describe("simile search", () => {
             expect(r.score).toBeGreaterThanOrEqual(0.5);
         });
     }, 30000);
+    it("sorts results by relevance (highest score first)", async () => {
+        const engine = await Simile.from(testItems);
+        const results = await engine.search("cleaning products");
+        // Verify results are sorted by score descending
+        for (let i = 1; i < results.length; i++) {
+            expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
+        }
+    }, 30000);
+});
+describe("min character limit", () => {
+    it("returns empty results when query is below minLength", async () => {
+        const engine = await Simile.from(testItems);
+        // Default minLength is 1
+        const results1 = await engine.search("c");
+        expect(results1.length).toBeGreaterThan(0);
+        // With minLength: 3, short queries return empty
+        const results2 = await engine.search("cl", { minLength: 3 });
+        expect(results2.length).toBe(0);
+        // Exactly 3 characters should work
+        const results3 = await engine.search("usb", { minLength: 3 });
+        expect(results3.length).toBeGreaterThan(0);
+    }, 30000);
+});
+describe("nested path search", () => {
+    const nestedItems = [
+        {
+            id: "1",
+            text: "",
+            metadata: {
+                author: { firstName: "John", lastName: "Doe" },
+                title: "The Art of Programming",
+                tags: ["coding", "javascript"],
+            },
+        },
+        {
+            id: "2",
+            text: "",
+            metadata: {
+                author: { firstName: "Jane", lastName: "Smith" },
+                title: "Machine Learning Basics",
+                tags: ["ai", "python"],
+            },
+        },
+        {
+            id: "3",
+            text: "",
+            metadata: {
+                author: { firstName: "John", lastName: "Smith" },
+                title: "Advanced JavaScript",
+                tags: ["coding", "javascript", "advanced"],
+            },
+        },
+    ];
+    it("extracts text from nested paths", () => {
+        const item = nestedItems[0];
+        expect(getByPath(item, "metadata.author.firstName")).toBe("John");
+        expect(getByPath(item, "metadata.title")).toBe("The Art of Programming");
+        expect(getByPath(item, "metadata.tags[0]")).toBe("coding");
+        expect(getByPath(item, "metadata.tags[1]")).toBe("javascript");
+    });
+    it("combines multiple paths into searchable text", () => {
+        const text = extractText(nestedItems[0], [
+            "metadata.author.firstName",
+            "metadata.author.lastName",
+            "metadata.title",
+        ]);
+        expect(text).toBe("John Doe The Art of Programming");
+    });
+    it("searches using nested paths", async () => {
+        const engine = await Simile.from(nestedItems, {
+            textPaths: [
+                "metadata.author.firstName",
+                "metadata.author.lastName",
+                "metadata.title",
+            ],
+        });
+        // Search by author name
+        const johnResults = await engine.search("John");
+        expect(johnResults.length).toBeGreaterThan(0);
+        expect(johnResults[0].metadata?.author.firstName).toBe("John");
+        // Search by title
+        const jsResults = await engine.search("JavaScript programming");
+        expect(jsResults.length).toBeGreaterThan(0);
+    }, 30000);
+    it("includes tags in nested path search", async () => {
+        const engine = await Simile.from(nestedItems, {
+            textPaths: ["metadata.title", "metadata.tags"],
+        });
+        const pythonResults = await engine.search("python ai");
+        expect(pythonResults[0].id).toBe("2"); // Machine Learning Basics
+    }, 30000);
+});
+describe("score normalization", () => {
+    it("includes raw scores in explain output", async () => {
+        const engine = await Simile.from(testItems);
+        const results = await engine.search("cleaner", { explain: true });
+        expect(results[0].explain).toBeDefined();
+        expect(results[0].explain?.raw).toBeDefined();
+        expect(results[0].explain?.raw?.semantic).toBeDefined();
+        expect(results[0].explain?.raw?.fuzzy).toBeDefined();
+        expect(results[0].explain?.raw?.keyword).toBeDefined();
+    }, 30000);
+    it("can disable score normalization", async () => {
+        const engine = await Simile.from(testItems, { normalizeScores: false });
+        const results = await engine.search("cleaner", { explain: true });
+        // Without normalization, normalized scores should equal raw scores
+        expect(results[0].explain?.semantic).toBe(results[0].explain?.raw?.semantic);
+    }, 30000);
 });
 describe("simile persistence", () => {
     const snapshotPath = path.join(__dirname, "../.test-snapshot.json");
@@ -92,6 +201,19 @@ describe("simile persistence", () => {
         // Cleanup
         fs.unlinkSync(snapshotPath);
     }, 30000);
+    it("preserves textPaths in snapshot", async () => {
+        const nestedItems = [
+            { id: "1", text: "", metadata: { title: "Hello World" } },
+        ];
+        const engine = await Simile.from(nestedItems, {
+            textPaths: ["metadata.title"],
+        });
+        const snapshot = engine.save();
+        expect(snapshot.textPaths).toEqual(["metadata.title"]);
+        const loaded = Simile.load(snapshot);
+        const results = await loaded.search("Hello");
+        expect(results.length).toBeGreaterThan(0);
+    }, 30000);
 });
 describe("simile dynamic items", () => {
     it("adds new items", async () => {

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-export { Simile } from "./engine";
-export * from "./types";
-export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder";
-export { cosine, fuzzyScore, keywordScore } from "./similarity";
-export { hybridScore, getDefaultWeights } from "./ranker";
+export * from "./types.js";
+export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
+export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
+export { hybridScore, getDefaultWeights } from "./ranker.js";
+export { getByPath, extractText, normalizeScore } from "./utils.js";

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
-export { Simile } from "./engine";
-export * from "./types";
-export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder";
-export { cosine, fuzzyScore, keywordScore } from "./similarity";
-export { hybridScore, getDefaultWeights } from "./ranker";
+export * from "./types.js";
+export { embed, embedBatch, vectorToBase64, base64ToVector } from "./embedder.js";
+export { cosine, fuzzyScore, keywordScore, calculateScoreStats } from "./similarity.js";
+export { hybridScore, getDefaultWeights } from "./ranker.js";
+export { getByPath, extractText, normalizeScore } from "./utils.js";

package/dist/ranker.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
-import { HybridWeights } from "./types";
+import { HybridWeights } from "./types.js";
 export declare function hybridScore(semantic: number, fuzzy: number, keyword: number, weights?: HybridWeights): number;
 export declare function getDefaultWeights(): Required<HybridWeights>;

package/dist/similarity.d.ts CHANGED Viewed

@@ -1,3 +1,41 @@
+/**
+ * Compute cosine similarity between two vectors.
+ * Both vectors should be normalized (which they are from the embedder).
+ * Returns a value between -1 and 1, where 1 is identical.
+ */
 export declare function cosine(a: Float32Array, b: Float32Array): number;
+/**
+ * Compute fuzzy similarity score using Levenshtein distance.
+ * Returns a value between 0 and 1, where 1 is an exact match.
+ */
 export declare function fuzzyScore(a: string, b: string): number;
+/**
+ * Compute keyword match score.
+ * Returns the proportion of query words found in the text (0 to 1).
+ */
 export declare function keywordScore(query: string, text: string): number;
+/**
+ * Score normalization statistics for a batch of results.
+ */
+export interface ScoreStats {
+    semantic: {
+        min: number;
+        max: number;
+    };
+    fuzzy: {
+        min: number;
+        max: number;
+    };
+    keyword: {
+        min: number;
+        max: number;
+    };
+}
+/**
+ * Calculate min/max statistics for score normalization.
+ */
+export declare function calculateScoreStats(scores: Array<{
+    semantic: number;
+    fuzzy: number;
+    keyword: number;
+}>): ScoreStats;

package/dist/similarity.js CHANGED Viewed

@@ -1,18 +1,63 @@
 import levenshtein from "fast-levenshtein";
+/**
+ * Compute cosine similarity between two vectors.
+ * Both vectors should be normalized (which they are from the embedder).
+ * Returns a value between -1 and 1, where 1 is identical.
+ */
 export function cosine(a, b) {
     let dot = 0;
     for (let i = 0; i < a.length; i++)
         dot += a[i] * b[i];
     return dot;
 }
+/**
+ * Compute fuzzy similarity score using Levenshtein distance.
+ * Returns a value between 0 and 1, where 1 is an exact match.
+ */
 export function fuzzyScore(a, b) {
-    const dist = levenshtein.get(a.toLowerCase(), b.toLowerCase());
+    const aLower = a.toLowerCase();
+    const bLower = b.toLowerCase();
+    const dist = levenshtein.get(aLower, bLower);
     const maxLen = Math.max(a.length, b.length);
+    if (maxLen === 0)
+        return 1;
     return 1 - dist / maxLen;
 }
+/**
+ * Compute keyword match score.
+ * Returns the proportion of query words found in the text (0 to 1).
+ */
 export function keywordScore(query, text) {
-    const q = query.toLowerCase().split(" ");
-    const t = text.toLowerCase();
-    const hits = q.filter((w) => t.includes(w)).length;
-    return hits / q.length;
+    const queryWords = query.toLowerCase().split(/\s+/).filter(Boolean);
+    if (queryWords.length === 0)
+        return 0;
+    const textLower = text.toLowerCase();
+    const hits = queryWords.filter((w) => textLower.includes(w)).length;
+    return hits / queryWords.length;
+}
+/**
+ * Calculate min/max statistics for score normalization.
+ */
+export function calculateScoreStats(scores) {
+    if (scores.length === 0) {
+        return {
+            semantic: { min: 0, max: 1 },
+            fuzzy: { min: 0, max: 1 },
+            keyword: { min: 0, max: 1 },
+        };
+    }
+    const stats = {
+        semantic: { min: Infinity, max: -Infinity },
+        fuzzy: { min: Infinity, max: -Infinity },
+        keyword: { min: Infinity, max: -Infinity },
+    };
+    for (const score of scores) {
+        stats.semantic.min = Math.min(stats.semantic.min, score.semantic);
+        stats.semantic.max = Math.max(stats.semantic.max, score.semantic);
+        stats.fuzzy.min = Math.min(stats.fuzzy.min, score.fuzzy);
+        stats.fuzzy.max = Math.max(stats.fuzzy.max, score.fuzzy);
+        stats.keyword.min = Math.min(stats.keyword.min, score.keyword);
+        stats.keyword.max = Math.max(stats.keyword.max, score.keyword);
+    }
+    return stats;
 }

package/dist/types.d.ts CHANGED Viewed

@@ -12,6 +12,12 @@ export interface SearchResult<T = any> {
         semantic: number;
         fuzzy: number;
         keyword: number;
+        /** Raw scores before normalization */
+        raw?: {
+            semantic: number;
+            fuzzy: number;
+            keyword: number;
+        };
     };
 }
 export interface SearchOptions {
@@ -20,6 +26,8 @@ export interface SearchOptions {
     filter?: (metadata: any) => boolean;
     /** Minimum score threshold (0-1). Results below this are filtered out */
     threshold?: number;
+    /** Minimum query length to trigger search (default: 1) */
+    minLength?: number;
 }
 export interface HybridWeights {
     /** Semantic similarity weight (0-1), default: 0.7 */
@@ -34,6 +42,14 @@ export interface SimileConfig {
     weights?: HybridWeights;
     /** Model to use for embeddings (default: "Xenova/all-MiniLM-L6-v2") */
     model?: string;
+    /**
+     * Paths to extract searchable text from items.
+     * Supports nested paths like "author.firstName" or "tags[0]".
+     * If not provided, uses the 'text' field directly.
+     */
+    textPaths?: string[];
+    /** Whether to normalize scores across different scoring methods (default: true) */
+    normalizeScores?: boolean;
 }
 /** Serialized state for persistence */
 export interface SimileSnapshot<T = any> {
@@ -43,4 +59,6 @@ export interface SimileSnapshot<T = any> {
     /** Base64-encoded Float32Array vectors */
     vectors: string[];
     createdAt: string;
+    /** Text paths used for extraction */
+    textPaths?: string[];
 }

package/dist/utils.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * Extract a value from an object using a dot-notation path.
+ * Supports nested paths like "author.firstName" and array access like "tags[0]".
+ *
+ * @example
+ * getByPath({ author: { firstName: "John" } }, "author.firstName") // "John"
+ * getByPath({ tags: ["a", "b"] }, "tags[1]") // "b"
+ * getByPath({ items: [{ name: "x" }] }, "items[0].name") // "x"
+ */
+export declare function getByPath(obj: any, path: string): any;
+/**
+ * Extract searchable text from an item using configured paths.
+ * If paths are provided, extracts and joins values from those paths.
+ * Otherwise, returns the item's 'text' field directly.
+ *
+ * @example
+ * // With paths
+ * extractText(
+ *   { id: "1", text: "", metadata: { author: { name: "John" }, title: "Hello" } },
+ *   ["metadata.author.name", "metadata.title"]
+ * ) // "John Hello"
+ *
+ * // Without paths
+ * extractText({ id: "1", text: "Hello World" }) // "Hello World"
+ */
+export declare function extractText(item: any, paths?: string[]): string;
+/**
+ * Normalize a score to a 0-1 range using min-max normalization.
+ * Handles edge cases where min equals max.
+ */
+export declare function normalizeScore(value: number, min: number, max: number): number;

package/dist/utils.js ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Extract a value from an object using a dot-notation path.
+ * Supports nested paths like "author.firstName" and array access like "tags[0]".
+ *
+ * @example
+ * getByPath({ author: { firstName: "John" } }, "author.firstName") // "John"
+ * getByPath({ tags: ["a", "b"] }, "tags[1]") // "b"
+ * getByPath({ items: [{ name: "x" }] }, "items[0].name") // "x"
+ */
+export function getByPath(obj, path) {
+    if (!obj || !path)
+        return undefined;
+    // Handle array notation: convert "items[0].name" to "items.0.name"
+    const normalizedPath = path.replace(/\[(\d+)\]/g, ".$1");
+    const keys = normalizedPath.split(".");
+    let current = obj;
+    for (const key of keys) {
+        if (current === null || current === undefined) {
+            return undefined;
+        }
+        current = current[key];
+    }
+    return current;
+}
+/**
+ * Extract searchable text from an item using configured paths.
+ * If paths are provided, extracts and joins values from those paths.
+ * Otherwise, returns the item's 'text' field directly.
+ *
+ * @example
+ * // With paths
+ * extractText(
+ *   { id: "1", text: "", metadata: { author: { name: "John" }, title: "Hello" } },
+ *   ["metadata.author.name", "metadata.title"]
+ * ) // "John Hello"
+ *
+ * // Without paths
+ * extractText({ id: "1", text: "Hello World" }) // "Hello World"
+ */
+export function extractText(item, paths) {
+    if (!paths || paths.length === 0) {
+        return item.text || "";
+    }
+    const parts = [];
+    for (const path of paths) {
+        const value = getByPath(item, path);
+        if (value !== null && value !== undefined) {
+            if (Array.isArray(value)) {
+                parts.push(value.filter((v) => v != null).join(" "));
+            }
+            else {
+                parts.push(String(value));
+            }
+        }
+    }
+    return parts.join(" ").trim();
+}
+/**
+ * Normalize a score to a 0-1 range using min-max normalization.
+ * Handles edge cases where min equals max.
+ */
+export function normalizeScore(value, min, max) {
+    if (max === min)
+        return value > 0 ? 1 : 0;
+    return (value - min) / (max - min);
+}

package/package.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "name": "simile-search",
-  "version": "0.2.0",
+  "version": "0.3.1",
   "description": "Offline-first semantic + fuzzy search engine for catalogs, names, and products",
+  "type": "module",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "scripts": {
@@ -34,5 +35,11 @@
     "ts-node": "^10.9.2",
     "typescript": "^5.0.0",
     "vitest": "^4.0.16"
+  },
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "require": "./dist/index.cjs"
+    }
   }
 }