simile-search 0.4.2 β 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -236
- package/dist/utils.d.ts +1 -1
- package/dist/utils.js +4 -3
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -1,45 +1,48 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<img src="assets/logo.
|
|
2
|
+
<img src="assets/logo.svg" alt="Simile Logo" width="200">
|
|
3
|
+
|
|
4
|
+
# Simile
|
|
5
|
+
|
|
6
|
+
**Intelligent offline-first semantic search for modern applications**
|
|
7
|
+
|
|
8
|
+
[](https://www.npmjs.com/package/simile-search)
|
|
9
|
+
[](https://www.npmjs.com/package/simile-search)
|
|
10
|
+
[](https://github.com/iaavas/simile/blob/main/LICENSE)
|
|
11
|
+
|
|
3
12
|
</div>
|
|
4
13
|
|
|
5
|
-
|
|
14
|
+
---
|
|
6
15
|
|
|
7
|
-
|
|
8
|
-

|
|
9
|
-

|
|
16
|
+
## Overview
|
|
10
17
|
|
|
11
|
-
|
|
18
|
+
Simile is a high-performance search engine that combines semantic understanding, fuzzy matching, and keyword search to deliver highly relevant resultsβentirely offline. Built with Transformers.js, it requires no API calls, runs completely locally, and scales to handle large datasets efficiently.
|
|
12
19
|
|
|
13
|
-
|
|
20
|
+
Perfect for product catalogs, content libraries, user directories, and any application requiring intelligent search without external dependencies.
|
|
14
21
|
|
|
15
|
-
##
|
|
22
|
+
## Key Features
|
|
16
23
|
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
-
|
|
26
|
-
- π¦ **Zero API Calls** - Everything runs locally with Transformers.js
|
|
27
|
-
- π **Nested Path Search** - Search `author.firstName` instead of flat strings
|
|
28
|
-
- π **Score Normalization** - Consistent scoring across different methods
|
|
29
|
-
- βοΈ **Min Character Limit** - Control when search triggers
|
|
24
|
+
- **π§ Semantic Understanding** β Finds conceptually similar items, not just keyword matches ("phone charger" β "USB-C cable")
|
|
25
|
+
- **π€ Typo Tolerance** β Fuzzy matching handles misspellings and partial queries gracefully
|
|
26
|
+
- **β‘ Lightning Fast** β O(log n) search with HNSW indexing for datasets of 10k+ items
|
|
27
|
+
- **πΎ Memory Efficient** β Quantization support (float16/int8) reduces memory usage by up to 75%
|
|
28
|
+
- **π Non-blocking Updates** β Asynchronous indexing keeps your application responsive
|
|
29
|
+
- **π¦ Zero Dependencies on APIs** β Runs entirely locally with Transformers.js
|
|
30
|
+
- **π Deep Object Search** β Query nested fields with dot notation (`author.firstName`)
|
|
31
|
+
- **πΎ Persistent Storage** β Save and load embeddings to avoid recomputation
|
|
32
|
+
- **π― Highly Configurable** β Tune scoring weights, thresholds, and search behavior
|
|
30
33
|
|
|
31
|
-
##
|
|
34
|
+
## Installation
|
|
32
35
|
|
|
33
36
|
```bash
|
|
34
37
|
npm install simile-search
|
|
35
38
|
```
|
|
36
39
|
|
|
37
|
-
##
|
|
40
|
+
## Quick Start
|
|
38
41
|
|
|
39
42
|
```typescript
|
|
40
43
|
import { Simile } from 'simile-search';
|
|
41
44
|
|
|
42
|
-
//
|
|
45
|
+
// Initialize search engine
|
|
43
46
|
const engine = await Simile.from([
|
|
44
47
|
{ id: '1', text: 'Bathroom floor cleaner', metadata: { category: 'Cleaning' } },
|
|
45
48
|
{ id: '2', text: 'Dishwashing liquid', metadata: { category: 'Kitchen' } },
|
|
@@ -47,144 +50,140 @@ const engine = await Simile.from([
|
|
|
47
50
|
{ id: '4', text: 'USB-C phone charger cable', metadata: { category: 'Electronics' } },
|
|
48
51
|
]);
|
|
49
52
|
|
|
50
|
-
// Search
|
|
53
|
+
// Search with natural language
|
|
51
54
|
const results = await engine.search('phone charger');
|
|
52
55
|
console.log(results);
|
|
53
56
|
// [
|
|
54
57
|
// { id: '3', text: 'iPhone Charger', score: 0.92, ... },
|
|
55
|
-
// { id: '4', text: 'USB-C phone charger cable', score: 0.87, ... }
|
|
56
|
-
// ...
|
|
58
|
+
// { id: '4', text: 'USB-C phone charger cable', score: 0.87, ... }
|
|
57
59
|
// ]
|
|
58
60
|
```
|
|
59
61
|
|
|
60
|
-
##
|
|
62
|
+
## Core Concepts
|
|
63
|
+
|
|
64
|
+
### Persistence
|
|
61
65
|
|
|
62
|
-
|
|
66
|
+
Avoid re-embedding on every startup by saving your index:
|
|
63
67
|
|
|
64
68
|
```typescript
|
|
65
69
|
import { Simile } from 'simile-search';
|
|
66
70
|
import * as fs from 'fs';
|
|
67
71
|
|
|
68
|
-
//
|
|
72
|
+
// Initial setup: embed and save
|
|
69
73
|
const engine = await Simile.from(items);
|
|
70
|
-
fs.writeFileSync('
|
|
74
|
+
fs.writeFileSync('search-index.json', engine.toJSON());
|
|
71
75
|
|
|
72
|
-
//
|
|
73
|
-
const json = fs.readFileSync('
|
|
76
|
+
// Subsequent loads: instant startup
|
|
77
|
+
const json = fs.readFileSync('search-index.json', 'utf-8');
|
|
74
78
|
const loadedEngine = Simile.loadFromJSON(json);
|
|
75
79
|
|
|
76
|
-
//
|
|
77
|
-
const results = await loadedEngine.search('
|
|
80
|
+
// Functionally identical to the original
|
|
81
|
+
const results = await loadedEngine.search('query');
|
|
78
82
|
```
|
|
79
83
|
|
|
80
|
-
|
|
84
|
+
**Snapshot Format** for database storage:
|
|
81
85
|
|
|
82
86
|
```typescript
|
|
83
|
-
// For database storage
|
|
84
87
|
const snapshot = engine.save();
|
|
85
88
|
// {
|
|
86
89
|
// version: '0.2.0',
|
|
87
90
|
// model: 'Xenova/all-MiniLM-L6-v2',
|
|
88
91
|
// items: [...],
|
|
89
|
-
// vectors: ['base64...'
|
|
92
|
+
// vectors: ['base64...'],
|
|
90
93
|
// createdAt: '2024-12-28T...',
|
|
91
|
-
// textPaths: [
|
|
94
|
+
// textPaths: [...]
|
|
92
95
|
// }
|
|
93
96
|
|
|
94
|
-
// Load from snapshot object
|
|
95
97
|
const restored = Simile.load(snapshot);
|
|
96
98
|
```
|
|
97
99
|
|
|
98
|
-
|
|
100
|
+
### Nested Object Search
|
|
99
101
|
|
|
100
|
-
Search complex
|
|
102
|
+
Search complex data structures by specifying extraction paths:
|
|
101
103
|
|
|
102
104
|
```typescript
|
|
103
105
|
const books = [
|
|
104
106
|
{
|
|
105
107
|
id: '1',
|
|
106
|
-
text: '', // Can be empty when using textPaths
|
|
107
108
|
metadata: {
|
|
108
109
|
author: { firstName: 'John', lastName: 'Doe' },
|
|
109
110
|
title: 'The Art of Programming',
|
|
110
111
|
tags: ['coding', 'javascript'],
|
|
111
112
|
},
|
|
112
113
|
},
|
|
113
|
-
{
|
|
114
|
-
id: '2',
|
|
115
|
-
text: '',
|
|
116
|
-
metadata: {
|
|
117
|
-
author: { firstName: 'Jane', lastName: 'Smith' },
|
|
118
|
-
title: 'Machine Learning Basics',
|
|
119
|
-
tags: ['ai', 'python'],
|
|
120
|
-
},
|
|
121
|
-
},
|
|
122
114
|
];
|
|
123
115
|
|
|
124
|
-
// Configure which paths to extract and search
|
|
125
116
|
const engine = await Simile.from(books, {
|
|
126
117
|
textPaths: [
|
|
127
118
|
'metadata.author.firstName',
|
|
128
119
|
'metadata.author.lastName',
|
|
129
120
|
'metadata.title',
|
|
130
|
-
'metadata.tags', // Arrays are joined
|
|
121
|
+
'metadata.tags', // Arrays are automatically joined
|
|
131
122
|
],
|
|
132
123
|
});
|
|
133
124
|
|
|
134
|
-
//
|
|
125
|
+
// Search across all configured paths
|
|
135
126
|
const results = await engine.search('John programming');
|
|
136
|
-
// Finds "The Art of Programming" by John Doe
|
|
137
127
|
```
|
|
138
128
|
|
|
139
|
-
|
|
129
|
+
**Supported path formats:**
|
|
130
|
+
- Nested objects: `metadata.author.firstName`
|
|
131
|
+
- Array indexing: `items[0].name`
|
|
132
|
+
- Array joining: `metadata.tags` (joins all elements)
|
|
133
|
+
|
|
134
|
+
### Dynamic Catalog Management
|
|
135
|
+
|
|
136
|
+
Update your search index without rebuilding:
|
|
140
137
|
|
|
141
138
|
```typescript
|
|
142
|
-
//
|
|
143
|
-
|
|
139
|
+
// Add new items
|
|
140
|
+
await engine.add([
|
|
141
|
+
{ id: '5', text: 'Wireless headphones', metadata: { category: 'Electronics' } }
|
|
142
|
+
]);
|
|
143
|
+
|
|
144
|
+
// Update existing items (by ID)
|
|
145
|
+
await engine.add([
|
|
146
|
+
{ id: '1', text: 'Premium bathroom cleaner', metadata: { category: 'Cleaning' } }
|
|
147
|
+
]);
|
|
144
148
|
|
|
145
|
-
//
|
|
146
|
-
|
|
147
|
-
'items[0].name' // β nested array access
|
|
149
|
+
// Remove items
|
|
150
|
+
engine.remove(['2', '3']);
|
|
148
151
|
|
|
149
|
-
//
|
|
150
|
-
|
|
152
|
+
// Retrieve items
|
|
153
|
+
const item = engine.get('1');
|
|
154
|
+
const allItems = engine.getAll();
|
|
155
|
+
console.log(engine.size); // Current item count
|
|
151
156
|
```
|
|
152
157
|
|
|
153
|
-
##
|
|
158
|
+
## Configuration
|
|
154
159
|
|
|
155
|
-
###
|
|
160
|
+
### Scoring Weights
|
|
156
161
|
|
|
157
|
-
|
|
162
|
+
Customize how different matching strategies contribute to the final score:
|
|
158
163
|
|
|
159
164
|
```typescript
|
|
160
165
|
const engine = await Simile.from(items, {
|
|
161
166
|
weights: {
|
|
162
|
-
semantic: 0.7, // AI embedding similarity (default
|
|
163
|
-
fuzzy: 0.15, // Levenshtein distance
|
|
164
|
-
keyword: 0.15, // Exact keyword
|
|
167
|
+
semantic: 0.7, // AI embedding similarity (default)
|
|
168
|
+
fuzzy: 0.15, // Levenshtein distance
|
|
169
|
+
keyword: 0.15, // Exact keyword matching
|
|
165
170
|
}
|
|
166
171
|
});
|
|
167
172
|
|
|
168
|
-
//
|
|
173
|
+
// Adjust weights dynamically
|
|
169
174
|
engine.setWeights({ semantic: 0.9, fuzzy: 0.05, keyword: 0.05 });
|
|
170
175
|
```
|
|
171
176
|
|
|
172
177
|
### Score Normalization
|
|
173
178
|
|
|
174
|
-
|
|
179
|
+
Simile normalizes scores across different matching methods for fair comparison:
|
|
175
180
|
|
|
176
181
|
```typescript
|
|
177
|
-
// Enabled by default
|
|
178
182
|
const engine = await Simile.from(items, {
|
|
179
|
-
normalizeScores: true, // default
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
// Disable if you want raw scores
|
|
183
|
-
const rawEngine = await Simile.from(items, {
|
|
184
|
-
normalizeScores: false,
|
|
183
|
+
normalizeScores: true, // Enabled by default
|
|
185
184
|
});
|
|
186
185
|
|
|
187
|
-
//
|
|
186
|
+
// View normalized and raw scores
|
|
188
187
|
const results = await engine.search('cleaner', { explain: true });
|
|
189
188
|
// {
|
|
190
189
|
// score: 1.0,
|
|
@@ -193,9 +192,9 @@ const results = await engine.search('cleaner', { explain: true });
|
|
|
193
192
|
// fuzzy: 1.0, // normalized
|
|
194
193
|
// keyword: 1.0, // normalized
|
|
195
194
|
// raw: {
|
|
196
|
-
// semantic: 0.62,
|
|
197
|
-
// fuzzy: 0.32,
|
|
198
|
-
// keyword: 1.0
|
|
195
|
+
// semantic: 0.62,
|
|
196
|
+
// fuzzy: 0.32,
|
|
197
|
+
// keyword: 1.0
|
|
199
198
|
// }
|
|
200
199
|
// }
|
|
201
200
|
// }
|
|
@@ -203,60 +202,82 @@ const results = await engine.search('cleaner', { explain: true });
|
|
|
203
202
|
|
|
204
203
|
### Search Options
|
|
205
204
|
|
|
205
|
+
Fine-tune search behavior per query:
|
|
206
|
+
|
|
206
207
|
```typescript
|
|
207
208
|
const results = await engine.search('cleaner', {
|
|
208
|
-
topK: 10,
|
|
209
|
-
threshold: 0.5,
|
|
210
|
-
explain: true,
|
|
211
|
-
filter: (meta) => meta.category === 'Cleaning',
|
|
212
|
-
minLength: 3,
|
|
209
|
+
topK: 10, // Maximum results (default: 5)
|
|
210
|
+
threshold: 0.5, // Minimum score cutoff
|
|
211
|
+
explain: true, // Include score breakdown
|
|
212
|
+
filter: (meta) => meta.category === 'Cleaning', // Metadata filtering
|
|
213
|
+
minLength: 3, // Minimum query length (default: 1)
|
|
213
214
|
});
|
|
214
215
|
```
|
|
215
216
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
Prevent unnecessary searches on very short queries:
|
|
217
|
+
**Minimum character limit** prevents unnecessary searches on partial input:
|
|
219
218
|
|
|
220
219
|
```typescript
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
// Returns [] because query length (2) < minLength (3)
|
|
224
|
-
|
|
225
|
-
const results2 = await engine.search('cle', { minLength: 3 });
|
|
226
|
-
// Returns results because query length (3) >= minLength (3)
|
|
220
|
+
await engine.search('cl', { minLength: 3 }); // Returns [] (too short)
|
|
221
|
+
await engine.search('cle', { minLength: 3 }); // Returns results
|
|
227
222
|
```
|
|
228
223
|
|
|
229
|
-
|
|
224
|
+
## Performance Optimization
|
|
230
225
|
|
|
231
|
-
|
|
226
|
+
Simile is designed to scale efficiently from hundreds to hundreds of thousands of items.
|
|
232
227
|
|
|
233
|
-
|
|
228
|
+
### Quantization
|
|
229
|
+
|
|
230
|
+
Reduce memory usage with lower-precision vector representations:
|
|
234
231
|
|
|
235
232
|
```typescript
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
233
|
+
const engine = await Simile.from(items, {
|
|
234
|
+
quantization: 'float16', // 50% memory reduction, minimal accuracy loss
|
|
235
|
+
// OR
|
|
236
|
+
quantization: 'int8', // 75% memory reduction, slight accuracy trade-off
|
|
237
|
+
});
|
|
238
|
+
```
|
|
240
239
|
|
|
241
|
-
|
|
242
|
-
await engine.add([
|
|
243
|
-
{ id: '1', text: 'Premium bathroom cleaner', metadata: { category: 'Cleaning' } }
|
|
244
|
-
]);
|
|
240
|
+
### Approximate Nearest Neighbor (ANN) Search
|
|
245
241
|
|
|
246
|
-
|
|
247
|
-
engine.remove(['2', '3']);
|
|
242
|
+
For large datasets, HNSW indexing provides logarithmic search time:
|
|
248
243
|
|
|
249
|
-
|
|
250
|
-
const
|
|
244
|
+
```typescript
|
|
245
|
+
const engine = await Simile.from(items, {
|
|
246
|
+
useANN: true, // Enable ANN indexing
|
|
247
|
+
annThreshold: 1000, // Auto-enable when items > threshold (default: 1000)
|
|
248
|
+
});
|
|
249
|
+
```
|
|
251
250
|
|
|
252
|
-
|
|
253
|
-
|
|
251
|
+
### Vector Caching
|
|
252
|
+
|
|
253
|
+
LRU cache eliminates redundant embeddings for duplicate texts:
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
const engine = await Simile.from(items, {
|
|
257
|
+
cache: {
|
|
258
|
+
maxSize: 5000, // Cache up to 5000 embeddings
|
|
259
|
+
enableStats: true, // Track cache performance
|
|
260
|
+
}
|
|
261
|
+
});
|
|
254
262
|
|
|
255
|
-
//
|
|
256
|
-
|
|
263
|
+
// Monitor cache efficiency
|
|
264
|
+
const stats = engine.getIndexInfo().cacheStats;
|
|
265
|
+
console.log(`Hit rate: ${stats.hitRate}%`);
|
|
257
266
|
```
|
|
258
267
|
|
|
259
|
-
|
|
268
|
+
### Background Indexing
|
|
269
|
+
|
|
270
|
+
Updates are processed asynchronously to maintain responsiveness:
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
// Returns immediately, processes in background
|
|
274
|
+
await engine.add(newItems);
|
|
275
|
+
await engine.add(moreItems);
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Advanced Usage
|
|
279
|
+
|
|
280
|
+
### Direct Utility Access
|
|
260
281
|
|
|
261
282
|
For custom implementations:
|
|
262
283
|
|
|
@@ -268,18 +289,12 @@ import {
|
|
|
268
289
|
fuzzyScore,
|
|
269
290
|
keywordScore,
|
|
270
291
|
hybridScore,
|
|
271
|
-
vectorToBase64,
|
|
272
|
-
base64ToVector,
|
|
273
292
|
getByPath,
|
|
274
293
|
extractText,
|
|
275
|
-
normalizeScore,
|
|
276
|
-
calculateScoreStats,
|
|
277
294
|
} from 'simile-search';
|
|
278
295
|
|
|
279
|
-
//
|
|
296
|
+
// Generate embeddings
|
|
280
297
|
const vector = await embed('hello world');
|
|
281
|
-
|
|
282
|
-
// Batch embed for performance
|
|
283
298
|
const vectors = await embedBatch(['text1', 'text2', 'text3']);
|
|
284
299
|
|
|
285
300
|
// Calculate similarities
|
|
@@ -288,52 +303,36 @@ const fuzzy = fuzzyScore('cleaner', 'cleenr');
|
|
|
288
303
|
const keyword = keywordScore('phone charger', 'USB phone charger cable');
|
|
289
304
|
|
|
290
305
|
// Combine scores
|
|
291
|
-
const
|
|
306
|
+
const finalScore = hybridScore(
|
|
307
|
+
0.8, 0.6, 0.5,
|
|
308
|
+
{ semantic: 0.7, fuzzy: 0.15, keyword: 0.15 }
|
|
309
|
+
);
|
|
292
310
|
|
|
293
|
-
// Extract nested
|
|
311
|
+
// Extract nested data
|
|
294
312
|
const firstName = getByPath(obj, 'author.firstName');
|
|
295
313
|
const text = extractText(item, ['metadata.title', 'metadata.tags']);
|
|
296
314
|
```
|
|
297
315
|
|
|
298
|
-
##
|
|
299
|
-
|
|
300
|
-
### `Simile.from(items, config?)`
|
|
301
|
-
Create a new engine from items. Embeds all items (async).
|
|
302
|
-
|
|
303
|
-
### `Simile.load(snapshot, config?)`
|
|
304
|
-
Load from a saved snapshot (instant, no embedding).
|
|
305
|
-
|
|
306
|
-
### `Simile.loadFromJSON(json, config?)`
|
|
307
|
-
Load from JSON string.
|
|
308
|
-
|
|
309
|
-
### `engine.search(query, options?)`
|
|
310
|
-
Search for similar items. **Results are always sorted by relevance (highest score first).**
|
|
311
|
-
|
|
312
|
-
### `engine.save()`
|
|
313
|
-
Export snapshot object for persistence.
|
|
314
|
-
|
|
315
|
-
### `engine.toJSON()`
|
|
316
|
-
Export as JSON string.
|
|
317
|
-
|
|
318
|
-
### `engine.add(items)`
|
|
319
|
-
Add or update items (async).
|
|
320
|
-
|
|
321
|
-
### `engine.remove(ids)`
|
|
322
|
-
Remove items by ID.
|
|
323
|
-
|
|
324
|
-
### `engine.get(id)`
|
|
325
|
-
Get single item by ID.
|
|
316
|
+
## API Reference
|
|
326
317
|
|
|
327
|
-
###
|
|
328
|
-
Get all items.
|
|
318
|
+
### Class Methods
|
|
329
319
|
|
|
330
|
-
|
|
331
|
-
|
|
320
|
+
| Method | Description |
|
|
321
|
+
|--------|-------------|
|
|
322
|
+
| `Simile.from(items, config?)` | Create engine from items (async, embeds all) |
|
|
323
|
+
| `Simile.load(snapshot, config?)` | Load from snapshot object (instant) |
|
|
324
|
+
| `Simile.loadFromJSON(json, config?)` | Load from JSON string |
|
|
325
|
+
| `engine.search(query, options?)` | Search for similar items (sorted by relevance) |
|
|
326
|
+
| `engine.save()` | Export snapshot object |
|
|
327
|
+
| `engine.toJSON()` | Export as JSON string |
|
|
328
|
+
| `engine.add(items)` | Add or update items (async) |
|
|
329
|
+
| `engine.remove(ids)` | Remove items by ID |
|
|
330
|
+
| `engine.get(id)` | Retrieve single item |
|
|
331
|
+
| `engine.getAll()` | Retrieve all items |
|
|
332
|
+
| `engine.setWeights(weights)` | Update scoring weights |
|
|
333
|
+
| `engine.size` | Current item count |
|
|
332
334
|
|
|
333
|
-
|
|
334
|
-
Update scoring weights.
|
|
335
|
-
|
|
336
|
-
## π§ͺ Types
|
|
335
|
+
## TypeScript Types
|
|
337
336
|
|
|
338
337
|
```typescript
|
|
339
338
|
interface SearchItem<T = any> {
|
|
@@ -359,99 +358,38 @@ interface SearchOptions {
|
|
|
359
358
|
topK?: number;
|
|
360
359
|
explain?: boolean;
|
|
361
360
|
threshold?: number;
|
|
362
|
-
minLength?: number;
|
|
361
|
+
minLength?: number;
|
|
363
362
|
filter?: (metadata: any) => boolean;
|
|
364
363
|
}
|
|
365
364
|
|
|
366
365
|
interface SimileConfig {
|
|
367
366
|
weights?: { semantic?: number; fuzzy?: number; keyword?: number };
|
|
368
367
|
model?: string;
|
|
369
|
-
textPaths?: string[];
|
|
370
|
-
normalizeScores?: boolean;
|
|
368
|
+
textPaths?: string[];
|
|
369
|
+
normalizeScores?: boolean;
|
|
371
370
|
cache?: boolean | CacheOptions;
|
|
372
371
|
quantization?: 'float32' | 'float16' | 'int8';
|
|
373
372
|
useANN?: boolean | HNSWConfig;
|
|
374
373
|
annThreshold?: number;
|
|
375
374
|
}
|
|
376
|
-
|
|
377
|
-
interface CacheOptions {
|
|
378
|
-
maxSize?: number;
|
|
379
|
-
enableStats?: boolean;
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
interface HNSWConfig {
|
|
383
|
-
M?: number;
|
|
384
|
-
efConstruction?: number;
|
|
385
|
-
efSearch?: number;
|
|
386
|
-
}
|
|
387
375
|
```
|
|
388
376
|
|
|
389
|
-
##
|
|
390
|
-
|
|
391
|
-
Simile uses [Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) via Transformers.js by default. This model runs entirely in JavaScriptβno Python or external APIs required.
|
|
392
|
-
|
|
393
|
-
## π License
|
|
377
|
+
## Technical Details
|
|
394
378
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
## β‘ Performance Optimization
|
|
379
|
+
**Embedding Model:** [Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) via Transformers.js
|
|
398
380
|
|
|
399
|
-
|
|
381
|
+
This model runs entirely in JavaScript with no Python runtime or external API dependencies.
|
|
400
382
|
|
|
401
|
-
|
|
383
|
+
## License
|
|
402
384
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
```typescript
|
|
406
|
-
const engine = await Simile.from(items, {
|
|
407
|
-
quantization: 'float16', // 50% memory reduction, minimal accuracy loss
|
|
408
|
-
// OR
|
|
409
|
-
quantization: 'int8', // 75% memory reduction, slight accuracy loss
|
|
410
|
-
});
|
|
411
|
-
```
|
|
412
|
-
|
|
413
|
-
### β‘ O(log n) Search (ANN)
|
|
414
|
-
|
|
415
|
-
For datasets larger than 1,000 items, Simile automatically builds an HNSW (Hierarchical Navigable Small World) index for near-instant search.
|
|
416
|
-
|
|
417
|
-
```typescript
|
|
418
|
-
const engine = await Simile.from(items, {
|
|
419
|
-
useANN: true, // Force enable ANN
|
|
420
|
-
annThreshold: 500, // Enable ANN if items > 500 (default: 1000)
|
|
421
|
-
});
|
|
422
|
-
```
|
|
423
|
-
|
|
424
|
-
### π Vector Caching
|
|
425
|
-
|
|
426
|
-
Avoid redundant AI embedding calls for duplicate texts with built-in LRU caching.
|
|
427
|
-
|
|
428
|
-
```typescript
|
|
429
|
-
const engine = await Simile.from(items, {
|
|
430
|
-
cache: {
|
|
431
|
-
maxSize: 5000, // Cache up to 5000 unique embeddings
|
|
432
|
-
enableStats: true,
|
|
433
|
-
}
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
// Check cache performance
|
|
437
|
-
const stats = engine.getIndexInfo().cacheStats;
|
|
438
|
-
console.log(`Cache Hit Rate: ${stats.hitRate}%`);
|
|
439
|
-
```
|
|
440
|
-
|
|
441
|
-
### π Non-blocking Background Updates
|
|
385
|
+
MIT Β© [Aavash Baral](https://github.com/iaavas)
|
|
442
386
|
|
|
443
|
-
|
|
387
|
+
## Contributing
|
|
444
388
|
|
|
445
|
-
|
|
446
|
-
// These return immediately/nearly immediately and process in batches
|
|
447
|
-
engine.add(newItems);
|
|
448
|
-
engine.add(moreItems);
|
|
449
|
-
```
|
|
389
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
450
390
|
|
|
451
391
|
---
|
|
452
392
|
|
|
453
|
-
|
|
454
|
-
<
|
|
455
|
-
|
|
456
|
-
</p>
|
|
457
|
-
|
|
393
|
+
<div align="center">
|
|
394
|
+
<sub>Built with β€οΈ by <a href="https://github.com/iaavas">Aavash Baral</a></sub>
|
|
395
|
+
</div>
|
package/dist/utils.d.ts
CHANGED
|
@@ -28,4 +28,4 @@ export declare function extractText(item: any, paths?: string[]): string;
|
|
|
28
28
|
* Normalize a score to a 0-1 range using min-max normalization.
|
|
29
29
|
* Handles edge cases where min equals max.
|
|
30
30
|
*/
|
|
31
|
-
export declare function normalizeScore(value: number, min: number, max: number): number;
|
|
31
|
+
export declare function normalizeScore(value: number, min: number, max: number, floorMax?: number): number;
|
package/dist/utils.js
CHANGED
|
@@ -59,8 +59,9 @@ export function extractText(item, paths) {
|
|
|
59
59
|
* Normalize a score to a 0-1 range using min-max normalization.
|
|
60
60
|
* Handles edge cases where min equals max.
|
|
61
61
|
*/
|
|
62
|
-
export function normalizeScore(value, min, max) {
|
|
63
|
-
|
|
62
|
+
export function normalizeScore(value, min, max, floorMax = 0) {
|
|
63
|
+
const effectiveMax = Math.max(max, floorMax);
|
|
64
|
+
if (effectiveMax <= min)
|
|
64
65
|
return value > 0 ? 1 : 0;
|
|
65
|
-
return (value - min) / (
|
|
66
|
+
return Math.max(0, Math.min(1, (value - min) / (effectiveMax - min)));
|
|
66
67
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "simile-search",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"description": "Offline-first semantic + fuzzy search engine for catalogs, names, and products",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
},
|
|
45
45
|
"repository": {
|
|
46
46
|
"type": "git",
|
|
47
|
-
"url": "github.com/iaavas/simile-search"
|
|
47
|
+
"url": "https://github.com/iaavas/simile-search.git"
|
|
48
48
|
},
|
|
49
49
|
"bugs": {
|
|
50
|
-
"url": "github.com/iaavas/simile-search/issues"
|
|
50
|
+
"url": "https://github.com/iaavas/simile-search/issues"
|
|
51
51
|
},
|
|
52
|
-
"homepage": "github.com/iaavas/simile-search"
|
|
52
|
+
"homepage": "https://github.com/iaavas/simile-search"
|
|
53
53
|
}
|