@crashbytes/semantic-text-toolkit 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +24 -0
- package/.github/dependabot.yml +50 -0
- package/.github/workflows/ci.yml +42 -0
- package/.github/workflows/release.yml +50 -0
- package/LICENSE +5 -0
- package/README.md +6 -1
- package/jest.config.js +66 -0
- package/package.json +2 -3
- package/src/__tests__/setup.ts +43 -0
- package/src/__tests__/types.test.ts +128 -0
- package/src/engine/__tests__/SemanticEngine.test.ts +398 -0
- package/src/search/__tests__/SemanticSearch.test.ts +582 -0
- package/src/utils/__tests__/vector.test.ts +354 -0
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SemanticSearch Test Suite
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive validation of semantic search functionality including
|
|
5
|
+
* indexing, searching, filtering, and index management.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Mock the transformers library before any imports
|
|
9
|
+
jest.mock('@xenova/transformers', () => ({
|
|
10
|
+
pipeline: jest.fn(),
|
|
11
|
+
}));
|
|
12
|
+
|
|
13
|
+
import { SemanticSearch, IndexedItem } from '../SemanticSearch';
|
|
14
|
+
import { SemanticEngine } from '../../engine/SemanticEngine';
|
|
15
|
+
import { SemanticError, SemanticErrorCode } from '../../types';
|
|
16
|
+
|
|
17
|
+
// Mock the SemanticEngine
|
|
18
|
+
jest.mock('../../engine/SemanticEngine');
|
|
19
|
+
|
|
20
|
+
const MockedSemanticEngine = SemanticEngine as jest.MockedClass<typeof SemanticEngine>;
|
|
21
|
+
|
|
22
|
+
describe('SemanticSearch', () => {
|
|
23
|
+
let mockEngine: jest.Mocked<SemanticEngine>;
|
|
24
|
+
let search: SemanticSearch<string>;
|
|
25
|
+
|
|
26
|
+
// Helper to create mock embeddings
|
|
27
|
+
const createMockEmbedding = (seed: number) =>
|
|
28
|
+
Array(384).fill(0).map((_, i) => Math.sin(seed + i) * 0.1);
|
|
29
|
+
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
jest.clearAllMocks();
|
|
32
|
+
|
|
33
|
+
// Create mock engine
|
|
34
|
+
mockEngine = new MockedSemanticEngine() as jest.Mocked<SemanticEngine>;
|
|
35
|
+
|
|
36
|
+
// Mock embedBatch to return embeddings based on input
|
|
37
|
+
mockEngine.embedBatch = jest.fn().mockImplementation((texts: string[]) =>
|
|
38
|
+
Promise.resolve(
|
|
39
|
+
texts.map((text, idx) => ({
|
|
40
|
+
embedding: createMockEmbedding(text.length + idx),
|
|
41
|
+
text,
|
|
42
|
+
metadata: {
|
|
43
|
+
dimensions: 384,
|
|
44
|
+
modelName: 'test-model',
|
|
45
|
+
processingTime: 10,
|
|
46
|
+
},
|
|
47
|
+
}))
|
|
48
|
+
)
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
// Mock embed for single text
|
|
52
|
+
mockEngine.embed = jest.fn().mockImplementation((text: string) =>
|
|
53
|
+
Promise.resolve({
|
|
54
|
+
embedding: createMockEmbedding(text.length),
|
|
55
|
+
text,
|
|
56
|
+
metadata: {
|
|
57
|
+
dimensions: 384,
|
|
58
|
+
modelName: 'test-model',
|
|
59
|
+
processingTime: 5,
|
|
60
|
+
},
|
|
61
|
+
})
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
search = new SemanticSearch(mockEngine);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('constructor', () => {
|
|
68
|
+
it('creates search with default configuration', () => {
|
|
69
|
+
const stats = search.getStats();
|
|
70
|
+
expect(stats.itemCount).toBe(0);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('accepts custom configuration', () => {
|
|
74
|
+
const customSearch = new SemanticSearch(mockEngine, {
|
|
75
|
+
topK: 5,
|
|
76
|
+
threshold: 0.5,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
expect(customSearch).toBeDefined();
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('accepts custom text extractor', () => {
|
|
83
|
+
interface Document {
|
|
84
|
+
title: string;
|
|
85
|
+
content: string;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const docSearch = new SemanticSearch<Document>(mockEngine, {
|
|
89
|
+
textExtractor: (doc) => `${doc.title} ${doc.content}`,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
expect(docSearch).toBeDefined();
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('accepts custom metadata extractor', () => {
|
|
96
|
+
interface Document {
|
|
97
|
+
id: number;
|
|
98
|
+
text: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const docSearch = new SemanticSearch<Document>(mockEngine, {
|
|
102
|
+
textExtractor: (doc) => doc.text,
|
|
103
|
+
metadataExtractor: (doc) => ({ id: doc.id }),
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
expect(docSearch).toBeDefined();
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('index', () => {
|
|
111
|
+
it('indexes array of strings', async () => {
|
|
112
|
+
await search.index(['Hello', 'World', 'Test']);
|
|
113
|
+
|
|
114
|
+
const stats = search.getStats();
|
|
115
|
+
expect(stats.itemCount).toBe(3);
|
|
116
|
+
expect(stats.dimensions).toBe(384);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('throws on empty array', async () => {
|
|
120
|
+
await expect(search.index([])).rejects.toThrow(SemanticError);
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
await search.index([]);
|
|
124
|
+
} catch (error) {
|
|
125
|
+
expect((error as SemanticError).code).toBe(SemanticErrorCode.INVALID_INPUT);
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('throws on non-array input', async () => {
|
|
130
|
+
await expect(search.index(null as any)).rejects.toThrow(SemanticError);
|
|
131
|
+
await expect(search.index('test' as any)).rejects.toThrow(SemanticError);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('appends to existing index by default', async () => {
|
|
135
|
+
await search.index(['A', 'B']);
|
|
136
|
+
await search.index(['C', 'D']);
|
|
137
|
+
|
|
138
|
+
const stats = search.getStats();
|
|
139
|
+
expect(stats.itemCount).toBe(4);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('replaces index when replace=true', async () => {
|
|
143
|
+
await search.index(['A', 'B', 'C']);
|
|
144
|
+
await search.index(['X', 'Y'], true);
|
|
145
|
+
|
|
146
|
+
const stats = search.getStats();
|
|
147
|
+
expect(stats.itemCount).toBe(2);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('calls embedBatch with batch size 32', async () => {
|
|
151
|
+
await search.index(['test']);
|
|
152
|
+
|
|
153
|
+
expect(mockEngine.embedBatch).toHaveBeenCalledWith(
|
|
154
|
+
['test'],
|
|
155
|
+
{ batchSize: 32 }
|
|
156
|
+
);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('uses custom text extractor', async () => {
|
|
160
|
+
interface Doc {
|
|
161
|
+
title: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const docSearch = new SemanticSearch<Doc>(mockEngine, {
|
|
165
|
+
textExtractor: (doc) => doc.title,
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
await docSearch.index([{ title: 'Hello' }, { title: 'World' }]);
|
|
169
|
+
|
|
170
|
+
expect(mockEngine.embedBatch).toHaveBeenCalledWith(
|
|
171
|
+
['Hello', 'World'],
|
|
172
|
+
{ batchSize: 32 }
|
|
173
|
+
);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('uses custom metadata extractor', async () => {
|
|
177
|
+
interface Doc {
|
|
178
|
+
id: number;
|
|
179
|
+
text: string;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const docSearch = new SemanticSearch<Doc>(mockEngine, {
|
|
183
|
+
textExtractor: (doc) => doc.text,
|
|
184
|
+
metadataExtractor: (doc) => ({ docId: doc.id }),
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
await docSearch.index([
|
|
188
|
+
{ id: 1, text: 'Hello' },
|
|
189
|
+
{ id: 2, text: 'World' },
|
|
190
|
+
]);
|
|
191
|
+
|
|
192
|
+
const exported = docSearch.exportIndex();
|
|
193
|
+
expect(exported[0].metadata).toEqual({ docId: 1 });
|
|
194
|
+
expect(exported[1].metadata).toEqual({ docId: 2 });
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
describe('search', () => {
|
|
199
|
+
beforeEach(async () => {
|
|
200
|
+
await search.index(['apple', 'banana', 'cherry', 'date', 'elderberry']);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it('returns search results', async () => {
|
|
204
|
+
const results = await search.search('fruit');
|
|
205
|
+
|
|
206
|
+
expect(results.length).toBeGreaterThan(0);
|
|
207
|
+
expect(results[0]).toHaveProperty('item');
|
|
208
|
+
expect(results[0]).toHaveProperty('score');
|
|
209
|
+
expect(results[0]).toHaveProperty('rank');
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it('throws on empty index', async () => {
|
|
213
|
+
const emptySearch = new SemanticSearch(mockEngine);
|
|
214
|
+
|
|
215
|
+
await expect(emptySearch.search('test')).rejects.toThrow(SemanticError);
|
|
216
|
+
|
|
217
|
+
try {
|
|
218
|
+
await emptySearch.search('test');
|
|
219
|
+
} catch (error) {
|
|
220
|
+
expect((error as SemanticError).code).toBe(SemanticErrorCode.INVALID_INPUT);
|
|
221
|
+
expect((error as SemanticError).message).toContain('Index is empty');
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it('respects topK configuration', async () => {
|
|
226
|
+
const results = await search.search('fruit', { topK: 3 });
|
|
227
|
+
|
|
228
|
+
expect(results.length).toBeLessThanOrEqual(3);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it('respects threshold configuration', async () => {
|
|
232
|
+
const results = await search.search('fruit', { threshold: 0.99 });
|
|
233
|
+
|
|
234
|
+
// High threshold likely filters out all results
|
|
235
|
+
results.forEach((result) => {
|
|
236
|
+
expect(result.score).toBeGreaterThanOrEqual(0.99);
|
|
237
|
+
});
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it('assigns correct ranks', async () => {
|
|
241
|
+
const results = await search.search('fruit', { topK: 5 });
|
|
242
|
+
|
|
243
|
+
results.forEach((result, idx) => {
|
|
244
|
+
expect(result.rank).toBe(idx + 1);
|
|
245
|
+
});
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
it('orders results by score descending', async () => {
|
|
249
|
+
const results = await search.search('fruit', { topK: 5 });
|
|
250
|
+
|
|
251
|
+
for (let i = 0; i < results.length - 1; i++) {
|
|
252
|
+
expect(results[i].score).toBeGreaterThanOrEqual(results[i + 1].score);
|
|
253
|
+
}
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it('uses default topK of 10', async () => {
|
|
257
|
+
// Index more than 10 items
|
|
258
|
+
await search.index(
|
|
259
|
+
Array(15).fill(0).map((_, i) => `item${i}`),
|
|
260
|
+
true
|
|
261
|
+
);
|
|
262
|
+
|
|
263
|
+
const results = await search.search('item');
|
|
264
|
+
|
|
265
|
+
expect(results.length).toBeLessThanOrEqual(10);
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
describe('searchWithFilter', () => {
|
|
270
|
+
interface Document {
|
|
271
|
+
text: string;
|
|
272
|
+
category: string;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
let docSearch: SemanticSearch<Document>;
|
|
276
|
+
|
|
277
|
+
beforeEach(async () => {
|
|
278
|
+
docSearch = new SemanticSearch<Document>(mockEngine, {
|
|
279
|
+
textExtractor: (doc) => doc.text,
|
|
280
|
+
metadataExtractor: (doc) => ({ category: doc.category }),
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
await docSearch.index([
|
|
284
|
+
{ text: 'apple pie', category: 'dessert' },
|
|
285
|
+
{ text: 'banana bread', category: 'dessert' },
|
|
286
|
+
{ text: 'chicken soup', category: 'main' },
|
|
287
|
+
{ text: 'beef stew', category: 'main' },
|
|
288
|
+
{ text: 'fruit salad', category: 'appetizer' },
|
|
289
|
+
]);
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
it('filters results by metadata', async () => {
|
|
293
|
+
const results = await docSearch.searchWithFilter(
|
|
294
|
+
'food',
|
|
295
|
+
(metadata) => metadata.category === 'dessert'
|
|
296
|
+
);
|
|
297
|
+
|
|
298
|
+
results.forEach((result) => {
|
|
299
|
+
expect(result.item.category).toBe('dessert');
|
|
300
|
+
});
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it('restores original index after filtering', async () => {
|
|
304
|
+
const beforeCount = docSearch.getStats().itemCount;
|
|
305
|
+
|
|
306
|
+
await docSearch.searchWithFilter(
|
|
307
|
+
'food',
|
|
308
|
+
(metadata) => metadata.category === 'dessert'
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
const afterCount = docSearch.getStats().itemCount;
|
|
312
|
+
expect(afterCount).toBe(beforeCount);
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
it('handles filter that matches no items', async () => {
|
|
316
|
+
// When filter matches no items, searchWithFilter catches the empty index error
|
|
317
|
+
// and returns empty results
|
|
318
|
+
try {
|
|
319
|
+
const results = await docSearch.searchWithFilter(
|
|
320
|
+
'food',
|
|
321
|
+
(metadata) => metadata.category === 'nonexistent'
|
|
322
|
+
);
|
|
323
|
+
// If we get here without error, results should be empty
|
|
324
|
+
expect(results).toEqual([]);
|
|
325
|
+
} catch (error) {
|
|
326
|
+
// Empty filtered index throws SemanticError
|
|
327
|
+
expect(error).toBeInstanceOf(SemanticError);
|
|
328
|
+
expect((error as SemanticError).code).toBe(SemanticErrorCode.INVALID_INPUT);
|
|
329
|
+
}
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
it('handles empty metadata', async () => {
|
|
333
|
+
await search.index(['a', 'b', 'c']);
|
|
334
|
+
|
|
335
|
+
const results = await search.searchWithFilter(
|
|
336
|
+
'test',
|
|
337
|
+
() => true
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
expect(results.length).toBeGreaterThan(0);
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
it('respects config overrides', async () => {
|
|
344
|
+
const results = await docSearch.searchWithFilter(
|
|
345
|
+
'food',
|
|
346
|
+
() => true,
|
|
347
|
+
{ topK: 2 }
|
|
348
|
+
);
|
|
349
|
+
|
|
350
|
+
expect(results.length).toBeLessThanOrEqual(2);
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
it('handles items with undefined metadata', async () => {
|
|
354
|
+
// Create index with items that have no metadata property
|
|
355
|
+
const itemsWithNoMetadata: IndexedItem<string>[] = [
|
|
356
|
+
{ item: 'test1', embedding: createMockEmbedding(1) },
|
|
357
|
+
{ item: 'test2', embedding: createMockEmbedding(2) },
|
|
358
|
+
];
|
|
359
|
+
|
|
360
|
+
search.importIndex(itemsWithNoMetadata);
|
|
361
|
+
|
|
362
|
+
// Filter should handle undefined metadata gracefully via ?? {}
|
|
363
|
+
const results = await search.searchWithFilter(
|
|
364
|
+
'test',
|
|
365
|
+
() => true
|
|
366
|
+
);
|
|
367
|
+
|
|
368
|
+
expect(Array.isArray(results)).toBe(true);
|
|
369
|
+
});
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
describe('findSimilar', () => {
|
|
373
|
+
beforeEach(async () => {
|
|
374
|
+
await search.index(['apple', 'banana', 'cherry']);
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
it('finds items similar to given item', async () => {
|
|
378
|
+
const results = await search.findSimilar('apple');
|
|
379
|
+
|
|
380
|
+
expect(results.length).toBeGreaterThan(0);
|
|
381
|
+
expect(mockEngine.embed).toHaveBeenCalledWith('apple');
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
it('respects config overrides', async () => {
|
|
385
|
+
const results = await search.findSimilar('apple', { topK: 1 });
|
|
386
|
+
|
|
387
|
+
expect(results.length).toBeLessThanOrEqual(1);
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
it('uses text extractor for complex types', async () => {
|
|
391
|
+
interface Doc {
|
|
392
|
+
title: string;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
const docSearch = new SemanticSearch<Doc>(mockEngine, {
|
|
396
|
+
textExtractor: (doc) => doc.title,
|
|
397
|
+
});
|
|
398
|
+
|
|
399
|
+
await docSearch.index([{ title: 'Hello' }, { title: 'World' }]);
|
|
400
|
+
await docSearch.findSimilar({ title: 'Test' });
|
|
401
|
+
|
|
402
|
+
expect(mockEngine.embed).toHaveBeenCalledWith('Test');
|
|
403
|
+
});
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
describe('getStats', () => {
|
|
407
|
+
it('returns zero stats for empty index', () => {
|
|
408
|
+
const stats = search.getStats();
|
|
409
|
+
|
|
410
|
+
expect(stats.itemCount).toBe(0);
|
|
411
|
+
expect(stats.dimensions).toBe(0);
|
|
412
|
+
expect(stats.memoryEstimate).toBe('0.00 KB');
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
it('returns correct item count', async () => {
|
|
416
|
+
await search.index(['a', 'b', 'c', 'd', 'e']);
|
|
417
|
+
|
|
418
|
+
const stats = search.getStats();
|
|
419
|
+
expect(stats.itemCount).toBe(5);
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it('returns correct dimensions', async () => {
|
|
423
|
+
await search.index(['test']);
|
|
424
|
+
|
|
425
|
+
const stats = search.getStats();
|
|
426
|
+
expect(stats.dimensions).toBe(384);
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
it('formats memory in KB for small indexes', async () => {
|
|
430
|
+
await search.index(['test']);
|
|
431
|
+
|
|
432
|
+
const stats = search.getStats();
|
|
433
|
+
expect(stats.memoryEstimate).toMatch(/KB$/);
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
it('formats memory in MB for large indexes', async () => {
|
|
437
|
+
// Create many items to exceed 1MB
|
|
438
|
+
// 384 dimensions * 8 bytes = 3072 bytes per item
|
|
439
|
+
// Need ~350 items for 1MB
|
|
440
|
+
const items = Array(400).fill('item');
|
|
441
|
+
await search.index(items);
|
|
442
|
+
|
|
443
|
+
const stats = search.getStats();
|
|
444
|
+
expect(stats.memoryEstimate).toMatch(/MB$/);
|
|
445
|
+
});
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
describe('clear', () => {
|
|
449
|
+
it('removes all items from index', async () => {
|
|
450
|
+
await search.index(['a', 'b', 'c']);
|
|
451
|
+
expect(search.getStats().itemCount).toBe(3);
|
|
452
|
+
|
|
453
|
+
search.clear();
|
|
454
|
+
|
|
455
|
+
expect(search.getStats().itemCount).toBe(0);
|
|
456
|
+
});
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
describe('exportIndex', () => {
|
|
460
|
+
it('returns copy of indexed items', async () => {
|
|
461
|
+
await search.index(['apple', 'banana']);
|
|
462
|
+
|
|
463
|
+
const exported = search.exportIndex();
|
|
464
|
+
|
|
465
|
+
expect(exported).toHaveLength(2);
|
|
466
|
+
expect(exported[0].item).toBe('apple');
|
|
467
|
+
expect(exported[1].item).toBe('banana');
|
|
468
|
+
expect(exported[0].embedding).toHaveLength(384);
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
it('returns copy, not reference', async () => {
|
|
472
|
+
await search.index(['test']);
|
|
473
|
+
|
|
474
|
+
const exported1 = search.exportIndex();
|
|
475
|
+
const exported2 = search.exportIndex();
|
|
476
|
+
|
|
477
|
+
expect(exported1).not.toBe(exported2);
|
|
478
|
+
});
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
describe('importIndex', () => {
|
|
482
|
+
it('replaces current index', async () => {
|
|
483
|
+
await search.index(['old1', 'old2']);
|
|
484
|
+
|
|
485
|
+
const newIndex: IndexedItem<string>[] = [
|
|
486
|
+
{ item: 'new1', embedding: createMockEmbedding(1), metadata: {} },
|
|
487
|
+
{ item: 'new2', embedding: createMockEmbedding(2), metadata: {} },
|
|
488
|
+
{ item: 'new3', embedding: createMockEmbedding(3), metadata: {} },
|
|
489
|
+
];
|
|
490
|
+
|
|
491
|
+
search.importIndex(newIndex);
|
|
492
|
+
|
|
493
|
+
expect(search.getStats().itemCount).toBe(3);
|
|
494
|
+
const exported = search.exportIndex();
|
|
495
|
+
expect(exported[0].item).toBe('new1');
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
it('creates copy of imported data', async () => {
|
|
499
|
+
const newIndex: IndexedItem<string>[] = [
|
|
500
|
+
{ item: 'test', embedding: createMockEmbedding(1), metadata: {} },
|
|
501
|
+
];
|
|
502
|
+
|
|
503
|
+
search.importIndex(newIndex);
|
|
504
|
+
newIndex.push({
|
|
505
|
+
item: 'added',
|
|
506
|
+
embedding: createMockEmbedding(2),
|
|
507
|
+
metadata: {},
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
expect(search.getStats().itemCount).toBe(1);
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
it('allows searching after import', async () => {
|
|
514
|
+
const newIndex: IndexedItem<string>[] = [
|
|
515
|
+
{ item: 'apple', embedding: createMockEmbedding(1), metadata: {} },
|
|
516
|
+
{ item: 'banana', embedding: createMockEmbedding(2), metadata: {} },
|
|
517
|
+
];
|
|
518
|
+
|
|
519
|
+
search.importIndex(newIndex);
|
|
520
|
+
const results = await search.search('fruit');
|
|
521
|
+
|
|
522
|
+
// Results may be empty if threshold filters them out, but search should work
|
|
523
|
+
expect(Array.isArray(results)).toBe(true);
|
|
524
|
+
// Verify search was called with the query
|
|
525
|
+
expect(mockEngine.embed).toHaveBeenCalledWith('fruit');
|
|
526
|
+
});
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
describe('integration scenarios', () => {
|
|
530
|
+
it('handles full workflow: index -> search -> filter -> clear', async () => {
|
|
531
|
+
interface Product {
|
|
532
|
+
name: string;
|
|
533
|
+
category: string;
|
|
534
|
+
price: number;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
const productSearch = new SemanticSearch<Product>(mockEngine, {
|
|
538
|
+
textExtractor: (p) => `${p.name} ${p.category}`,
|
|
539
|
+
metadataExtractor: (p) => ({ category: p.category, price: p.price }),
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
// Index products
|
|
543
|
+
await productSearch.index([
|
|
544
|
+
{ name: 'iPhone', category: 'electronics', price: 999 },
|
|
545
|
+
{ name: 'Samsung Galaxy', category: 'electronics', price: 899 },
|
|
546
|
+
{ name: 'Nike Shoes', category: 'apparel', price: 150 },
|
|
547
|
+
{ name: 'Adidas Sneakers', category: 'apparel', price: 120 },
|
|
548
|
+
]);
|
|
549
|
+
|
|
550
|
+
expect(productSearch.getStats().itemCount).toBe(4);
|
|
551
|
+
|
|
552
|
+
// Search all
|
|
553
|
+
const allResults = await productSearch.search('phone');
|
|
554
|
+
expect(allResults.length).toBeGreaterThan(0);
|
|
555
|
+
|
|
556
|
+
// Search with filter
|
|
557
|
+
const electronicsResults = await productSearch.searchWithFilter(
|
|
558
|
+
'phone',
|
|
559
|
+
(m) => m.category === 'electronics'
|
|
560
|
+
);
|
|
561
|
+
electronicsResults.forEach((r) => {
|
|
562
|
+
expect(r.item.category).toBe('electronics');
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
// Find similar
|
|
566
|
+
const similar = await productSearch.findSimilar({
|
|
567
|
+
name: 'Google Pixel',
|
|
568
|
+
category: 'electronics',
|
|
569
|
+
price: 799,
|
|
570
|
+
});
|
|
571
|
+
expect(similar.length).toBeGreaterThan(0);
|
|
572
|
+
|
|
573
|
+
// Export and import
|
|
574
|
+
const exported = productSearch.exportIndex();
|
|
575
|
+
productSearch.clear();
|
|
576
|
+
expect(productSearch.getStats().itemCount).toBe(0);
|
|
577
|
+
|
|
578
|
+
productSearch.importIndex(exported);
|
|
579
|
+
expect(productSearch.getStats().itemCount).toBe(4);
|
|
580
|
+
});
|
|
581
|
+
});
|
|
582
|
+
});
|