vectra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ import { MetadataFilter, MetadataTypes } from './LocalIndex';
2
+
3
+ export class ItemSelector {
4
+ /**
5
+ * Returns the similarity between two vectors using the cosine similarity.
6
+ * @param vector1 Vector 1
7
+ * @param vector2 Vector 2
8
+ * @returns Similarity between the two vectors
9
+ */
10
+ public static cosineSimilarity(vector1: number[], vector2: number[]) {
11
+ // Return the quotient of the dot product and the product of the norms
12
+ return this.dotProduct(vector1, vector2) / (this.normalize(vector1) * this.normalize(vector2));
13
+ }
14
+
15
+ /**
16
+ * Normalizes a vector.
17
+ * @remarks
18
+ * The norm of a vector is the square root of the sum of the squares of the elements.
19
+ * The LocalIndex pre-normalizes all vectors to improve performance.
20
+ * @param vector Vector to normalize
21
+ * @returns Normalized vector
22
+ */
23
+ public static normalize(vector: number[]) {
24
+ // Initialize a variable to store the sum of the squares
25
+ let sum = 0;
26
+ // Loop through the elements of the array
27
+ for (let i = 0; i < vector.length; i++) {
28
+ // Square the element and add it to the sum
29
+ sum += vector[i] * vector[i];
30
+ }
31
+ // Return the square root of the sum
32
+ return Math.sqrt(sum);
33
+ }
34
+
35
+ /**
36
+ * Returns the similarity between two vectors using cosine similarity.
37
+ * @remarks
38
+ * The LocalIndex pre-normalizes all vectors to improve performance.
39
+ * This method uses the pre-calculated norms to improve performance.
40
+ * @param vector1 Vector 1
41
+ * @param norm1 Norm of vector 1
42
+ * @param vector2 Vector 2
43
+ * @param norm2 Norm of vector 2
44
+ * @returns Similarity between the two vectors
45
+ */
46
+ public static normalizedCosineSimilarity(vector1: number[], norm1: number, vector2: number[], norm2: number) {
47
+ // Return the quotient of the dot product and the product of the norms
48
+ return this.dotProduct(vector1, vector2) / (norm1 * norm2);
49
+ }
50
+
51
+ /**
52
+ * Applies a filter to the metadata of an item.
53
+ * @param metadata Metadata of the item
54
+ * @param filter Filter to apply
55
+ * @returns True if the item matches the filter, false otherwise
56
+ */
57
+ public static select(metadata: Record<string, MetadataTypes>, filter: MetadataFilter): boolean {
58
+ if (filter === undefined || filter === null) {
59
+ return true;
60
+ }
61
+
62
+ for (const key in filter) {
63
+ switch (key) {
64
+ case '$and':
65
+ if (!filter[key].every((f) => this.select(metadata, f))) {
66
+ return false;
67
+ }
68
+ break;
69
+ case '$or':
70
+ if (!filter[key].some((f) => this.select(metadata, f))) {
71
+ return false;
72
+ }
73
+ break;
74
+ default:
75
+ const value = filter[key];
76
+ if (value === undefined || value === null) {
77
+ return false;
78
+ } else if (typeof value == 'object') {
79
+ if (!this.metadataFilter(metadata[key], value as MetadataFilter)) {
80
+ return false;
81
+ }
82
+ } else {
83
+ if (metadata[key] !== value) {
84
+ return false;
85
+ }
86
+ }
87
+ break;
88
+ }
89
+ }
90
+ return true;
91
+ }
92
+
93
+ private static dotProduct(arr1: number[], arr2: number[]) {
94
+ // Initialize a variable to store the sum of the products
95
+ let sum = 0;
96
+ // Loop through the elements of the arrays
97
+ for (let i = 0; i < arr1.length; i++) {
98
+ // Multiply the corresponding elements and add them to the sum
99
+ sum += arr1[i] * arr2[i];
100
+ }
101
+ // Return the sum
102
+ return sum;
103
+ }
104
+
105
+ private static metadataFilter(value: MetadataTypes, filter: MetadataFilter): boolean {
106
+ if (value === undefined || value === null) {
107
+ return false;
108
+ }
109
+
110
+ for (const key in filter) {
111
+ switch (key) {
112
+ case '$eq':
113
+ if (value !== filter[key]) {
114
+ return false;
115
+ }
116
+ break;
117
+ case '$ne':
118
+ if (value === filter[key]) {
119
+ return false;
120
+ }
121
+ break;
122
+ case '$gt':
123
+ if (typeof value != 'number' || value <= filter[key]) {
124
+ return false;
125
+ }
126
+ break;
127
+ case '$gte':
128
+ if (typeof value != 'number' || value < filter[key]) {
129
+ return false;
130
+ }
131
+ break;
132
+ case '$lt':
133
+ if (typeof value != 'number' || value >= filter[key]) {
134
+ return false;
135
+ }
136
+ break;
137
+ case '$lte':
138
+ if (typeof value != 'number' || value > filter[key]) {
139
+ return false;
140
+ }
141
+ break;
142
+ case '$in':
143
+ if (typeof value == 'boolean' || !filter[key].includes(value)) {
144
+ return false;
145
+ }
146
+ break;
147
+ case '$nin':
148
+ if (typeof value == 'boolean' || filter[key].includes(value)) {
149
+ return false;
150
+ }
151
+ break;
152
+ default:
153
+ return value === filter[key];
154
+ }
155
+ }
156
+ return true;
157
+ }
158
+ }
@@ -0,0 +1,450 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { v4 } from 'uuid';
4
+ import { ItemSelector } from './ItemSelector';
5
+
6
+ export interface CreateIndexConfig {
7
+ version: number;
8
+ deleteIfExists?: boolean;
9
+ metadata_config?: {
10
+ indexed?: string[];
11
+ };
12
+ }
13
+
14
+ export interface IndexStats {
15
+ version: number;
16
+ metadata_config: {
17
+ indexed?: string[];
18
+ };
19
+ items: number;
20
+ }
21
+
22
+ export interface IndexItem<TMetadata = Record<string,MetadataTypes>> {
23
+ id: string;
24
+ metadata: TMetadata;
25
+ vector: number[];
26
+ norm: number;
27
+ metadataFile?: string;
28
+ }
29
+
30
+ export interface QueryResult<TMetadata = Record<string,MetadataTypes>> {
31
+ item: IndexItem<TMetadata>;
32
+ score: number;
33
+ }
34
+
35
+ export interface MetadataFilter {
36
+ [key: string]: MetadataTypes|MetadataFilter|(number|string)[]|MetadataFilter[];
37
+
38
+ /**
39
+ * Equal to (number, string, boolean)
40
+ */
41
+ '$eq': number|string|boolean;
42
+
43
+ /**
44
+ * Not equal to (number, string, boolean)
45
+ */
46
+ '$ne': number|string|boolean;
47
+
48
+ /**
49
+ * Greater than (number)
50
+ */
51
+ '$gt': number;
52
+
53
+ /**
54
+ * Greater than or equal to (number)
55
+ */
56
+ '$gte': number;
57
+
58
+ /**
59
+ * Less than (number)
60
+ */
61
+ '$lt': number;
62
+
63
+ /**
64
+ * Less than or equal to (number)
65
+ */
66
+ '$lte': number;
67
+
68
+ /**
69
+ * In array (string or number)
70
+ */
71
+ '$in': (number|string)[];
72
+
73
+ /**
74
+ * Not in array (string or number)
75
+ */
76
+ '$nin': (number|string)[];
77
+
78
+ /**
79
+ * AND (MetadataFilter[])
80
+ */
81
+ '$and': MetadataFilter[];
82
+
83
+ /**
84
+ * OR (MetadataFilter[])
85
+ */
86
+ '$or': MetadataFilter[];
87
+ }
88
+
89
+ export type MetadataTypes = number|string|boolean;
90
+
91
+ /**
92
+ * Local vector index instance.
93
+ * @remarks
94
+ * This class is used to create, update, and query a local vector index.
95
+ * Each index is a folder on disk containing an index.json file and an optional set of metadata files.
96
+ */
97
+ export class LocalIndex {
98
+ private readonly _folderPath: string;
99
+ private _data?: IndexData;
100
+ private _update?: IndexData;
101
+
102
+ /**
103
+ * Creates a new instance of LocalIndex.
104
+ * @param folderPath - Path to the index folder
105
+ */
106
+ public constructor(folderPath: string) {
107
+ this._folderPath = folderPath;
108
+ }
109
+
110
+ /**
111
+ * Begins an update to the index.
112
+ * @remarks
113
+ * This method loads the index into memory and prepares it for updates.
114
+ */
115
+ public async beginUpdate(): Promise<void> {
116
+ if (this._update) {
117
+ throw new Error('Update already in progress');
118
+ }
119
+
120
+ await this.loadIndexData();
121
+ this._update = Object.assign({}, this._data);
122
+ }
123
+
124
+ /**
125
+ * Cancels an update to the index.
126
+ * @remarks
127
+ * This method discards any changes made to the index since the update began.
128
+ */
129
+ public cancelUpdate(): void {
130
+ this._update = undefined;
131
+ }
132
+
133
+ /**
134
+ * Creates a new index.
135
+ * @remarks
136
+ * This method creates a new folder on disk containing an index.json file.
137
+ * @param config - Index configuration
138
+ */
139
+ public async createIndex(config: CreateIndexConfig = {version: 1}): Promise<void> {
140
+ // Delete if exists
141
+ if (await this.isIndexCreated()) {
142
+ if (config.deleteIfExists) {
143
+ await this.deleteIndex();
144
+ } else {
145
+ throw new Error('Index already exists');
146
+ }
147
+ }
148
+
149
+ try {
150
+ // Create folder for index
151
+ await fs.mkdir(this._folderPath, { recursive: true });
152
+
153
+ // Initialize index.json file
154
+ this._data = {
155
+ version: config.version,
156
+ metadata_config: config.metadata_config ?? {},
157
+ items: []
158
+ };
159
+ await fs.writeFile(path.join(this._folderPath, 'index.json'), JSON.stringify(this._data));
160
+ } catch (err: unknown) {
161
+ await this.deleteIndex();
162
+ throw new Error('Error creating index');
163
+ }
164
+ }
165
+
166
+ /**
167
+ * Deletes the index.
168
+ * @remarks
169
+ * This method deletes the index folder from disk.
170
+ */
171
+ public deleteIndex(): Promise<void> {
172
+ this._data = undefined;
173
+ return fs.rm(this._folderPath, {
174
+ recursive: true,
175
+ maxRetries: 3
176
+ });
177
+ }
178
+
179
+ /**
180
+ * Deletes an item from the index.
181
+ * @param id - Item id
182
+ */
183
+ public async deleteItem(id: string): Promise<void> {
184
+ if (this._update) {
185
+ const index = this._update.items.findIndex(i => i.id === id);
186
+ if (index >= 0) {
187
+ this._update.items.splice(index, 1);
188
+ }
189
+ } else {
190
+ await this.beginUpdate();
191
+ const index = this._update!.items.findIndex(i => i.id === id);
192
+ if (index >= 0) {
193
+ this._update!.items.splice(index, 1);
194
+ }
195
+ await this.endUpdate();
196
+ }
197
+ }
198
+
199
+ /**
200
+ * Ends an update to the index.
201
+ * @remarks
202
+ * This method saves the index to disk.
203
+ */
204
+ public async endUpdate(): Promise<void> {
205
+ if (!this._update) {
206
+ throw new Error('No update in progress');
207
+ }
208
+
209
+ try {
210
+ // Save index
211
+ await fs.writeFile(path.join(this._folderPath, 'index.json'), JSON.stringify(this._update));
212
+ this._data = this._update;
213
+ this._update = undefined;
214
+ } catch(err: unknown) {
215
+ throw new Error(`Error saving index: ${(err as any).toString()}`);
216
+ }
217
+ }
218
+
219
+ /**
220
+ * Loads an index from disk and returns its stats.
221
+ * @returns Index stats
222
+ */
223
+ public async getIndexStats(): Promise<IndexStats> {
224
+ await this.loadIndexData();
225
+ return {
226
+ version: this._data!.version,
227
+ metadata_config: this._data!.metadata_config,
228
+ items: this._data!.items.length
229
+ };
230
+ }
231
+
232
+ /**
233
+ * Returns an item from the index given its ID.
234
+ * @param id Item id
235
+ * @returns Item or undefined if not found
236
+ */
237
+ public async getItem<TMetadata = Record<string,MetadataTypes>>(id: string): Promise<IndexItem<TMetadata> | undefined> {
238
+ await this.loadIndexData();
239
+ return this._data!.items.find(i => i.id === id) as any | undefined;
240
+ }
241
+
242
+ /**
243
+ * Adds an item to the index.
244
+ * @remarks
245
+ * A new update is started if one is not already in progress. If an item with the same ID
246
+ * already exists, an error will be thrown.
247
+ * @param item Item to insert
248
+ * @returns Inserted item
249
+ */
250
+ public async insertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
251
+ if (this._update) {
252
+ return await this.addItemToUpdate(item, true) as any;
253
+ } else {
254
+ await this.beginUpdate();
255
+ const newItem = await this.addItemToUpdate(item, true);
256
+ await this.endUpdate();
257
+ return newItem as any;
258
+ }
259
+ }
260
+
261
+ /**
262
+ * Returns true if the index exists.
263
+ */
264
+ public async isIndexCreated(): Promise<boolean> {
265
+ try {
266
+ await fs.access(path.join(this._folderPath, 'index.json'));
267
+ return true;
268
+ } catch (err: unknown) {
269
+ return false;
270
+ }
271
+ }
272
+
273
+ /**
274
+ * Returns all items in the index.
275
+ * @remarks
276
+ * This method loads the index into memory and returns all its items. A copy of the items
277
+ * array is returned so no modifications should be made to the array.
278
+ * @returns All items in the index
279
+ */
280
+ public async listItems<TMetadata = Record<string,MetadataTypes>>(): Promise<IndexItem<TMetadata>[]> {
281
+ await this.loadIndexData();
282
+ return this._data!.items.slice() as any;
283
+ }
284
+
285
+ /**
286
+ * Returns all items in the index matching the filter.
287
+ * @remarks
288
+ * This method loads the index into memory and returns all its items matching the filter.
289
+ * @param filter Filter to apply
290
+ * @returns Items matching the filter
291
+ */
292
+ public async listItemsByMetadata<TMetadata = Record<string,MetadataTypes>>(filter: MetadataFilter): Promise<IndexItem<TMetadata>[]> {
293
+ await this.loadIndexData();
294
+ return this._data!.items.filter(i => ItemSelector.select(i.metadata, filter)) as any;
295
+ }
296
+
297
+ /**
298
+ * Finds the top k items in the index that are most similar to the vector.
299
+ * @remarks
300
+ * This method loads the index into memory and returns the top k items that are most similar.
301
+ * An optional filter can be applied to the metadata of the items.
302
+ * @param vector Vector to query against
303
+ * @param topK Number of items to return
304
+ * @param filter Optional filter to apply
305
+ * @returns Similar items to the vector that matches the filter
306
+ */
307
+ public async queryItems<TMetadata = Record<string,MetadataTypes>>(vector: number[], topK: number, filter?: MetadataFilter): Promise<QueryResult<TMetadata>[]> {
308
+ await this.loadIndexData();
309
+
310
+ // Filter items
311
+ let items = this._data!.items;
312
+ if (filter) {
313
+ items = items.filter(i => ItemSelector.select(i.metadata, filter));
314
+ }
315
+
316
+ // Calculate distances
317
+ const norm = ItemSelector.normalize(vector);
318
+ const distances: { index: number, distance: number }[] = [];
319
+ for (let i = 0; i < items.length; i++) {
320
+ const item = items[i];
321
+ const distance = ItemSelector.normalizedCosineSimilarity(vector, norm, item.vector, item.norm);
322
+ distances.push({ index: i, distance: distance });
323
+ }
324
+
325
+ // Sort by distance DESCENDING
326
+ distances.sort((a, b) => b.distance - a.distance);
327
+
328
+ // Find top k
329
+ const top: QueryResult<TMetadata>[] = distances.slice(0, topK).map(d => {
330
+ return {
331
+ item: Object.assign({}, items[d.index]) as any,
332
+ score: d.distance
333
+ };
334
+ });
335
+
336
+ // Load external metadata
337
+ for (const item of top) {
338
+ if (item.item.metadataFile) {
339
+ const metadataPath = path.join(this._folderPath, item.item.metadataFile);
340
+ const metadata = await fs.readFile(metadataPath);
341
+ item.item.metadata = JSON.parse(metadata.toString());
342
+ }
343
+ }
344
+
345
+ return top;
346
+ }
347
+
348
+ /**
349
+ * Adds or replaces an item in the index.
350
+ * @remarks
351
+ * A new update is started if one is not already in progress. If an item with the same ID
352
+ * already exists, it will be replaced.
353
+ * @param item Item to insert or replace
354
+ * @returns Upserted item
355
+ */
356
+ public async upsertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
357
+ if (this._update) {
358
+ return await this.addItemToUpdate(item, false) as any;
359
+ } else {
360
+ await this.beginUpdate();
361
+ const newItem = await this.addItemToUpdate(item, false);
362
+ await this.endUpdate();
363
+ return newItem as any;
364
+ }
365
+ }
366
+
367
+ private async loadIndexData(): Promise<void> {
368
+ if (this._data) {
369
+ return;
370
+ }
371
+
372
+ if (!await this.isIndexCreated()) {
373
+ throw new Error('Index does not exist');
374
+ }
375
+
376
+ const data = await fs.readFile(path.join(this._folderPath, 'index.json'));
377
+ this._data = JSON.parse(data.toString());
378
+ }
379
+
380
+ private async addItemToUpdate(item: Partial<IndexItem<any>>, unique: boolean): Promise<IndexItem> {
381
+ // Ensure vector is provided
382
+ if (!item.vector) {
383
+ throw new Error('Vector is required');
384
+ }
385
+
386
+ // Ensure unique
387
+ const id = item.id ?? v4();
388
+ if (unique) {
389
+ const existing = this._update!.items.find(i => i.id === id);
390
+ if (existing) {
391
+ throw new Error(`Item with id ${id} already exists`);
392
+ }
393
+ }
394
+
395
+ // Check for indexed metadata
396
+ let metadata: Record<string,any> = {};
397
+ let metadataFile: string | undefined;
398
+ if (this._update!.metadata_config.indexed && this._update!.metadata_config.indexed.length > 0 && item.metadata) {
399
+ // Copy only indexed metadata
400
+ for (const key of this._update!.metadata_config.indexed) {
401
+ if (item.metadata && item.metadata[key]) {
402
+ metadata[key] = item.metadata[key];
403
+ }
404
+ }
405
+
406
+ // Save remaining metadata to disk
407
+ metadataFile = `${v4}.json`;
408
+ const metadataPath = path.join(this._folderPath, metadataFile);
409
+ await fs.writeFile(metadataPath, JSON.stringify(item.metadata));
410
+ } else if (item.metadata) {
411
+ metadata = item.metadata;
412
+ }
413
+
414
+ // Create new item
415
+ const newItem: IndexItem = {
416
+ id: id,
417
+ metadata: metadata,
418
+ vector: item.vector,
419
+ norm: ItemSelector.normalize(item.vector)
420
+ };
421
+ if (metadataFile) {
422
+ newItem.metadataFile = metadataFile;
423
+ }
424
+
425
+ // Add item to index
426
+ if (!unique) {
427
+ const existing = this._update!.items.find(i => i.id === id);
428
+ if (existing) {
429
+ existing.metadata = newItem.metadata;
430
+ existing.vector = newItem.vector;
431
+ existing.metadataFile = newItem.metadataFile;
432
+ return existing;
433
+ } else {
434
+ this._update!.items.push(newItem);
435
+ return newItem;
436
+ }
437
+ } else {
438
+ this._update!.items.push(newItem);
439
+ return newItem;
440
+ }
441
+ }
442
+ }
443
+
444
+ interface IndexData {
445
+ version: number;
446
+ metadata_config: {
447
+ indexed?: string[];
448
+ };
449
+ items: IndexItem[];
450
+ }
package/src/index.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from './ItemSelector';
2
+ export * from './LocalIndex';