vectra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +10 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +156 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/LocalIndex.d.ts +184 -0
- package/lib/LocalIndex.d.ts.map +1 -0
- package/lib/LocalIndex.js +392 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/index.d.ts +3 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +15 -0
- package/lib/index.js.map +1 -0
- package/lib/utilities.d.ts +5 -0
- package/lib/utilities.d.ts.map +1 -0
- package/lib/utilities.js +38 -0
- package/lib/utilities.js.map +1 -0
- package/package.json +51 -0
- package/src/ItemSelector.ts +158 -0
- package/src/LocalIndex.ts +450 -0
- package/src/index.ts +2 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { MetadataFilter, MetadataTypes } from './LocalIndex';
|
|
2
|
+
|
|
3
|
+
export class ItemSelector {
|
|
4
|
+
/**
|
|
5
|
+
* Returns the similarity between two vectors using the cosine similarity.
|
|
6
|
+
* @param vector1 Vector 1
|
|
7
|
+
* @param vector2 Vector 2
|
|
8
|
+
* @returns Similarity between the two vectors
|
|
9
|
+
*/
|
|
10
|
+
public static cosineSimilarity(vector1: number[], vector2: number[]) {
|
|
11
|
+
// Return the quotient of the dot product and the product of the norms
|
|
12
|
+
return this.dotProduct(vector1, vector2) / (this.normalize(vector1) * this.normalize(vector2));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Normalizes a vector.
|
|
17
|
+
* @remarks
|
|
18
|
+
* The norm of a vector is the square root of the sum of the squares of the elements.
|
|
19
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
20
|
+
* @param vector Vector to normalize
|
|
21
|
+
* @returns Normalized vector
|
|
22
|
+
*/
|
|
23
|
+
public static normalize(vector: number[]) {
|
|
24
|
+
// Initialize a variable to store the sum of the squares
|
|
25
|
+
let sum = 0;
|
|
26
|
+
// Loop through the elements of the array
|
|
27
|
+
for (let i = 0; i < vector.length; i++) {
|
|
28
|
+
// Square the element and add it to the sum
|
|
29
|
+
sum += vector[i] * vector[i];
|
|
30
|
+
}
|
|
31
|
+
// Return the square root of the sum
|
|
32
|
+
return Math.sqrt(sum);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Returns the similarity between two vectors using cosine similarity.
|
|
37
|
+
* @remarks
|
|
38
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
39
|
+
* This method uses the pre-calculated norms to improve performance.
|
|
40
|
+
* @param vector1 Vector 1
|
|
41
|
+
* @param norm1 Norm of vector 1
|
|
42
|
+
* @param vector2 Vector 2
|
|
43
|
+
* @param norm2 Norm of vector 2
|
|
44
|
+
* @returns Similarity between the two vectors
|
|
45
|
+
*/
|
|
46
|
+
public static normalizedCosineSimilarity(vector1: number[], norm1: number, vector2: number[], norm2: number) {
|
|
47
|
+
// Return the quotient of the dot product and the product of the norms
|
|
48
|
+
return this.dotProduct(vector1, vector2) / (norm1 * norm2);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Applies a filter to the metadata of an item.
|
|
53
|
+
* @param metadata Metadata of the item
|
|
54
|
+
* @param filter Filter to apply
|
|
55
|
+
* @returns True if the item matches the filter, false otherwise
|
|
56
|
+
*/
|
|
57
|
+
public static select(metadata: Record<string, MetadataTypes>, filter: MetadataFilter): boolean {
|
|
58
|
+
if (filter === undefined || filter === null) {
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for (const key in filter) {
|
|
63
|
+
switch (key) {
|
|
64
|
+
case '$and':
|
|
65
|
+
if (!filter[key].every((f) => this.select(metadata, f))) {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
break;
|
|
69
|
+
case '$or':
|
|
70
|
+
if (!filter[key].some((f) => this.select(metadata, f))) {
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
73
|
+
break;
|
|
74
|
+
default:
|
|
75
|
+
const value = filter[key];
|
|
76
|
+
if (value === undefined || value === null) {
|
|
77
|
+
return false;
|
|
78
|
+
} else if (typeof value == 'object') {
|
|
79
|
+
if (!this.metadataFilter(metadata[key], value as MetadataFilter)) {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
} else {
|
|
83
|
+
if (metadata[key] !== value) {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
private static dotProduct(arr1: number[], arr2: number[]) {
|
|
94
|
+
// Initialize a variable to store the sum of the products
|
|
95
|
+
let sum = 0;
|
|
96
|
+
// Loop through the elements of the arrays
|
|
97
|
+
for (let i = 0; i < arr1.length; i++) {
|
|
98
|
+
// Multiply the corresponding elements and add them to the sum
|
|
99
|
+
sum += arr1[i] * arr2[i];
|
|
100
|
+
}
|
|
101
|
+
// Return the sum
|
|
102
|
+
return sum;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
private static metadataFilter(value: MetadataTypes, filter: MetadataFilter): boolean {
|
|
106
|
+
if (value === undefined || value === null) {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (const key in filter) {
|
|
111
|
+
switch (key) {
|
|
112
|
+
case '$eq':
|
|
113
|
+
if (value !== filter[key]) {
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
break;
|
|
117
|
+
case '$ne':
|
|
118
|
+
if (value === filter[key]) {
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
break;
|
|
122
|
+
case '$gt':
|
|
123
|
+
if (typeof value != 'number' || value <= filter[key]) {
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
break;
|
|
127
|
+
case '$gte':
|
|
128
|
+
if (typeof value != 'number' || value < filter[key]) {
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
break;
|
|
132
|
+
case '$lt':
|
|
133
|
+
if (typeof value != 'number' || value >= filter[key]) {
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
break;
|
|
137
|
+
case '$lte':
|
|
138
|
+
if (typeof value != 'number' || value > filter[key]) {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
break;
|
|
142
|
+
case '$in':
|
|
143
|
+
if (typeof value == 'boolean' || !filter[key].includes(value)) {
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
break;
|
|
147
|
+
case '$nin':
|
|
148
|
+
if (typeof value == 'boolean' || filter[key].includes(value)) {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
break;
|
|
152
|
+
default:
|
|
153
|
+
return value === filter[key];
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return true;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { v4 } from 'uuid';
|
|
4
|
+
import { ItemSelector } from './ItemSelector';
|
|
5
|
+
|
|
6
|
+
export interface CreateIndexConfig {
|
|
7
|
+
version: number;
|
|
8
|
+
deleteIfExists?: boolean;
|
|
9
|
+
metadata_config?: {
|
|
10
|
+
indexed?: string[];
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface IndexStats {
|
|
15
|
+
version: number;
|
|
16
|
+
metadata_config: {
|
|
17
|
+
indexed?: string[];
|
|
18
|
+
};
|
|
19
|
+
items: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface IndexItem<TMetadata = Record<string,MetadataTypes>> {
|
|
23
|
+
id: string;
|
|
24
|
+
metadata: TMetadata;
|
|
25
|
+
vector: number[];
|
|
26
|
+
norm: number;
|
|
27
|
+
metadataFile?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface QueryResult<TMetadata = Record<string,MetadataTypes>> {
|
|
31
|
+
item: IndexItem<TMetadata>;
|
|
32
|
+
score: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface MetadataFilter {
|
|
36
|
+
[key: string]: MetadataTypes|MetadataFilter|(number|string)[]|MetadataFilter[];
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Equal to (number, string, boolean)
|
|
40
|
+
*/
|
|
41
|
+
'$eq': number|string|boolean;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Not equal to (number, string, boolean)
|
|
45
|
+
*/
|
|
46
|
+
'$ne': number|string|boolean;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Greater than (number)
|
|
50
|
+
*/
|
|
51
|
+
'$gt': number;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Greater than or equal to (number)
|
|
55
|
+
*/
|
|
56
|
+
'$gte': number;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Less than (number)
|
|
60
|
+
*/
|
|
61
|
+
'$lt': number;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Less than or equal to (number)
|
|
65
|
+
*/
|
|
66
|
+
'$lte': number;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* In array (string or number)
|
|
70
|
+
*/
|
|
71
|
+
'$in': (number|string)[];
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Not in array (string or number)
|
|
75
|
+
*/
|
|
76
|
+
'$nin': (number|string)[];
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* AND (MetadataFilter[])
|
|
80
|
+
*/
|
|
81
|
+
'$and': MetadataFilter[];
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* OR (MetadataFilter[])
|
|
85
|
+
*/
|
|
86
|
+
'$or': MetadataFilter[];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export type MetadataTypes = number|string|boolean;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Local vector index instance.
|
|
93
|
+
* @remarks
|
|
94
|
+
* This class is used to create, update, and query a local vector index.
|
|
95
|
+
* Each index is a folder on disk containing an index.json file and an optional set of metadata files.
|
|
96
|
+
*/
|
|
97
|
+
export class LocalIndex {
|
|
98
|
+
private readonly _folderPath: string;
|
|
99
|
+
private _data?: IndexData;
|
|
100
|
+
private _update?: IndexData;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Creates a new instance of LocalIndex.
|
|
104
|
+
* @param folderPath - Path to the index folder
|
|
105
|
+
*/
|
|
106
|
+
public constructor(folderPath: string) {
|
|
107
|
+
this._folderPath = folderPath;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Begins an update to the index.
|
|
112
|
+
* @remarks
|
|
113
|
+
* This method loads the index into memory and prepares it for updates.
|
|
114
|
+
*/
|
|
115
|
+
public async beginUpdate(): Promise<void> {
|
|
116
|
+
if (this._update) {
|
|
117
|
+
throw new Error('Update already in progress');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
await this.loadIndexData();
|
|
121
|
+
this._update = Object.assign({}, this._data);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Cancels an update to the index.
|
|
126
|
+
* @remarks
|
|
127
|
+
* This method discards any changes made to the index since the update began.
|
|
128
|
+
*/
|
|
129
|
+
public cancelUpdate(): void {
|
|
130
|
+
this._update = undefined;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Creates a new index.
|
|
135
|
+
* @remarks
|
|
136
|
+
* This method creates a new folder on disk containing an index.json file.
|
|
137
|
+
* @param config - Index configuration
|
|
138
|
+
*/
|
|
139
|
+
public async createIndex(config: CreateIndexConfig = {version: 1}): Promise<void> {
|
|
140
|
+
// Delete if exists
|
|
141
|
+
if (await this.isIndexCreated()) {
|
|
142
|
+
if (config.deleteIfExists) {
|
|
143
|
+
await this.deleteIndex();
|
|
144
|
+
} else {
|
|
145
|
+
throw new Error('Index already exists');
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
// Create folder for index
|
|
151
|
+
await fs.mkdir(this._folderPath, { recursive: true });
|
|
152
|
+
|
|
153
|
+
// Initialize index.json file
|
|
154
|
+
this._data = {
|
|
155
|
+
version: config.version,
|
|
156
|
+
metadata_config: config.metadata_config ?? {},
|
|
157
|
+
items: []
|
|
158
|
+
};
|
|
159
|
+
await fs.writeFile(path.join(this._folderPath, 'index.json'), JSON.stringify(this._data));
|
|
160
|
+
} catch (err: unknown) {
|
|
161
|
+
await this.deleteIndex();
|
|
162
|
+
throw new Error('Error creating index');
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Deletes the index.
|
|
168
|
+
* @remarks
|
|
169
|
+
* This method deletes the index folder from disk.
|
|
170
|
+
*/
|
|
171
|
+
public deleteIndex(): Promise<void> {
|
|
172
|
+
this._data = undefined;
|
|
173
|
+
return fs.rm(this._folderPath, {
|
|
174
|
+
recursive: true,
|
|
175
|
+
maxRetries: 3
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Deletes an item from the index.
|
|
181
|
+
* @param id - Item id
|
|
182
|
+
*/
|
|
183
|
+
public async deleteItem(id: string): Promise<void> {
|
|
184
|
+
if (this._update) {
|
|
185
|
+
const index = this._update.items.findIndex(i => i.id === id);
|
|
186
|
+
if (index >= 0) {
|
|
187
|
+
this._update.items.splice(index, 1);
|
|
188
|
+
}
|
|
189
|
+
} else {
|
|
190
|
+
await this.beginUpdate();
|
|
191
|
+
const index = this._update!.items.findIndex(i => i.id === id);
|
|
192
|
+
if (index >= 0) {
|
|
193
|
+
this._update!.items.splice(index, 1);
|
|
194
|
+
}
|
|
195
|
+
await this.endUpdate();
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Ends an update to the index.
|
|
201
|
+
* @remarks
|
|
202
|
+
* This method saves the index to disk.
|
|
203
|
+
*/
|
|
204
|
+
public async endUpdate(): Promise<void> {
|
|
205
|
+
if (!this._update) {
|
|
206
|
+
throw new Error('No update in progress');
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
try {
|
|
210
|
+
// Save index
|
|
211
|
+
await fs.writeFile(path.join(this._folderPath, 'index.json'), JSON.stringify(this._update));
|
|
212
|
+
this._data = this._update;
|
|
213
|
+
this._update = undefined;
|
|
214
|
+
} catch(err: unknown) {
|
|
215
|
+
throw new Error(`Error saving index: ${(err as any).toString()}`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Loads an index from disk and returns its stats.
|
|
221
|
+
* @returns Index stats
|
|
222
|
+
*/
|
|
223
|
+
public async getIndexStats(): Promise<IndexStats> {
|
|
224
|
+
await this.loadIndexData();
|
|
225
|
+
return {
|
|
226
|
+
version: this._data!.version,
|
|
227
|
+
metadata_config: this._data!.metadata_config,
|
|
228
|
+
items: this._data!.items.length
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Returns an item from the index given its ID.
|
|
234
|
+
* @param id Item id
|
|
235
|
+
* @returns Item or undefined if not found
|
|
236
|
+
*/
|
|
237
|
+
public async getItem<TMetadata = Record<string,MetadataTypes>>(id: string): Promise<IndexItem<TMetadata> | undefined> {
|
|
238
|
+
await this.loadIndexData();
|
|
239
|
+
return this._data!.items.find(i => i.id === id) as any | undefined;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Adds an item to the index.
|
|
244
|
+
* @remarks
|
|
245
|
+
* A new update is started if one is not already in progress. If an item with the same ID
|
|
246
|
+
* already exists, an error will be thrown.
|
|
247
|
+
* @param item Item to insert
|
|
248
|
+
* @returns Inserted item
|
|
249
|
+
*/
|
|
250
|
+
public async insertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
|
|
251
|
+
if (this._update) {
|
|
252
|
+
return await this.addItemToUpdate(item, true) as any;
|
|
253
|
+
} else {
|
|
254
|
+
await this.beginUpdate();
|
|
255
|
+
const newItem = await this.addItemToUpdate(item, true);
|
|
256
|
+
await this.endUpdate();
|
|
257
|
+
return newItem as any;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Returns true if the index exists.
|
|
263
|
+
*/
|
|
264
|
+
public async isIndexCreated(): Promise<boolean> {
|
|
265
|
+
try {
|
|
266
|
+
await fs.access(path.join(this._folderPath, 'index.json'));
|
|
267
|
+
return true;
|
|
268
|
+
} catch (err: unknown) {
|
|
269
|
+
return false;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Returns all items in the index.
|
|
275
|
+
* @remarks
|
|
276
|
+
* This method loads the index into memory and returns all its items. A copy of the items
|
|
277
|
+
* array is returned so no modifications should be made to the array.
|
|
278
|
+
* @returns All items in the index
|
|
279
|
+
*/
|
|
280
|
+
public async listItems<TMetadata = Record<string,MetadataTypes>>(): Promise<IndexItem<TMetadata>[]> {
|
|
281
|
+
await this.loadIndexData();
|
|
282
|
+
return this._data!.items.slice() as any;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Returns all items in the index matching the filter.
|
|
287
|
+
* @remarks
|
|
288
|
+
* This method loads the index into memory and returns all its items matching the filter.
|
|
289
|
+
* @param filter Filter to apply
|
|
290
|
+
* @returns Items matching the filter
|
|
291
|
+
*/
|
|
292
|
+
public async listItemsByMetadata<TMetadata = Record<string,MetadataTypes>>(filter: MetadataFilter): Promise<IndexItem<TMetadata>[]> {
|
|
293
|
+
await this.loadIndexData();
|
|
294
|
+
return this._data!.items.filter(i => ItemSelector.select(i.metadata, filter)) as any;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Finds the top k items in the index that are most similar to the vector.
|
|
299
|
+
* @remarks
|
|
300
|
+
* This method loads the index into memory and returns the top k items that are most similar.
|
|
301
|
+
* An optional filter can be applied to the metadata of the items.
|
|
302
|
+
* @param vector Vector to query against
|
|
303
|
+
* @param topK Number of items to return
|
|
304
|
+
* @param filter Optional filter to apply
|
|
305
|
+
* @returns Similar items to the vector that matches the filter
|
|
306
|
+
*/
|
|
307
|
+
public async queryItems<TMetadata = Record<string,MetadataTypes>>(vector: number[], topK: number, filter?: MetadataFilter): Promise<QueryResult<TMetadata>[]> {
|
|
308
|
+
await this.loadIndexData();
|
|
309
|
+
|
|
310
|
+
// Filter items
|
|
311
|
+
let items = this._data!.items;
|
|
312
|
+
if (filter) {
|
|
313
|
+
items = items.filter(i => ItemSelector.select(i.metadata, filter));
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Calculate distances
|
|
317
|
+
const norm = ItemSelector.normalize(vector);
|
|
318
|
+
const distances: { index: number, distance: number }[] = [];
|
|
319
|
+
for (let i = 0; i < items.length; i++) {
|
|
320
|
+
const item = items[i];
|
|
321
|
+
const distance = ItemSelector.normalizedCosineSimilarity(vector, norm, item.vector, item.norm);
|
|
322
|
+
distances.push({ index: i, distance: distance });
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Sort by distance DESCENDING
|
|
326
|
+
distances.sort((a, b) => b.distance - a.distance);
|
|
327
|
+
|
|
328
|
+
// Find top k
|
|
329
|
+
const top: QueryResult<TMetadata>[] = distances.slice(0, topK).map(d => {
|
|
330
|
+
return {
|
|
331
|
+
item: Object.assign({}, items[d.index]) as any,
|
|
332
|
+
score: d.distance
|
|
333
|
+
};
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Load external metadata
|
|
337
|
+
for (const item of top) {
|
|
338
|
+
if (item.item.metadataFile) {
|
|
339
|
+
const metadataPath = path.join(this._folderPath, item.item.metadataFile);
|
|
340
|
+
const metadata = await fs.readFile(metadataPath);
|
|
341
|
+
item.item.metadata = JSON.parse(metadata.toString());
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return top;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Adds or replaces an item in the index.
|
|
350
|
+
* @remarks
|
|
351
|
+
* A new update is started if one is not already in progress. If an item with the same ID
|
|
352
|
+
* already exists, it will be replaced.
|
|
353
|
+
* @param item Item to insert or replace
|
|
354
|
+
* @returns Upserted item
|
|
355
|
+
*/
|
|
356
|
+
public async upsertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
|
|
357
|
+
if (this._update) {
|
|
358
|
+
return await this.addItemToUpdate(item, false) as any;
|
|
359
|
+
} else {
|
|
360
|
+
await this.beginUpdate();
|
|
361
|
+
const newItem = await this.addItemToUpdate(item, false);
|
|
362
|
+
await this.endUpdate();
|
|
363
|
+
return newItem as any;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
private async loadIndexData(): Promise<void> {
|
|
368
|
+
if (this._data) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (!await this.isIndexCreated()) {
|
|
373
|
+
throw new Error('Index does not exist');
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const data = await fs.readFile(path.join(this._folderPath, 'index.json'));
|
|
377
|
+
this._data = JSON.parse(data.toString());
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
private async addItemToUpdate(item: Partial<IndexItem<any>>, unique: boolean): Promise<IndexItem> {
|
|
381
|
+
// Ensure vector is provided
|
|
382
|
+
if (!item.vector) {
|
|
383
|
+
throw new Error('Vector is required');
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Ensure unique
|
|
387
|
+
const id = item.id ?? v4();
|
|
388
|
+
if (unique) {
|
|
389
|
+
const existing = this._update!.items.find(i => i.id === id);
|
|
390
|
+
if (existing) {
|
|
391
|
+
throw new Error(`Item with id ${id} already exists`);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Check for indexed metadata
|
|
396
|
+
let metadata: Record<string,any> = {};
|
|
397
|
+
let metadataFile: string | undefined;
|
|
398
|
+
if (this._update!.metadata_config.indexed && this._update!.metadata_config.indexed.length > 0 && item.metadata) {
|
|
399
|
+
// Copy only indexed metadata
|
|
400
|
+
for (const key of this._update!.metadata_config.indexed) {
|
|
401
|
+
if (item.metadata && item.metadata[key]) {
|
|
402
|
+
metadata[key] = item.metadata[key];
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Save remaining metadata to disk
|
|
407
|
+
metadataFile = `${v4}.json`;
|
|
408
|
+
const metadataPath = path.join(this._folderPath, metadataFile);
|
|
409
|
+
await fs.writeFile(metadataPath, JSON.stringify(item.metadata));
|
|
410
|
+
} else if (item.metadata) {
|
|
411
|
+
metadata = item.metadata;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Create new item
|
|
415
|
+
const newItem: IndexItem = {
|
|
416
|
+
id: id,
|
|
417
|
+
metadata: metadata,
|
|
418
|
+
vector: item.vector,
|
|
419
|
+
norm: ItemSelector.normalize(item.vector)
|
|
420
|
+
};
|
|
421
|
+
if (metadataFile) {
|
|
422
|
+
newItem.metadataFile = metadataFile;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Add item to index
|
|
426
|
+
if (!unique) {
|
|
427
|
+
const existing = this._update!.items.find(i => i.id === id);
|
|
428
|
+
if (existing) {
|
|
429
|
+
existing.metadata = newItem.metadata;
|
|
430
|
+
existing.vector = newItem.vector;
|
|
431
|
+
existing.metadataFile = newItem.metadataFile;
|
|
432
|
+
return existing;
|
|
433
|
+
} else {
|
|
434
|
+
this._update!.items.push(newItem);
|
|
435
|
+
return newItem;
|
|
436
|
+
}
|
|
437
|
+
} else {
|
|
438
|
+
this._update!.items.push(newItem);
|
|
439
|
+
return newItem;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
interface IndexData {
|
|
445
|
+
version: number;
|
|
446
|
+
metadata_config: {
|
|
447
|
+
indexed?: string[];
|
|
448
|
+
};
|
|
449
|
+
items: IndexItem[];
|
|
450
|
+
}
|
package/src/index.ts
ADDED