vectra 0.5.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/ItemSelector.d.ts.map +1 -1
- package/lib/ItemSelector.js +7 -1
- package/lib/ItemSelector.js.map +1 -1
- package/lib/LocalDocument.d.ts +40 -2
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +50 -3
- package/lib/LocalDocument.js.map +1 -1
- package/lib/LocalDocumentIndex.d.ts +79 -2
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +90 -19
- package/lib/LocalDocumentIndex.js.map +1 -1
- package/lib/LocalDocumentResult.d.ts +32 -1
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +105 -23
- package/lib/LocalDocumentResult.js.map +1 -1
- package/lib/LocalIndex.d.ts +18 -18
- package/lib/LocalIndex.js +18 -18
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.js +16 -22
- package/lib/vectra-cli.js.map +1 -1
- package/package.json +1 -1
- package/src/ItemSelector.ts +6 -2
- package/src/LocalDocument.ts +50 -5
- package/src/LocalDocumentIndex.ts +124 -16
- package/src/LocalDocumentResult.ts +110 -23
- package/src/LocalIndex.ts +18 -18
- package/src/vectra-cli.ts +15 -20
|
@@ -8,19 +8,58 @@ import { MetadataFilter, EmbeddingsModel, Tokenizer, MetadataTypes, EmbeddingsRe
|
|
|
8
8
|
import { LocalDocumentResult } from './LocalDocumentResult';
|
|
9
9
|
import { LocalDocument } from './LocalDocument';
|
|
10
10
|
|
|
11
|
+
/**
|
|
12
|
+
* Options for querying documents in the index.
|
|
13
|
+
*/
|
|
11
14
|
export interface DocumentQueryOptions {
|
|
15
|
+
/**
|
|
16
|
+
* Optional. Maximum number of documents to return.
|
|
17
|
+
* @remarks
|
|
18
|
+
* Default is 10.
|
|
19
|
+
*/
|
|
12
20
|
maxDocuments?: number;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Maximum number of chunks to return per document.
|
|
24
|
+
* @remarks
|
|
25
|
+
* Default is 50.
|
|
26
|
+
*/
|
|
13
27
|
maxChunks?: number;
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Optional. Filter to apply to the document metadata.
|
|
31
|
+
*/
|
|
14
32
|
filter?: MetadataFilter;
|
|
15
33
|
}
|
|
16
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Configuration settings for a local document index.
|
|
37
|
+
*/
|
|
17
38
|
export interface LocalDocumentIndexConfig {
|
|
39
|
+
/**
|
|
40
|
+
* Folder path where the index is stored.
|
|
41
|
+
*/
|
|
18
42
|
folderPath: string;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Optional. Embeddings model to use for generating document embeddings.
|
|
46
|
+
*/
|
|
19
47
|
embeddings?: EmbeddingsModel;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Optional. Tokenizer to use for splitting text into tokens.
|
|
51
|
+
*/
|
|
20
52
|
tokenizer?: Tokenizer;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Optional. Configuration settings for splitting text into chunks.
|
|
56
|
+
*/
|
|
21
57
|
chunkingConfig?: Partial<TextSplitterConfig>;
|
|
22
58
|
}
|
|
23
59
|
|
|
60
|
+
/**
|
|
61
|
+
* Represents a local index of documents stored on disk.
|
|
62
|
+
*/
|
|
24
63
|
export class LocalDocumentIndex extends LocalIndex {
|
|
25
64
|
private readonly _embeddings?: EmbeddingsModel;
|
|
26
65
|
private readonly _tokenizer: Tokenizer;
|
|
@@ -28,7 +67,10 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
28
67
|
private _catalog?: DocumentCatalog;
|
|
29
68
|
private _newCatalog?: DocumentCatalog;
|
|
30
69
|
|
|
31
|
-
|
|
70
|
+
/**
|
|
71
|
+
* Creates a new `LocalDocumentIndex` instance.
|
|
72
|
+
* @param config Configuration settings for the document index.
|
|
73
|
+
*/
|
|
32
74
|
public constructor(config: LocalDocumentIndexConfig) {
|
|
33
75
|
super(config.folderPath);
|
|
34
76
|
this._embeddings = config.embeddings;
|
|
@@ -41,6 +83,20 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
41
83
|
this._chunkingConfig.tokenizer = this._tokenizer;
|
|
42
84
|
}
|
|
43
85
|
|
|
86
|
+
/**
|
|
87
|
+
* Returns the embeddings model used by the index (if configured.)
|
|
88
|
+
*/
|
|
89
|
+
public get embeddings(): EmbeddingsModel | undefined {
|
|
90
|
+
return this._embeddings;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Returns the tokenizer used by the index.
|
|
95
|
+
*/
|
|
96
|
+
public get tokenizer(): Tokenizer {
|
|
97
|
+
return this._tokenizer;
|
|
98
|
+
}
|
|
99
|
+
|
|
44
100
|
/**
|
|
45
101
|
* Returns true if the document catalog exists.
|
|
46
102
|
*/
|
|
@@ -53,21 +109,44 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
53
109
|
}
|
|
54
110
|
}
|
|
55
111
|
|
|
112
|
+
/**
|
|
113
|
+
* Returns the document ID for the given URI.
|
|
114
|
+
* @param uri URI of the document to lookup.
|
|
115
|
+
* @returns Document ID or undefined if not found.
|
|
116
|
+
*/
|
|
56
117
|
public async getDocumentId(uri: string): Promise<string | undefined> {
|
|
57
118
|
await this.loadIndexData();
|
|
58
119
|
return this._catalog?.uriToId[uri];
|
|
59
120
|
}
|
|
60
121
|
|
|
122
|
+
/**
|
|
123
|
+
* Returns the document URI for the given ID.
|
|
124
|
+
* @param documentId ID of the document to lookup.
|
|
125
|
+
* @returns Document URI or undefined if not found.
|
|
126
|
+
*/
|
|
61
127
|
public async getDocumentUri(documentId: string): Promise<string | undefined> {
|
|
62
128
|
await this.loadIndexData();
|
|
63
129
|
return this._catalog?.idToUri[documentId];
|
|
64
130
|
}
|
|
65
131
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
132
|
+
/**
|
|
133
|
+
* Loads the document catalog from disk and returns its stats.
|
|
134
|
+
* @returns Catalog stats.
|
|
135
|
+
*/
|
|
136
|
+
public async getCatalogStats(): Promise<DocumentCatalogStats> {
|
|
137
|
+
const stats = await this.getIndexStats()
|
|
138
|
+
return {
|
|
139
|
+
version: this._catalog!.version,
|
|
140
|
+
documents: this._catalog!.count,
|
|
141
|
+
chunks: stats.items,
|
|
142
|
+
metadata_config: stats.metadata_config
|
|
143
|
+
};
|
|
69
144
|
}
|
|
70
145
|
|
|
146
|
+
/**
|
|
147
|
+
* Deletes a document from the index.
|
|
148
|
+
* @param uri URI of the document to delete.
|
|
149
|
+
*/
|
|
71
150
|
public async deleteDocument(uri: string): Promise<void> {
|
|
72
151
|
// Lookup document ID
|
|
73
152
|
const documentId = await this.getDocumentId(uri);
|
|
@@ -114,16 +193,6 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
114
193
|
}
|
|
115
194
|
}
|
|
116
195
|
|
|
117
|
-
public async getCatalogStats(): Promise<DocumentCatalogStats> {
|
|
118
|
-
const stats = await this.getIndexStats()
|
|
119
|
-
return {
|
|
120
|
-
version: this._catalog!.version,
|
|
121
|
-
documents: this._catalog!.count,
|
|
122
|
-
chunks: stats.items,
|
|
123
|
-
metadata_config: stats.metadata_config
|
|
124
|
-
};
|
|
125
|
-
}
|
|
126
|
-
|
|
127
196
|
/**
|
|
128
197
|
* Adds a document to the catalog.
|
|
129
198
|
* @remarks
|
|
@@ -245,10 +314,44 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
245
314
|
}
|
|
246
315
|
|
|
247
316
|
// Return document
|
|
248
|
-
return new LocalDocument(this
|
|
317
|
+
return new LocalDocument(this, documentId, uri);
|
|
249
318
|
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Returns all documents in the index.
|
|
322
|
+
* @remarks
|
|
323
|
+
* Each document will contain all of the documents indexed chunks.
|
|
324
|
+
* @returns Array of documents.
|
|
325
|
+
*/
|
|
326
|
+
public async listDocuments(): Promise<LocalDocumentResult[]> {
|
|
327
|
+
// Sort chunks by document ID
|
|
328
|
+
const docs: { [documentId: string]: QueryResult<DocumentChunkMetadata>[]; } = {};
|
|
329
|
+
const chunks = await this.listItems<DocumentChunkMetadata>();
|
|
330
|
+
chunks.forEach(chunk => {
|
|
331
|
+
const metadata = chunk.metadata;
|
|
332
|
+
if (docs[metadata.documentId] == undefined) {
|
|
333
|
+
docs[metadata.documentId] = [];
|
|
334
|
+
}
|
|
335
|
+
docs[metadata.documentId].push({ item: chunk, score: 1.0 });
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
// Create document results
|
|
339
|
+
const results: LocalDocumentResult[] = [];
|
|
340
|
+
for (const documentId in docs) {
|
|
341
|
+
const uri = await this.getDocumentUri(documentId) as string;
|
|
342
|
+
const documentResult = new LocalDocumentResult(this, documentId, uri, docs[documentId], this._tokenizer);
|
|
343
|
+
results.push(documentResult);
|
|
344
|
+
}
|
|
250
345
|
|
|
346
|
+
return results;
|
|
347
|
+
}
|
|
251
348
|
|
|
349
|
+
/**
|
|
350
|
+
* Queries the index for documents similar to the given query.
|
|
351
|
+
* @param query Text to query for.
|
|
352
|
+
* @param options Optional. Query options.
|
|
353
|
+
* @returns Array of document results.
|
|
354
|
+
*/
|
|
252
355
|
public async queryDocuments(query: string, options?: DocumentQueryOptions): Promise<LocalDocumentResult[]> {
|
|
253
356
|
// Ensure embeddings configured
|
|
254
357
|
if (!this._embeddings) {
|
|
@@ -292,7 +395,7 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
292
395
|
for (const documentId in documentChunks) {
|
|
293
396
|
const chunks = documentChunks[documentId];
|
|
294
397
|
const uri = await this.getDocumentUri(documentId) as string;
|
|
295
|
-
const documentResult = new LocalDocumentResult(this
|
|
398
|
+
const documentResult = new LocalDocumentResult(this, documentId, uri, chunks, this._tokenizer);
|
|
296
399
|
documentResults.push(documentResult);
|
|
297
400
|
}
|
|
298
401
|
|
|
@@ -312,6 +415,11 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
312
415
|
this._newCatalog = undefined;
|
|
313
416
|
}
|
|
314
417
|
|
|
418
|
+
public async createIndex(config?: CreateIndexConfig): Promise<void> {
|
|
419
|
+
await super.createIndex(config);
|
|
420
|
+
await this.loadIndexData();
|
|
421
|
+
}
|
|
422
|
+
|
|
315
423
|
public async endUpdate(): Promise<void> {
|
|
316
424
|
await super.endUpdate();
|
|
317
425
|
|
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
import { LocalDocument } from "./LocalDocument";
|
|
2
|
+
import { LocalDocumentIndex } from "./LocalDocumentIndex";
|
|
2
3
|
import { QueryResult, DocumentChunkMetadata, Tokenizer, DocumentTextSection } from "./types";
|
|
3
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Represents a search result for a document stored on disk.
|
|
7
|
+
*/
|
|
4
8
|
export class LocalDocumentResult extends LocalDocument {
|
|
5
9
|
private readonly _chunks: QueryResult<DocumentChunkMetadata>[];
|
|
6
10
|
private readonly _tokenizer: Tokenizer;
|
|
7
11
|
private readonly _score: number;
|
|
8
12
|
|
|
9
|
-
|
|
10
|
-
|
|
13
|
+
/**
|
|
14
|
+
* @private
|
|
15
|
+
* Internal constructor for `LocalDocumentResult` instances.
|
|
16
|
+
*/
|
|
17
|
+
public constructor(index: LocalDocumentIndex, id: string, uri: string, chunks: QueryResult<DocumentChunkMetadata>[], tokenizer: Tokenizer) {
|
|
18
|
+
super(index, id, uri);
|
|
11
19
|
this._chunks = chunks;
|
|
12
20
|
this._tokenizer = tokenizer;
|
|
13
21
|
|
|
@@ -17,30 +25,112 @@ export class LocalDocumentResult extends LocalDocument {
|
|
|
17
25
|
this._score = score / this._chunks.length;
|
|
18
26
|
}
|
|
19
27
|
|
|
28
|
+
/**
|
|
29
|
+
* Returns the chunks of the document that matched the query.
|
|
30
|
+
*/
|
|
20
31
|
public get chunks(): QueryResult<DocumentChunkMetadata>[] {
|
|
21
32
|
return this._chunks;
|
|
22
33
|
}
|
|
23
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Returns the average score of the document result.
|
|
37
|
+
*/
|
|
24
38
|
public get score(): number {
|
|
25
39
|
return this._score;
|
|
26
40
|
}
|
|
27
41
|
|
|
28
|
-
|
|
42
|
+
/**
|
|
43
|
+
* Renders all of the results chunks as spans of text (sections.)
|
|
44
|
+
* @remarks
|
|
45
|
+
* The returned sections will be sorted by document order and limited to maxTokens in length.
|
|
46
|
+
* @param maxTokens Maximum number of tokens per section.
|
|
47
|
+
* @returns Array of rendered text sections.
|
|
48
|
+
*/
|
|
49
|
+
public async renderAllSections(maxTokens: number): Promise<DocumentTextSection[]> {
|
|
29
50
|
// Load text from disk
|
|
30
51
|
const text = await this.loadText();
|
|
31
52
|
|
|
32
|
-
//
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
53
|
+
// Add chunks to a temp array and split any chunks that are longer than maxTokens.
|
|
54
|
+
const chunks: SectionChunk[] = [];
|
|
55
|
+
for (let i = 0; i < this._chunks.length; i++) {
|
|
56
|
+
const chunk = this._chunks[i];
|
|
57
|
+
const startPos = chunk.item.metadata.startPos;
|
|
58
|
+
const endPos = chunk.item.metadata.endPos;
|
|
59
|
+
const chunkText = text.substring(startPos, endPos + 1);
|
|
60
|
+
const tokens = this._tokenizer.encode(chunkText);
|
|
61
|
+
let offset = 0;
|
|
62
|
+
while (offset < tokens.length) {
|
|
63
|
+
const chunkLength = Math.min(maxTokens, tokens.length - offset);
|
|
64
|
+
chunks.push({
|
|
65
|
+
text: this._tokenizer.decode(tokens.slice(offset, offset + chunkLength)),
|
|
66
|
+
startPos: startPos + offset,
|
|
67
|
+
endPos: startPos + offset + chunkLength - 1,
|
|
68
|
+
score: chunk.score,
|
|
69
|
+
tokenCount: chunkLength
|
|
70
|
+
});
|
|
71
|
+
offset += chunkLength;
|
|
41
72
|
}
|
|
42
73
|
}
|
|
43
74
|
|
|
75
|
+
// Sort chunks by startPos
|
|
76
|
+
const sorted = chunks.sort((a, b) => a.startPos - b.startPos);
|
|
77
|
+
|
|
78
|
+
// Generate sections
|
|
79
|
+
const sections: Section[] = [];
|
|
80
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
81
|
+
const chunk = sorted[i];
|
|
82
|
+
let section = sections[sections.length - 1];
|
|
83
|
+
if (!section || section.tokenCount + chunk.tokenCount > maxTokens) {
|
|
84
|
+
section = {
|
|
85
|
+
chunks: [],
|
|
86
|
+
score: 0,
|
|
87
|
+
tokenCount: 0
|
|
88
|
+
};
|
|
89
|
+
sections.push(section);
|
|
90
|
+
}
|
|
91
|
+
section.chunks.push(chunk);
|
|
92
|
+
section.score += chunk.score;
|
|
93
|
+
section.tokenCount += chunk.tokenCount;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Normalize section scores
|
|
97
|
+
sections.forEach(section => section.score /= section.chunks.length);
|
|
98
|
+
|
|
99
|
+
// Return final rendered sections
|
|
100
|
+
return sections.map(section => {
|
|
101
|
+
let text = '';
|
|
102
|
+
section.chunks.forEach(chunk => text += chunk.text);
|
|
103
|
+
return {
|
|
104
|
+
text: text,
|
|
105
|
+
tokenCount: section.tokenCount,
|
|
106
|
+
score: section.score
|
|
107
|
+
};
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Renders the top spans of text (sections) of the document based on the query result.
|
|
113
|
+
* @remarks
|
|
114
|
+
* The returned sections will be sorted by relevance and limited to the top `maxSections`.
|
|
115
|
+
* @param maxTokens Maximum number of tokens per section.
|
|
116
|
+
* @param maxSections Maximum number of sections to return.
|
|
117
|
+
* @param overlappingChunks Optional. If true, overlapping chunks of text will be added to each section until the maxTokens is reached.
|
|
118
|
+
* @returns Array of rendered text sections.
|
|
119
|
+
*/
|
|
120
|
+
public async renderSections(maxTokens: number, maxSections: number, overlappingChunks = true): Promise<DocumentTextSection[]> {
|
|
121
|
+
// Load text from disk
|
|
122
|
+
const text = await this.loadText();
|
|
123
|
+
|
|
124
|
+
// First check to see if the entire document is shorter than maxTokens
|
|
125
|
+
const length = await this.getLength();
|
|
126
|
+
if (length <= maxTokens) {
|
|
127
|
+
return [{
|
|
128
|
+
text,
|
|
129
|
+
tokenCount: length,
|
|
130
|
+
score: 1.0
|
|
131
|
+
}];
|
|
132
|
+
}
|
|
133
|
+
|
|
44
134
|
// Otherwise, we need to split the document into sections
|
|
45
135
|
// - Add each chunk to a temp array and filter out any chunk that's longer then maxTokens.
|
|
46
136
|
// - Sort the array by startPos to arrange chunks in document order.
|
|
@@ -78,24 +168,21 @@ export class LocalDocumentResult extends LocalDocument {
|
|
|
78
168
|
}
|
|
79
169
|
|
|
80
170
|
// Generate sections
|
|
81
|
-
const sections: Section[] = [
|
|
82
|
-
chunks: [],
|
|
83
|
-
score: 0,
|
|
84
|
-
tokenCount: 0
|
|
85
|
-
}];
|
|
171
|
+
const sections: Section[] = [];
|
|
86
172
|
for (let i = 0; i < chunks.length; i++) {
|
|
87
173
|
const chunk = chunks[i];
|
|
88
174
|
let section = sections[sections.length - 1];
|
|
89
|
-
if (section.tokenCount + chunk.tokenCount > maxTokens) {
|
|
90
|
-
|
|
175
|
+
if (!section || section.tokenCount + chunk.tokenCount > maxTokens) {
|
|
176
|
+
section = {
|
|
91
177
|
chunks: [],
|
|
92
178
|
score: 0,
|
|
93
179
|
tokenCount: 0
|
|
94
|
-
}
|
|
180
|
+
};
|
|
181
|
+
sections.push(section);
|
|
95
182
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
183
|
+
section.chunks.push(chunk);
|
|
184
|
+
section.score += chunk.score;
|
|
185
|
+
section.tokenCount += chunk.tokenCount;
|
|
99
186
|
}
|
|
100
187
|
|
|
101
188
|
// Normalize section scores
|
package/src/LocalIndex.ts
CHANGED
|
@@ -27,8 +27,8 @@ export class LocalIndex {
|
|
|
27
27
|
|
|
28
28
|
/**
|
|
29
29
|
* Creates a new instance of LocalIndex.
|
|
30
|
-
* @param folderPath
|
|
31
|
-
* @param indexName
|
|
30
|
+
* @param folderPath Path to the index folder.
|
|
31
|
+
* @param indexName Optional name of the index file. Defaults to index.json.
|
|
32
32
|
*/
|
|
33
33
|
public constructor(folderPath: string, indexName?: string) {
|
|
34
34
|
this._folderPath = folderPath;
|
|
@@ -76,7 +76,7 @@ export class LocalIndex {
|
|
|
76
76
|
* Creates a new index.
|
|
77
77
|
* @remarks
|
|
78
78
|
* This method creates a new folder on disk containing an index.json file.
|
|
79
|
-
* @param config
|
|
79
|
+
* @param config Index configuration.
|
|
80
80
|
*/
|
|
81
81
|
public async createIndex(config: CreateIndexConfig = {version: 1}): Promise<void> {
|
|
82
82
|
// Delete if exists
|
|
@@ -121,7 +121,7 @@ export class LocalIndex {
|
|
|
121
121
|
|
|
122
122
|
/**
|
|
123
123
|
* Deletes an item from the index.
|
|
124
|
-
* @param id
|
|
124
|
+
* @param id ID of item to delete.
|
|
125
125
|
*/
|
|
126
126
|
public async deleteItem(id: string): Promise<void> {
|
|
127
127
|
if (this._update) {
|
|
@@ -161,7 +161,7 @@ export class LocalIndex {
|
|
|
161
161
|
|
|
162
162
|
/**
|
|
163
163
|
* Loads an index from disk and returns its stats.
|
|
164
|
-
* @returns Index stats
|
|
164
|
+
* @returns Index stats.
|
|
165
165
|
*/
|
|
166
166
|
public async getIndexStats(): Promise<IndexStats> {
|
|
167
167
|
await this.loadIndexData();
|
|
@@ -174,8 +174,8 @@ export class LocalIndex {
|
|
|
174
174
|
|
|
175
175
|
/**
|
|
176
176
|
* Returns an item from the index given its ID.
|
|
177
|
-
* @param id
|
|
178
|
-
* @returns Item or undefined if not found
|
|
177
|
+
* @param id ID of the item to retrieve.
|
|
178
|
+
* @returns Item or undefined if not found.
|
|
179
179
|
*/
|
|
180
180
|
public async getItem<TMetadata = Record<string,MetadataTypes>>(id: string): Promise<IndexItem<TMetadata> | undefined> {
|
|
181
181
|
await this.loadIndexData();
|
|
@@ -187,8 +187,8 @@ export class LocalIndex {
|
|
|
187
187
|
* @remarks
|
|
188
188
|
* A new update is started if one is not already in progress. If an item with the same ID
|
|
189
189
|
* already exists, an error will be thrown.
|
|
190
|
-
* @param item Item to insert
|
|
191
|
-
* @returns Inserted item
|
|
190
|
+
* @param item Item to insert.
|
|
191
|
+
* @returns Inserted item.
|
|
192
192
|
*/
|
|
193
193
|
public async insertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
|
|
194
194
|
if (this._update) {
|
|
@@ -218,7 +218,7 @@ export class LocalIndex {
|
|
|
218
218
|
* @remarks
|
|
219
219
|
* This method loads the index into memory and returns all its items. A copy of the items
|
|
220
220
|
* array is returned so no modifications should be made to the array.
|
|
221
|
-
* @returns
|
|
221
|
+
* @returns Array of all items in the index.
|
|
222
222
|
*/
|
|
223
223
|
public async listItems<TMetadata = Record<string,MetadataTypes>>(): Promise<IndexItem<TMetadata>[]> {
|
|
224
224
|
await this.loadIndexData();
|
|
@@ -229,8 +229,8 @@ export class LocalIndex {
|
|
|
229
229
|
* Returns all items in the index matching the filter.
|
|
230
230
|
* @remarks
|
|
231
231
|
* This method loads the index into memory and returns all its items matching the filter.
|
|
232
|
-
* @param filter Filter to apply
|
|
233
|
-
* @returns
|
|
232
|
+
* @param filter Filter to apply.
|
|
233
|
+
* @returns Array of items matching the filter.
|
|
234
234
|
*/
|
|
235
235
|
public async listItemsByMetadata<TMetadata = Record<string,MetadataTypes>>(filter: MetadataFilter): Promise<IndexItem<TMetadata>[]> {
|
|
236
236
|
await this.loadIndexData();
|
|
@@ -242,10 +242,10 @@ export class LocalIndex {
|
|
|
242
242
|
* @remarks
|
|
243
243
|
* This method loads the index into memory and returns the top k items that are most similar.
|
|
244
244
|
* An optional filter can be applied to the metadata of the items.
|
|
245
|
-
* @param vector Vector to query against
|
|
246
|
-
* @param topK Number of items to return
|
|
247
|
-
* @param filter Optional
|
|
248
|
-
* @returns Similar items to the vector that
|
|
245
|
+
* @param vector Vector to query against.
|
|
246
|
+
* @param topK Number of items to return.
|
|
247
|
+
* @param filter Optional. Filter to apply.
|
|
248
|
+
* @returns Similar items to the vector that matche the supplied filter.
|
|
249
249
|
*/
|
|
250
250
|
public async queryItems<TMetadata = Record<string,MetadataTypes>>(vector: number[], topK: number, filter?: MetadataFilter): Promise<QueryResult<TMetadata>[]> {
|
|
251
251
|
await this.loadIndexData();
|
|
@@ -293,8 +293,8 @@ export class LocalIndex {
|
|
|
293
293
|
* @remarks
|
|
294
294
|
* A new update is started if one is not already in progress. If an item with the same ID
|
|
295
295
|
* already exists, it will be replaced.
|
|
296
|
-
* @param item Item to insert or replace
|
|
297
|
-
* @returns Upserted item
|
|
296
|
+
* @param item Item to insert or replace.
|
|
297
|
+
* @returns Upserted item.
|
|
298
298
|
*/
|
|
299
299
|
public async upsertItem<TMetadata = Record<string,MetadataTypes>>(item: Partial<IndexItem<TMetadata>>): Promise<IndexItem<TMetadata>> {
|
|
300
300
|
if (this._update) {
|
package/src/vectra-cli.ts
CHANGED
|
@@ -82,27 +82,22 @@ export async function run() {
|
|
|
82
82
|
// Get list of url's
|
|
83
83
|
const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
|
|
84
84
|
|
|
85
|
-
// Fetch
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
await
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
} catch (err: unknown) {
|
|
101
|
-
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
|
|
102
|
-
}
|
|
85
|
+
// Fetch documents
|
|
86
|
+
const fileFetcher = new FileFetcher();
|
|
87
|
+
const webFetcher = args.cookie ? new WebFetcher({ headers: { "cookie": args.cookie }}) : new WebFetcher();
|
|
88
|
+
for (const path of uris) {
|
|
89
|
+
try {
|
|
90
|
+
console.log(Colorize.progress(`fetching ${path}`));
|
|
91
|
+
const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
|
|
92
|
+
await fetcher.fetch(path, async (uri, text, docType) => {
|
|
93
|
+
console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
|
|
94
|
+
await index.upsertDocument(uri, text, docType);
|
|
95
|
+
console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
|
|
96
|
+
return true;
|
|
97
|
+
});
|
|
98
|
+
} catch (err: unknown) {
|
|
99
|
+
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
|
|
103
100
|
}
|
|
104
|
-
} finally {
|
|
105
|
-
await index.endUpdate();
|
|
106
101
|
}
|
|
107
102
|
})
|
|
108
103
|
.command('remove <index>', `removes one or more documents from an index`, (yargs) => {
|