vectra 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/FileFetcher.ts +31 -0
- package/src/LocalIndex.ts +17 -5
- package/src/OpenAIEmbeddings.ts +3 -2
- package/src/WebFetcher.ts +3 -4
- package/src/index.ts +1 -0
- package/src/types.ts +1 -1
- package/src/vectra-cli.ts +13 -8
package/package.json
CHANGED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { TextFetcher } from './types';
|
|
2
|
+
import * as fs from 'fs/promises';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
|
|
5
|
+
export class FileFetcher implements TextFetcher {
|
|
6
|
+
public async fetch(uri: string, onDocument: (uri: string, text: string, docType?: string | undefined) => Promise<boolean>): Promise<boolean> {
|
|
7
|
+
// Does path exist and is it a directory?
|
|
8
|
+
let isDirectory: boolean;
|
|
9
|
+
try {
|
|
10
|
+
const stat = await fs.stat(uri);
|
|
11
|
+
isDirectory = stat.isDirectory();
|
|
12
|
+
} catch {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// If directory, read all files and recurse
|
|
17
|
+
if (isDirectory) {
|
|
18
|
+
const files = await fs.readdir(uri);
|
|
19
|
+
for (const file of files) {
|
|
20
|
+
const filePath = path.join(uri, file);
|
|
21
|
+
await this.fetch(filePath, onDocument);
|
|
22
|
+
}
|
|
23
|
+
return true;
|
|
24
|
+
} else {
|
|
25
|
+
// Read file and call onDocument
|
|
26
|
+
const text = await fs.readFile(uri, 'utf8');
|
|
27
|
+
const parts = uri.split('.');
|
|
28
|
+
return await onDocument(uri, text, parts.length > 0 ? parts[parts.length - 1].toLowerCase() : undefined);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
package/src/LocalIndex.ts
CHANGED
|
@@ -20,15 +20,19 @@ export interface CreateIndexConfig {
|
|
|
20
20
|
*/
|
|
21
21
|
export class LocalIndex {
|
|
22
22
|
private readonly _folderPath: string;
|
|
23
|
+
private readonly _indexName: string;
|
|
24
|
+
|
|
23
25
|
private _data?: IndexData;
|
|
24
26
|
private _update?: IndexData;
|
|
25
27
|
|
|
26
28
|
/**
|
|
27
29
|
* Creates a new instance of LocalIndex.
|
|
28
30
|
* @param folderPath - Path to the index folder
|
|
31
|
+
* @param indexName - Optional name of the index file. Defaults to index.json
|
|
29
32
|
*/
|
|
30
|
-
public constructor(folderPath: string) {
|
|
33
|
+
public constructor(folderPath: string, indexName?: string) {
|
|
31
34
|
this._folderPath = folderPath;
|
|
35
|
+
this._indexName = indexName || "index.json";
|
|
32
36
|
}
|
|
33
37
|
|
|
34
38
|
/**
|
|
@@ -38,6 +42,13 @@ export class LocalIndex {
|
|
|
38
42
|
return this._folderPath;
|
|
39
43
|
}
|
|
40
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Optional name of the index file.
|
|
47
|
+
*/
|
|
48
|
+
public get indexName(): string {
|
|
49
|
+
return this._indexName;
|
|
50
|
+
}
|
|
51
|
+
|
|
41
52
|
/**
|
|
42
53
|
* Begins an update to the index.
|
|
43
54
|
* @remarks
|
|
@@ -87,7 +98,8 @@ export class LocalIndex {
|
|
|
87
98
|
metadata_config: config.metadata_config ?? {},
|
|
88
99
|
items: []
|
|
89
100
|
};
|
|
90
|
-
|
|
101
|
+
|
|
102
|
+
await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._data));
|
|
91
103
|
} catch (err: unknown) {
|
|
92
104
|
await this.deleteIndex();
|
|
93
105
|
throw new Error('Error creating index');
|
|
@@ -139,7 +151,7 @@ export class LocalIndex {
|
|
|
139
151
|
|
|
140
152
|
try {
|
|
141
153
|
// Save index
|
|
142
|
-
await fs.writeFile(path.join(this._folderPath,
|
|
154
|
+
await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._update));
|
|
143
155
|
this._data = this._update;
|
|
144
156
|
this._update = undefined;
|
|
145
157
|
} catch(err: unknown) {
|
|
@@ -194,7 +206,7 @@ export class LocalIndex {
|
|
|
194
206
|
*/
|
|
195
207
|
public async isIndexCreated(): Promise<boolean> {
|
|
196
208
|
try {
|
|
197
|
-
await fs.access(path.join(this._folderPath,
|
|
209
|
+
await fs.access(path.join(this._folderPath, this.indexName));
|
|
198
210
|
return true;
|
|
199
211
|
} catch (err: unknown) {
|
|
200
212
|
return false;
|
|
@@ -307,7 +319,7 @@ export class LocalIndex {
|
|
|
307
319
|
throw new Error('Index does not exist');
|
|
308
320
|
}
|
|
309
321
|
|
|
310
|
-
const data = await fs.readFile(path.join(this._folderPath,
|
|
322
|
+
const data = await fs.readFile(path.join(this._folderPath, this.indexName));
|
|
311
323
|
this._data = JSON.parse(data.toString());
|
|
312
324
|
}
|
|
313
325
|
|
package/src/OpenAIEmbeddings.ts
CHANGED
|
@@ -140,7 +140,8 @@ export class OpenAIEmbeddings implements EmbeddingsModel {
|
|
|
140
140
|
|
|
141
141
|
// Process response
|
|
142
142
|
if (response.status < 300) {
|
|
143
|
-
|
|
143
|
+
const {data,model,usage} = response.data
|
|
144
|
+
return { status: 'success', output: data.sort((a, b) => a.index - b.index).map((item) => item.embedding), model, usage };
|
|
144
145
|
} else if (response.status == 429) {
|
|
145
146
|
return { status: 'rate_limited', message: `The embeddings API returned a rate limit error.` }
|
|
146
147
|
} else {
|
|
@@ -205,4 +206,4 @@ export class OpenAIEmbeddings implements EmbeddingsModel {
|
|
|
205
206
|
return response;
|
|
206
207
|
}
|
|
207
208
|
}
|
|
208
|
-
}
|
|
209
|
+
}
|
package/src/WebFetcher.ts
CHANGED
|
@@ -3,7 +3,6 @@ import { TextFetcher } from './types';
|
|
|
3
3
|
import * as cheerio from 'cheerio';
|
|
4
4
|
import TurndownService from 'turndown';
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
const ALLOWED_CONTENT_TYPES = [
|
|
8
7
|
"text/html",
|
|
9
8
|
"application/json",
|
|
@@ -45,7 +44,7 @@ export class WebFetcher implements TextFetcher {
|
|
|
45
44
|
} as WebFetcherConfig, config);
|
|
46
45
|
}
|
|
47
46
|
|
|
48
|
-
public async fetch(uri: string
|
|
47
|
+
public async fetch(uri: string, onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>): Promise<boolean> {
|
|
49
48
|
const httpClient = axios.create({
|
|
50
49
|
validateStatus: () => true,
|
|
51
50
|
});
|
|
@@ -78,10 +77,10 @@ export class WebFetcher implements TextFetcher {
|
|
|
78
77
|
const docType = contentTypeArray[0] != 'text/plain' ? contentTypeArray[0].split('/')[1] : undefined;
|
|
79
78
|
if (docType == 'html' && this._config.htmlToMarkdown) {
|
|
80
79
|
const text = this.htmlToMarkdown(response.data, uri);
|
|
81
|
-
return
|
|
80
|
+
return await onDocument(uri, text, 'md');
|
|
82
81
|
} else {
|
|
83
82
|
const text = response.data;
|
|
84
|
-
return
|
|
83
|
+
return await onDocument(uri, text, docType);
|
|
85
84
|
}
|
|
86
85
|
}
|
|
87
86
|
|
package/src/index.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -56,7 +56,7 @@ export interface TextChunk {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
export interface TextFetcher {
|
|
59
|
-
fetch(uri: string
|
|
59
|
+
fetch(uri: string, onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>): Promise<boolean>;
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
export interface IndexStats {
|
package/src/vectra-cli.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { LocalDocumentIndex } from "./LocalDocumentIndex";
|
|
|
5
5
|
import { WebFetcher } from './WebFetcher';
|
|
6
6
|
import { OpenAIEmbeddings } from './OpenAIEmbeddings';
|
|
7
7
|
import { Colorize } from './internals';
|
|
8
|
+
import { FileFetcher } from './FileFetcher';
|
|
8
9
|
|
|
9
10
|
export async function run() {
|
|
10
11
|
// prettier-ignore
|
|
@@ -77,16 +78,20 @@ export async function run() {
|
|
|
77
78
|
const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
|
|
78
79
|
|
|
79
80
|
// Fetch web pages
|
|
80
|
-
const
|
|
81
|
-
|
|
81
|
+
const fileFetcher = new FileFetcher();
|
|
82
|
+
const webFetcher = new WebFetcher();
|
|
83
|
+
for (const path of uris) {
|
|
82
84
|
try {
|
|
83
|
-
console.log(Colorize.progress(`fetching ${
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
console.log(Colorize.progress(`fetching ${path}`));
|
|
86
|
+
const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
|
|
87
|
+
await fetcher.fetch(path, async (uri, text, docType) => {
|
|
88
|
+
console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
|
|
89
|
+
await index.upsertDocument(uri, text, docType);
|
|
90
|
+
console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
|
|
91
|
+
return true;
|
|
92
|
+
});
|
|
88
93
|
} catch (err: unknown) {
|
|
89
|
-
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${
|
|
94
|
+
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
|
|
90
95
|
}
|
|
91
96
|
}
|
|
92
97
|
})
|