vectra 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/LocalDocumentIndex.d.ts +5 -2
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +14 -8
- package/lib/LocalDocumentIndex.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts.map +1 -1
- package/lib/OpenAIEmbeddings.js +1 -0
- package/lib/OpenAIEmbeddings.js.map +1 -1
- package/lib/TextSplitter.d.ts +2 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +99 -52
- package/lib/TextSplitter.js.map +1 -1
- package/lib/WebFetcher.d.ts +6 -4
- package/lib/WebFetcher.d.ts.map +1 -1
- package/lib/WebFetcher.js +132 -52
- package/lib/WebFetcher.js.map +1 -1
- package/lib/types.d.ts +4 -1
- package/lib/types.d.ts.map +1 -1
- package/lib/vectra-cli.js +7 -7
- package/lib/vectra-cli.js.map +1 -1
- package/package.json +3 -1
- package/src/FileFetcher.ts +31 -0
- package/src/LocalDocumentIndex.ts +14 -8
- package/src/LocalIndex.ts +17 -5
- package/src/OpenAIEmbeddings.ts +4 -2
- package/src/TextSplitter.ts +101 -52
- package/src/WebFetcher.ts +159 -59
- package/src/index.ts +1 -0
- package/src/types.ts +1 -1
- package/src/vectra-cli.ts +18 -13
package/lib/WebFetcher.js
CHANGED
|
@@ -38,6 +38,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
38
38
|
exports.WebFetcher = void 0;
|
|
39
39
|
const axios_1 = __importDefault(require("axios"));
|
|
40
40
|
const cheerio = __importStar(require("cheerio"));
|
|
41
|
+
const turndown_1 = __importDefault(require("turndown"));
|
|
41
42
|
const ALLOWED_CONTENT_TYPES = [
|
|
42
43
|
"text/html",
|
|
43
44
|
"application/json",
|
|
@@ -62,59 +63,11 @@ const DEFAULT_HEADERS = {
|
|
|
62
63
|
class WebFetcher {
|
|
63
64
|
constructor(config) {
|
|
64
65
|
this._config = Object.assign({
|
|
65
|
-
|
|
66
|
+
htmlToMarkdown: true,
|
|
66
67
|
summarizeHtml: false,
|
|
67
68
|
}, config);
|
|
68
69
|
}
|
|
69
70
|
fetch(uri) {
|
|
70
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
71
|
-
const { data, contentType } = yield this.fetchPage(uri);
|
|
72
|
-
if (contentType === "text/html" && this._config.htmlToText) {
|
|
73
|
-
return this.extractText(data, uri, this._config.summarizeHtml);
|
|
74
|
-
}
|
|
75
|
-
else {
|
|
76
|
-
return data;
|
|
77
|
-
}
|
|
78
|
-
});
|
|
79
|
-
}
|
|
80
|
-
extractText(html, baseUrl, summarize) {
|
|
81
|
-
// Parse all elements including <noscript> tags
|
|
82
|
-
const $ = cheerio.load(html, { scriptingEnabled: true });
|
|
83
|
-
// If we want a summary, just get use the <body/>
|
|
84
|
-
let text = '';
|
|
85
|
-
$(`${summarize ? 'body ' : '*'}:not(style):not(script):not(svg)`).each((i, elem) => {
|
|
86
|
-
var _a, _b;
|
|
87
|
-
// Remove any children to avoid duplicate text
|
|
88
|
-
let content = $(elem).clone().children().remove().end().text().trim();
|
|
89
|
-
const $el = $(elem);
|
|
90
|
-
// Print links in markdown format
|
|
91
|
-
let href = $el.attr("href");
|
|
92
|
-
if (((_a = $el.prop("tagName")) === null || _a === void 0 ? void 0 : _a.toLowerCase()) === "a" && href) {
|
|
93
|
-
if (!href.startsWith("http")) {
|
|
94
|
-
// Try converting to a relevant link
|
|
95
|
-
try {
|
|
96
|
-
href = new URL(href, baseUrl).toString();
|
|
97
|
-
}
|
|
98
|
-
catch (_c) {
|
|
99
|
-
// Leave as is
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
// If the link has content, use that as the text
|
|
103
|
-
const altText = (_b = $el.find("img[alt]").attr("alt")) === null || _b === void 0 ? void 0 : _b.trim();
|
|
104
|
-
if (altText) {
|
|
105
|
-
content += ` ${altText}`;
|
|
106
|
-
}
|
|
107
|
-
text += ` [${content}](${href})`;
|
|
108
|
-
}
|
|
109
|
-
// otherwise just print the content
|
|
110
|
-
else if (content !== "") {
|
|
111
|
-
text += ` ${content}`;
|
|
112
|
-
}
|
|
113
|
-
});
|
|
114
|
-
// Remove newlines
|
|
115
|
-
return text.trim().replace(/\n+/g, ' ');
|
|
116
|
-
}
|
|
117
|
-
fetchPage(baseUrl) {
|
|
118
71
|
return __awaiter(this, void 0, void 0, function* () {
|
|
119
72
|
const httpClient = axios_1.default.create({
|
|
120
73
|
validateStatus: () => true,
|
|
@@ -122,11 +75,11 @@ class WebFetcher {
|
|
|
122
75
|
// Clone headers to avoid mutating the original
|
|
123
76
|
const headers = Object.assign({}, DEFAULT_HEADERS, this._config.headers);
|
|
124
77
|
// get hostname from url
|
|
125
|
-
const host = new URL(
|
|
78
|
+
const host = new URL(uri).hostname;
|
|
126
79
|
headers['Host'] = host;
|
|
127
80
|
headers['Alt-Used'] = host;
|
|
128
81
|
// Fetch page and check for errors
|
|
129
|
-
const response = yield httpClient.get(
|
|
82
|
+
const response = yield httpClient.get(uri, Object.assign({ headers }, this._config.requestConfig));
|
|
130
83
|
if (response.status >= 400) {
|
|
131
84
|
throw new Error(`Site returned an HTTP status of ${response.status}`);
|
|
132
85
|
}
|
|
@@ -136,9 +89,136 @@ class WebFetcher {
|
|
|
136
89
|
if (!contentTypeArray[0] || !ALLOWED_CONTENT_TYPES.includes(contentTypeArray[0])) {
|
|
137
90
|
throw new Error(`Site returned an invalid content type of ${contentType}`);
|
|
138
91
|
}
|
|
139
|
-
|
|
92
|
+
// Convert content type to doc type
|
|
93
|
+
const docType = contentTypeArray[0] != 'text/plain' ? contentTypeArray[0].split('/')[1] : undefined;
|
|
94
|
+
if (docType == 'html' && this._config.htmlToMarkdown) {
|
|
95
|
+
const text = this.htmlToMarkdown(response.data, uri);
|
|
96
|
+
return { text, docType: 'md' };
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
const text = response.data;
|
|
100
|
+
return { text, docType };
|
|
101
|
+
}
|
|
140
102
|
});
|
|
141
103
|
}
|
|
104
|
+
htmlToMarkdown(html, baseUrl) {
|
|
105
|
+
var _a;
|
|
106
|
+
// Parse HTML and remove scripts
|
|
107
|
+
const $ = cheerio.load(html, { scriptingEnabled: true });
|
|
108
|
+
// Remove scripts and convert relative links to absolute
|
|
109
|
+
$('script').remove();
|
|
110
|
+
$('a').each((i, elem) => {
|
|
111
|
+
const $el = $(elem);
|
|
112
|
+
const href = $el.attr("href");
|
|
113
|
+
if (href && !href.startsWith("http")) {
|
|
114
|
+
// Try converting to an absolute link
|
|
115
|
+
try {
|
|
116
|
+
$el.attr("href", new URL(href, baseUrl).toString());
|
|
117
|
+
}
|
|
118
|
+
catch (_a) {
|
|
119
|
+
// Leave as is
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
});
|
|
123
|
+
// Convert to markdown
|
|
124
|
+
const body = (_a = $('body').html()) !== null && _a !== void 0 ? _a : '';
|
|
125
|
+
const turndownService = new turndown_1.default({
|
|
126
|
+
hr: '\n\n---\n\n',
|
|
127
|
+
});
|
|
128
|
+
convertTables(turndownService);
|
|
129
|
+
const md = turndownService.turndown(body);
|
|
130
|
+
// Remove any overly long header text
|
|
131
|
+
const contentStart = Math.min(md.indexOf('\n'), md.indexOf(' '));
|
|
132
|
+
if (contentStart > 64) {
|
|
133
|
+
return md.slice(contentStart);
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
return md;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
142
139
|
}
|
|
143
140
|
exports.WebFetcher = WebFetcher;
|
|
141
|
+
function convertTables(turndownService) {
|
|
142
|
+
turndownService.addRule('tableCell', {
|
|
143
|
+
filter: ['th', 'td'],
|
|
144
|
+
replacement: function (content, node) {
|
|
145
|
+
return cell(content, node);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
turndownService.addRule('tableRow', {
|
|
149
|
+
filter: 'tr',
|
|
150
|
+
replacement: function (content, node) {
|
|
151
|
+
var borderCells = '';
|
|
152
|
+
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
|
153
|
+
if (isHeadingRow(node)) {
|
|
154
|
+
for (var i = 0; i < node.childNodes.length; i++) {
|
|
155
|
+
var border = '---';
|
|
156
|
+
var align = (node.childNodes[i].getAttribute('align') || '').toLowerCase();
|
|
157
|
+
if (align)
|
|
158
|
+
border = alignMap[align] || border;
|
|
159
|
+
borderCells += cell(border, node.childNodes[i]);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return '\n' + content + (borderCells ? '\n' + borderCells : '');
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
turndownService.addRule('table', {
|
|
166
|
+
filter: ['table'],
|
|
167
|
+
replacement: function (content, node) {
|
|
168
|
+
// Ensure there are no blank lines
|
|
169
|
+
content = content.replace('\n\n', '\n');
|
|
170
|
+
return '\n\n' + content + '\n\n';
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
turndownService.addRule('tableSection', {
|
|
174
|
+
filter: ['thead', 'tbody', 'tfoot'],
|
|
175
|
+
replacement: function (content) {
|
|
176
|
+
return content;
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
const indexOf = Array.prototype.indexOf;
|
|
181
|
+
const every = Array.prototype.every;
|
|
182
|
+
// A tr is a heading row if:
|
|
183
|
+
// - the parent is a THEAD
|
|
184
|
+
// - or if its the first child of the TABLE or the first TBODY (possibly
|
|
185
|
+
// following a blank THEAD)
|
|
186
|
+
// - and every cell is a TH
|
|
187
|
+
function isHeadingRow(tr) {
|
|
188
|
+
var parentNode = tr.parentNode;
|
|
189
|
+
return (parentNode.nodeName === 'THEAD' ||
|
|
190
|
+
(parentNode.firstChild === tr &&
|
|
191
|
+
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
|
192
|
+
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH'; })));
|
|
193
|
+
}
|
|
194
|
+
function isFirstTbody(element) {
|
|
195
|
+
var previousSibling = element.previousSibling;
|
|
196
|
+
return (element.nodeName === 'TBODY' && (!previousSibling ||
|
|
197
|
+
(previousSibling.nodeName === 'THEAD' &&
|
|
198
|
+
/^\s*$/i.test(previousSibling.textContent))));
|
|
199
|
+
}
|
|
200
|
+
function cell(content, node) {
|
|
201
|
+
var index = indexOf.call(node.parentNode.childNodes, node);
|
|
202
|
+
var prefix = ' ';
|
|
203
|
+
if (index === 0) {
|
|
204
|
+
prefix = '| ';
|
|
205
|
+
}
|
|
206
|
+
return cleanContent(prefix + content + ' |');
|
|
207
|
+
}
|
|
208
|
+
function cleanContent(content) {
|
|
209
|
+
let output = '';
|
|
210
|
+
const chars = ['\n', '\r', '\t', '\f', '\v', '\u00a0', '\u2028', '\u2029', ' '];
|
|
211
|
+
for (let i = 0; i < content.length; i++) {
|
|
212
|
+
if (chars.includes(content[i])) {
|
|
213
|
+
if (output[output.length - 1] != ' ') {
|
|
214
|
+
output += ' ';
|
|
215
|
+
}
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
output += content[i];
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return output;
|
|
223
|
+
}
|
|
144
224
|
//# sourceMappingURL=WebFetcher.js.map
|
package/lib/WebFetcher.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebFetcher.js","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,kDAAkD;
|
|
1
|
+
{"version":3,"file":"WebFetcher.js","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,kDAAkD;AAElD,iDAAmC;AACnC,wDAAwC;AAGxC,MAAM,qBAAqB,GAAG;IAC1B,WAAW;IACX,kBAAkB;IAClB,iBAAiB;IACjB,wBAAwB;IACxB,YAAY;CACf,CAAC;AAGF,MAAM,eAAe,GAAG;IACpB,MAAM,EAAE,uFAAuF;IAC/F,iBAAiB,EAAE,eAAe;IAClC,iBAAiB,EAAE,gBAAgB;IACnC,UAAU,EAAE,4BAA4B;IACxC,UAAU,EAAE,YAAY;IACxB,IAAI,EAAE,4BAA4B;IAClC,OAAO,EAAE,yBAAyB;IAClC,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,UAAU;IAC5B,gBAAgB,EAAE,YAAY;IAC9B,2BAA2B,EAAE,GAAG;IAChC,YAAY,EAAE,gFAAgF;CACjG,CAAC;AASF,MAAa,UAAU;IAGnB,YAAmB,MAAkC;QACjD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,cAAc,EAAE,IAAI;YACpB,aAAa,EAAE,KAAK;SACH,EAAE,MAAM,CAAC,CAAC;IACnC,CAAC;IAEY,KAAK,CAAC,GAAW;;YAC1B,MAAM,UAAU,GAAG,eAAK,CAAC,MAAM,CAAC;gBAC5B,cAAc,EAAE,GAAG,EAAE,CAAC,IAAI;aAC7B,CAAC,CAAC;YAEH,+CAA+C;YAC/C,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,eAAe,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAA;YAExE,wBAAwB;YACxB,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACnC,OAAO,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;YACvB,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;YAE3B,kCAAkC;YAClC,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,kBACrC,OAAO,IACJ,IAAI,CAAC,OAAO,CAAC,aAAa,EAC/B,CAAC;YACH,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE;gBACxB,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;aACzE;YAED,+BAA+B;YAC/B,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACrD,MAAM,gBAAgB,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAChD,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,EAAE;gBAC9E,MAAM,IAAI,KAAK,CAAC,4CAA4C,WAAW,EAAE,CAAC,CAAC;aAC9E;YAED,mCAAmC;YACnC,MAAM,OAAO,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACpG,IAAI,OAAO,IAAI,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;gBAClD,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBACrD,OAAO,EAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAC,CAAC;aAChC;iBAAM;gBACH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;gBAC3B,OAAO,EAAC,IAAI,EAAE,OAAO,EAAC,CAAC;aAC1B;QACL,CAAC;KAAA;IAGO,cAAc,CAAC,IAAY,EAAE,OAAe;;QAChD,gCAAgC;QAChC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC,CAAC;QAEzD,wDAAwD;QACxD,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;QACrB,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YACpB,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YACpB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9B,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE;gBAClC,qCAAqC;gBACrC,IAAI;oBACA,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;iBACvD;gBAAC,WAAM;oBACJ,cAAc;iBACjB;aACJ;QACL,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,IAAI,GAAG,MAAA,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,mCAAI,EAAE,CAAC;QACpC,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;YACxC,EAAE,EAAE,aAAa;SACpB,CAAC,CAAC;QACH,aAAa,CAAC,eAAe,CAAC,CAAC;QAC/B,MAAM,EAAE,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAE1C,qCAAqC;QACrC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;QACjE,IAAI,YAAY,GAAG,EAAE,EAAE;YACnB,OAAO,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;SACjC;aAAM;YACH,OAAO,EAAE,CAAC;SACb;IACL,CAAC;CACJ;AAtFD,gCAsFC;AAED,SAAS,aAAa,CAAC,eAAgC;IACnD,eAAe,CAAC,OAAO,CAAC,WAAW,EAAE;QACjC,MAAM,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;QACpB,WAAW,EAAE,UAAU,OAAO,EAAE,IAAI;YAChC,OAAO,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QAC9B,CAAC;KACJ,CAAC,CAAC;IAEH,eAAe,CAAC,OAAO,CAAC,UAAU,EAAE;QAChC,MAAM,EAAE,IAAI;QACZ,WAAW,EAAE,UAAU,OAAO,EAAE,IAAI;YAChC,IAAI,WAAW,GAAG,EAAE,CAAA;YACpB,IAAI,QAAQ,GAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,CAAA;YAEhE,IAAI,YAAY,CAAC,IAAI,CAAC,EAAE;gBACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBAC7C,IAAI,MAAM,GAAG,KAAK,CAAA;oBAClB,IAAI,KAAK,GAAW,CAChB,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CACjD,CAAC,WAAW,EAAE,CAAA;oBAEf,IAAI,KAAK;wBAAE,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAA;oBAE7C,WAAW,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;iBAClD;aACJ;YACD,OAAO,IAAI,GAAG,OAAO,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;QACnE,CAAC;KACJ,CAAC,CAAC;IAEH,eAAe,CAAC,OAAO,CAAC,OAAO,EAAE;QAC7B,MAAM,EAAE,CAAC,OAAO,CAAC;QACjB,WAAW,EAAE,UAAU,OAAO,EAAE,IAAI;YAChC,kCAAkC;YAClC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAA;YACvC,OAAO,MAAM,GAAG,OAAO,GAAG,MAAM,CAAA;QACpC,CAAC;KACJ,CAAC,CAAC;IAEH,eAAe,CAAC,OAAO,CAAC,cAAc,EAAE;QACpC,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC;QACnC,WAAW,EAAE,UAAU,OAAO;YAC1B,OAAO,OAAO,CAAA;QAClB,CAAC;KACJ,CAAC,CAAC;AACP,CAAC;AAED,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,CAAC,OAAO,CAAA;AACvC,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,CAAA;AAEnC,4BAA4B;AAC5B,0BAA0B;AAC1B,wEAAwE;AACxE,6BAA6B;AAC7B,2BAA2B;AAC3B,SAAS,YAAY,CAAC,EAAO;IACzB,IAAI,UAAU,GAAG,EAAE,CAAC,UAAU,CAAA;IAC9B,OAAO,CACH,UAAU,CAAC,QAAQ,KAAK,OAAO;QAC/B,CACI,UAAU,CAAC,UAAU,KAAK,EAAE;YAC5B,CAAC,UAAU,CAAC,QAAQ,KAAK,OAAO,IAAI,YAAY,CAAC,UAAU,CAAC,CAAC;YAC7D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,UAAU,CAAC,IAAI,OAAO,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAA,CAAC,CAAC,CAAC,CACzE,CACJ,CAAA;AACL,CAAC;AAED,SAAS,YAAY,CAAC,OAAY;IAC9B,IAAI,eAAe,GAAG,OAAO,CAAC,eAAe,CAAA;IAC7C,OAAO,CACH,OAAO,CAAC,QAAQ,KAAK,OAAO,IAAI,CAC5B,CAAC,eAAe;QAChB,CACI,eAAe,CAAC,QAAQ,KAAK,OAAO;YACpC,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,WAAW,CAAC,CAC7C,CACJ,CACJ,CAAA;AACL,CAAC;AAED,SAAS,IAAI,CAAC,OAAe,EAAE,IAAS;IACpC,IAAI,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,CAAC,CAAA;IAC1D,IAAI,MAAM,GAAG,GAAG,CAAA;IAChB,IAAI,KAAK,KAAK,CAAC,EAAE;QACb,MAAM,GAAG,IAAI,CAAA;KAChB;IACD,OAAO,YAAY,CAAC,MAAM,GAAG,OAAO,GAAG,IAAI,CAAC,CAAC;AACjD,CAAC;AAED,SAAS,YAAY,CAAC,OAAe;IACjC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,MAAM,KAAK,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;IAChF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACrC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE;YAC5B,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,GAAG,EAAE;gBAClC,MAAM,IAAI,GAAG,CAAC;aACjB;YACD,SAAS;SACZ;aAAM;YACH,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;SACxB;KACJ;IACD,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
package/lib/types.d.ts
CHANGED
|
@@ -47,7 +47,10 @@ export interface TextChunk {
|
|
|
47
47
|
endOverlap: number[];
|
|
48
48
|
}
|
|
49
49
|
export interface TextFetcher {
|
|
50
|
-
fetch(uri: string): Promise<
|
|
50
|
+
fetch(uri: string): Promise<{
|
|
51
|
+
text: string;
|
|
52
|
+
docType: string | undefined;
|
|
53
|
+
}>;
|
|
51
54
|
}
|
|
52
55
|
export interface IndexStats {
|
|
53
56
|
version: number;
|
package/lib/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;OAEG;IACH,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B;;;;OAIG;IACH,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAC,MAAM,EAAE,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;CAC1E;AAED;;;;;;GAMG;AACH,MAAM,MAAM,wBAAwB,GAAG,SAAS,GAAG,OAAO,GAAG,cAAc,CAAC;AAE5E;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,EAAE,wBAAwB,CAAC;IAEjC;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IAEpB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,EAAE,MAAM,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,WAAW;IACxB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;OAEG;IACH,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B;;;;OAIG;IACH,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAC,MAAM,EAAE,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;CAC1E;AAED;;;;;;GAMG;AACH,MAAM,MAAM,wBAAwB,GAAG,SAAS,GAAG,OAAO,GAAG,cAAc,CAAC;AAE5E;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,EAAE,wBAAwB,CAAC;IAEjC;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IAEpB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,EAAE,MAAM,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,WAAW;IACxB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,GAAC,SAAS,CAAC;KAAE,CAAC,CAAC;CAC7E;AAED,MAAM,WAAW,UAAU;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE;QACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;IACF,KAAK,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,EAAC,aAAa,CAAC;IAC/D,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,SAAS,CAAC;IACpB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAE3B;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,GAAC,MAAM,GAAC,OAAO,CAAC;IAE9B;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,GAAC,MAAM,GAAC,OAAO,CAAC;IAE9B;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,KAAK,CAAC,EAAE,CAAC,MAAM,GAAC,MAAM,CAAC,EAAE,CAAC;IAE1B;;OAEG;IACH,MAAM,CAAC,EAAE,CAAC,MAAM,GAAC,MAAM,CAAC,EAAE,CAAC;IAE3B;;OAEG;IACH,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;IAE1B;;OAEG;IACH,KAAK,CAAC,EAAE,cAAc,EAAE,CAAC;IAEzB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG,MAAM,GAAC,MAAM,GAAC,OAAO,CAAC;AAElD,MAAM,WAAW,WAAW,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,EAAC,aAAa,CAAC;IACjE,IAAI,EAAE,SAAS,CAAC,SAAS,CAAC,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACtB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IACjC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAClC;AAED,MAAM,WAAW,qBAAqB;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,CAAC;CAChC;AAED,MAAM,WAAW,oBAAoB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE;QACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACL;AAED,MAAM,WAAW,mBAAmB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACjB"}
|
package/lib/vectra-cli.js
CHANGED
|
@@ -117,9 +117,9 @@ function run() {
|
|
|
117
117
|
for (const uri of uris) {
|
|
118
118
|
try {
|
|
119
119
|
console.log(internals_1.Colorize.progress(`fetching ${uri}`));
|
|
120
|
-
const
|
|
120
|
+
const { text, docType } = yield fetcher.fetch(uri);
|
|
121
121
|
console.log(internals_1.Colorize.replaceLine(internals_1.Colorize.progress(`indexing ${uri}`)));
|
|
122
|
-
yield index.upsertDocument(uri,
|
|
122
|
+
yield index.upsertDocument(uri, text, docType);
|
|
123
123
|
console.log(internals_1.Colorize.replaceLine(internals_1.Colorize.success(`added ${uri}`)));
|
|
124
124
|
}
|
|
125
125
|
catch (err) {
|
|
@@ -179,25 +179,25 @@ function run() {
|
|
|
179
179
|
.option('document-count', {
|
|
180
180
|
alias: 'dc',
|
|
181
181
|
describe: 'max number of documents to return (defaults to 10)',
|
|
182
|
-
type: '
|
|
182
|
+
type: 'number',
|
|
183
183
|
default: 10
|
|
184
184
|
})
|
|
185
185
|
.option('chunk-count', {
|
|
186
186
|
alias: 'cc',
|
|
187
187
|
describe: 'max number of chunks to return (defaults to 50)',
|
|
188
|
-
type: '
|
|
188
|
+
type: 'number',
|
|
189
189
|
default: 50
|
|
190
190
|
})
|
|
191
191
|
.option('section-count', {
|
|
192
192
|
alias: 'sc',
|
|
193
193
|
describe: 'max number of document sections to render (defaults to 1)',
|
|
194
|
-
type: '
|
|
194
|
+
type: 'number',
|
|
195
195
|
default: 1
|
|
196
196
|
})
|
|
197
197
|
.option('tokens', {
|
|
198
198
|
alias: 't',
|
|
199
199
|
describe: 'max number of tokens to render for each document section (defaults to 2000)',
|
|
200
|
-
type: '
|
|
200
|
+
type: 'number',
|
|
201
201
|
default: 2000
|
|
202
202
|
})
|
|
203
203
|
.option('format', {
|
|
@@ -233,7 +233,7 @@ function run() {
|
|
|
233
233
|
const sections = yield result.renderSections(args.tokens, args.sectionCount);
|
|
234
234
|
for (let i = 0; i < sections.length; i++) {
|
|
235
235
|
const section = sections[i];
|
|
236
|
-
console.log(internals_1.Colorize.title(args.sectionCount
|
|
236
|
+
console.log(internals_1.Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
|
|
237
237
|
console.log(internals_1.Colorize.value('score', section.score));
|
|
238
238
|
console.log(internals_1.Colorize.value('tokens', section.tokenCount));
|
|
239
239
|
console.log(internals_1.Colorize.output(section.text));
|
package/lib/vectra-cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AAEvC,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,OAAO,GAAG,IAAI,uBAAU,EAAE,CAAC;YACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC;oBAClD,MAAM,OAAO,GAAI,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AAEvC,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,OAAO,GAAG,IAAI,uBAAU,EAAE,CAAC;YACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC;oBAClD,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAI,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;oBACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;oBAC/C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;iBACvE;gBAAC,OAAO,GAAY,EAAE;oBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,GAAG,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;iBACxG;aACJ;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBAC5B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBACrF,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;wBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;qBAC9C;iBACJ;qBAAM,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE;oBAChC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;qBACtE;iBACJ;aACJ;QACL,CAAC,CAAA,CAAC;aACD,IAAI,EAAE;aACN,aAAa,EAAE;aACf,UAAU,EAAE,CAAC;IACtB,CAAC;CAAA;AAzND,kBAyNC;AAGD,SAAe,WAAW,CAAC,KAAe,EAAE,QAAgB,EAAE,OAAe;;QACzE,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,OAAO,KAAK,CAAC;SAChB;aAAM,IAAI,OAAO,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;YAClE,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;SACxF;aAAM;YACH,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,6EAA6E,CAAC,CAAA;SACvJ;IACL,CAAC;CAAA"}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "vectra",
|
|
3
3
|
"author": "Steven Ickman",
|
|
4
4
|
"description": "A vector database that uses the local file system for storage.",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.4.0",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"keywords": [
|
|
8
8
|
"gpt"
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"gpt-3-encoder": "1.1.4",
|
|
34
34
|
"json-colorizer": "^2.2.2",
|
|
35
35
|
"openai": "^3.2.1",
|
|
36
|
+
"turndown": "^7.1.2",
|
|
36
37
|
"uuid": "^9.0.0",
|
|
37
38
|
"yargs": "^17.7.2"
|
|
38
39
|
},
|
|
@@ -42,6 +43,7 @@
|
|
|
42
43
|
"@types/node": "^14.14.31",
|
|
43
44
|
"@types/mocha": "^8.2.0",
|
|
44
45
|
"@types/assert": "^1.5.3",
|
|
46
|
+
"@types/turndown": "^5.0.1",
|
|
45
47
|
"@types/uuid": "9.0.1",
|
|
46
48
|
"@types/yargs": "17.0.24",
|
|
47
49
|
"mocha": "10.2.0",
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { TextFetcher } from './types';
|
|
2
|
+
import * as fs from 'fs/promises';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
|
|
5
|
+
export class FileFetcher implements TextFetcher {
|
|
6
|
+
public async fetch(uri: string, onDocument: (uri: string, text: string, docType?: string | undefined) => Promise<boolean>): Promise<boolean> {
|
|
7
|
+
// Does path exist and is it a directory?
|
|
8
|
+
let isDirectory: boolean;
|
|
9
|
+
try {
|
|
10
|
+
const stat = await fs.stat(uri);
|
|
11
|
+
isDirectory = stat.isDirectory();
|
|
12
|
+
} catch {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// If directory, read all files and recurse
|
|
17
|
+
if (isDirectory) {
|
|
18
|
+
const files = await fs.readdir(uri);
|
|
19
|
+
for (const file of files) {
|
|
20
|
+
const filePath = path.join(uri, file);
|
|
21
|
+
await this.fetch(filePath, onDocument);
|
|
22
|
+
}
|
|
23
|
+
return true;
|
|
24
|
+
} else {
|
|
25
|
+
// Read file and call onDocument
|
|
26
|
+
const text = await fs.readFile(uri, 'utf8');
|
|
27
|
+
const parts = uri.split('.');
|
|
28
|
+
return await onDocument(uri, text, parts.length > 0 ? parts[parts.length - 1].toLowerCase() : undefined);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -129,10 +129,13 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
129
129
|
* @remarks
|
|
130
130
|
* A new update is started if one is not already in progress. If an document with the same uri
|
|
131
131
|
* already exists, it will be replaced.
|
|
132
|
-
* @param
|
|
132
|
+
* @param uri - Document URI
|
|
133
|
+
* @param text - Document text
|
|
134
|
+
* @param docType - Optional. Document type
|
|
135
|
+
* @param metadata - Optional. Document metadata to index
|
|
133
136
|
* @returns Inserted document
|
|
134
137
|
*/
|
|
135
|
-
public async upsertDocument(uri: string, text: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument> {
|
|
138
|
+
public async upsertDocument(uri: string, text: string, docType?: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument> {
|
|
136
139
|
// Ensure embeddings configured
|
|
137
140
|
if (!this._embeddings) {
|
|
138
141
|
throw new Error(`Embeddings model not configured.`);
|
|
@@ -148,12 +151,15 @@ export class LocalDocumentIndex extends LocalIndex {
|
|
|
148
151
|
documentId = v4();
|
|
149
152
|
}
|
|
150
153
|
|
|
151
|
-
//
|
|
152
|
-
const config = Object.assign({}, this._chunkingConfig);
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
const
|
|
156
|
-
|
|
154
|
+
// Initialize text splitter settings
|
|
155
|
+
const config = Object.assign({ docType }, this._chunkingConfig);
|
|
156
|
+
if (config.docType == undefined) {
|
|
157
|
+
// Populate docType based on extension
|
|
158
|
+
const pos = uri.lastIndexOf('.');
|
|
159
|
+
if (pos >= 0) {
|
|
160
|
+
const ext = uri.substring(pos + 1).toLowerCase();
|
|
161
|
+
config.docType = ext;
|
|
162
|
+
}
|
|
157
163
|
}
|
|
158
164
|
|
|
159
165
|
// Split text into chunks
|
package/src/LocalIndex.ts
CHANGED
|
@@ -20,15 +20,19 @@ export interface CreateIndexConfig {
|
|
|
20
20
|
*/
|
|
21
21
|
export class LocalIndex {
|
|
22
22
|
private readonly _folderPath: string;
|
|
23
|
+
private readonly _indexName: string;
|
|
24
|
+
|
|
23
25
|
private _data?: IndexData;
|
|
24
26
|
private _update?: IndexData;
|
|
25
27
|
|
|
26
28
|
/**
|
|
27
29
|
* Creates a new instance of LocalIndex.
|
|
28
30
|
* @param folderPath - Path to the index folder
|
|
31
|
+
* @param indexName - Optional name of the index file. Defaults to index.json
|
|
29
32
|
*/
|
|
30
|
-
public constructor(folderPath: string) {
|
|
33
|
+
public constructor(folderPath: string, indexName?: string) {
|
|
31
34
|
this._folderPath = folderPath;
|
|
35
|
+
this._indexName = indexName || "index.json";
|
|
32
36
|
}
|
|
33
37
|
|
|
34
38
|
/**
|
|
@@ -38,6 +42,13 @@ export class LocalIndex {
|
|
|
38
42
|
return this._folderPath;
|
|
39
43
|
}
|
|
40
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Optional name of the index file.
|
|
47
|
+
*/
|
|
48
|
+
public get indexName(): string {
|
|
49
|
+
return this._indexName;
|
|
50
|
+
}
|
|
51
|
+
|
|
41
52
|
/**
|
|
42
53
|
* Begins an update to the index.
|
|
43
54
|
* @remarks
|
|
@@ -87,7 +98,8 @@ export class LocalIndex {
|
|
|
87
98
|
metadata_config: config.metadata_config ?? {},
|
|
88
99
|
items: []
|
|
89
100
|
};
|
|
90
|
-
|
|
101
|
+
|
|
102
|
+
await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._data));
|
|
91
103
|
} catch (err: unknown) {
|
|
92
104
|
await this.deleteIndex();
|
|
93
105
|
throw new Error('Error creating index');
|
|
@@ -139,7 +151,7 @@ export class LocalIndex {
|
|
|
139
151
|
|
|
140
152
|
try {
|
|
141
153
|
// Save index
|
|
142
|
-
await fs.writeFile(path.join(this._folderPath,
|
|
154
|
+
await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._update));
|
|
143
155
|
this._data = this._update;
|
|
144
156
|
this._update = undefined;
|
|
145
157
|
} catch(err: unknown) {
|
|
@@ -194,7 +206,7 @@ export class LocalIndex {
|
|
|
194
206
|
*/
|
|
195
207
|
public async isIndexCreated(): Promise<boolean> {
|
|
196
208
|
try {
|
|
197
|
-
await fs.access(path.join(this._folderPath,
|
|
209
|
+
await fs.access(path.join(this._folderPath, this.indexName));
|
|
198
210
|
return true;
|
|
199
211
|
} catch (err: unknown) {
|
|
200
212
|
return false;
|
|
@@ -307,7 +319,7 @@ export class LocalIndex {
|
|
|
307
319
|
throw new Error('Index does not exist');
|
|
308
320
|
}
|
|
309
321
|
|
|
310
|
-
const data = await fs.readFile(path.join(this._folderPath,
|
|
322
|
+
const data = await fs.readFile(path.join(this._folderPath, this.indexName));
|
|
311
323
|
this._data = JSON.parse(data.toString());
|
|
312
324
|
}
|
|
313
325
|
|
package/src/OpenAIEmbeddings.ts
CHANGED
|
@@ -140,10 +140,12 @@ export class OpenAIEmbeddings implements EmbeddingsModel {
|
|
|
140
140
|
|
|
141
141
|
// Process response
|
|
142
142
|
if (response.status < 300) {
|
|
143
|
-
|
|
143
|
+
const {data,model,usage} = response.data
|
|
144
|
+
return { status: 'success', output: data.sort((a, b) => a.index - b.index).map((item) => item.embedding), model, usage };
|
|
144
145
|
} else if (response.status == 429) {
|
|
145
146
|
return { status: 'rate_limited', message: `The embeddings API returned a rate limit error.` }
|
|
146
147
|
} else {
|
|
148
|
+
console.log(inputs);
|
|
147
149
|
return { status: 'error', message: `The embeddings API returned an error status of ${response.status}: ${response.statusText}` };
|
|
148
150
|
}
|
|
149
151
|
}
|
|
@@ -204,4 +206,4 @@ export class OpenAIEmbeddings implements EmbeddingsModel {
|
|
|
204
206
|
return response;
|
|
205
207
|
}
|
|
206
208
|
}
|
|
207
|
-
}
|
|
209
|
+
}
|