vectra 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/bin/vectra.js +3 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.js +17 -0
- package/lib/GPT3Tokenizer.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +156 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/LocalDocument.d.ts +16 -0
- package/lib/LocalDocument.d.ts.map +1 -0
- package/lib/LocalDocument.js +99 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +48 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.js +367 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +12 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -0
- package/lib/LocalDocumentResult.js +186 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalIndex.d.ts +130 -0
- package/lib/LocalIndex.d.ts.map +1 -0
- package/lib/LocalIndex.js +405 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/OpenAIEmbeddings.d.ts +98 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +139 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/TextSplitter.d.ts +17 -0
- package/lib/TextSplitter.d.ts.map +1 -0
- package/lib/TextSplitter.js +460 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/WebFetcher.d.ts +16 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +144 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/index.d.ts +11 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +27 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +64 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +42 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/types.d.ts +133 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -0
- package/lib/vectra-cli.js +276 -0
- package/lib/vectra-cli.js.map +1 -0
- package/package.json +21 -3
- package/src/GPT3Tokenizer.ts +15 -0
- package/src/ItemSelector.ts +9 -9
- package/src/LocalDocument.ts +70 -0
- package/src/LocalDocumentIndex.ts +355 -0
- package/src/LocalDocumentResult.ts +206 -0
- package/src/LocalIndex.ts +12 -78
- package/src/OpenAIEmbeddings.ts +205 -0
- package/src/TextSplitter.ts +480 -0
- package/src/WebFetcher.ts +128 -0
- package/src/index.ts +8 -0
- package/src/internals/Colorize.ts +64 -0
- package/src/internals/index.ts +2 -0
- package/src/internals/types.ts +46 -0
- package/src/types.ts +160 -0
- package/src/vectra-cli.ts +238 -0
package/README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Vectra
|
|
2
2
|
Vectra is a local vector database for Node.js with features similar to [Pinecone](https://www.pinecone.io/) or [Qdrant](https://qdrant.tech/) but built using local files. Each Vectra index is a folder on disk. There's an `index.json` file in the folder that contains all the vectors for the index along with any indexed metadata. When you create an index you can specify which metadata properties to index and only those fields will be stored in the `index.json` file. All of the other metadata for an item will be stored on disk in a separate file keyed by a GUID.
|
|
3
3
|
|
|
4
4
|
When queryng Vectra you'll be able to use the same subset of [Mongo DB query operators](https://www.mongodb.com/docs/manual/reference/operator/query/) that Pinecone supports and the results will be returned sorted by simularity. Every item in the index will first be filtered by metadata and then ranked for simularity. Even though every item is evaluated its all in memory so it should by nearly instantanious. Likely 1ms - 2ms for even a rather large index. Smaller indexes should be <1ms.
|
|
5
5
|
|
|
6
|
-
Keep in mind that your entire Vectra index is loaded into memory so it's not well suited for scenarios like long term chat bot memory. Use a
|
|
6
|
+
Keep in mind that your entire Vectra index is loaded into memory so it's not well suited for scenarios like long term chat bot memory. Use a real vector DB for that. Vectra is intended to be used in scenarios where you have a small corpus of mostly static data that you'd like to include in your prompt. Infinite few shot examples would be a great use case for Vectra or even just a single document you want to ask questions over.
|
|
7
7
|
|
|
8
8
|
Pinecone style namespaces aren't directly supported but you could easily mimic them by creating a separate Vectra index (and folder) for each namespace.
|
|
9
9
|
|
|
@@ -92,4 +92,4 @@ await query('banana');
|
|
|
92
92
|
[0.8493374123092652] oranges
|
|
93
93
|
[0.8415324469533297] blue
|
|
94
94
|
*/
|
|
95
|
-
```
|
|
95
|
+
```
|
package/bin/vectra.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Tokenizer } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Tokenizer that uses GPT-3's encoder.
|
|
4
|
+
*/
|
|
5
|
+
export declare class GPT3Tokenizer implements Tokenizer {
|
|
6
|
+
decode(tokens: number[]): string;
|
|
7
|
+
encode(text: string): number[];
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=GPT3Tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GPT3Tokenizer.d.ts","sourceRoot":"","sources":["../src/GPT3Tokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAGpC;;GAEG;AACH,qBAAa,aAAc,YAAW,SAAS;IACpC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM;IAIhC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;CAGxC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GPT3Tokenizer = void 0;
|
|
4
|
+
const gpt_3_encoder_1 = require("gpt-3-encoder");
|
|
5
|
+
/**
|
|
6
|
+
* Tokenizer that uses GPT-3's encoder.
|
|
7
|
+
*/
|
|
8
|
+
class GPT3Tokenizer {
|
|
9
|
+
decode(tokens) {
|
|
10
|
+
return (0, gpt_3_encoder_1.decode)(tokens);
|
|
11
|
+
}
|
|
12
|
+
encode(text) {
|
|
13
|
+
return (0, gpt_3_encoder_1.encode)(text);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
exports.GPT3Tokenizer = GPT3Tokenizer;
|
|
17
|
+
//# sourceMappingURL=GPT3Tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GPT3Tokenizer.js","sourceRoot":"","sources":["../src/GPT3Tokenizer.ts"],"names":[],"mappings":";;;AACA,iDAA+C;AAE/C;;GAEG;AACH,MAAa,aAAa;IACf,MAAM,CAAC,MAAgB;QAC1B,OAAO,IAAA,sBAAM,EAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,OAAO,IAAA,sBAAM,EAAC,IAAI,CAAC,CAAC;IACxB,CAAC;CACJ;AARD,sCAQC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { MetadataFilter, MetadataTypes } from './types';
|
|
2
|
+
export declare class ItemSelector {
|
|
3
|
+
/**
|
|
4
|
+
* Returns the similarity between two vectors using the cosine similarity.
|
|
5
|
+
* @param vector1 Vector 1
|
|
6
|
+
* @param vector2 Vector 2
|
|
7
|
+
* @returns Similarity between the two vectors
|
|
8
|
+
*/
|
|
9
|
+
static cosineSimilarity(vector1: number[], vector2: number[]): number;
|
|
10
|
+
/**
|
|
11
|
+
* Normalizes a vector.
|
|
12
|
+
* @remarks
|
|
13
|
+
* The norm of a vector is the square root of the sum of the squares of the elements.
|
|
14
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
15
|
+
* @param vector Vector to normalize
|
|
16
|
+
* @returns Normalized vector
|
|
17
|
+
*/
|
|
18
|
+
static normalize(vector: number[]): number;
|
|
19
|
+
/**
|
|
20
|
+
* Returns the similarity between two vectors using cosine similarity.
|
|
21
|
+
* @remarks
|
|
22
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
23
|
+
* This method uses the pre-calculated norms to improve performance.
|
|
24
|
+
* @param vector1 Vector 1
|
|
25
|
+
* @param norm1 Norm of vector 1
|
|
26
|
+
* @param vector2 Vector 2
|
|
27
|
+
* @param norm2 Norm of vector 2
|
|
28
|
+
* @returns Similarity between the two vectors
|
|
29
|
+
*/
|
|
30
|
+
static normalizedCosineSimilarity(vector1: number[], norm1: number, vector2: number[], norm2: number): number;
|
|
31
|
+
/**
|
|
32
|
+
* Applies a filter to the metadata of an item.
|
|
33
|
+
* @param metadata Metadata of the item
|
|
34
|
+
* @param filter Filter to apply
|
|
35
|
+
* @returns True if the item matches the filter, false otherwise
|
|
36
|
+
*/
|
|
37
|
+
static select(metadata: Record<string, MetadataTypes>, filter: MetadataFilter): boolean;
|
|
38
|
+
private static dotProduct;
|
|
39
|
+
private static metadataFilter;
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=ItemSelector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ItemSelector.d.ts","sourceRoot":"","sources":["../src/ItemSelector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExD,qBAAa,YAAY;IACrB;;;;;OAKG;WACW,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE;IAKnE;;;;;;;OAOG;WACW,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE;IAYxC;;;;;;;;;;OAUG;WACW,0BAA0B,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM;IAK3G;;;;;OAKG;WACW,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,EAAE,MAAM,EAAE,cAAc,GAAG,OAAO;IAoC9F,OAAO,CAAC,MAAM,CAAC,UAAU;IAYzB,OAAO,CAAC,MAAM,CAAC,cAAc;CAqDhC"}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ItemSelector = void 0;
|
|
4
|
+
class ItemSelector {
|
|
5
|
+
/**
|
|
6
|
+
* Returns the similarity between two vectors using the cosine similarity.
|
|
7
|
+
* @param vector1 Vector 1
|
|
8
|
+
* @param vector2 Vector 2
|
|
9
|
+
* @returns Similarity between the two vectors
|
|
10
|
+
*/
|
|
11
|
+
static cosineSimilarity(vector1, vector2) {
|
|
12
|
+
// Return the quotient of the dot product and the product of the norms
|
|
13
|
+
return this.dotProduct(vector1, vector2) / (this.normalize(vector1) * this.normalize(vector2));
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Normalizes a vector.
|
|
17
|
+
* @remarks
|
|
18
|
+
* The norm of a vector is the square root of the sum of the squares of the elements.
|
|
19
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
20
|
+
* @param vector Vector to normalize
|
|
21
|
+
* @returns Normalized vector
|
|
22
|
+
*/
|
|
23
|
+
static normalize(vector) {
|
|
24
|
+
// Initialize a variable to store the sum of the squares
|
|
25
|
+
let sum = 0;
|
|
26
|
+
// Loop through the elements of the array
|
|
27
|
+
for (let i = 0; i < vector.length; i++) {
|
|
28
|
+
// Square the element and add it to the sum
|
|
29
|
+
sum += vector[i] * vector[i];
|
|
30
|
+
}
|
|
31
|
+
// Return the square root of the sum
|
|
32
|
+
return Math.sqrt(sum);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Returns the similarity between two vectors using cosine similarity.
|
|
36
|
+
* @remarks
|
|
37
|
+
* The LocalIndex pre-normalizes all vectors to improve performance.
|
|
38
|
+
* This method uses the pre-calculated norms to improve performance.
|
|
39
|
+
* @param vector1 Vector 1
|
|
40
|
+
* @param norm1 Norm of vector 1
|
|
41
|
+
* @param vector2 Vector 2
|
|
42
|
+
* @param norm2 Norm of vector 2
|
|
43
|
+
* @returns Similarity between the two vectors
|
|
44
|
+
*/
|
|
45
|
+
static normalizedCosineSimilarity(vector1, norm1, vector2, norm2) {
|
|
46
|
+
// Return the quotient of the dot product and the product of the norms
|
|
47
|
+
return this.dotProduct(vector1, vector2) / (norm1 * norm2);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Applies a filter to the metadata of an item.
|
|
51
|
+
* @param metadata Metadata of the item
|
|
52
|
+
* @param filter Filter to apply
|
|
53
|
+
* @returns True if the item matches the filter, false otherwise
|
|
54
|
+
*/
|
|
55
|
+
static select(metadata, filter) {
|
|
56
|
+
if (filter === undefined || filter === null) {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
for (const key in filter) {
|
|
60
|
+
switch (key) {
|
|
61
|
+
case '$and':
|
|
62
|
+
if (!filter[key].every((f) => this.select(metadata, f))) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
break;
|
|
66
|
+
case '$or':
|
|
67
|
+
if (!filter[key].some((f) => this.select(metadata, f))) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
break;
|
|
71
|
+
default:
|
|
72
|
+
const value = filter[key];
|
|
73
|
+
if (value === undefined || value === null) {
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
else if (typeof value == 'object') {
|
|
77
|
+
if (!this.metadataFilter(metadata[key], value)) {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
if (metadata[key] !== value) {
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return true;
|
|
90
|
+
}
|
|
91
|
+
static dotProduct(arr1, arr2) {
|
|
92
|
+
// Initialize a variable to store the sum of the products
|
|
93
|
+
let sum = 0;
|
|
94
|
+
// Loop through the elements of the arrays
|
|
95
|
+
for (let i = 0; i < arr1.length; i++) {
|
|
96
|
+
// Multiply the corresponding elements and add them to the sum
|
|
97
|
+
sum += arr1[i] * arr2[i];
|
|
98
|
+
}
|
|
99
|
+
// Return the sum
|
|
100
|
+
return sum;
|
|
101
|
+
}
|
|
102
|
+
static metadataFilter(value, filter) {
|
|
103
|
+
if (value === undefined || value === null) {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
for (const key in filter) {
|
|
107
|
+
switch (key) {
|
|
108
|
+
case '$eq':
|
|
109
|
+
if (value !== filter[key]) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
break;
|
|
113
|
+
case '$ne':
|
|
114
|
+
if (value === filter[key]) {
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
break;
|
|
118
|
+
case '$gt':
|
|
119
|
+
if (typeof value != 'number' || value <= filter[key]) {
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
break;
|
|
123
|
+
case '$gte':
|
|
124
|
+
if (typeof value != 'number' || value < filter[key]) {
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
break;
|
|
128
|
+
case '$lt':
|
|
129
|
+
if (typeof value != 'number' || value >= filter[key]) {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
break;
|
|
133
|
+
case '$lte':
|
|
134
|
+
if (typeof value != 'number' || value > filter[key]) {
|
|
135
|
+
return false;
|
|
136
|
+
}
|
|
137
|
+
break;
|
|
138
|
+
case '$in':
|
|
139
|
+
if (typeof value == 'boolean' || !filter[key].includes(value)) {
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
break;
|
|
143
|
+
case '$nin':
|
|
144
|
+
if (typeof value == 'boolean' || filter[key].includes(value)) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
break;
|
|
148
|
+
default:
|
|
149
|
+
return value === filter[key];
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
exports.ItemSelector = ItemSelector;
|
|
156
|
+
//# sourceMappingURL=ItemSelector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ItemSelector.js","sourceRoot":"","sources":["../src/ItemSelector.ts"],"names":[],"mappings":";;;AAEA,MAAa,YAAY;IACrB;;;;;OAKG;IACI,MAAM,CAAC,gBAAgB,CAAC,OAAiB,EAAE,OAAiB;QAC/D,sEAAsE;QACtE,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IACnG,CAAC;IAED;;;;;;;OAOG;IACI,MAAM,CAAC,SAAS,CAAC,MAAgB;QACpC,wDAAwD;QACxD,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,yCAAyC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,2CAA2C;YAC3C,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;SAChC;QACD,oCAAoC;QACpC,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;OAUG;IACI,MAAM,CAAC,0BAA0B,CAAC,OAAiB,EAAE,KAAa,EAAE,OAAiB,EAAE,KAAa;QACvG,sEAAsE;QACtE,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC;IAC/D,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAC,MAAM,CAAC,QAAuC,EAAE,MAAsB;QAChF,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,EAAE;YACzC,OAAO,IAAI,CAAC;SACf;QAED,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE;YACtB,QAAQ,GAAG,EAAE;gBACT,KAAK,MAAM;oBACP,IAAI,CAAC,MAAM,CAAC,GAAG,CAAE,CAAC,KAAK,CAAC,CAAC,CAAiB,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE;wBACtE,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,KAAK;oBACN,IAAI,CAAC,MAAM,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,CAAC,CAAiB,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE;wBACrE,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV;oBACI,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;oBAC1B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE;wBACvC,OAAO,KAAK,CAAC;qBAChB;yBAAM,IAAI,OAAO,KAAK,IAAI,QAAQ,EAAE;wBACjC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,KAAuB,CAAC,EAAE;4BAC9D,OAAO,KAAK,CAAC;yBAChB;qBACJ;yBAAM;wBACH,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,KAAK,EAAE;4BACzB,OAAO,KAAK,CAAC;yBAChB;qBACJ;oBACD,MAAM;aACb;SACJ;QACD,OAAO,IAAI,CAAC;IAChB,CAAC;IAEO,MAAM,CAAC,UAAU,CAAC,IAAc,EAAE,IAAc;QACpD,yDAAyD;QACzD,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,0CAA0C;QAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAClC,8DAA8D;YAC9D,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;SAC5B;QACD,iBAAiB;QACjB,OAAO,GAAG,CAAC;IACf,CAAC;IAEO,MAAM,CAAC,cAAc,CAAC,KAAoB,EAAE,MAAsB;QACtE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE;YACvC,OAAO,KAAK,CAAC;SAChB;QAED,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE;YACtB,QAAQ,GAAG,EAAE;gBACT,KAAK,KAAK;oBACN,IAAI,KAAK,KAAK,MAAM,CAAC,GAAG,CAAC,EAAE;wBACvB,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,KAAK;oBACN,IAAI,KAAK,KAAK,MAAM,CAAC,GAAG,CAAC,EAAE;wBACvB,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,KAAK;oBACN,IAAI,OAAO,KAAK,IAAI,QAAQ,IAAI,KAAK,IAAI,MAAM,CAAC,GAAG,CAAE,EAAE;wBACnD,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,MAAM;oBACP,IAAI,OAAO,KAAK,IAAI,QAAQ,IAAI,KAAK,GAAG,MAAM,CAAC,GAAG,CAAE,EAAE;wBAClD,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,KAAK;oBACN,IAAI,OAAO,KAAK,IAAI,QAAQ,IAAI,KAAK,IAAI,MAAM,CAAC,GAAG,CAAE,EAAE;wBACnD,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,MAAM;oBACP,IAAI,OAAO,KAAK,IAAI,QAAQ,IAAI,KAAK,GAAG,MAAM,CAAC,GAAG,CAAE,EAAE;wBAClD,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,KAAK;oBACN,IAAI,OAAO,KAAK,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,GAAG,CAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE;wBAC5D,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV,KAAK,MAAM;oBACP,IAAI,OAAO,KAAK,IAAI,SAAS,IAAI,MAAM,CAAC,GAAG,CAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE;wBAC3D,OAAO,KAAK,CAAC;qBAChB;oBACD,MAAM;gBACV;oBACI,OAAO,KAAK,KAAK,MAAM,CAAC,GAAG,CAAC,CAAC;aACpC;SACJ;QACD,OAAO,IAAI,CAAC;IAChB,CAAC;CACJ;AA3JD,oCA2JC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { MetadataTypes } from './types';
|
|
2
|
+
export declare class LocalDocument {
|
|
3
|
+
private readonly _folderPath;
|
|
4
|
+
private readonly _id;
|
|
5
|
+
private readonly _uri;
|
|
6
|
+
private _metadata;
|
|
7
|
+
private _text;
|
|
8
|
+
constructor(folderPath: string, id: string, uri: string);
|
|
9
|
+
get folderPath(): string;
|
|
10
|
+
get id(): string;
|
|
11
|
+
get uri(): string;
|
|
12
|
+
hasMetadata(): Promise<boolean>;
|
|
13
|
+
loadMetadata(): Promise<Record<string, MetadataTypes>>;
|
|
14
|
+
loadText(): Promise<string>;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=LocalDocument.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LocalDocument.d.ts","sourceRoot":"","sources":["../src/LocalDocument.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,qBAAa,aAAa;IACtB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,SAAS,CAAyC;IAC1D,OAAO,CAAC,KAAK,CAAmB;gBAEb,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM;IAM9D,IAAW,UAAU,IAAI,MAAM,CAE9B;IAED,IAAW,EAAE,IAAI,MAAM,CAEtB;IAED,IAAW,GAAG,IAAI,MAAM,CAEvB;IAEY,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;IAS/B,YAAY,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAC,aAAa,CAAC,CAAC;IAmBrD,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC;CAY3C"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.LocalDocument = void 0;
|
|
36
|
+
const fs = __importStar(require("fs/promises"));
|
|
37
|
+
const path = __importStar(require("path"));
|
|
38
|
+
class LocalDocument {
|
|
39
|
+
constructor(folderPath, id, uri) {
|
|
40
|
+
this._folderPath = folderPath;
|
|
41
|
+
this._id = id;
|
|
42
|
+
this._uri = uri;
|
|
43
|
+
}
|
|
44
|
+
get folderPath() {
|
|
45
|
+
return this._folderPath;
|
|
46
|
+
}
|
|
47
|
+
get id() {
|
|
48
|
+
return this._id;
|
|
49
|
+
}
|
|
50
|
+
get uri() {
|
|
51
|
+
return this._uri;
|
|
52
|
+
}
|
|
53
|
+
hasMetadata() {
|
|
54
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
55
|
+
try {
|
|
56
|
+
yield fs.access(path.join(this.folderPath, `${this.id}.json`));
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
loadMetadata() {
|
|
65
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
66
|
+
if (this._metadata == undefined) {
|
|
67
|
+
let json;
|
|
68
|
+
try {
|
|
69
|
+
json = (yield fs.readFile(path.join(this.folderPath, `${this.id}.json`))).toString();
|
|
70
|
+
}
|
|
71
|
+
catch (err) {
|
|
72
|
+
throw new Error(`Error reading metadata for document "${this.uri}": ${err.toString()}`);
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
this._metadata = JSON.parse(json);
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
throw new Error(`Error parsing metadata for document "${this.uri}": ${err.toString()}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return this._metadata;
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
loadText() {
|
|
85
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
86
|
+
if (this._text == undefined) {
|
|
87
|
+
try {
|
|
88
|
+
this._text = (yield fs.readFile(path.join(this.folderPath, `${this.id}.txt`))).toString();
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
throw new Error(`Error reading text file for document "${this.uri}": ${err.toString()}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return this._text;
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
exports.LocalDocument = LocalDocument;
|
|
99
|
+
//# sourceMappingURL=LocalDocument.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LocalDocument.js","sourceRoot":"","sources":["../src/LocalDocument.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAG7B,MAAa,aAAa;IAOtB,YAAmB,UAAkB,EAAE,EAAU,EAAE,GAAW;QAC1D,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;QACd,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;IACpB,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED,IAAW,EAAE;QACT,OAAO,IAAI,CAAC,GAAG,CAAC;IACpB,CAAC;IAED,IAAW,GAAG;QACV,OAAO,IAAI,CAAC,IAAI,CAAC;IACrB,CAAC;IAEY,WAAW;;YACpB,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;gBAC/D,OAAO,IAAI,CAAC;aACf;YAAC,OAAO,GAAY,EAAE;gBACnB,OAAO,KAAK,CAAC;aAChB;QACL,CAAC;KAAA;IAEY,YAAY;;YACrB,IAAI,IAAI,CAAC,SAAS,IAAI,SAAS,EAAE;gBAC7B,IAAI,IAAY,CAAC;gBACjB,IAAI;oBACA,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;iBACxF;gBAAC,OAAO,GAAY,EAAE;oBACnB,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,CAAC,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBACpG;gBAED,IAAI;oBACA,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;iBACrC;gBAAC,OAAO,GAAY,EAAE;oBACnB,MAAM,IAAI,KAAK,CAAC,wCAAwC,IAAI,CAAC,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBACpG;aACJ;YAED,OAAO,IAAI,CAAC,SAAU,CAAC;QAC3B,CAAC;KAAA;IAEY,QAAQ;;YACjB,IAAI,IAAI,CAAC,KAAK,IAAI,SAAS,EAAE;gBACzB,IAAI;oBACA,IAAI,CAAC,KAAK,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;iBAC7F;gBAAC,OAAO,GAAY,EAAE;oBACnB,MAAM,IAAI,KAAK,CAAC,yCAAyC,IAAI,CAAC,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBACrG;aACJ;YAED,OAAO,IAAI,CAAC,KAAK,CAAC;QACtB,CAAC;KAAA;CAEJ;AAjED,sCAiEC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { CreateIndexConfig, LocalIndex } from "./LocalIndex";
|
|
2
|
+
import { TextSplitterConfig } from "./TextSplitter";
|
|
3
|
+
import { MetadataFilter, EmbeddingsModel, Tokenizer, MetadataTypes, DocumentCatalogStats } from "./types";
|
|
4
|
+
import { LocalDocumentResult } from './LocalDocumentResult';
|
|
5
|
+
import { LocalDocument } from './LocalDocument';
|
|
6
|
+
export interface DocumentQueryOptions {
|
|
7
|
+
maxDocuments?: number;
|
|
8
|
+
maxChunks?: number;
|
|
9
|
+
filter?: MetadataFilter;
|
|
10
|
+
}
|
|
11
|
+
export interface LocalDocumentIndexConfig {
|
|
12
|
+
folderPath: string;
|
|
13
|
+
embeddings?: EmbeddingsModel;
|
|
14
|
+
tokenizer?: Tokenizer;
|
|
15
|
+
chunkingConfig?: Partial<TextSplitterConfig>;
|
|
16
|
+
}
|
|
17
|
+
export declare class LocalDocumentIndex extends LocalIndex {
|
|
18
|
+
private readonly _embeddings?;
|
|
19
|
+
private readonly _tokenizer;
|
|
20
|
+
private readonly _chunkingConfig?;
|
|
21
|
+
private _catalog?;
|
|
22
|
+
private _newCatalog?;
|
|
23
|
+
constructor(config: LocalDocumentIndexConfig);
|
|
24
|
+
/**
|
|
25
|
+
* Returns true if the document catalog exists.
|
|
26
|
+
*/
|
|
27
|
+
isCatalogCreated(): Promise<boolean>;
|
|
28
|
+
getDocumentId(uri: string): Promise<string | undefined>;
|
|
29
|
+
getDocumentUri(documentId: string): Promise<string | undefined>;
|
|
30
|
+
createIndex(config?: CreateIndexConfig): Promise<void>;
|
|
31
|
+
deleteDocument(uri: string): Promise<void>;
|
|
32
|
+
getCatalogStats(): Promise<DocumentCatalogStats>;
|
|
33
|
+
/**
|
|
34
|
+
* Adds a document to the catalog.
|
|
35
|
+
* @remarks
|
|
36
|
+
* A new update is started if one is not already in progress. If an document with the same uri
|
|
37
|
+
* already exists, it will be replaced.
|
|
38
|
+
* @param item Item to insert
|
|
39
|
+
* @returns Inserted document
|
|
40
|
+
*/
|
|
41
|
+
upsertDocument(uri: string, text: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument>;
|
|
42
|
+
queryDocuments(query: string, options?: DocumentQueryOptions): Promise<LocalDocumentResult[]>;
|
|
43
|
+
beginUpdate(): Promise<void>;
|
|
44
|
+
cancelUpdate(): void;
|
|
45
|
+
endUpdate(): Promise<void>;
|
|
46
|
+
protected loadIndexData(): Promise<void>;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=LocalDocumentIndex.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LocalDocumentIndex.d.ts","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAgB,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,SAAS,EAAE,aAAa,EAA0D,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAClK,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAIhD,MAAM,WAAW,oBAAoB;IACjC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,cAAc,CAAC;CAC3B;AAED,MAAM,WAAW,wBAAwB;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,cAAc,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC;CAChD;AAED,qBAAa,kBAAmB,SAAQ,UAAU;IAC9C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAkB;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAqB;IACtD,OAAO,CAAC,QAAQ,CAAC,CAAkB;IACnC,OAAO,CAAC,WAAW,CAAC,CAAkB;gBAGnB,MAAM,EAAE,wBAAwB;IAYnD;;OAEG;IACU,gBAAgB,IAAI,OAAO,CAAC,OAAO,CAAC;IASpC,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAKvD,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAK/D,WAAW,CAAC,MAAM,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAKtD,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C1C,eAAe,IAAI,OAAO,CAAC,oBAAoB,CAAC;IAU7D;;;;;;;OAOG;IACU,cAAc,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,GAAG,OAAO,CAAC,aAAa,CAAC;IA4G3G,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAqD7F,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAKlC,YAAY,IAAI,IAAI;IAKd,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;cAavB,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;CA2BjD"}
|