vectra 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/LocalDocumentIndex.d.ts +5 -2
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +20 -12
- package/lib/LocalDocumentIndex.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +1 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -1
- package/lib/OpenAIEmbeddings.js +3 -1
- package/lib/OpenAIEmbeddings.js.map +1 -1
- package/lib/TextSplitter.d.ts +2 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +101 -49
- package/lib/TextSplitter.js.map +1 -1
- package/lib/WebFetcher.d.ts +6 -4
- package/lib/WebFetcher.d.ts.map +1 -1
- package/lib/WebFetcher.js +132 -52
- package/lib/WebFetcher.js.map +1 -1
- package/lib/types.d.ts +8 -1
- package/lib/types.d.ts.map +1 -1
- package/lib/vectra-cli.js +8 -8
- package/lib/vectra-cli.js.map +1 -1
- package/package.json +3 -1
- package/src/LocalDocumentIndex.ts +20 -13
- package/src/OpenAIEmbeddings.ts +4 -1
- package/src/TextSplitter.ts +104 -49
- package/src/WebFetcher.ts +159 -58
- package/src/types.ts +6 -1
- package/src/vectra-cli.ts +8 -8
|
@@ -35,10 +35,13 @@ export declare class LocalDocumentIndex extends LocalIndex {
|
|
|
35
35
|
* @remarks
|
|
36
36
|
* A new update is started if one is not already in progress. If an document with the same uri
|
|
37
37
|
* already exists, it will be replaced.
|
|
38
|
-
* @param
|
|
38
|
+
* @param uri - Document URI
|
|
39
|
+
* @param text - Document text
|
|
40
|
+
* @param docType - Optional. Document type
|
|
41
|
+
* @param metadata - Optional. Document metadata to index
|
|
39
42
|
* @returns Inserted document
|
|
40
43
|
*/
|
|
41
|
-
upsertDocument(uri: string, text: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument>;
|
|
44
|
+
upsertDocument(uri: string, text: string, docType?: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument>;
|
|
42
45
|
queryDocuments(query: string, options?: DocumentQueryOptions): Promise<LocalDocumentResult[]>;
|
|
43
46
|
beginUpdate(): Promise<void>;
|
|
44
47
|
cancelUpdate(): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LocalDocumentIndex.d.ts","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAgB,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,SAAS,EAAE,aAAa,EAA0D,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAClK,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"LocalDocumentIndex.d.ts","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAgB,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,SAAS,EAAE,aAAa,EAA0D,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAClK,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,MAAM,WAAW,oBAAoB;IACjC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,cAAc,CAAC;CAC3B;AAED,MAAM,WAAW,wBAAwB;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,cAAc,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC;CAChD;AAED,qBAAa,kBAAmB,SAAQ,UAAU;IAC9C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAkB;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAqB;IACtD,OAAO,CAAC,QAAQ,CAAC,CAAkB;IACnC,OAAO,CAAC,WAAW,CAAC,CAAkB;gBAGnB,MAAM,EAAE,wBAAwB;IAYnD;;OAEG;IACU,gBAAgB,IAAI,OAAO,CAAC,OAAO,CAAC;IASpC,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAKvD,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAK/D,WAAW,CAAC,MAAM,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAKtD,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8C1C,eAAe,IAAI,OAAO,CAAC,oBAAoB,CAAC;IAU7D;;;;;;;;;;OAUG;IACU,cAAc,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,GAAG,OAAO,CAAC,aAAa,CAAC;IAkH7H,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAqD7F,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAKlC,YAAY,IAAI,IAAI;IAKd,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;cAavB,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;CA2BjD"}
|
|
@@ -41,7 +41,6 @@ const LocalIndex_1 = require("./LocalIndex");
|
|
|
41
41
|
const TextSplitter_1 = require("./TextSplitter");
|
|
42
42
|
const LocalDocumentResult_1 = require("./LocalDocumentResult");
|
|
43
43
|
const LocalDocument_1 = require("./LocalDocument");
|
|
44
|
-
const EMBEDDINGS_BATCH_SIZE = 500;
|
|
45
44
|
class LocalDocumentIndex extends LocalIndex_1.LocalIndex {
|
|
46
45
|
constructor(config) {
|
|
47
46
|
var _a, _b;
|
|
@@ -152,10 +151,13 @@ class LocalDocumentIndex extends LocalIndex_1.LocalIndex {
|
|
|
152
151
|
* @remarks
|
|
153
152
|
* A new update is started if one is not already in progress. If an document with the same uri
|
|
154
153
|
* already exists, it will be replaced.
|
|
155
|
-
* @param
|
|
154
|
+
* @param uri - Document URI
|
|
155
|
+
* @param text - Document text
|
|
156
|
+
* @param docType - Optional. Document type
|
|
157
|
+
* @param metadata - Optional. Document metadata to index
|
|
156
158
|
* @returns Inserted document
|
|
157
159
|
*/
|
|
158
|
-
upsertDocument(uri, text, metadata) {
|
|
160
|
+
upsertDocument(uri, text, docType, metadata) {
|
|
159
161
|
return __awaiter(this, void 0, void 0, function* () {
|
|
160
162
|
// Ensure embeddings configured
|
|
161
163
|
if (!this._embeddings) {
|
|
@@ -171,25 +173,31 @@ class LocalDocumentIndex extends LocalIndex_1.LocalIndex {
|
|
|
171
173
|
// Generate new document ID
|
|
172
174
|
documentId = (0, uuid_1.v4)();
|
|
173
175
|
}
|
|
174
|
-
//
|
|
175
|
-
const config = Object.assign({}, this._chunkingConfig);
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
const
|
|
179
|
-
|
|
176
|
+
// Initialize text splitter settings
|
|
177
|
+
const config = Object.assign({ docType }, this._chunkingConfig);
|
|
178
|
+
if (config.docType == undefined) {
|
|
179
|
+
// Populate docType based on extension
|
|
180
|
+
const pos = uri.lastIndexOf('.');
|
|
181
|
+
if (pos >= 0) {
|
|
182
|
+
const ext = uri.substring(pos + 1).toLowerCase();
|
|
183
|
+
config.docType = ext;
|
|
184
|
+
}
|
|
180
185
|
}
|
|
181
186
|
// Split text into chunks
|
|
182
187
|
const splitter = new TextSplitter_1.TextSplitter(config);
|
|
183
188
|
const chunks = splitter.split(text);
|
|
184
189
|
// Break chunks into batches for embedding generation
|
|
190
|
+
let totalTokens = 0;
|
|
185
191
|
const chunkBatches = [];
|
|
186
192
|
let currentBatch = [];
|
|
187
193
|
for (const chunk of chunks) {
|
|
188
|
-
|
|
189
|
-
if (
|
|
194
|
+
totalTokens += chunk.tokens.length;
|
|
195
|
+
if (totalTokens > this._embeddings.maxTokens) {
|
|
190
196
|
chunkBatches.push(currentBatch);
|
|
191
197
|
currentBatch = [];
|
|
198
|
+
totalTokens = chunk.tokens.length;
|
|
192
199
|
}
|
|
200
|
+
currentBatch.push(chunk.text.replace(/\n/g, ' '));
|
|
193
201
|
}
|
|
194
202
|
if (currentBatch.length > 0) {
|
|
195
203
|
chunkBatches.push(currentBatch);
|
|
@@ -267,7 +275,7 @@ class LocalDocumentIndex extends LocalIndex_1.LocalIndex {
|
|
|
267
275
|
// Generate embeddings for query
|
|
268
276
|
let embeddings;
|
|
269
277
|
try {
|
|
270
|
-
embeddings = yield this._embeddings.createEmbeddings(query);
|
|
278
|
+
embeddings = yield this._embeddings.createEmbeddings(query.replace(/\n/g, ' '));
|
|
271
279
|
}
|
|
272
280
|
catch (err) {
|
|
273
281
|
throw new Error(`Error generating embeddings for query: ${err.toString()}`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LocalDocumentIndex.js","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAC7B,+BAA0B;AAC1B,mDAAgD;AAChD,6CAA6D;AAC7D,iDAAkE;AAElE,+DAA4D;AAC5D,mDAAgD;
|
|
1
|
+
{"version":3,"file":"LocalDocumentIndex.js","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAC7B,+BAA0B;AAC1B,mDAAgD;AAChD,6CAA6D;AAC7D,iDAAkE;AAElE,+DAA4D;AAC5D,mDAAgD;AAehD,MAAa,kBAAmB,SAAQ,uBAAU;IAQ9C,YAAmB,MAAgC;;QAC/C,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC;YACjC,cAAc,EAAE,IAAI;YACpB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,CAAC;SACI,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,GAAG,MAAA,MAAA,MAAM,CAAC,SAAS,mCAAI,IAAI,CAAC,eAAe,CAAC,SAAS,mCAAI,IAAI,6BAAa,EAAE,CAAC;QAC5F,IAAI,CAAC,eAAe,CAAC,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC;IACrD,CAAC;IAED;;OAEG;IACU,gBAAgB;;YACzB,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC,CAAC;gBAC5D,OAAO,IAAI,CAAC;aACf;YAAC,OAAO,GAAY,EAAE;gBACnB,OAAO,KAAK,CAAC;aAChB;QACL,CAAC;KAAA;IAEY,aAAa,CAAC,GAAW;;;YAClC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC3B,OAAO,MAAA,IAAI,CAAC,QAAQ,0CAAE,OAAO,CAAC,GAAG,CAAC,CAAC;;KACtC;IAEY,cAAc,CAAC,UAAkB;;;YAC1C,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC3B,OAAO,MAAA,IAAI,CAAC,QAAQ,0CAAE,OAAO,CAAC,UAAU,CAAC,CAAC;;KAC7C;IAEY,WAAW,CAAC,MAA0B;;;;;YAC/C,MAAM,OAAM,WAAW,YAAC,MAAM,CAAC,CAAC;YAChC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/B,CAAC;KAAA;IAEY,cAAc,CAAC,GAAW;;YACnC,qBAAqB;YACrB,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YACjD,IAAI,UAAU,IAAI,SAAS,EAAE;gBACzB,OAAO;aACV;YAED,4DAA4D;YAC5D,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI;gBACA,kCAAkC;gBAClC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAwB,EAAE,UAAU,EAAE,CAAC,CAAC;gBAErF,gBAAgB;gBAChB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;oBACxB,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;iBACnC;gBAED,4BAA4B;gBAC5B,OAAO,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBACtC,OAAO,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;gBAC7C,IAAI,CAAC,WAAY,CAAC,KAAK,EAAE,CAAC;gBAE1B,iBAAiB;gBACjB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;aAC1B;YAAC,OAAO,GAAY,EAAE;gBACnB,gCAAgC;gBAChC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACnF;YAED,6BAA6B;YAC7B,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,MAAM,CAAC,CAAC,CAAC;aACpE;YAAC,OAAO,GAAY,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,0CAA0C,GAAG,gBAAiB,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aAC3G;YAED,iCAAiC;YACjC,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,OAAO,CAAC,CAAC,CAAC;aACrE;YAAC,OAAO,GAAY,EAAE;gBACnB,eAAe;aAClB;QACL,CAAC;KAAA;IAEY,eAAe;;YACxB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;YACxC,OAAO;gBACH,OAAO,EAAE,IAAI,CAAC,QAAS,CAAC,OAAO;gBAC/B,SAAS,EAAE,IAAI,CAAC,QAAS,CAAC,KAAK;gBAC/B,MAAM,EAAE,KAAK,CAAC,KAAK;gBACnB,eAAe,EAAE,KAAK,CAAC,eAAe;aACzC,CAAC;QACN,CAAC;KAAA;IAED;;;;;;;;;;OAUG;IACU,cAAc,CAAC,GAAW,EAAE,IAAY,EAAE,OAAgB,EAAE,QAAwC;;YAC7G,+BAA+B;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;aACvD;YAED,iCAAiC;YACjC,IAAI,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC/C,IAAI,UAAU,IAAI,SAAS,EAAE;gBACzB,2BAA2B;gBAC3B,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aAClC;iBAAM;gBACH,2BAA2B;gBAC3B,UAAU,GAAG,IAAA,SAAE,GAAE,CAAC;aACrB;YAED,oCAAoC;YACpC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;YAChE,IAAI,MAAM,CAAC,OAAO,IAAI,SAAS,EAAE;gBAC7B,sCAAsC;gBACtC,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;gBACjC,IAAI,GAAG,IAAI,CAAC,EAAE;oBACV,MAAM,GAAG,GAAG,GAAG,CAAC,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;oBACjD,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC;iBACxB;aACJ;YAED,yBAAyB;YACzB,MAAM,QAAQ,GAAG,IAAI,2BAAY,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAEpC,qDAAqD;YACrD,IAAI,WAAW,GAAG,CAAC,CAAC;YACpB,MAAM,YAAY,GAAe,EAAE,CAAC;YACpC,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;gBACxB,WAAW,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;gBACnC,IAAI,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE;oBAC1C,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAChC,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACrC;gBACD,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;aACrD;YACD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE;gBACzB,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;aACnC;YAED,iCAAiC;YACjC,MAAM,UAAU,GAAe,EAAE,CAAC;YAClC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE;gBAC9B,IAAI,QAA4B,CAAC;gBACjC,IAAI;oBACA,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;iBAC7D;gBAAC,OAAO,GAAY,EAAE;oBACnB,MAAM,IAAI,KAAK,CAAC,gCAAiC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBAC9E;gBAED,kBAAkB;gBAClB,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE;oBAC9B,MAAM,IAAI,KAAK,CAAC,gCAAgC,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC;iBACvE;gBAED,2BAA2B;gBAC3B,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,MAAO,EAAE;oBACtC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;iBAC9B;aACJ;YAED,+BAA+B;YAC/B,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI;gBACA,sBAAsB;gBACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;oBACxB,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;oBAChC,MAAM,aAAa,GAA0B,MAAM,CAAC,MAAM,CAAC;wBACvD,UAAU;wBACV,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,MAAM,EAAE,KAAK,CAAC,MAAM;qBACvB,EAAE,QAAQ,CAAC,CAAC;oBACb,MAAM,IAAI,CAAC,UAAU,CAAC;wBAClB,EAAE,EAAE,IAAA,SAAE,GAAE;wBACR,QAAQ,EAAE,aAAa;wBACvB,MAAM,EAAE,SAAS;qBACpB,CAAC,CAAC;iBACN;gBAED,6BAA6B;gBAC7B,IAAI,QAAQ,IAAI,SAAS,EAAE;oBACvB,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;iBAClG;gBAED,yBAAyB;gBACzB,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,MAAM,CAAC,EAAE,IAAI,CAAC,CAAC;gBAE1E,uBAAuB;gBACvB,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC;gBAC5C,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC;gBAC5C,IAAI,CAAC,WAAY,CAAC,KAAK,EAAE,CAAC;gBAE1B,iBAAiB;gBACjB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;aAC1B;YAAC,OAAO,GAAY,EAAE;gBACnB,gCAAgC;gBAChC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACjF;YAED,kBAAkB;YAClB,OAAO,IAAI,6BAAa,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;QAC/D,CAAC;KAAA;IAGY,cAAc,CAAC,KAAa,EAAE,OAA8B;;YACrE,+BAA+B;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;aACvD;YAED,6BAA6B;YAC7B,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACpB,YAAY,EAAE,EAAE;gBAChB,SAAS,EAAE,EAAE;aAChB,EAAE,OAAO,CAAC,CAAC;YAEZ,gCAAgC;YAChC,IAAI,UAA8B,CAAC;YACnC,IAAI;gBACA,UAAU,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;aACnF;YAAC,OAAO,GAAY,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,0CAA2C,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACxF;YAED,kBAAkB;YAClB,IAAI,UAAU,CAAC,MAAM,IAAI,SAAS,EAAE;gBAChC,MAAM,IAAI,KAAK,CAAC,0CAA0C,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC;aACnF;YAED,yBAAyB;YACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAwB,UAAU,CAAC,MAAO,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,SAAU,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAExH,2BAA2B;YAC3B,MAAM,cAAc,GAAoE,EAAE,CAAC;YAC3F,KAAK,MAAM,MAAM,IAAK,OAAO,EAAE;gBAC3B,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACtC,IAAI,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,SAAS,EAAE;oBAClD,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC;iBAC5C;gBACD,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;aACpD;YAED,6CAA6C;YAC7C,MAAM,eAAe,GAA0B,EAAE,CAAC;YAClD,KAAK,MAAM,UAAU,IAAI,cAAc,EAAE;gBACrC,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;gBAC1C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,UAAU,CAAW,CAAC;gBAC5D,MAAM,cAAc,GAAG,IAAI,yCAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAC1G,eAAe,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;aACxC;YAED,wDAAwD;YACxD,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,YAAa,CAAC,CAAC;QAC7F,CAAC;KAAA;IAED,YAAY;IAEC,WAAW;;;;;YACpB,MAAM,OAAM,WAAW,WAAE,CAAC;YAC1B,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACxD,CAAC;KAAA;IAEM,YAAY;QACf,KAAK,CAAC,YAAY,EAAE,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC;IACjC,CAAC;IAEY,SAAS;;;;;YAClB,MAAM,OAAM,SAAS,WAAE,CAAC;YAExB,IAAI;gBACA,eAAe;gBACf,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;gBACjG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC;gBACjC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC;aAChC;YAAC,OAAM,GAAY,EAAE;gBAClB,MAAM,IAAI,KAAK,CAAC,kCAAmC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aAChF;QACL,CAAC;KAAA;IAEe,aAAa;;;;;YACzB,MAAM,OAAM,aAAa,WAAE,CAAC;YAE5B,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACf,OAAO;aACV;YAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;YAC/D,IAAI,MAAM,IAAI,CAAC,gBAAgB,EAAE,EAAE;gBAC/B,eAAe;gBACf,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;gBAC9C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;aACjD;iBAAM;gBACH,IAAI;oBACA,qBAAqB;oBACrB,IAAI,CAAC,QAAQ,GAAG;wBACZ,OAAO,EAAE,CAAC;wBACV,KAAK,EAAE,CAAC;wBACR,OAAO,EAAE,EAAE;wBACX,OAAO,EAAE,EAAE;qBACd,CAAC;oBACF,MAAM,EAAE,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;iBAClE;gBAAC,OAAM,GAAY,EAAE;oBAClB,MAAM,IAAI,KAAK,CAAC,oCAAqC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBAClF;aACJ;QACL,CAAC;KAAA;CACJ;AA3UD,gDA2UC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"OpenAIEmbeddings.d.ts","sourceRoot":"","sources":["../src/OpenAIEmbeddings.ts"],"names":[],"mappings":"AAAA,OAAc,EAAiB,aAAa,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAChF,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAgC,MAAM,aAAa,CAAC;AAE5G,MAAM,WAAW,2BAA2B;IACxC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEvB;;OAEG;IACH,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,2BAA2B;IACxE;;;;OAIG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,2BAA2B;IAC7E;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;OAEG;IACH,aAAa,EAAE,MAAM,CAAC;IAEtB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACpD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAgB;IAC5C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAU;IAEpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IAEzC;;OAEG;IACH,SAAgB,OAAO,EAAE,uBAAuB,GAAC,4BAA4B,CAAC;IAE9E;;;OAGG;gBACgB,OAAO,EAAE,uBAAuB,GAAC,4BAA4B;IAiChF;;;;;OAKG;IACU,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"OpenAIEmbeddings.d.ts","sourceRoot":"","sources":["../src/OpenAIEmbeddings.ts"],"names":[],"mappings":"AAAA,OAAc,EAAiB,aAAa,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAChF,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAgC,MAAM,aAAa,CAAC;AAE5G,MAAM,WAAW,2BAA2B;IACxC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEvB;;OAEG;IACH,aAAa,CAAC,EAAE,kBAAkB,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,2BAA2B;IACxE;;;;OAIG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,2BAA2B;IAC7E;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;OAEG;IACH,aAAa,EAAE,MAAM,CAAC;IAEtB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;;GAGG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACpD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAgB;IAC5C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAU;IAEpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IAEzC,SAAgB,SAAS,QAAQ;IAEjC;;OAEG;IACH,SAAgB,OAAO,EAAE,uBAAuB,GAAC,4BAA4B,CAAC;IAE9E;;;OAGG;gBACgB,OAAO,EAAE,uBAAuB,GAAC,4BAA4B;IAiChF;;;;;OAKG;IACU,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAgBrF;;OAEG;IACH,SAAS,CAAC,sBAAsB,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC,uBAAuB,CAAC,CAAC;IAalH;;OAEG;cACa,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,SAAI,GAAG,OAAO,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;CAqCxG"}
|
package/lib/OpenAIEmbeddings.js
CHANGED
|
@@ -25,6 +25,7 @@ class OpenAIEmbeddings {
|
|
|
25
25
|
*/
|
|
26
26
|
constructor(options) {
|
|
27
27
|
this.UserAgent = 'AlphaWave';
|
|
28
|
+
this.maxTokens = 8000;
|
|
28
29
|
// Check for azure config
|
|
29
30
|
if (options.azureApiKey) {
|
|
30
31
|
this._useAzure = true;
|
|
@@ -50,7 +51,7 @@ class OpenAIEmbeddings {
|
|
|
50
51
|
}
|
|
51
52
|
// Create client
|
|
52
53
|
this._httpClient = axios_1.default.create({
|
|
53
|
-
validateStatus: (status) =>
|
|
54
|
+
validateStatus: (status) => true
|
|
54
55
|
});
|
|
55
56
|
}
|
|
56
57
|
/**
|
|
@@ -72,6 +73,7 @@ class OpenAIEmbeddings {
|
|
|
72
73
|
return { status: 'rate_limited', message: `The embeddings API returned a rate limit error.` };
|
|
73
74
|
}
|
|
74
75
|
else {
|
|
76
|
+
console.log(inputs);
|
|
75
77
|
return { status: 'error', message: `The embeddings API returned an error status of ${response.status}: ${response.statusText}` };
|
|
76
78
|
}
|
|
77
79
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"OpenAIEmbeddings.js","sourceRoot":"","sources":["../src/OpenAIEmbeddings.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,kDAAgF;AA2EhF;;;GAGG;AACH,MAAa,gBAAgB;
|
|
1
|
+
{"version":3,"file":"OpenAIEmbeddings.js","sourceRoot":"","sources":["../src/OpenAIEmbeddings.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,kDAAgF;AA2EhF;;;GAGG;AACH,MAAa,gBAAgB;IAazB;;;OAGG;IACH,YAAmB,OAA6D;QAb/D,cAAS,GAAG,WAAW,CAAC;QAEzB,cAAS,GAAG,IAAI,CAAC;QAY7B,yBAAyB;QACzB,IAAK,OAAwC,CAAC,WAAW,EAAE;YACvD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;gBACzB,eAAe,EAAE,YAAY;aAChC,EAAE,OAAO,CAAiC,CAAC;YAE5C,gCAAgC;YAChC,IAAI,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACjD,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBACxB,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;aACzD;YAED,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE;gBAChD,MAAM,IAAI,KAAK,CAAC,+CAA+C,QAAQ,4CAA4C,CAAC,CAAC;aACxH;YAED,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,QAAQ,CAAC;SACzC;aAAM;YACH,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,WAAW,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;aAC5B,EAAE,OAAO,CAA4B,CAAC;SAC1C;QAED,gBAAgB;QAChB,IAAI,CAAC,WAAW,GAAG,eAAK,CAAC,MAAM,CAAC;YAC5B,cAAc,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI;SACnC,CAAC,CAAC;IACP,CAAC;IAED;;;;;OAKG;IACU,gBAAgB,CAAC,MAAyB;;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,sBAAsB,CAAC;gBAC/C,KAAK,EAAE,MAAM;aAChB,CAAC,CAAC;YAEH,mBAAmB;YACnB,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE;gBACvB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;aAC5H;iBAAM,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE;gBAC/B,OAAO,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAA;aAChG;iBAAM;gBACH,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;gBACpB,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,kDAAkD,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC;aACpI;QACL,CAAC;KAAA;IAED;;OAEG;IACO,sBAAsB,CAAC,OAA+B;;QAC5D,IAAI,IAAI,CAAC,SAAS,EAAE;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAuC,CAAC;YAC7D,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC,aAAa,uBAAuB,OAAO,CAAC,eAAe,2BAA2B,OAAO,CAAC,eAAgB,EAAE,CAAC;YACxI,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;SAClC;aAAM;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAkC,CAAC;YACxD,MAAM,GAAG,GAAG,GAAG,MAAA,OAAO,CAAC,QAAQ,mCAAI,wBAAwB,gBAAgB,CAAC;YAC3E,OAAwC,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;YAChE,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;SAClC;IACL,CAAC;IAED;;OAEG;IACa,IAAI,CAAQ,GAAW,EAAE,IAAY,EAAE,UAAU,GAAG,CAAC;;YACjE,4BAA4B;YAC5B,MAAM,aAAa,GAAuB,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;YAExF,6BAA6B;YAC7B,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE;gBACxB,aAAa,CAAC,OAAO,GAAG,EAAE,CAAC;aAC9B;YACD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;gBACxC,aAAa,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,kBAAkB,CAAC;aAC9D;YACD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE;gBACtC,aAAa,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC;aACxD;YACD,IAAI,IAAI,CAAC,SAAS,EAAE;gBAChB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAuC,CAAC;gBAC7D,aAAa,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC;aAC1D;iBAAM;gBACH,MAAM,OAAO,GAAG,IAAI,CAAC,OAAkC,CAAC;gBACxD,aAAa,CAAC,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpE,IAAI,OAAO,CAAC,YAAY,EAAE;oBACtB,aAAa,CAAC,OAAO,CAAC,qBAAqB,CAAC,GAAG,OAAO,CAAC,YAAY,CAAC;iBACvE;aACJ;YAED,eAAe;YACf,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,aAAa,CAAC,CAAC;YAEvE,6BAA6B;YAC7B,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,EAAE;gBACnH,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;gBACnD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC3D,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;aAC/C;iBAAM;gBACH,OAAO,QAAQ,CAAC;aACnB;QACL,CAAC;KAAA;CACJ;AAhID,4CAgIC"}
|
package/lib/TextSplitter.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export declare class TextSplitter {
|
|
|
12
12
|
constructor(config?: Partial<TextSplitterConfig>);
|
|
13
13
|
split(text: string): TextChunk[];
|
|
14
14
|
private recursiveSplit;
|
|
15
|
+
private combineChunks;
|
|
16
|
+
private containsAlphanumeric;
|
|
15
17
|
private getSeparators;
|
|
16
18
|
}
|
|
17
19
|
//# sourceMappingURL=TextSplitter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TextSplitter.d.ts","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"TextSplitter.d.ts","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAI/C,MAAM,WAAW,kBAAkB;IAC/B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAY;IACrB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;gBAE1B,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC;IA2BhD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA4BvC,OAAO,CAAC,cAAc;IAoEtB,OAAO,CAAC,aAAa;IA6BrB,OAAO,CAAC,oBAAoB;IAS5B,OAAO,CAAC,aAAa;CAoWxB"}
|
package/lib/TextSplitter.js
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.TextSplitter = void 0;
|
|
4
4
|
const GPT3Tokenizer_1 = require("./GPT3Tokenizer");
|
|
5
|
+
const ALPHANUMERIC_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
|
|
5
6
|
class TextSplitter {
|
|
6
7
|
constructor(config) {
|
|
7
8
|
this._config = Object.assign({
|
|
8
|
-
separators: ["\n\n", "\n", " ", ""],
|
|
9
9
|
keepSeparators: false,
|
|
10
10
|
chunkSize: 400,
|
|
11
11
|
chunkOverlap: 40,
|
|
@@ -56,10 +56,22 @@ class TextSplitter {
|
|
|
56
56
|
}
|
|
57
57
|
recursiveSplit(text, separators, startPos) {
|
|
58
58
|
const chunks = [];
|
|
59
|
-
if (text.length > 0
|
|
60
|
-
|
|
59
|
+
if (text.length > 0) {
|
|
60
|
+
// Split text into parts
|
|
61
|
+
let parts;
|
|
62
|
+
let separator = '';
|
|
61
63
|
const nextSeparators = separators.length > 1 ? separators.slice(1) : [];
|
|
62
|
-
|
|
64
|
+
if (separators.length > 0) {
|
|
65
|
+
// Split by separator
|
|
66
|
+
separator = separators[0];
|
|
67
|
+
parts = text.split(separator);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
// Cut text in half
|
|
71
|
+
const half = Math.floor(text.length / 2);
|
|
72
|
+
parts = [text.substring(0, half), text.substring(half)];
|
|
73
|
+
}
|
|
74
|
+
// Iterate over parts
|
|
63
75
|
for (let i = 0; i < parts.length; i++) {
|
|
64
76
|
const lastChunk = (i === parts.length - 1);
|
|
65
77
|
// Get chunk text and endPos
|
|
@@ -68,29 +80,79 @@ class TextSplitter {
|
|
|
68
80
|
if (this._config.keepSeparators && !lastChunk) {
|
|
69
81
|
chunk += separator;
|
|
70
82
|
}
|
|
71
|
-
//
|
|
72
|
-
|
|
73
|
-
|
|
83
|
+
// Ensure chunk contains text
|
|
84
|
+
if (!this.containsAlphanumeric(chunk)) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
// Optimization to avoid encoding really large chunks
|
|
88
|
+
if (chunk.length / 6 > this._config.chunkSize) {
|
|
74
89
|
// Break the text into smaller chunks
|
|
75
90
|
const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
|
|
76
91
|
chunks.push(...subChunks);
|
|
77
92
|
}
|
|
78
93
|
else {
|
|
79
|
-
//
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
94
|
+
// Encode chunk text
|
|
95
|
+
const tokens = this._config.tokenizer.encode(chunk);
|
|
96
|
+
if (tokens.length > this._config.chunkSize) {
|
|
97
|
+
// Break the text into smaller chunks
|
|
98
|
+
const subChunks = this.recursiveSplit(chunk, nextSeparators, startPos);
|
|
99
|
+
chunks.push(...subChunks);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
// Append chunk to output
|
|
103
|
+
chunks.push({
|
|
104
|
+
text: chunk,
|
|
105
|
+
tokens: tokens,
|
|
106
|
+
startPos: startPos,
|
|
107
|
+
endPos: endPos,
|
|
108
|
+
startOverlap: [],
|
|
109
|
+
endOverlap: [],
|
|
110
|
+
});
|
|
111
|
+
}
|
|
88
112
|
}
|
|
89
113
|
// Update startPos
|
|
90
114
|
startPos = endPos + 1;
|
|
91
115
|
}
|
|
92
116
|
}
|
|
93
|
-
return chunks;
|
|
117
|
+
return this.combineChunks(chunks);
|
|
118
|
+
}
|
|
119
|
+
combineChunks(chunks) {
|
|
120
|
+
const combinedChunks = [];
|
|
121
|
+
let currentChunk;
|
|
122
|
+
let currentLength = 0;
|
|
123
|
+
const separator = this._config.keepSeparators ? '' : ' ';
|
|
124
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
125
|
+
const chunk = chunks[i];
|
|
126
|
+
if (currentChunk) {
|
|
127
|
+
const length = currentChunk.tokens.length + chunk.tokens.length;
|
|
128
|
+
if (length > this._config.chunkSize) {
|
|
129
|
+
combinedChunks.push(currentChunk);
|
|
130
|
+
currentChunk = chunk;
|
|
131
|
+
currentLength = chunk.tokens.length;
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
currentChunk.text += separator + chunk.text;
|
|
135
|
+
currentChunk.tokens.push(...chunk.tokens);
|
|
136
|
+
currentLength += chunk.tokens.length;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
currentChunk = chunk;
|
|
141
|
+
currentLength = chunk.tokens.length;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if (currentChunk) {
|
|
145
|
+
combinedChunks.push(currentChunk);
|
|
146
|
+
}
|
|
147
|
+
return combinedChunks;
|
|
148
|
+
}
|
|
149
|
+
containsAlphanumeric(text) {
|
|
150
|
+
for (let i = 0; i < text.length; i++) {
|
|
151
|
+
if (ALPHANUMERIC_CHARS.includes(text[i])) {
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return false;
|
|
94
156
|
}
|
|
95
157
|
getSeparators(docType) {
|
|
96
158
|
switch (docType !== null && docType !== void 0 ? docType : '') {
|
|
@@ -112,8 +174,7 @@ class TextSplitter {
|
|
|
112
174
|
// Split by the normal type of lines
|
|
113
175
|
"\n\n",
|
|
114
176
|
"\n",
|
|
115
|
-
" "
|
|
116
|
-
"",
|
|
177
|
+
" "
|
|
117
178
|
];
|
|
118
179
|
case "go":
|
|
119
180
|
return [
|
|
@@ -130,8 +191,7 @@ class TextSplitter {
|
|
|
130
191
|
// Split by the normal type of lines
|
|
131
192
|
"\n\n",
|
|
132
193
|
"\n",
|
|
133
|
-
" "
|
|
134
|
-
"",
|
|
194
|
+
" "
|
|
135
195
|
];
|
|
136
196
|
case "java":
|
|
137
197
|
case "c#":
|
|
@@ -157,8 +217,7 @@ class TextSplitter {
|
|
|
157
217
|
// Split by the normal type of lines
|
|
158
218
|
"\n\n",
|
|
159
219
|
"\n",
|
|
160
|
-
" "
|
|
161
|
-
"",
|
|
220
|
+
" "
|
|
162
221
|
];
|
|
163
222
|
case "js":
|
|
164
223
|
case "jsx":
|
|
@@ -182,8 +241,7 @@ class TextSplitter {
|
|
|
182
241
|
// Split by the normal type of lines
|
|
183
242
|
"\n\n",
|
|
184
243
|
"\n",
|
|
185
|
-
" "
|
|
186
|
-
"",
|
|
244
|
+
" "
|
|
187
245
|
];
|
|
188
246
|
case "php":
|
|
189
247
|
return [
|
|
@@ -201,8 +259,7 @@ class TextSplitter {
|
|
|
201
259
|
// Split by the normal type of lines
|
|
202
260
|
"\n\n",
|
|
203
261
|
"\n",
|
|
204
|
-
" "
|
|
205
|
-
"",
|
|
262
|
+
" "
|
|
206
263
|
];
|
|
207
264
|
case "proto":
|
|
208
265
|
return [
|
|
@@ -221,8 +278,7 @@ class TextSplitter {
|
|
|
221
278
|
// Split by the normal type of lines
|
|
222
279
|
"\n\n",
|
|
223
280
|
"\n",
|
|
224
|
-
" "
|
|
225
|
-
"",
|
|
281
|
+
" "
|
|
226
282
|
];
|
|
227
283
|
case "python":
|
|
228
284
|
case "py":
|
|
@@ -234,8 +290,7 @@ class TextSplitter {
|
|
|
234
290
|
// Now split by the normal type of lines
|
|
235
291
|
"\n\n",
|
|
236
292
|
"\n",
|
|
237
|
-
" "
|
|
238
|
-
"",
|
|
293
|
+
" "
|
|
239
294
|
];
|
|
240
295
|
case "rst":
|
|
241
296
|
return [
|
|
@@ -248,8 +303,7 @@ class TextSplitter {
|
|
|
248
303
|
// Split by the normal type of lines
|
|
249
304
|
"\n\n",
|
|
250
305
|
"\n",
|
|
251
|
-
" "
|
|
252
|
-
"",
|
|
306
|
+
" "
|
|
253
307
|
];
|
|
254
308
|
case "ruby":
|
|
255
309
|
return [
|
|
@@ -267,8 +321,7 @@ class TextSplitter {
|
|
|
267
321
|
// Split by the normal type of lines
|
|
268
322
|
"\n\n",
|
|
269
323
|
"\n",
|
|
270
|
-
" "
|
|
271
|
-
"",
|
|
324
|
+
" "
|
|
272
325
|
];
|
|
273
326
|
case "rust":
|
|
274
327
|
return [
|
|
@@ -286,8 +339,7 @@ class TextSplitter {
|
|
|
286
339
|
// Split by the normal type of lines
|
|
287
340
|
"\n\n",
|
|
288
341
|
"\n",
|
|
289
|
-
" "
|
|
290
|
-
"",
|
|
342
|
+
" "
|
|
291
343
|
];
|
|
292
344
|
case "scala":
|
|
293
345
|
return [
|
|
@@ -307,8 +359,7 @@ class TextSplitter {
|
|
|
307
359
|
// Split by the normal type of lines
|
|
308
360
|
"\n\n",
|
|
309
361
|
"\n",
|
|
310
|
-
" "
|
|
311
|
-
"",
|
|
362
|
+
" "
|
|
312
363
|
];
|
|
313
364
|
case "swift":
|
|
314
365
|
return [
|
|
@@ -328,9 +379,9 @@ class TextSplitter {
|
|
|
328
379
|
// Split by the normal type of lines
|
|
329
380
|
"\n\n",
|
|
330
381
|
"\n",
|
|
331
|
-
" "
|
|
332
|
-
"",
|
|
382
|
+
" "
|
|
333
383
|
];
|
|
384
|
+
case "md":
|
|
334
385
|
case "markdown":
|
|
335
386
|
return [
|
|
336
387
|
// First, try to split along Markdown headings (starting with level 2)
|
|
@@ -350,10 +401,14 @@ class TextSplitter {
|
|
|
350
401
|
"\n\n___\n\n",
|
|
351
402
|
// Note that this splitter doesn't handle horizontal lines defined
|
|
352
403
|
// by *three or more* of ***, ---, or ___, but this is not handled
|
|
404
|
+
// Github tables
|
|
405
|
+
"<table>",
|
|
406
|
+
// "<tr>",
|
|
407
|
+
// "<td>",
|
|
408
|
+
// "<td ",
|
|
353
409
|
"\n\n",
|
|
354
410
|
"\n",
|
|
355
|
-
" "
|
|
356
|
-
"",
|
|
411
|
+
" "
|
|
357
412
|
];
|
|
358
413
|
case "latex":
|
|
359
414
|
return [
|
|
@@ -378,8 +433,7 @@ class TextSplitter {
|
|
|
378
433
|
// Now split by the normal type of lines
|
|
379
434
|
"\n\n",
|
|
380
435
|
"\n",
|
|
381
|
-
" "
|
|
382
|
-
"",
|
|
436
|
+
" "
|
|
383
437
|
];
|
|
384
438
|
case "html":
|
|
385
439
|
return [
|
|
@@ -412,8 +466,7 @@ class TextSplitter {
|
|
|
412
466
|
"<meta>",
|
|
413
467
|
"<title>",
|
|
414
468
|
// Normal type of lines
|
|
415
|
-
" "
|
|
416
|
-
"",
|
|
469
|
+
" "
|
|
417
470
|
];
|
|
418
471
|
case "sol":
|
|
419
472
|
return [
|
|
@@ -442,8 +495,7 @@ class TextSplitter {
|
|
|
442
495
|
// Split by the normal type of lines
|
|
443
496
|
"\n\n",
|
|
444
497
|
"\n",
|
|
445
|
-
" "
|
|
446
|
-
"",
|
|
498
|
+
" "
|
|
447
499
|
];
|
|
448
500
|
default:
|
|
449
501
|
return [
|
package/lib/TextSplitter.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;
|
|
1
|
+
{"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;AAGhD,MAAM,kBAAkB,GAAG,gEAAgE,CAAC;AAW5F,MAAa,YAAY;IAGrB,YAAmB,MAAoC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,EAAE;SACG,EAAE,MAAM,CAAC,CAAC;QAEjC,iDAAiD;QACjD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,6BAAa,EAAE,CAAC;SAChD;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;YAClE,IAAI,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACtE;QAED,+BAA+B;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE;YAC5B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;SAC7C;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAChD;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;SACxD;IACL,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,mBAAmB;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,SAAS,gBAAgB,CAAC,MAAiB;YACvC,IAAI,MAAM,IAAI,SAAS,EAAE;gBACrB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClG,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;aACtC;iBAAM;gBACH,OAAO,EAAE,CAAC;aACb;QACL,CAAC;QAED,iEAAiE;QACjE,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,aAAa,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACpE,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;gBAChF,KAAK,CAAC,UAAU,GAAG,gBAAgB,CAAC,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,MAAM,CAAC,CAAC;aAC1D;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,IAAY,EAAE,UAAoB,EAAE,QAAgB;QACvE,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;YACjB,wBAAwB;YACxB,IAAI,KAAe,CAAC;YACpB,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACxE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;gBACvB,qBAAqB;gBACrB,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC1B,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;aACjC;iBAAM;gBACH,mBAAmB;gBACnB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzC,KAAK,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;aAC3D;YAED,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAE3C,4BAA4B;gBAC5B,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACpF,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE;oBAC3C,KAAK,IAAI,SAAS,CAAC;iBACtB;gBAED,6BAA6B;gBAC7B,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,EAAE;oBACnC,SAAS;iBACZ;gBAED,qDAAqD;gBACrD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBAC3C,qCAAqC;oBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;iBAC7B;qBAAM;oBACH,oBAAoB;oBACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACpD,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;wBACxC,qCAAqC;wBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;wBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;qBAC7B;yBAAM;wBACH,yBAAyB;wBACzB,MAAM,CAAC,IAAI,CAAC;4BACR,IAAI,EAAE,KAAK;4BACX,MAAM,EAAE,MAAM;4BACd,QAAQ,EAAE,QAAQ;4BAClB,MAAM,EAAE,MAAM;4BACd,YAAY,EAAE,EAAE;4BAChB,UAAU,EAAE,EAAE;yBACjB,CAAC,CAAC;qBACN;iBAEJ;gBAGD,kBAAkB;gBAClB,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC;aACzB;SACJ;QAED,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAEO,aAAa,CAAC,MAAmB;QACrC,MAAM,cAAc,GAAgB,EAAE,CAAC;QACvC,IAAI,YAAiC,CAAC;QACtC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,YAAY,EAAE;gBACd,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;gBAChE,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACjC,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAClC,YAAY,GAAG,KAAK,CAAC;oBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACvC;qBAAM;oBACH,YAAY,CAAC,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;oBAC5C,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;oBAC1C,aAAa,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACxC;aACJ;iBAAM;gBACH,YAAY,GAAG,KAAK,CAAC;gBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;aACvC;SACJ;QACD,IAAI,YAAY,EAAE;YACd,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;SACrC;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAEO,oBAAoB,CAAC,IAAY;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAClC,IAAI,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;gBACtC,OAAO,IAAI,CAAC;aACf;SACJ;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,OAAgB;QAClC,QAAQ,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,EAAE;YACnB,KAAK,KAAK;gBACN,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI;gBACL,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,iCAAiC;oBACjC,WAAW;oBACX,cAAc;oBACd,YAAY;oBACZ,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,UAAU;oBACV,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,YAAY;oBACZ,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,mCAAmC;oBACnC,aAAa;oBACb,gCAAgC;oBAChC,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,YAAY;oBACZ,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,kCAAkC;oBAClC,YAAY;oBACZ,kCAAkC;oBAClC,YAAY;oBACZ,+BAA+B;oBAC/B,SAAS;oBACT,iCAAiC;oBACjC,WAAW;oBACX,gCAAgC;oBAChC,WAAW;oBACX,kCAAkC;oBAClC,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACL,OAAO;oBACH,8CAA8C;oBAC9C,UAAU;oBACV,QAAQ;oBACR,UAAU;oBACV,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,6BAA6B;oBAC7B,SAAS;oBACT,SAAS;oBACT,SAAS;oBACT,gCAAgC;oBAChC,OAAO;oBACP,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,iCAAiC;oBACjC,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,WAAW;oBACX,UAAU;oBACV,QAAQ;oBACR,OAAO;oBACP,UAAU;oBACV,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,mCAAmC;oBACnC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,iCAAiC;oBACjC,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,UAAU;oBACV,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,UAAU;gBACX,OAAO;oBACH,sEAAsE;oBACtE,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,uEAAuE;oBACvE,kBAAkB;oBAClB,kBAAkB;oBAClB,oBAAoB;oBACpB,SAAS;oBACT,mBAAmB;oBACnB,aAAa;oBACb,aAAa;oBACb,aAAa;oBACb,kEAAkE;oBAClE,kEAAkE;oBAClE,gBAAgB;oBAChB,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,UAAU;oBACV,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,2CAA2C;oBAC3C,cAAc;oBACd,cAAc;oBACd,iBAAiB;oBACjB,oBAAoB;oBAEpB,4BAA4B;oBAC5B,sBAAsB;oBACtB,oBAAoB;oBACpB,wBAAwB;oBACxB,iBAAiB;oBACjB,kBAAkB;oBAClB,sBAAsB;oBACtB,kBAAkB;oBAClB,qBAAqB;oBAErB,iCAAiC;oBACjC,kBAAkB;oBAClB,IAAI;oBACJ,GAAG;oBAEH,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,sCAAsC;oBACtC,QAAQ;oBACR,OAAO;oBACP,KAAK;oBACL,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,QAAQ;oBACR,SAAS;oBACT,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,UAAU;oBACV,UAAU;oBACV,OAAO;oBACP,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,uBAAuB;oBACvB,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,gDAAgD;oBAChD,WAAW;oBACX,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,cAAc;oBACd,YAAY;oBACZ,iCAAiC;oBACjC,gBAAgB;oBAChB,SAAS;oBACT,aAAa;oBACb,UAAU;oBACV,aAAa;oBACb,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,aAAa;oBACb,aAAa;oBACb,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN;gBACI,OAAO;oBACH,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;SACT;IACL,CAAC;CACJ;AAxgBD,oCAwgBC"}
|
package/lib/WebFetcher.d.ts
CHANGED
|
@@ -3,14 +3,16 @@ import { TextFetcher } from './types';
|
|
|
3
3
|
export interface WebFetcherConfig {
|
|
4
4
|
headers?: Record<string, string>;
|
|
5
5
|
requestConfig?: AxiosRequestConfig;
|
|
6
|
-
|
|
6
|
+
htmlToMarkdown: boolean;
|
|
7
7
|
summarizeHtml: boolean;
|
|
8
8
|
}
|
|
9
9
|
export declare class WebFetcher implements TextFetcher {
|
|
10
10
|
private readonly _config;
|
|
11
11
|
constructor(config?: Partial<WebFetcherConfig>);
|
|
12
|
-
fetch(uri: string): Promise<
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
fetch(uri: string): Promise<{
|
|
13
|
+
text: string;
|
|
14
|
+
docType: string | undefined;
|
|
15
|
+
}>;
|
|
16
|
+
private htmlToMarkdown;
|
|
15
17
|
}
|
|
16
18
|
//# sourceMappingURL=WebFetcher.d.ts.map
|
package/lib/WebFetcher.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAc,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAc,EAAE,kBAAkB,EAAE,MAAM,OAAO,CAAC;AAClD,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AA6BtC,MAAM,WAAW,gBAAgB;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAC,MAAM,CAAC,CAAC;IAChC,aAAa,CAAC,EAAE,kBAAkB,CAAC;IACnC,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;CAC1B;AAED,qBAAa,UAAW,YAAW,WAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;gBAExB,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;IAOxC,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,GAAC,SAAS,CAAC;KAAE,CAAC;IAyCtF,OAAO,CAAC,cAAc;CAmCzB"}
|