vectra 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +3 -3
  2. package/bin/vectra.js +3 -0
  3. package/lib/GPT3Tokenizer.d.ts +9 -0
  4. package/lib/GPT3Tokenizer.d.ts.map +1 -0
  5. package/lib/GPT3Tokenizer.js +17 -0
  6. package/lib/GPT3Tokenizer.js.map +1 -0
  7. package/lib/ItemSelector.d.ts +41 -0
  8. package/lib/ItemSelector.d.ts.map +1 -0
  9. package/lib/ItemSelector.js +156 -0
  10. package/lib/ItemSelector.js.map +1 -0
  11. package/lib/LocalDocument.d.ts +16 -0
  12. package/lib/LocalDocument.d.ts.map +1 -0
  13. package/lib/LocalDocument.js +99 -0
  14. package/lib/LocalDocument.js.map +1 -0
  15. package/lib/LocalDocumentIndex.d.ts +48 -0
  16. package/lib/LocalDocumentIndex.d.ts.map +1 -0
  17. package/lib/LocalDocumentIndex.js +367 -0
  18. package/lib/LocalDocumentIndex.js.map +1 -0
  19. package/lib/LocalDocumentResult.d.ts +12 -0
  20. package/lib/LocalDocumentResult.d.ts.map +1 -0
  21. package/lib/LocalDocumentResult.js +186 -0
  22. package/lib/LocalDocumentResult.js.map +1 -0
  23. package/lib/LocalIndex.d.ts +130 -0
  24. package/lib/LocalIndex.d.ts.map +1 -0
  25. package/lib/LocalIndex.js +405 -0
  26. package/lib/LocalIndex.js.map +1 -0
  27. package/lib/OpenAIEmbeddings.d.ts +98 -0
  28. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  29. package/lib/OpenAIEmbeddings.js +139 -0
  30. package/lib/OpenAIEmbeddings.js.map +1 -0
  31. package/lib/TextSplitter.d.ts +17 -0
  32. package/lib/TextSplitter.d.ts.map +1 -0
  33. package/lib/TextSplitter.js +460 -0
  34. package/lib/TextSplitter.js.map +1 -0
  35. package/lib/WebFetcher.d.ts +16 -0
  36. package/lib/WebFetcher.d.ts.map +1 -0
  37. package/lib/WebFetcher.js +144 -0
  38. package/lib/WebFetcher.js.map +1 -0
  39. package/lib/index.d.ts +11 -0
  40. package/lib/index.d.ts.map +1 -0
  41. package/lib/index.js +27 -0
  42. package/lib/index.js.map +1 -0
  43. package/lib/internals/Colorize.d.ts +14 -0
  44. package/lib/internals/Colorize.d.ts.map +1 -0
  45. package/lib/internals/Colorize.js +64 -0
  46. package/lib/internals/Colorize.js.map +1 -0
  47. package/lib/internals/index.d.ts +3 -0
  48. package/lib/internals/index.d.ts.map +1 -0
  49. package/lib/internals/index.js +19 -0
  50. package/lib/internals/index.js.map +1 -0
  51. package/lib/internals/types.d.ts +42 -0
  52. package/lib/internals/types.d.ts.map +1 -0
  53. package/lib/internals/types.js +3 -0
  54. package/lib/internals/types.js.map +1 -0
  55. package/lib/types.d.ts +133 -0
  56. package/lib/types.d.ts.map +1 -0
  57. package/lib/types.js +3 -0
  58. package/lib/types.js.map +1 -0
  59. package/lib/vectra-cli.d.ts +2 -0
  60. package/lib/vectra-cli.d.ts.map +1 -0
  61. package/lib/vectra-cli.js +276 -0
  62. package/lib/vectra-cli.js.map +1 -0
  63. package/package.json +21 -3
  64. package/src/GPT3Tokenizer.ts +15 -0
  65. package/src/ItemSelector.ts +9 -9
  66. package/src/LocalDocument.ts +70 -0
  67. package/src/LocalDocumentIndex.ts +355 -0
  68. package/src/LocalDocumentResult.ts +206 -0
  69. package/src/LocalIndex.ts +12 -78
  70. package/src/OpenAIEmbeddings.ts +205 -0
  71. package/src/TextSplitter.ts +480 -0
  72. package/src/WebFetcher.ts +128 -0
  73. package/src/index.ts +8 -0
  74. package/src/internals/Colorize.ts +64 -0
  75. package/src/internals/index.ts +2 -0
  76. package/src/internals/types.ts +46 -0
  77. package/src/types.ts +160 -0
  78. package/src/vectra-cli.ts +238 -0
@@ -0,0 +1,367 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
+ return new (P || (P = Promise))(function (resolve, reject) {
28
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
29
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
30
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
31
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
32
+ });
33
+ };
34
+ Object.defineProperty(exports, "__esModule", { value: true });
35
+ exports.LocalDocumentIndex = void 0;
36
+ const fs = __importStar(require("fs/promises"));
37
+ const path = __importStar(require("path"));
38
+ const uuid_1 = require("uuid");
39
+ const GPT3Tokenizer_1 = require("./GPT3Tokenizer");
40
+ const LocalIndex_1 = require("./LocalIndex");
41
+ const TextSplitter_1 = require("./TextSplitter");
42
+ const LocalDocumentResult_1 = require("./LocalDocumentResult");
43
+ const LocalDocument_1 = require("./LocalDocument");
44
+ const EMBEDDINGS_BATCH_SIZE = 500;
45
+ class LocalDocumentIndex extends LocalIndex_1.LocalIndex {
46
+ constructor(config) {
47
+ var _a, _b;
48
+ super(config.folderPath);
49
+ this._embeddings = config.embeddings;
50
+ this._chunkingConfig = Object.assign({
51
+ keepSeparators: true,
52
+ chunkSize: 512,
53
+ chunkOverlap: 0,
54
+ }, config.chunkingConfig);
55
+ this._tokenizer = (_b = (_a = config.tokenizer) !== null && _a !== void 0 ? _a : this._chunkingConfig.tokenizer) !== null && _b !== void 0 ? _b : new GPT3Tokenizer_1.GPT3Tokenizer();
56
+ this._chunkingConfig.tokenizer = this._tokenizer;
57
+ }
58
+ /**
59
+ * Returns true if the document catalog exists.
60
+ */
61
+ isCatalogCreated() {
62
+ return __awaiter(this, void 0, void 0, function* () {
63
+ try {
64
+ yield fs.access(path.join(this.folderPath, 'catalog.json'));
65
+ return true;
66
+ }
67
+ catch (err) {
68
+ return false;
69
+ }
70
+ });
71
+ }
72
+ getDocumentId(uri) {
73
+ var _a;
74
+ return __awaiter(this, void 0, void 0, function* () {
75
+ yield this.loadIndexData();
76
+ return (_a = this._catalog) === null || _a === void 0 ? void 0 : _a.uriToId[uri];
77
+ });
78
+ }
79
+ getDocumentUri(documentId) {
80
+ var _a;
81
+ return __awaiter(this, void 0, void 0, function* () {
82
+ yield this.loadIndexData();
83
+ return (_a = this._catalog) === null || _a === void 0 ? void 0 : _a.idToUri[documentId];
84
+ });
85
+ }
86
+ createIndex(config) {
87
+ const _super = Object.create(null, {
88
+ createIndex: { get: () => super.createIndex }
89
+ });
90
+ return __awaiter(this, void 0, void 0, function* () {
91
+ yield _super.createIndex.call(this, config);
92
+ yield this.loadIndexData();
93
+ });
94
+ }
95
+ deleteDocument(uri) {
96
+ return __awaiter(this, void 0, void 0, function* () {
97
+ // Lookup document ID
98
+ const documentId = yield this.getDocumentId(uri);
99
+ if (documentId == undefined) {
100
+ return;
101
+ }
102
+ // Delete document chunks from index and remove from catalog
103
+ yield this.beginUpdate();
104
+ try {
105
+ // Get list of chunks for document
106
+ const chunks = yield this.listItemsByMetadata({ documentId });
107
+ // Delete chunks
108
+ for (const chunk of chunks) {
109
+ yield this.deleteItem(chunk.id);
110
+ }
111
+ // Remove entry from catalog
112
+ delete this._newCatalog.uriToId[uri];
113
+ delete this._newCatalog.idToUri[documentId];
114
+ this._newCatalog.count--;
115
+ // Commit changes
116
+ yield this.endUpdate();
117
+ }
118
+ catch (err) {
119
+ // Cancel update and raise error
120
+ this.cancelUpdate();
121
+ throw new Error(`Error deleting document "${uri}": ${err.toString()}`);
122
+ }
123
+ // Delete text file from disk
124
+ try {
125
+ yield fs.unlink(path.join(this.folderPath, `${documentId}.txt`));
126
+ }
127
+ catch (err) {
128
+ throw new Error(`Error removing text file for document "${uri}" from disk: ${err.toString()}`);
129
+ }
130
+ // Delete metadata file from disk
131
+ try {
132
+ yield fs.unlink(path.join(this.folderPath, `${documentId}.json`));
133
+ }
134
+ catch (err) {
135
+ // Ignore error
136
+ }
137
+ });
138
+ }
139
+ getCatalogStats() {
140
+ return __awaiter(this, void 0, void 0, function* () {
141
+ const stats = yield this.getIndexStats();
142
+ return {
143
+ version: this._catalog.version,
144
+ documents: this._catalog.count,
145
+ chunks: stats.items,
146
+ metadata_config: stats.metadata_config
147
+ };
148
+ });
149
+ }
150
+ /**
151
+ * Adds a document to the catalog.
152
+ * @remarks
153
+ * A new update is started if one is not already in progress. If an document with the same uri
154
+ * already exists, it will be replaced.
155
+ * @param item Item to insert
156
+ * @returns Inserted document
157
+ */
158
+ upsertDocument(uri, text, metadata) {
159
+ return __awaiter(this, void 0, void 0, function* () {
160
+ // Ensure embeddings configured
161
+ if (!this._embeddings) {
162
+ throw new Error(`Embeddings model not configured.`);
163
+ }
164
+ // Check for existing document ID
165
+ let documentId = yield this.getDocumentId(uri);
166
+ if (documentId != undefined) {
167
+ // Delete existing document
168
+ yield this.deleteDocument(uri);
169
+ }
170
+ else {
171
+ // Generate new document ID
172
+ documentId = (0, uuid_1.v4)();
173
+ }
174
+ // Populate docType based on extension
175
+ const config = Object.assign({}, this._chunkingConfig);
176
+ const pos = uri.lastIndexOf('.');
177
+ if (pos >= 0) {
178
+ const ext = uri.substring(pos + 1).toLowerCase();
179
+ config.docType = ext;
180
+ }
181
+ // Split text into chunks
182
+ const splitter = new TextSplitter_1.TextSplitter(config);
183
+ const chunks = splitter.split(text);
184
+ // Break chunks into batches for embedding generation
185
+ const chunkBatches = [];
186
+ let currentBatch = [];
187
+ for (const chunk of chunks) {
188
+ currentBatch.push(chunk.text);
189
+ if (currentBatch.length >= EMBEDDINGS_BATCH_SIZE) {
190
+ chunkBatches.push(currentBatch);
191
+ currentBatch = [];
192
+ }
193
+ }
194
+ if (currentBatch.length > 0) {
195
+ chunkBatches.push(currentBatch);
196
+ }
197
+ // Generate embeddings for chunks
198
+ const embeddings = [];
199
+ for (const batch of chunkBatches) {
200
+ let response;
201
+ try {
202
+ response = yield this._embeddings.createEmbeddings(batch);
203
+ }
204
+ catch (err) {
205
+ throw new Error(`Error generating embeddings: ${err.toString()}`);
206
+ }
207
+ // Check for error
208
+ if (response.status != 'success') {
209
+ throw new Error(`Error generating embeddings: ${response.message}`);
210
+ }
211
+ // Add embeddings to output
212
+ for (const embedding of response.output) {
213
+ embeddings.push(embedding);
214
+ }
215
+ }
216
+ // Add document chunks to index
217
+ yield this.beginUpdate();
218
+ try {
219
+ // Add chunks to index
220
+ for (let i = 0; i < chunks.length; i++) {
221
+ const chunk = chunks[i];
222
+ const embedding = embeddings[i];
223
+ const chunkMetadata = Object.assign({
224
+ documentId,
225
+ startPos: chunk.startPos,
226
+ endPos: chunk.endPos,
227
+ }, metadata);
228
+ yield this.insertItem({
229
+ id: (0, uuid_1.v4)(),
230
+ metadata: chunkMetadata,
231
+ vector: embedding,
232
+ });
233
+ }
234
+ // Save metadata file to disk
235
+ if (metadata != undefined) {
236
+ yield fs.writeFile(path.join(this.folderPath, `${documentId}.json`), JSON.stringify(metadata));
237
+ }
238
+ // Save text file to disk
239
+ yield fs.writeFile(path.join(this.folderPath, `${documentId}.txt`), text);
240
+ // Add entry to catalog
241
+ this._newCatalog.uriToId[uri] = documentId;
242
+ this._newCatalog.idToUri[documentId] = uri;
243
+ this._newCatalog.count++;
244
+ // Commit changes
245
+ yield this.endUpdate();
246
+ }
247
+ catch (err) {
248
+ // Cancel update and raise error
249
+ this.cancelUpdate();
250
+ throw new Error(`Error adding document "${uri}": ${err.toString()}`);
251
+ }
252
+ // Return document
253
+ return new LocalDocument_1.LocalDocument(this.folderPath, documentId, uri);
254
+ });
255
+ }
256
+ queryDocuments(query, options) {
257
+ return __awaiter(this, void 0, void 0, function* () {
258
+ // Ensure embeddings configured
259
+ if (!this._embeddings) {
260
+ throw new Error(`Embeddings model not configured.`);
261
+ }
262
+ // Ensure options are defined
263
+ options = Object.assign({
264
+ maxDocuments: 10,
265
+ maxChunks: 50,
266
+ }, options);
267
+ // Generate embeddings for query
268
+ let embeddings;
269
+ try {
270
+ embeddings = yield this._embeddings.createEmbeddings(query);
271
+ }
272
+ catch (err) {
273
+ throw new Error(`Error generating embeddings for query: ${err.toString()}`);
274
+ }
275
+ // Check for error
276
+ if (embeddings.status != 'success') {
277
+ throw new Error(`Error generating embeddings for query: ${embeddings.message}`);
278
+ }
279
+ // Query index for chunks
280
+ const results = yield this.queryItems(embeddings.output[0], options.maxChunks, options.filter);
281
+ // Group chunks by document
282
+ const documentChunks = {};
283
+ for (const result of results) {
284
+ const metadata = result.item.metadata;
285
+ if (documentChunks[metadata.documentId] == undefined) {
286
+ documentChunks[metadata.documentId] = [];
287
+ }
288
+ documentChunks[metadata.documentId].push(result);
289
+ }
290
+ // Create a document result for each document
291
+ const documentResults = [];
292
+ for (const documentId in documentChunks) {
293
+ const chunks = documentChunks[documentId];
294
+ const uri = yield this.getDocumentUri(documentId);
295
+ const documentResult = new LocalDocumentResult_1.LocalDocumentResult(this.folderPath, documentId, uri, chunks, this._tokenizer);
296
+ documentResults.push(documentResult);
297
+ }
298
+ // Sort document results by score and return top results
299
+ return documentResults.sort((a, b) => b.score - a.score).slice(0, options.maxDocuments);
300
+ });
301
+ }
302
+ // Overrides
303
+ beginUpdate() {
304
+ const _super = Object.create(null, {
305
+ beginUpdate: { get: () => super.beginUpdate }
306
+ });
307
+ return __awaiter(this, void 0, void 0, function* () {
308
+ yield _super.beginUpdate.call(this);
309
+ this._newCatalog = Object.assign({}, this._catalog);
310
+ });
311
+ }
312
+ cancelUpdate() {
313
+ super.cancelUpdate();
314
+ this._newCatalog = undefined;
315
+ }
316
+ endUpdate() {
317
+ const _super = Object.create(null, {
318
+ endUpdate: { get: () => super.endUpdate }
319
+ });
320
+ return __awaiter(this, void 0, void 0, function* () {
321
+ yield _super.endUpdate.call(this);
322
+ try {
323
+ // Save catalog
324
+ yield fs.writeFile(path.join(this.folderPath, 'catalog.json'), JSON.stringify(this._newCatalog));
325
+ this._catalog = this._newCatalog;
326
+ this._newCatalog = undefined;
327
+ }
328
+ catch (err) {
329
+ throw new Error(`Error saving document catalog: ${err.toString()}`);
330
+ }
331
+ });
332
+ }
333
+ loadIndexData() {
334
+ const _super = Object.create(null, {
335
+ loadIndexData: { get: () => super.loadIndexData }
336
+ });
337
+ return __awaiter(this, void 0, void 0, function* () {
338
+ yield _super.loadIndexData.call(this);
339
+ if (this._catalog) {
340
+ return;
341
+ }
342
+ const catalogPath = path.join(this.folderPath, 'catalog.json');
343
+ if (yield this.isCatalogCreated()) {
344
+ // Load catalog
345
+ const buffer = yield fs.readFile(catalogPath);
346
+ this._catalog = JSON.parse(buffer.toString());
347
+ }
348
+ else {
349
+ try {
350
+ // Initialize catalog
351
+ this._catalog = {
352
+ version: 1,
353
+ count: 0,
354
+ uriToId: {},
355
+ idToUri: {},
356
+ };
357
+ yield fs.writeFile(catalogPath, JSON.stringify(this._catalog));
358
+ }
359
+ catch (err) {
360
+ throw new Error(`Error creating document catalog: ${err.toString()}`);
361
+ }
362
+ }
363
+ });
364
+ }
365
+ }
366
+ exports.LocalDocumentIndex = LocalDocumentIndex;
367
+ //# sourceMappingURL=LocalDocumentIndex.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LocalDocumentIndex.js","sourceRoot":"","sources":["../src/LocalDocumentIndex.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAC7B,+BAA0B;AAC1B,mDAAgD;AAChD,6CAA6D;AAC7D,iDAAkE;AAElE,+DAA4D;AAC5D,mDAAgD;AAEhD,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAelC,MAAa,kBAAmB,SAAQ,uBAAU;IAQ9C,YAAmB,MAAgC;;QAC/C,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC;YACjC,cAAc,EAAE,IAAI;YACpB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,CAAC;SACI,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,GAAG,MAAA,MAAA,MAAM,CAAC,SAAS,mCAAI,IAAI,CAAC,eAAe,CAAC,SAAS,mCAAI,IAAI,6BAAa,EAAE,CAAC;QAC5F,IAAI,CAAC,eAAe,CAAC,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC;IACrD,CAAC;IAED;;OAEG;IACU,gBAAgB;;YACzB,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC,CAAC;gBAC5D,OAAO,IAAI,CAAC;aACf;YAAC,OAAO,GAAY,EAAE;gBACnB,OAAO,KAAK,CAAC;aAChB;QACL,CAAC;KAAA;IAEY,aAAa,CAAC,GAAW;;;YAClC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC3B,OAAO,MAAA,IAAI,CAAC,QAAQ,0CAAE,OAAO,CAAC,GAAG,CAAC,CAAC;;KACtC;IAEY,cAAc,CAAC,UAAkB;;;YAC1C,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;YAC3B,OAAO,MAAA,IAAI,CAAC,QAAQ,0CAAE,OAAO,CAAC,UAAU,CAAC,CAAC;;KAC7C;IAEY,WAAW,CAAC,MAA0B;;;;;YAC/C,MAAM,OAAM,WAAW,YAAC,MAAM,CAAC,CAAC;YAChC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAC/B,CAAC;KAAA;IAEY,cAAc,CAAC,GAAW;;YACnC,qBAAqB;YACrB,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YACjD,IAAI,UAAU,IAAI,SAAS,EAAE;gBACzB,OAAO;aACV;YAED,4DAA4D;YAC5D,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI;gBACA,kCAAkC;gBAClC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAwB,EAAE,UAAU,EAAE,CAAC,CAAC;gBAErF,gBAAgB;gBAChB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;oBACxB,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;iBACnC;gBAED,4BAA4B;gBAC5B,OAAO,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBACtC,OAAO,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;gBAC7C,IAAI,CAAC,WAAY,CAAC,KAAK,EAAE,CAAC;gBAE1B,iBAAiB;gBACjB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;aAC1B;YAAC,OAAO,GAAY,EAAE;gBACnB,gCAAgC;gBAChC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACnF;YAED,6BAA6B;YAC7B,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,MAAM,CAAC,CAAC,CAAC;aACpE;YAAC,OAAO,GAAY,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,0CAA0C,GAAG,gBAAiB,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aAC3G;YAED,iCAAiC;YACjC,IAAI;gBACA,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,OAAO,CAAC,CAAC,CAAC;aACrE;YAAC,OAAO,GAAY,EAAE;gBACnB,eAAe;aAClB;QACL,CAAC;KAAA;IAEY,eAAe;;YACxB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;YACxC,OAAO;gBACH,OAAO,EAAE,IAAI,CAAC,QAAS,CAAC,OAAO;gBAC/B,SAAS,EAAE,IAAI,CAAC,QAAS,CAAC,KAAK;gBAC/B,MAAM,EAAE,KAAK,CAAC,KAAK;gBACnB,eAAe,EAAE,KAAK,CAAC,eAAe;aACzC,CAAC;QACN,CAAC;KAAA;IAED;;;;;;;OAOG;IACU,cAAc,CAAC,GAAW,EAAE,IAAY,EAAE,QAAwC;;YAC3F,+BAA+B;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;aACvD;YAED,iCAAiC;YACjC,IAAI,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC/C,IAAI,UAAU,IAAI,SAAS,EAAE;gBACzB,2BAA2B;gBAC3B,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aAClC;iBAAM;gBACH,2BAA2B;gBAC3B,UAAU,GAAG,IAAA,SAAE,GAAE,CAAC;aACrB;YAED,sCAAsC;YACtC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;YACvD,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,GAAG,IAAI,CAAC,EAAE;gBACV,MAAM,GAAG,GAAG,GAAG,CAAC,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;gBACjD,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC;aACxB;YAED,yBAAyB;YACzB,MAAM,QAAQ,GAAG,IAAI,2BAAY,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAEpC,qDAAqD;YACrD,MAAM,YAAY,GAAe,EAAE,CAAC;YACpC,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;gBACxB,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9B,IAAI,YAAY,CAAC,MAAM,IAAI,qBAAqB,EAAE;oBAC9C,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAChC,YAAY,GAAG,EAAE,CAAC;iBACrB;aACJ;YACD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE;gBACzB,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;aACnC;YAED,iCAAiC;YACjC,MAAM,UAAU,GAAe,EAAE,CAAC;YAClC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE;gBAC9B,IAAI,QAA4B,CAAC;gBACjC,IAAI;oBACA,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;iBAC7D;gBAAC,OAAO,GAAY,EAAE;oBACnB,MAAM,IAAI,KAAK,CAAC,gCAAiC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBAC9E;gBAED,kBAAkB;gBAClB,IAAI,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE;oBAC9B,MAAM,IAAI,KAAK,CAAC,gCAAgC,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC;iBACvE;gBAED,2BAA2B;gBAC3B,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,MAAO,EAAE;oBACtC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;iBAC9B;aACJ;YAED,+BAA+B;YAC/B,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,IAAI;gBACA,sBAAsB;gBACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;oBACxB,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;oBAChC,MAAM,aAAa,GAA0B,MAAM,CAAC,MAAM,CAAC;wBACvD,UAAU;wBACV,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,MAAM,EAAE,KAAK,CAAC,MAAM;qBACvB,EAAE,QAAQ,CAAC,CAAC;oBACb,MAAM,IAAI,CAAC,UAAU,CAAC;wBAClB,EAAE,EAAE,IAAA,SAAE,GAAE;wBACR,QAAQ,EAAE,aAAa;wBACvB,MAAM,EAAE,SAAS;qBACpB,CAAC,CAAC;iBACN;gBAED,6BAA6B;gBAC7B,IAAI,QAAQ,IAAI,SAAS,EAAE;oBACvB,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;iBAClG;gBAED,yBAAyB;gBACzB,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,UAAU,MAAM,CAAC,EAAE,IAAI,CAAC,CAAC;gBAE1E,uBAAuB;gBACvB,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC;gBAC5C,IAAI,CAAC,WAAY,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC;gBAC5C,IAAI,CAAC,WAAY,CAAC,KAAK,EAAE,CAAC;gBAE1B,iBAAiB;gBACjB,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;aAC1B;YAAC,OAAO,GAAY,EAAE;gBACnB,gCAAgC;gBAChC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,MAAO,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACjF;YAED,kBAAkB;YAClB,OAAO,IAAI,6BAAa,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;QAC/D,CAAC;KAAA;IAGY,cAAc,CAAC,KAAa,EAAE,OAA8B;;YACrE,+BAA+B;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;aACvD;YAED,6BAA6B;YAC7B,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;gBACpB,YAAY,EAAE,EAAE;gBAChB,SAAS,EAAE,EAAE;aAChB,EAAE,OAAO,CAAC,CAAC;YAEZ,gCAAgC;YAChC,IAAI,UAA8B,CAAC;YACnC,IAAI;gBACA,UAAU,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;aAC/D;YAAC,OAAO,GAAY,EAAE;gBACnB,MAAM,IAAI,KAAK,CAAC,0CAA2C,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aACxF;YAED,kBAAkB;YAClB,IAAI,UAAU,CAAC,MAAM,IAAI,SAAS,EAAE;gBAChC,MAAM,IAAI,KAAK,CAAC,0CAA0C,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC;aACnF;YAED,yBAAyB;YACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAwB,UAAU,CAAC,MAAO,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,SAAU,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAExH,2BAA2B;YAC3B,MAAM,cAAc,GAAoE,EAAE,CAAC;YAC3F,KAAK,MAAM,MAAM,IAAK,OAAO,EAAE;gBAC3B,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACtC,IAAI,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,SAAS,EAAE;oBAClD,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC;iBAC5C;gBACD,cAAc,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;aACpD;YAED,6CAA6C;YAC7C,MAAM,eAAe,GAA0B,EAAE,CAAC;YAClD,KAAK,MAAM,UAAU,IAAI,cAAc,EAAE;gBACrC,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;gBAC1C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,UAAU,CAAW,CAAC;gBAC5D,MAAM,cAAc,GAAG,IAAI,yCAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAC1G,eAAe,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;aACxC;YAED,wDAAwD;YACxD,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,YAAa,CAAC,CAAC;QAC7F,CAAC;KAAA;IAED,YAAY;IAEC,WAAW;;;;;YACpB,MAAM,OAAM,WAAW,WAAE,CAAC;YAC1B,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACxD,CAAC;KAAA;IAEM,YAAY;QACf,KAAK,CAAC,YAAY,EAAE,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC;IACjC,CAAC;IAEY,SAAS;;;;;YAClB,MAAM,OAAM,SAAS,WAAE,CAAC;YAExB,IAAI;gBACA,eAAe;gBACf,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;gBACjG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC;gBACjC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC;aAChC;YAAC,OAAM,GAAY,EAAE;gBAClB,MAAM,IAAI,KAAK,CAAC,kCAAmC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;aAChF;QACL,CAAC;KAAA;IAEe,aAAa;;;;;YACzB,MAAM,OAAM,aAAa,WAAE,CAAC;YAE5B,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACf,OAAO;aACV;YAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;YAC/D,IAAI,MAAM,IAAI,CAAC,gBAAgB,EAAE,EAAE;gBAC/B,eAAe;gBACf,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;gBAC9C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;aACjD;iBAAM;gBACH,IAAI;oBACA,qBAAqB;oBACrB,IAAI,CAAC,QAAQ,GAAG;wBACZ,OAAO,EAAE,CAAC;wBACV,KAAK,EAAE,CAAC;wBACR,OAAO,EAAE,EAAE;wBACX,OAAO,EAAE,EAAE;qBACd,CAAC;oBACF,MAAM,EAAE,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;iBAClE;gBAAC,OAAM,GAAY,EAAE;oBAClB,MAAM,IAAI,KAAK,CAAC,oCAAqC,GAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;iBAClF;aACJ;QACL,CAAC;KAAA;CACJ;AAlUD,gDAkUC"}
@@ -0,0 +1,12 @@
1
+ import { LocalDocument } from "./LocalDocument";
2
+ import { QueryResult, DocumentChunkMetadata, Tokenizer, DocumentTextSection } from "./types";
3
+ export declare class LocalDocumentResult extends LocalDocument {
4
+ private readonly _chunks;
5
+ private readonly _tokenizer;
6
+ private readonly _score;
7
+ constructor(folderPath: string, id: string, uri: string, chunks: QueryResult<DocumentChunkMetadata>[], tokenizer: Tokenizer);
8
+ get chunks(): QueryResult<DocumentChunkMetadata>[];
9
+ get score(): number;
10
+ renderSections(maxTokens: number, maxSections: number): Promise<DocumentTextSection[]>;
11
+ }
12
+ //# sourceMappingURL=LocalDocumentResult.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LocalDocumentResult.d.ts","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,qBAAqB,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE7F,qBAAa,mBAAoB,SAAQ,aAAa;IAClD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAC/D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;gBAEb,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,qBAAqB,CAAC,EAAE,EAAE,SAAS,EAAE,SAAS;IAWlI,IAAW,MAAM,IAAI,WAAW,CAAC,qBAAqB,CAAC,EAAE,CAExD;IAED,IAAW,KAAK,IAAI,MAAM,CAEzB;IAEY,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;CAmKtG"}
@@ -0,0 +1,186 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.LocalDocumentResult = void 0;
13
+ const LocalDocument_1 = require("./LocalDocument");
14
+ class LocalDocumentResult extends LocalDocument_1.LocalDocument {
15
+ constructor(folderPath, id, uri, chunks, tokenizer) {
16
+ super(folderPath, id, uri);
17
+ this._chunks = chunks;
18
+ this._tokenizer = tokenizer;
19
+ // Compute average score
20
+ let score = 0;
21
+ this._chunks.forEach(chunk => score += chunk.score);
22
+ this._score = score / this._chunks.length;
23
+ }
24
+ get chunks() {
25
+ return this._chunks;
26
+ }
27
+ get score() {
28
+ return this._score;
29
+ }
30
+ renderSections(maxTokens, maxSections) {
31
+ return __awaiter(this, void 0, void 0, function* () {
32
+ // Load text from disk
33
+ const text = yield this.loadText();
34
+ // First check to see if the entire document is less than maxTokens
35
+ const tokens = this._tokenizer.encode(text);
36
+ if (tokens.length < maxTokens) {
37
+ return [{
38
+ text,
39
+ tokenCount: tokens.length,
40
+ score: 1.0
41
+ }];
42
+ }
43
+ // Otherwise, we need to split the document into sections
44
+ // - Add each chunk to a temp array and filter out any chunk that's longer then maxTokens.
45
+ // - Sort the array by startPos to arrange chunks in document order.
46
+ // - Generate a new array of sections by combining chunks until the maxTokens is reached for each section.
47
+ // - Generate an aggregate score for each section by averaging the score of each chunk in the section.
48
+ // - Sort the sections by score and limit to maxSections.
49
+ // - For each remaining section combine adjacent chunks of text.
50
+ // - Dynamically add overlapping chunks of text to each section until the maxTokens is reached.
51
+ const chunks = this._chunks.map(chunk => {
52
+ const startPos = chunk.item.metadata.startPos;
53
+ const endPos = chunk.item.metadata.endPos;
54
+ const chunkText = text.substring(startPos, endPos + 1);
55
+ return {
56
+ text: chunkText,
57
+ startPos,
58
+ endPos,
59
+ score: chunk.score,
60
+ tokenCount: this._tokenizer.encode(chunkText).length
61
+ };
62
+ }).filter(chunk => chunk.tokenCount <= maxTokens).sort((a, b) => a.startPos - b.startPos);
63
+ // Check for no chunks
64
+ if (chunks.length === 0) {
65
+ // Take the top chunk and return a subset of its text
66
+ const topChunk = this._chunks[0];
67
+ const startPos = topChunk.item.metadata.startPos;
68
+ const endPos = topChunk.item.metadata.endPos;
69
+ const chunkText = text.substring(startPos, endPos + 1);
70
+ const tokens = this._tokenizer.encode(chunkText);
71
+ return [{
72
+ text: this._tokenizer.decode(tokens.slice(0, maxTokens)),
73
+ tokenCount: maxTokens,
74
+ score: topChunk.score
75
+ }];
76
+ }
77
+ // Generate sections
78
+ const sections = [{
79
+ chunks: [],
80
+ score: 0,
81
+ tokenCount: 0
82
+ }];
83
+ for (let i = 0; i < chunks.length; i++) {
84
+ const chunk = chunks[i];
85
+ let section = sections[sections.length - 1];
86
+ if (section.tokenCount + chunk.tokenCount > maxTokens) {
87
+ sections.push({
88
+ chunks: [],
89
+ score: 0,
90
+ tokenCount: 0
91
+ });
92
+ }
93
+ sections[sections.length - 1].chunks.push(chunk);
94
+ sections[sections.length - 1].score += chunk.score;
95
+ sections[sections.length - 1].tokenCount += chunk.tokenCount;
96
+ }
97
+ // Normalize section scores
98
+ sections.forEach(section => section.score /= section.chunks.length);
99
+ // Sort sections by score and limit to maxSections
100
+ sections.sort((a, b) => b.score - a.score);
101
+ if (sections.length > maxSections) {
102
+ sections.splice(maxSections, sections.length - maxSections);
103
+ }
104
+ // Combine adjacent chunks of text
105
+ sections.forEach(section => {
106
+ for (let i = 0; i < section.chunks.length - 1; i++) {
107
+ const chunk = section.chunks[i];
108
+ const nextChunk = section.chunks[i + 1];
109
+ if (chunk.endPos + 1 === nextChunk.startPos) {
110
+ chunk.text += nextChunk.text;
111
+ chunk.endPos = nextChunk.endPos;
112
+ chunk.tokenCount += nextChunk.tokenCount;
113
+ section.chunks.splice(i + 1, 1);
114
+ i--;
115
+ }
116
+ }
117
+ });
118
+ // Add overlapping chunks of text to each section until the maxTokens is reached
119
+ const connector = {
120
+ text: '\n\n...\n\n',
121
+ startPos: -1,
122
+ endPos: -1,
123
+ score: 0,
124
+ tokenCount: this._tokenizer.encode('\n\n...\n\n').length
125
+ };
126
+ sections.forEach(section => {
127
+ // Insert connectors between chunks
128
+ if (section.chunks.length > 1) {
129
+ for (let i = 0; i < section.chunks.length - 1; i++) {
130
+ section.chunks.splice(i + 1, 0, connector);
131
+ section.tokenCount += connector.tokenCount;
132
+ i++;
133
+ }
134
+ }
135
+ // Add chunks to beginning and end of the section until maxTokens is reached
136
+ let budget = maxTokens - section.tokenCount;
137
+ if (budget > 40) {
138
+ const sectionStart = section.chunks[0].startPos;
139
+ const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
140
+ if (sectionStart > 0) {
141
+ const beforeTex = text.substring(0, section.chunks[0].startPos);
142
+ const beforeTokens = this._tokenizer.encode(beforeTex);
143
+ const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget / 2)) : Math.min(beforeTokens.length, budget);
144
+ const chunk = {
145
+ text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
146
+ startPos: sectionStart - beforeBudget,
147
+ endPos: sectionStart - 1,
148
+ score: 0,
149
+ tokenCount: beforeBudget
150
+ };
151
+ section.chunks.unshift(chunk);
152
+ section.tokenCount += chunk.tokenCount;
153
+ budget -= chunk.tokenCount;
154
+ }
155
+ if (sectionEnd < text.length - 1) {
156
+ const afterText = text.substring(sectionEnd + 1);
157
+ const afterTokens = this._tokenizer.encode(afterText);
158
+ const afterBudget = Math.min(afterTokens.length, budget);
159
+ const chunk = {
160
+ text: this._tokenizer.decode(afterTokens.slice(0, afterBudget)),
161
+ startPos: sectionEnd + 1,
162
+ endPos: sectionEnd + afterBudget,
163
+ score: 0,
164
+ tokenCount: afterBudget
165
+ };
166
+ section.chunks.push(chunk);
167
+ section.tokenCount += chunk.tokenCount;
168
+ budget -= chunk.tokenCount;
169
+ }
170
+ }
171
+ });
172
+ // Return final rendered sections
173
+ return sections.map(section => {
174
+ let text = '';
175
+ section.chunks.forEach(chunk => text += chunk.text);
176
+ return {
177
+ text: text,
178
+ tokenCount: section.tokenCount,
179
+ score: section.score
180
+ };
181
+ });
182
+ });
183
+ }
184
+ }
185
+ exports.LocalDocumentResult = LocalDocumentResult;
186
+ //# sourceMappingURL=LocalDocumentResult.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LocalDocumentResult.js","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mDAAgD;AAGhD,MAAa,mBAAoB,SAAQ,6BAAa;IAKlD,YAAmB,UAAkB,EAAE,EAAU,EAAE,GAAW,EAAE,MAA4C,EAAE,SAAoB;QAC9H,KAAK,CAAC,UAAU,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC;QAC3B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAE5B,wBAAwB;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IAC9C,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,IAAW,KAAK;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAEY,cAAc,CAAC,SAAiB,EAAE,WAAmB;;YAC9D,sBAAsB;YACtB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;YAEnC,mEAAmE;YACnE,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC5C,IAAI,MAAM,CAAC,MAAM,GAAG,SAAS,EAAE;gBAC3B,OAAO,CAAC;wBACJ,IAAI;wBACJ,UAAU,EAAE,MAAM,CAAC,MAAM;wBACzB,KAAK,EAAE,GAAG;qBACb,CAAC,CAAC;aACN;YAED,yDAAyD;YACzD,0FAA0F;YAC1F,oEAAoE;YACpE,0GAA0G;YAC1G,sGAAsG;YACtG,yDAAyD;YACzD,gEAAgE;YAChE,+FAA+F;YAC/F,MAAM,MAAM,GAAmB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE;gBACpD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;gBACvD,OAAO;oBACH,IAAI,EAAE,SAAS;oBACf,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM;iBACvD,CAAC;YACN,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,UAAU,IAAI,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;YAE1F,sBAAsB;YACtB,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE;gBACrB,qDAAqD;gBACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACjC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;gBACvD,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,CAAC;wBACJ,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;wBACxD,UAAU,EAAE,SAAS;wBACrB,KAAK,EAAE,QAAQ,CAAC,KAAK;qBACxB,CAAC,CAAC;aACN;YAED,oBAAoB;YACpB,MAAM,QAAQ,GAAc,CAAC;oBACzB,MAAM,EAAE,EAAE;oBACV,KAAK,EAAE,CAAC;oBACR,UAAU,EAAE,CAAC;iBAChB,CAAC,CAAC;YACH,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,IAAI,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC5C,IAAI,OAAO,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE;oBACnD,QAAQ,CAAC,IAAI,CAAC;wBACV,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,CAAC;wBACR,UAAU,EAAE,CAAC;qBAChB,CAAC,CAAC;iBACN;gBACD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACjD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC;gBACnD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;aAChE;YAED,2BAA2B;YAC3B,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAEpE,kDAAkD;YAClD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,MAAM,GAAG,WAAW,EAAE;gBAC/B,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC;aAC/D;YAED,kCAAkC;YAClC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;gBACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;oBAChD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;oBAChC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBACxC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,KAAK,SAAS,CAAC,QAAQ,EAAE;wBACzC,KAAK,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC;wBAC7B,KAAK,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;wBAChC,KAAK,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;wBACzC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAChC,CAAC,EAAE,CAAC;qBACP;iBACJ;YACL,CAAC,CAAC,CAAC;YAEH,gFAAgF;YAChF,MAAM,SAAS,GAAiB;gBAC5B,IAAI,EAAE,aAAa;gBACnB,QAAQ,EAAE,CAAC,CAAC;gBACZ,MAAM,EAAE,CAAC,CAAC;gBACV,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM;aAC3D,CAAC;YACF,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;gBACvB,mCAAmC;gBACnC,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;wBAChD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC;wBAC3C,OAAO,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;wBAC3C,CAAC,EAAE,CAAC;qBACP;iBACJ;gBAED,4EAA4E;gBAC5E,IAAI,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;gBAC5C,IAAI,MAAM,GAAG,EAAE,EAAE;oBACb,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;oBAChD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;oBACpE,IAAI,YAAY,GAAG,CAAC,EAAE;wBAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;wBAChE,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;wBACvD,MAAM,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,GAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;wBAC/I,MAAM,KAAK,GAAiB;4BACxB,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC;4BAC/D,QAAQ,EAAE,YAAY,GAAG,YAAY;4BACrC,MAAM,EAAE,YAAY,GAAG,CAAC;4BACxB,KAAK,EAAE,CAAC;4BACR,UAAU,EAAE,YAAY;yBAC3B,CAAC;wBACF,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;wBAC9B,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;wBACvC,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC;qBAC9B;oBAED,IAAI,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;wBAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;wBACjD,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;wBACtD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;wBACzD,MAAM,KAAK,GAAiB;4BACxB,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;4BAC/D,QAAQ,EAAE,UAAU,GAAG,CAAC;4BACxB,MAAM,EAAE,UAAU,GAAG,WAAW;4BAChC,KAAK,EAAE,CAAC;4BACR,UAAU,EAAE,WAAW;yBAC1B,CAAC;wBACF,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;wBAC3B,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;wBACvC,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC;qBAC9B;iBACJ;YACL,CAAC,CAAC,CAAC;YAEH,iCAAiC;YACjC,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;gBAC1B,IAAI,IAAI,GAAG,EAAE,CAAC;gBACd,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,OAAO;oBACH,IAAI,EAAE,IAAI;oBACV,UAAU,EAAE,OAAO,CAAC,UAAU;oBAC9B,KAAK,EAAE,OAAO,CAAC,KAAK;iBACvB,CAAC;YACN,CAAC,CAAC,CAAC;QACP,CAAC;KAAA;CACJ;AA3LD,kDA2LC"}