vectra 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/LocalDocumentResult.d.ts +1 -1
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +54 -52
- package/lib/LocalDocumentResult.js.map +1 -1
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.js +7 -1
- package/lib/vectra-cli.js.map +1 -1
- package/package.json +1 -1
- package/src/LocalDocumentResult.ts +55 -53
- package/src/vectra-cli.ts +7 -1
|
@@ -7,7 +7,7 @@ export declare class LocalDocumentResult extends LocalDocument {
|
|
|
7
7
|
constructor(folderPath: string, id: string, uri: string, chunks: QueryResult<DocumentChunkMetadata>[], tokenizer: Tokenizer);
|
|
8
8
|
get chunks(): QueryResult<DocumentChunkMetadata>[];
|
|
9
9
|
get score(): number;
|
|
10
|
-
renderSections(maxTokens: number, maxSections: number): Promise<DocumentTextSection[]>;
|
|
10
|
+
renderSections(maxTokens: number, maxSections: number, overlappingChunks?: boolean): Promise<DocumentTextSection[]>;
|
|
11
11
|
private encodeBeforeText;
|
|
12
12
|
private encodeAfterText;
|
|
13
13
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LocalDocumentResult.d.ts","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,qBAAqB,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE7F,qBAAa,mBAAoB,SAAQ,aAAa;IAClD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAC/D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;gBAEb,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,qBAAqB,CAAC,EAAE,EAAE,SAAS,EAAE,SAAS;IAWlI,IAAW,MAAM,IAAI,WAAW,CAAC,qBAAqB,CAAC,EAAE,CAExD;IAED,IAAW,KAAK,IAAI,MAAM,CAEzB;IAEY,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"LocalDocumentResult.d.ts","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,qBAAqB,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE7F,qBAAa,mBAAoB,SAAQ,aAAa;IAClD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAC/D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;gBAEb,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,qBAAqB,CAAC,EAAE,EAAE,SAAS,EAAE,SAAS;IAWlI,IAAW,MAAM,IAAI,WAAW,CAAC,qBAAqB,CAAC,EAAE,CAExD;IAED,IAAW,KAAK,IAAI,MAAM,CAEzB;IAEY,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,iBAAiB,UAAO,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAwK7H,OAAO,CAAC,gBAAgB;IAMxB,OAAO,CAAC,eAAe;CAM1B"}
|
|
@@ -27,7 +27,7 @@ class LocalDocumentResult extends LocalDocument_1.LocalDocument {
|
|
|
27
27
|
get score() {
|
|
28
28
|
return this._score;
|
|
29
29
|
}
|
|
30
|
-
renderSections(maxTokens, maxSections) {
|
|
30
|
+
renderSections(maxTokens, maxSections, overlappingChunks = true) {
|
|
31
31
|
return __awaiter(this, void 0, void 0, function* () {
|
|
32
32
|
// Load text from disk
|
|
33
33
|
const text = yield this.loadText();
|
|
@@ -118,59 +118,61 @@ class LocalDocumentResult extends LocalDocument_1.LocalDocument {
|
|
|
118
118
|
}
|
|
119
119
|
});
|
|
120
120
|
// Add overlapping chunks of text to each section until the maxTokens is reached
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
section.chunks.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
// Add chunks to beginning and end of the section until maxTokens is reached
|
|
138
|
-
let budget = maxTokens - section.tokenCount;
|
|
139
|
-
if (budget > 40) {
|
|
140
|
-
const sectionStart = section.chunks[0].startPos;
|
|
141
|
-
const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
|
|
142
|
-
if (sectionStart > 0) {
|
|
143
|
-
const beforeTex = text.substring(0, section.chunks[0].startPos);
|
|
144
|
-
const beforeTokens = this.encodeBeforeText(beforeTex, Math.ceil(budget / 2));
|
|
145
|
-
const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget / 2)) : Math.min(beforeTokens.length, budget);
|
|
146
|
-
const chunk = {
|
|
147
|
-
text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
|
|
148
|
-
startPos: sectionStart - beforeBudget,
|
|
149
|
-
endPos: sectionStart - 1,
|
|
150
|
-
score: 0,
|
|
151
|
-
tokenCount: beforeBudget
|
|
152
|
-
};
|
|
153
|
-
section.chunks.unshift(chunk);
|
|
154
|
-
section.tokenCount += chunk.tokenCount;
|
|
155
|
-
budget -= chunk.tokenCount;
|
|
121
|
+
if (overlappingChunks) {
|
|
122
|
+
const connector = {
|
|
123
|
+
text: '\n\n...\n\n',
|
|
124
|
+
startPos: -1,
|
|
125
|
+
endPos: -1,
|
|
126
|
+
score: 0,
|
|
127
|
+
tokenCount: this._tokenizer.encode('\n\n...\n\n').length
|
|
128
|
+
};
|
|
129
|
+
sections.forEach(section => {
|
|
130
|
+
// Insert connectors between chunks
|
|
131
|
+
if (section.chunks.length > 1) {
|
|
132
|
+
for (let i = 0; i < section.chunks.length - 1; i++) {
|
|
133
|
+
section.chunks.splice(i + 1, 0, connector);
|
|
134
|
+
section.tokenCount += connector.tokenCount;
|
|
135
|
+
i++;
|
|
136
|
+
}
|
|
156
137
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
138
|
+
// Add chunks to beginning and end of the section until maxTokens is reached
|
|
139
|
+
let budget = maxTokens - section.tokenCount;
|
|
140
|
+
if (budget > 40) {
|
|
141
|
+
const sectionStart = section.chunks[0].startPos;
|
|
142
|
+
const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
|
|
143
|
+
if (sectionStart > 0) {
|
|
144
|
+
const beforeTex = text.substring(0, section.chunks[0].startPos);
|
|
145
|
+
const beforeTokens = this.encodeBeforeText(beforeTex, Math.ceil(budget / 2));
|
|
146
|
+
const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget / 2)) : Math.min(beforeTokens.length, budget);
|
|
147
|
+
const chunk = {
|
|
148
|
+
text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
|
|
149
|
+
startPos: sectionStart - beforeBudget,
|
|
150
|
+
endPos: sectionStart - 1,
|
|
151
|
+
score: 0,
|
|
152
|
+
tokenCount: beforeBudget
|
|
153
|
+
};
|
|
154
|
+
section.chunks.unshift(chunk);
|
|
155
|
+
section.tokenCount += chunk.tokenCount;
|
|
156
|
+
budget -= chunk.tokenCount;
|
|
157
|
+
}
|
|
158
|
+
if (sectionEnd < text.length - 1) {
|
|
159
|
+
const afterText = text.substring(sectionEnd + 1);
|
|
160
|
+
const afterTokens = this.encodeAfterText(afterText, budget);
|
|
161
|
+
const afterBudget = Math.min(afterTokens.length, budget);
|
|
162
|
+
const chunk = {
|
|
163
|
+
text: this._tokenizer.decode(afterTokens.slice(0, afterBudget)),
|
|
164
|
+
startPos: sectionEnd + 1,
|
|
165
|
+
endPos: sectionEnd + afterBudget,
|
|
166
|
+
score: 0,
|
|
167
|
+
tokenCount: afterBudget
|
|
168
|
+
};
|
|
169
|
+
section.chunks.push(chunk);
|
|
170
|
+
section.tokenCount += chunk.tokenCount;
|
|
171
|
+
budget -= chunk.tokenCount;
|
|
172
|
+
}
|
|
171
173
|
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
+
});
|
|
175
|
+
}
|
|
174
176
|
// Return final rendered sections
|
|
175
177
|
return sections.map(section => {
|
|
176
178
|
let text = '';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LocalDocumentResult.js","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mDAAgD;AAGhD,MAAa,mBAAoB,SAAQ,6BAAa;IAKlD,YAAmB,UAAkB,EAAE,EAAU,EAAE,GAAW,EAAE,MAA4C,EAAE,SAAoB;QAC9H,KAAK,CAAC,UAAU,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC;QAC3B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAE5B,wBAAwB;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IAC9C,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,IAAW,KAAK;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAEY,cAAc,CAAC,SAAiB,EAAE,WAAmB;;
|
|
1
|
+
{"version":3,"file":"LocalDocumentResult.js","sourceRoot":"","sources":["../src/LocalDocumentResult.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mDAAgD;AAGhD,MAAa,mBAAoB,SAAQ,6BAAa;IAKlD,YAAmB,UAAkB,EAAE,EAAU,EAAE,GAAW,EAAE,MAA4C,EAAE,SAAoB;QAC9H,KAAK,CAAC,UAAU,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC;QAC3B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QACtB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAE5B,wBAAwB;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IAC9C,CAAC;IAED,IAAW,MAAM;QACb,OAAO,IAAI,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,IAAW,KAAK;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAEY,cAAc,CAAC,SAAiB,EAAE,WAAmB,EAAE,iBAAiB,GAAG,IAAI;;YACxF,sBAAsB;YACtB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;YAEnC,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,EAAE;gBAChC,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBAC5C,IAAI,MAAM,CAAC,MAAM,GAAG,SAAS,EAAE;oBAC3B,OAAO,CAAC;4BACJ,IAAI;4BACJ,UAAU,EAAE,MAAM,CAAC,MAAM;4BACzB,KAAK,EAAE,GAAG;yBACb,CAAC,CAAC;iBACN;aACJ;YAED,yDAAyD;YACzD,0FAA0F;YAC1F,oEAAoE;YACpE,0GAA0G;YAC1G,sGAAsG;YACtG,yDAAyD;YACzD,gEAAgE;YAChE,+FAA+F;YAC/F,MAAM,MAAM,GAAmB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE;gBACpD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;gBACvD,OAAO;oBACH,IAAI,EAAE,SAAS;oBACf,QAAQ;oBACR,MAAM;oBACN,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM;iBACvD,CAAC;YACN,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,UAAU,IAAI,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;YAE1F,sBAAsB;YACtB,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE;gBACrB,qDAAqD;gBACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACjC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;gBACvD,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,CAAC;wBACJ,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;wBACxD,UAAU,EAAE,SAAS;wBACrB,KAAK,EAAE,QAAQ,CAAC,KAAK;qBACxB,CAAC,CAAC;aACN;YAED,oBAAoB;YACpB,MAAM,QAAQ,GAAc,CAAC;oBACzB,MAAM,EAAE,EAAE;oBACV,KAAK,EAAE,CAAC;oBACR,UAAU,EAAE,CAAC;iBAChB,CAAC,CAAC;YACH,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,IAAI,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC5C,IAAI,OAAO,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE;oBACnD,QAAQ,CAAC,IAAI,CAAC;wBACV,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,CAAC;wBACR,UAAU,EAAE,CAAC;qBAChB,CAAC,CAAC;iBACN;gBACD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACjD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC;gBACnD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;aAChE;YAED,2BAA2B;YAC3B,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAEpE,kDAAkD;YAClD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAC3C,IAAI,QAAQ,CAAC,MAAM,GAAG,WAAW,EAAE;gBAC/B,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC;aAC/D;YAED,kCAAkC;YAClC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;gBACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;oBAChD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;oBAChC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBACxC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,KAAK,SAAS,CAAC,QAAQ,EAAE;wBACzC,KAAK,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC;wBAC7B,KAAK,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;wBAChC,KAAK,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;wBACzC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAChC,CAAC,EAAE,CAAC;qBACP;iBACJ;YACL,CAAC,CAAC,CAAC;YAEH,gFAAgF;YAChF,IAAI,iBAAiB,EAAE;gBACnB,MAAM,SAAS,GAAiB;oBAC5B,IAAI,EAAE,aAAa;oBACnB,QAAQ,EAAE,CAAC,CAAC;oBACZ,MAAM,EAAE,CAAC,CAAC;oBACV,KAAK,EAAE,CAAC;oBACR,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM;iBAC3D,CAAC;gBACF,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;oBACvB,mCAAmC;oBACnC,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;wBAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;4BAChD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC;4BAC3C,OAAO,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;4BAC3C,CAAC,EAAE,CAAC;yBACP;qBACJ;oBAED,4EAA4E;oBAC5E,IAAI,MAAM,GAAG,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;oBAC5C,IAAI,MAAM,GAAG,EAAE,EAAE;wBACb,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;wBAChD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;wBACpE,IAAI,YAAY,GAAG,CAAC,EAAE;4BAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;4BAChE,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,GAAC,CAAC,CAAC,CAAC,CAAC;4BAC3E,MAAM,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,GAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;4BAC/I,MAAM,KAAK,GAAiB;gCACxB,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC;gCAC/D,QAAQ,EAAE,YAAY,GAAG,YAAY;gCACrC,MAAM,EAAE,YAAY,GAAG,CAAC;gCACxB,KAAK,EAAE,CAAC;gCACR,UAAU,EAAE,YAAY;6BAC3B,CAAC;4BACF,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;4BAC9B,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;4BACvC,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC;yBAC9B;wBAED,IAAI,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;4BAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;4BACjD,MAAM,WAAW,GAAG,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;4BAC5D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;4BACzD,MAAM,KAAK,GAAiB;gCACxB,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;gCAC/D,QAAQ,EAAE,UAAU,GAAG,CAAC;gCACxB,MAAM,EAAE,UAAU,GAAG,WAAW;gCAChC,KAAK,EAAE,CAAC;gCACR,UAAU,EAAE,WAAW;6BAC1B,CAAC;4BACF,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;4BAC3B,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;4BACvC,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC;yBAC9B;qBACJ;gBACL,CAAC,CAAC,CAAC;aACN;YAED,iCAAiC;YACjC,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;gBAC1B,IAAI,IAAI,GAAG,EAAE,CAAC;gBACd,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,OAAO;oBACH,IAAI,EAAE,IAAI;oBACV,UAAU,EAAE,OAAO,CAAC,UAAU;oBAC9B,KAAK,EAAE,OAAO,CAAC,KAAK;iBACvB,CAAC;YACN,CAAC,CAAC,CAAC;QACP,CAAC;KAAA;IAEO,gBAAgB,CAAC,IAAY,EAAE,MAAc;QACjD,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;QACzF,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC1C,CAAC;IAEO,eAAe,CAAC,IAAY,EAAE,MAAc;QAChD,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC9E,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC1C,CAAC;CAEJ;AA5MD,kDA4MC"}
|
package/lib/vectra-cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectra-cli.d.ts","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":"AASA,wBAAsB,GAAG,
|
|
1
|
+
{"version":3,"file":"vectra-cli.d.ts","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":"AASA,wBAAsB,GAAG,kBAmOxB"}
|
package/lib/vectra-cli.js
CHANGED
|
@@ -210,6 +210,12 @@ function run() {
|
|
|
210
210
|
describe: `format of the rendered results. Defaults to 'sections'`,
|
|
211
211
|
choices: ['sections', 'stats', 'chunks'],
|
|
212
212
|
default: 'sections'
|
|
213
|
+
})
|
|
214
|
+
.option('overlap', {
|
|
215
|
+
alias: 'o',
|
|
216
|
+
describe: `whether to add overlapping chunks to sections.`,
|
|
217
|
+
type: 'boolean',
|
|
218
|
+
default: true
|
|
213
219
|
})
|
|
214
220
|
.demandOption(['keys']);
|
|
215
221
|
}, (args) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -235,7 +241,7 @@ function run() {
|
|
|
235
241
|
console.log(internals_1.Colorize.value('score', result.score));
|
|
236
242
|
console.log(internals_1.Colorize.value('chunks', result.chunks.length));
|
|
237
243
|
if (args.format == 'sections') {
|
|
238
|
-
const sections = yield result.renderSections(args.tokens, args.sectionCount);
|
|
244
|
+
const sections = yield result.renderSections(args.tokens, args.sectionCount, args.overlap);
|
|
239
245
|
for (let i = 0; i < sections.length; i++) {
|
|
240
246
|
const section = sections[i];
|
|
241
247
|
console.log(internals_1.Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
|
package/lib/vectra-cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AACvC,+CAA4C;AAE5C,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAC;YACtC,MAAM,UAAU,GAAG,IAAI,uBAAU,EAAE,CAAC;YACpC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE;gBACrB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC;oBACnD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC;oBACnE,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAO,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;wBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;wBACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;wBAC/C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;wBACpE,OAAO,IAAI,CAAC;oBAChB,CAAC,CAAA,CAAC,CAAC;iBACN;gBAAC,OAAO,GAAY,EAAE;oBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,IAAI,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;iBACzG;aACJ;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AACvC,+CAA4C;AAE5C,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAC;YACtC,MAAM,UAAU,GAAG,IAAI,uBAAU,EAAE,CAAC;YACpC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE;gBACrB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC;oBACnD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC;oBACnE,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAO,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;wBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;wBACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;wBAC/C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;wBACpE,OAAO,IAAI,CAAC;oBAChB,CAAC,CAAA,CAAC,CAAC;iBACN;gBAAC,OAAO,GAAY,EAAE;oBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,IAAI,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;iBACzG;aACJ;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,MAAM,CAAC,SAAS,EAAE;gBACf,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gDAAgD;gBAC1D,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;oBAC3F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBAC5B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBACrF,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;wBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;qBAC9C;iBACJ;qBAAM,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE;oBAChC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;qBACtE;iBACJ;aACJ;QACL,CAAC,CAAA,CAAC;aACD,IAAI,EAAE;aACN,aAAa,EAAE;aACf,UAAU,EAAE,CAAC;IACtB,CAAC;CAAA;AAnOD,kBAmOC;AAGD,SAAe,WAAW,CAAC,KAAe,EAAE,QAAgB,EAAE,OAAe;;QACzE,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,OAAO,KAAK,CAAC;SAChB;aAAM,IAAI,OAAO,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;YAClE,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;SACxF;aAAM;YACH,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,6EAA6E,CAAC,CAAA;SACvJ;IACL,CAAC;CAAA"}
|
package/package.json
CHANGED
|
@@ -25,7 +25,7 @@ export class LocalDocumentResult extends LocalDocument {
|
|
|
25
25
|
return this._score;
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
-
public async renderSections(maxTokens: number, maxSections: number): Promise<DocumentTextSection[]> {
|
|
28
|
+
public async renderSections(maxTokens: number, maxSections: number, overlappingChunks = true): Promise<DocumentTextSection[]> {
|
|
29
29
|
// Load text from disk
|
|
30
30
|
const text = await this.loadText();
|
|
31
31
|
|
|
@@ -123,61 +123,63 @@ export class LocalDocumentResult extends LocalDocument {
|
|
|
123
123
|
});
|
|
124
124
|
|
|
125
125
|
// Add overlapping chunks of text to each section until the maxTokens is reached
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
section.chunks.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
// Add chunks to beginning and end of the section until maxTokens is reached
|
|
144
|
-
let budget = maxTokens - section.tokenCount;
|
|
145
|
-
if (budget > 40) {
|
|
146
|
-
const sectionStart = section.chunks[0].startPos;
|
|
147
|
-
const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
|
|
148
|
-
if (sectionStart > 0) {
|
|
149
|
-
const beforeTex = text.substring(0, section.chunks[0].startPos);
|
|
150
|
-
const beforeTokens = this.encodeBeforeText(beforeTex, Math.ceil(budget/2));
|
|
151
|
-
const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget/2)) : Math.min(beforeTokens.length, budget);
|
|
152
|
-
const chunk: SectionChunk = {
|
|
153
|
-
text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
|
|
154
|
-
startPos: sectionStart - beforeBudget,
|
|
155
|
-
endPos: sectionStart - 1,
|
|
156
|
-
score: 0,
|
|
157
|
-
tokenCount: beforeBudget
|
|
158
|
-
};
|
|
159
|
-
section.chunks.unshift(chunk);
|
|
160
|
-
section.tokenCount += chunk.tokenCount;
|
|
161
|
-
budget -= chunk.tokenCount;
|
|
126
|
+
if (overlappingChunks) {
|
|
127
|
+
const connector: SectionChunk = {
|
|
128
|
+
text: '\n\n...\n\n',
|
|
129
|
+
startPos: -1,
|
|
130
|
+
endPos: -1,
|
|
131
|
+
score: 0,
|
|
132
|
+
tokenCount: this._tokenizer.encode('\n\n...\n\n').length
|
|
133
|
+
};
|
|
134
|
+
sections.forEach(section => {
|
|
135
|
+
// Insert connectors between chunks
|
|
136
|
+
if (section.chunks.length > 1) {
|
|
137
|
+
for (let i = 0; i < section.chunks.length - 1; i++) {
|
|
138
|
+
section.chunks.splice(i + 1, 0, connector);
|
|
139
|
+
section.tokenCount += connector.tokenCount;
|
|
140
|
+
i++;
|
|
141
|
+
}
|
|
162
142
|
}
|
|
163
143
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
const
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
144
|
+
// Add chunks to beginning and end of the section until maxTokens is reached
|
|
145
|
+
let budget = maxTokens - section.tokenCount;
|
|
146
|
+
if (budget > 40) {
|
|
147
|
+
const sectionStart = section.chunks[0].startPos;
|
|
148
|
+
const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
|
|
149
|
+
if (sectionStart > 0) {
|
|
150
|
+
const beforeTex = text.substring(0, section.chunks[0].startPos);
|
|
151
|
+
const beforeTokens = this.encodeBeforeText(beforeTex, Math.ceil(budget/2));
|
|
152
|
+
const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget/2)) : Math.min(beforeTokens.length, budget);
|
|
153
|
+
const chunk: SectionChunk = {
|
|
154
|
+
text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
|
|
155
|
+
startPos: sectionStart - beforeBudget,
|
|
156
|
+
endPos: sectionStart - 1,
|
|
157
|
+
score: 0,
|
|
158
|
+
tokenCount: beforeBudget
|
|
159
|
+
};
|
|
160
|
+
section.chunks.unshift(chunk);
|
|
161
|
+
section.tokenCount += chunk.tokenCount;
|
|
162
|
+
budget -= chunk.tokenCount;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (sectionEnd < text.length - 1) {
|
|
166
|
+
const afterText = text.substring(sectionEnd + 1);
|
|
167
|
+
const afterTokens = this.encodeAfterText(afterText, budget);
|
|
168
|
+
const afterBudget = Math.min(afterTokens.length, budget);
|
|
169
|
+
const chunk: SectionChunk = {
|
|
170
|
+
text: this._tokenizer.decode(afterTokens.slice(0, afterBudget)),
|
|
171
|
+
startPos: sectionEnd + 1,
|
|
172
|
+
endPos: sectionEnd + afterBudget,
|
|
173
|
+
score: 0,
|
|
174
|
+
tokenCount: afterBudget
|
|
175
|
+
};
|
|
176
|
+
section.chunks.push(chunk);
|
|
177
|
+
section.tokenCount += chunk.tokenCount;
|
|
178
|
+
budget -= chunk.tokenCount;
|
|
179
|
+
}
|
|
178
180
|
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
+
});
|
|
182
|
+
}
|
|
181
183
|
|
|
182
184
|
// Return final rendered sections
|
|
183
185
|
return sections.map(section => {
|
package/src/vectra-cli.ts
CHANGED
|
@@ -174,6 +174,12 @@ export async function run() {
|
|
|
174
174
|
choices: ['sections', 'stats', 'chunks'],
|
|
175
175
|
default: 'sections'
|
|
176
176
|
})
|
|
177
|
+
.option('overlap', {
|
|
178
|
+
alias: 'o',
|
|
179
|
+
describe: `whether to add overlapping chunks to sections.`,
|
|
180
|
+
type: 'boolean',
|
|
181
|
+
default: true
|
|
182
|
+
})
|
|
177
183
|
.demandOption(['keys']);
|
|
178
184
|
}, async (args) => {
|
|
179
185
|
console.log(Colorize.title('Querying Index'));
|
|
@@ -202,7 +208,7 @@ export async function run() {
|
|
|
202
208
|
console.log(Colorize.value('score', result.score));
|
|
203
209
|
console.log(Colorize.value('chunks', result.chunks.length));
|
|
204
210
|
if (args.format == 'sections') {
|
|
205
|
-
const sections = await result.renderSections(args.tokens, args.sectionCount);
|
|
211
|
+
const sections = await result.renderSections(args.tokens, args.sectionCount, args.overlap);
|
|
206
212
|
for (let i = 0; i < sections.length; i++) {
|
|
207
213
|
const section = sections[i];
|
|
208
214
|
console.log(Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
|