vectra 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/TextSplitter.d.ts +1 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +23 -1
- package/lib/TextSplitter.js.map +1 -1
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.js +26 -15
- package/lib/vectra-cli.js.map +1 -1
- package/package.json +1 -1
- package/src/TextSplitter.ts +23 -1
- package/src/vectra-cli.ts +24 -14
package/lib/TextSplitter.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TextSplitter.d.ts","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAI/C,MAAM,WAAW,kBAAkB;IAC/B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAY;IACrB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;gBAE1B,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC;IA2BhD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA4BvC,OAAO,CAAC,cAAc;IAoEtB,OAAO,CAAC,aAAa;IA6BrB,OAAO,CAAC,oBAAoB;IAS5B,OAAO,CAAC,aAAa;CA4WxB"}
|
|
1
|
+
{"version":3,"file":"TextSplitter.d.ts","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAI/C,MAAM,WAAW,kBAAkB;IAC/B,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,SAAS,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,YAAY;IACrB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;gBAE1B,MAAM,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC;IA2BhD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IA4BvC,OAAO,CAAC,cAAc;IAoEtB,OAAO,CAAC,aAAa;IA6BrB,OAAO,CAAC,oBAAoB;IAS5B,OAAO,CAAC,aAAa;IAsBrB,OAAO,CAAC,aAAa;CA4WxB"}
|
package/lib/TextSplitter.js
CHANGED
|
@@ -64,7 +64,7 @@ class TextSplitter {
|
|
|
64
64
|
if (separators.length > 0) {
|
|
65
65
|
// Split by separator
|
|
66
66
|
separator = separators[0];
|
|
67
|
-
parts = text.split(separator);
|
|
67
|
+
parts = separator == ' ' ? this.splitBySpaces(text) : text.split(separator);
|
|
68
68
|
}
|
|
69
69
|
else {
|
|
70
70
|
// Cut text in half
|
|
@@ -154,6 +154,28 @@ class TextSplitter {
|
|
|
154
154
|
}
|
|
155
155
|
return false;
|
|
156
156
|
}
|
|
157
|
+
splitBySpaces(text) {
|
|
158
|
+
const parts = [];
|
|
159
|
+
const words = text.split(' ');
|
|
160
|
+
if (words.length > 0) {
|
|
161
|
+
let part = words[0];
|
|
162
|
+
for (let i = 1; i < words.length; i++) {
|
|
163
|
+
const nextWord = words[i];
|
|
164
|
+
if (this._config.tokenizer.encode(part + ' ' + nextWord).length <= this._config.chunkSize) {
|
|
165
|
+
part += ' ' + nextWord;
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
parts.push(part);
|
|
169
|
+
part = nextWord;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
parts.push(part);
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
parts.push(text);
|
|
176
|
+
}
|
|
177
|
+
return parts;
|
|
178
|
+
}
|
|
157
179
|
getSeparators(docType) {
|
|
158
180
|
switch (docType !== null && docType !== void 0 ? docType : '') {
|
|
159
181
|
case "cpp":
|
package/lib/TextSplitter.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;AAGhD,MAAM,kBAAkB,GAAG,gEAAgE,CAAC;AAW5F,MAAa,YAAY;IAGrB,YAAmB,MAAoC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,EAAE;SACG,EAAE,MAAM,CAAC,CAAC;QAEjC,iDAAiD;QACjD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,6BAAa,EAAE,CAAC;SAChD;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;YAClE,IAAI,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACtE;QAED,+BAA+B;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE;YAC5B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;SAC7C;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAChD;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;SACxD;IACL,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,mBAAmB;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,SAAS,gBAAgB,CAAC,MAAiB;YACvC,IAAI,MAAM,IAAI,SAAS,EAAE;gBACrB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClG,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;aAC/B;iBAAM;gBACH,OAAO,EAAE,CAAC;aACb;QACL,CAAC;QAED,iEAAiE;QACjE,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,aAAa,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACpE,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;gBAChF,KAAK,CAAC,UAAU,GAAG,gBAAgB,CAAC,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,MAAM,CAAC,CAAC;aAC1D;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,IAAY,EAAE,UAAoB,EAAE,QAAgB;QACvE,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;YACjB,wBAAwB;YACxB,IAAI,KAAe,CAAC;YACpB,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACxE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;gBACvB,qBAAqB;gBACrB,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC1B,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;aACjC;iBAAM;gBACH,mBAAmB;gBACnB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzC,KAAK,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;aAC3D;YAED,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAE3C,4BAA4B;gBAC5B,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACpF,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE;oBAC3C,KAAK,IAAI,SAAS,CAAC;iBACtB;gBAED,6BAA6B;gBAC7B,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,EAAE;oBACnC,SAAS;iBACZ;gBAED,qDAAqD;gBACrD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBAC3C,qCAAqC;oBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;iBAC7B;qBAAM;oBACH,oBAAoB;oBACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACpD,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;wBACxC,qCAAqC;wBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;wBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;qBAC7B;yBAAM;wBACH,yBAAyB;wBACzB,MAAM,CAAC,IAAI,CAAC;4BACR,IAAI,EAAE,KAAK;4BACX,MAAM,EAAE,MAAM;4BACd,QAAQ,EAAE,QAAQ;4BAClB,MAAM,EAAE,MAAM;4BACd,YAAY,EAAE,EAAE;4BAChB,UAAU,EAAE,EAAE;yBACjB,CAAC,CAAC;qBACN;iBAEJ;gBAGD,kBAAkB;gBAClB,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC;aACzB;SACJ;QAED,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAEO,aAAa,CAAC,MAAmB;QACrC,MAAM,cAAc,GAAgB,EAAE,CAAC;QACvC,IAAI,YAAiC,CAAC;QACtC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,YAAY,EAAE;gBACd,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;gBAChE,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACjC,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAClC,YAAY,GAAG,KAAK,CAAC;oBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACvC;qBAAM;oBACH,YAAY,CAAC,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;oBAC5C,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;oBAC1C,aAAa,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACxC;aACJ;iBAAM;gBACH,YAAY,GAAG,KAAK,CAAC;gBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;aACvC;SACJ;QACD,IAAI,YAAY,EAAE;YACd,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;SACrC;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAEO,oBAAoB,CAAC,IAAY;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAClC,IAAI,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;gBACtC,OAAO,IAAI,CAAC;aACf;SACJ;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,OAAgB;QAClC,QAAQ,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,EAAE;YACnB,KAAK,KAAK;gBACN,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI;gBACL,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,iCAAiC;oBACjC,WAAW;oBACX,cAAc;oBACd,YAAY;oBACZ,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,UAAU;oBACV,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,YAAY;oBACZ,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,mCAAmC;oBACnC,aAAa;oBACb,gCAAgC;oBAChC,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,YAAY;oBACZ,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,kCAAkC;oBAClC,YAAY;oBACZ,kCAAkC;oBAClC,YAAY;oBACZ,+BAA+B;oBAC/B,SAAS;oBACT,iCAAiC;oBACjC,WAAW;oBACX,gCAAgC;oBAChC,WAAW;oBACX,kCAAkC;oBAClC,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACL,OAAO;oBACH,8CAA8C;oBAC9C,UAAU;oBACV,QAAQ;oBACR,UAAU;oBACV,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,6BAA6B;oBAC7B,SAAS;oBACT,SAAS;oBACT,SAAS;oBACT,gCAAgC;oBAChC,OAAO;oBACP,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,iCAAiC;oBACjC,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,WAAW;oBACX,UAAU;oBACV,QAAQ;oBACR,OAAO;oBACP,UAAU;oBACV,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,mCAAmC;oBACnC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,iCAAiC;oBACjC,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,UAAU;oBACV,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,UAAU;gBACX,OAAO;oBACH,sEAAsE;oBACtE,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,uEAAuE;oBACvE,kBAAkB;oBAClB,kBAAkB;oBAClB,oBAAoB;oBACpB,SAAS;oBACT,mBAAmB;oBACnB,aAAa;oBACb,aAAa;oBACb,aAAa;oBACb,kEAAkE;oBAClE,kEAAkE;oBAClE,gBAAgB;oBAChB,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,UAAU;oBACV,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,2CAA2C;oBAC3C,cAAc;oBACd,cAAc;oBACd,iBAAiB;oBACjB,oBAAoB;oBAEpB,4BAA4B;oBAC5B,sBAAsB;oBACtB,oBAAoB;oBACpB,wBAAwB;oBACxB,iBAAiB;oBACjB,kBAAkB;oBAClB,sBAAsB;oBACtB,kBAAkB;oBAClB,qBAAqB;oBAErB,iCAAiC;oBACjC,kBAAkB;oBAClB,IAAI;oBACJ,GAAG;oBAEH,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,sCAAsC;oBACtC,QAAQ;oBACR,OAAO;oBACP,KAAK;oBACL,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,QAAQ;oBACR,SAAS;oBACT,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,UAAU;oBACV,UAAU;oBACV,OAAO;oBACP,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,uBAAuB;oBACvB,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,gDAAgD;oBAChD,WAAW;oBACX,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,cAAc;oBACd,YAAY;oBACZ,iCAAiC;oBACjC,gBAAgB;oBAChB,SAAS;oBACT,aAAa;oBACb,UAAU;oBACV,aAAa;oBACb,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,aAAa;oBACb,aAAa;oBACb,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN;gBACI,OAAO;oBACH,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;SACT;IACL,CAAC;CACJ;AAhhBD,oCAghBC"}
|
|
1
|
+
{"version":3,"file":"TextSplitter.js","sourceRoot":"","sources":["../src/TextSplitter.ts"],"names":[],"mappings":";;;AAAA,mDAAgD;AAGhD,MAAM,kBAAkB,GAAG,gEAAgE,CAAC;AAW5F,MAAa,YAAY;IAGrB,YAAmB,MAAoC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC;YACzB,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,EAAE;SACG,EAAE,MAAM,CAAC,CAAC;QAEjC,iDAAiD;QACjD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YACzB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,6BAAa,EAAE,CAAC;SAChD;QAED,8CAA8C;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE;YAClE,IAAI,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACtE;QAED,+BAA+B;QAC/B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE;YAC5B,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;SAC7C;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAChD;aAAM,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAC3D,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;SACxD;IACL,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,mBAAmB;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,SAAS,gBAAgB,CAAC,MAAiB;YACvC,IAAI,MAAM,IAAI,SAAS,EAAE;gBACrB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClG,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;aAC/B;iBAAM;gBACH,OAAO,EAAE,CAAC;aACb;QACL,CAAC;QAED,iEAAiE;QACjE,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,EAAE;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACpC,MAAM,aAAa,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBACxB,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACpE,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;gBAChF,KAAK,CAAC,UAAU,GAAG,gBAAgB,CAAC,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,MAAM,CAAC,CAAC;aAC1D;SACJ;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,IAAY,EAAE,UAAoB,EAAE,QAAgB;QACvE,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;YACjB,wBAAwB;YACxB,IAAI,KAAe,CAAC;YACpB,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACxE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;gBACvB,qBAAqB;gBACrB,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC1B,KAAK,GAAG,SAAS,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;aAC/E;iBAAM;gBACH,mBAAmB;gBACnB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBACzC,KAAK,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;aAC3D;YAED,qBAAqB;YACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAE3C,4BAA4B;gBAC5B,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBACpF,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE;oBAC3C,KAAK,IAAI,SAAS,CAAC;iBACtB;gBAED,6BAA6B;gBAC7B,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,EAAE;oBACnC,SAAS;iBACZ;gBAED,qDAAqD;gBACrD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBAC3C,qCAAqC;oBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;iBAC7B;qBAAM;oBACH,oBAAoB;oBACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBACpD,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;wBACxC,qCAAqC;wBACrC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;wBACvE,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;qBAC7B;yBAAM;wBACH,yBAAyB;wBACzB,MAAM,CAAC,IAAI,CAAC;4BACR,IAAI,EAAE,KAAK;4BACX,MAAM,EAAE,MAAM;4BACd,QAAQ,EAAE,QAAQ;4BAClB,MAAM,EAAE,MAAM;4BACd,YAAY,EAAE,EAAE;4BAChB,UAAU,EAAE,EAAE;yBACjB,CAAC,CAAC;qBACN;iBAEJ;gBAGD,kBAAkB;gBAClB,QAAQ,GAAG,MAAM,GAAG,CAAC,CAAC;aACzB;SACJ;QAED,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAEO,aAAa,CAAC,MAAmB;QACrC,MAAM,cAAc,GAAgB,EAAE,CAAC;QACvC,IAAI,YAAiC,CAAC;QACtC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,YAAY,EAAE;gBACd,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;gBAChE,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACjC,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAClC,YAAY,GAAG,KAAK,CAAC;oBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACvC;qBAAM;oBACH,YAAY,CAAC,IAAI,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;oBAC5C,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;oBAC1C,aAAa,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;iBACxC;aACJ;iBAAM;gBACH,YAAY,GAAG,KAAK,CAAC;gBACrB,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC;aACvC;SACJ;QACD,IAAI,YAAY,EAAE;YACd,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;SACrC;QACD,OAAO,cAAc,CAAC;IAC1B,CAAC;IAEO,oBAAoB,CAAC,IAAY;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAClC,IAAI,kBAAkB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;gBACtC,OAAO,IAAI,CAAC;aACf;SACJ;QACD,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,IAAY;QAC9B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAClB,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,GAAG,GAAG,GAAG,QAAQ,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;oBACvF,IAAI,IAAI,GAAG,GAAG,QAAQ,CAAC;iBAC1B;qBAAM;oBACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACjB,IAAI,GAAG,QAAQ,CAAC;iBACnB;aACJ;YACD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACpB;aAAM;YACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACpB;QAED,OAAO,KAAK,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,OAAgB;QAClC,QAAQ,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,EAAE;YACnB,KAAK,KAAK;gBACN,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI;gBACL,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI,CAAC;YACV,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,iCAAiC;oBACjC,WAAW;oBACX,cAAc;oBACd,YAAY;oBACZ,WAAW;oBACX,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,KAAK,CAAC;YACX,KAAK,YAAY;gBACb,OAAO;oBACH,sBAAsB;oBACtB,eAAe;oBACf,eAAe;oBACf,gBAAgB;oBAChB,gCAAgC;oBAChC,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,UAAU;oBACV,QAAQ;oBACR,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,YAAY;oBACZ,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,mCAAmC;oBACnC,aAAa;oBACb,gCAAgC;oBAChC,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,YAAY;oBACZ,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,kCAAkC;oBAClC,YAAY;oBACZ,kCAAkC;oBAClC,YAAY;oBACZ,+BAA+B;oBAC/B,SAAS;oBACT,iCAAiC;oBACjC,WAAW;oBACX,gCAAgC;oBAChC,WAAW;oBACX,kCAAkC;oBAClC,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACL,OAAO;oBACH,8CAA8C;oBAC9C,UAAU;oBACV,QAAQ;oBACR,UAAU;oBACV,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,6BAA6B;oBAC7B,SAAS;oBACT,SAAS;oBACT,SAAS;oBACT,gCAAgC;oBAChC,OAAO;oBACP,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,iCAAiC;oBACjC,QAAQ;oBACR,UAAU;oBACV,sCAAsC;oBACtC,OAAO;oBACP,WAAW;oBACX,UAAU;oBACV,QAAQ;oBACR,OAAO;oBACP,UAAU;oBACV,WAAW;oBACX,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,mCAAmC;oBACnC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,iCAAiC;oBACjC,QAAQ;oBACR,QAAQ;oBACR,QAAQ;oBACR,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,UAAU;oBACV,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,mCAAmC;oBACnC,SAAS;oBACT,gCAAgC;oBAChC,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,OAAO;oBACP,WAAW;oBACX,SAAS;oBACT,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,IAAI,CAAC;YACV,KAAK,UAAU;gBACX,OAAO;oBACH,sEAAsE;oBACtE,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,WAAW;oBACX,uEAAuE;oBACvE,kBAAkB;oBAClB,kBAAkB;oBAClB,oBAAoB;oBACpB,SAAS;oBACT,mBAAmB;oBACnB,aAAa;oBACb,aAAa;oBACb,aAAa;oBACb,kEAAkE;oBAClE,kEAAkE;oBAClE,gBAAgB;oBAChB,SAAS;oBACT,UAAU;oBACV,UAAU;oBACV,UAAU;oBACV,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,OAAO;gBACR,OAAO;oBACH,2CAA2C;oBAC3C,cAAc;oBACd,cAAc;oBACd,iBAAiB;oBACjB,oBAAoB;oBAEpB,4BAA4B;oBAC5B,sBAAsB;oBACtB,oBAAoB;oBACpB,wBAAwB;oBACxB,iBAAiB;oBACjB,kBAAkB;oBAClB,sBAAsB;oBACtB,kBAAkB;oBAClB,qBAAqB;oBAErB,iCAAiC;oBACjC,kBAAkB;oBAClB,IAAI;oBACJ,GAAG;oBAEH,wCAAwC;oBACxC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN,KAAK,MAAM;gBACP,OAAO;oBACH,sCAAsC;oBACtC,QAAQ;oBACR,OAAO;oBACP,KAAK;oBACL,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,QAAQ;oBACR,SAAS;oBACT,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,MAAM;oBACN,UAAU;oBACV,UAAU;oBACV,OAAO;oBACP,OAAO;oBACP,QAAQ;oBACR,SAAS;oBACT,UAAU;oBACV,QAAQ;oBACR,SAAS;oBACT,uBAAuB;oBACvB,GAAG;iBACN,CAAC;YACN,KAAK,KAAK;gBACN,OAAO;oBACH,gDAAgD;oBAChD,WAAW;oBACX,UAAU;oBACV,mCAAmC;oBACnC,aAAa;oBACb,cAAc;oBACd,YAAY;oBACZ,iCAAiC;oBACjC,gBAAgB;oBAChB,SAAS;oBACT,aAAa;oBACb,UAAU;oBACV,aAAa;oBACb,UAAU;oBACV,WAAW;oBACX,SAAS;oBACT,sCAAsC;oBACtC,OAAO;oBACP,QAAQ;oBACR,UAAU;oBACV,aAAa;oBACb,aAAa;oBACb,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;iBACN,CAAC;YACN;gBACI,OAAO;oBACH,oCAAoC;oBACpC,MAAM;oBACN,IAAI;oBACJ,GAAG;oBACH,EAAE;iBACL,CAAC;SACT;IACL,CAAC;CACJ;AAtiBD,oCAsiBC"}
|
package/lib/vectra-cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectra-cli.d.ts","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":"AASA,wBAAsB,GAAG,
|
|
1
|
+
{"version":3,"file":"vectra-cli.d.ts","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":"AASA,wBAAsB,GAAG,kBA6OxB"}
|
package/lib/vectra-cli.js
CHANGED
|
@@ -78,6 +78,11 @@ function run() {
|
|
|
78
78
|
alias: 'l',
|
|
79
79
|
describe: 'path to a file containing a list of web pages to add',
|
|
80
80
|
type: 'string'
|
|
81
|
+
})
|
|
82
|
+
.option('cookie', {
|
|
83
|
+
alias: 'c',
|
|
84
|
+
describe: 'optional cookies to add to web fetch requests',
|
|
85
|
+
type: 'string'
|
|
81
86
|
})
|
|
82
87
|
.option('chunk-size', {
|
|
83
88
|
alias: 'cs',
|
|
@@ -114,23 +119,29 @@ function run() {
|
|
|
114
119
|
// Get list of url's
|
|
115
120
|
const uris = yield getItemList(args.uri, args.list, 'web page');
|
|
116
121
|
// Fetch web pages
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
yield
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
122
|
+
yield index.beginUpdate();
|
|
123
|
+
try {
|
|
124
|
+
const fileFetcher = new FileFetcher_1.FileFetcher();
|
|
125
|
+
const webFetcher = args.cookie ? new WebFetcher_1.WebFetcher({ headers: { "cookie": args.cookie } }) : new WebFetcher_1.WebFetcher();
|
|
126
|
+
for (const path of uris) {
|
|
127
|
+
try {
|
|
128
|
+
console.log(internals_1.Colorize.progress(`fetching ${path}`));
|
|
129
|
+
const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
|
|
130
|
+
yield fetcher.fetch(path, (uri, text, docType) => __awaiter(this, void 0, void 0, function* () {
|
|
131
|
+
console.log(internals_1.Colorize.replaceLine(internals_1.Colorize.progress(`indexing ${uri}`)));
|
|
132
|
+
yield index.upsertDocument(uri, text, docType);
|
|
133
|
+
console.log(internals_1.Colorize.replaceLine(internals_1.Colorize.success(`added ${uri}`)));
|
|
134
|
+
return true;
|
|
135
|
+
}));
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
console.log(internals_1.Colorize.replaceLine(internals_1.Colorize.error(`Error adding: ${path}\n${err.message}`)));
|
|
139
|
+
}
|
|
132
140
|
}
|
|
133
141
|
}
|
|
142
|
+
finally {
|
|
143
|
+
yield index.endUpdate();
|
|
144
|
+
}
|
|
134
145
|
}))
|
|
135
146
|
.command('remove <index>', `removes one or more documents from an index`, (yargs) => {
|
|
136
147
|
return yargs
|
package/lib/vectra-cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AACvC,+CAA4C;AAE5C,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AACvC,+CAA4C;AAE5C,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,aAAa,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YACxE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,+CAA+C;gBACzD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;YAC1B,IAAI;gBACA,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAC;gBACtC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,uBAAU,CAAC,EAAE,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE,EAAC,CAAC,CAAC,CAAC,CAAC,IAAI,uBAAU,EAAE,CAAC;gBAC1G,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE;oBACrB,IAAI;wBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,CAAC;wBACnD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC;wBACnE,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,CAAO,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;4BACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;4BACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;4BAC/C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;4BACpE,OAAO,IAAI,CAAC;wBAChB,CAAC,CAAA,CAAC,CAAC;qBACN;oBAAC,OAAO,GAAY,EAAE;wBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,IAAI,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;qBACzG;iBACJ;aACJ;oBAAS;gBACN,MAAM,KAAK,CAAC,SAAS,EAAE,CAAC;aAC3B;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,MAAM,CAAC,SAAS,EAAE;gBACf,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gDAAgD;gBAC1D,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;oBAC3F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBAC5B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBACrF,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;wBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;qBAC9C;iBACJ;qBAAM,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE;oBAChC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;qBACtE;iBACJ;aACJ;QACL,CAAC,CAAA,CAAC;aACD,IAAI,EAAE;aACN,aAAa,EAAE;aACf,UAAU,EAAE,CAAC;IACtB,CAAC;CAAA;AA7OD,kBA6OC;AAGD,SAAe,WAAW,CAAC,KAAe,EAAE,QAAgB,EAAE,OAAe;;QACzE,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,OAAO,KAAK,CAAC;SAChB;aAAM,IAAI,OAAO,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;YAClE,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;SACxF;aAAM;YACH,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,6EAA6E,CAAC,CAAA;SACvJ;IACL,CAAC;CAAA"}
|
package/package.json
CHANGED
package/src/TextSplitter.ts
CHANGED
|
@@ -80,7 +80,7 @@ export class TextSplitter {
|
|
|
80
80
|
if (separators.length > 0) {
|
|
81
81
|
// Split by separator
|
|
82
82
|
separator = separators[0];
|
|
83
|
-
parts = text.split(separator);
|
|
83
|
+
parts = separator == ' ' ? this.splitBySpaces(text) : text.split(separator);
|
|
84
84
|
} else {
|
|
85
85
|
// Cut text in half
|
|
86
86
|
const half = Math.floor(text.length / 2);
|
|
@@ -176,6 +176,28 @@ export class TextSplitter {
|
|
|
176
176
|
return false;
|
|
177
177
|
}
|
|
178
178
|
|
|
179
|
+
private splitBySpaces(text: string): string[] {
|
|
180
|
+
const parts: string[] = [];
|
|
181
|
+
const words = text.split(' ');
|
|
182
|
+
if (words.length > 0) {
|
|
183
|
+
let part = words[0];
|
|
184
|
+
for (let i = 1; i < words.length; i++) {
|
|
185
|
+
const nextWord = words[i];
|
|
186
|
+
if (this._config.tokenizer.encode(part + ' ' + nextWord).length <= this._config.chunkSize) {
|
|
187
|
+
part += ' ' + nextWord;
|
|
188
|
+
} else {
|
|
189
|
+
parts.push(part);
|
|
190
|
+
part = nextWord;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
parts.push(part);
|
|
194
|
+
} else {
|
|
195
|
+
parts.push(text);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return parts;
|
|
199
|
+
}
|
|
200
|
+
|
|
179
201
|
private getSeparators(docType?: string): string[] {
|
|
180
202
|
switch (docType ?? '') {
|
|
181
203
|
case "cpp":
|
package/src/vectra-cli.ts
CHANGED
|
@@ -41,6 +41,11 @@ export async function run() {
|
|
|
41
41
|
describe: 'path to a file containing a list of web pages to add',
|
|
42
42
|
type: 'string'
|
|
43
43
|
})
|
|
44
|
+
.option('cookie', {
|
|
45
|
+
alias: 'c',
|
|
46
|
+
describe: 'optional cookies to add to web fetch requests',
|
|
47
|
+
type: 'string'
|
|
48
|
+
})
|
|
44
49
|
.option('chunk-size', {
|
|
45
50
|
alias: 'cs',
|
|
46
51
|
describe: 'size of the generated chunks in tokens (defaults to 512)',
|
|
@@ -78,21 +83,26 @@ export async function run() {
|
|
|
78
83
|
const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
|
|
79
84
|
|
|
80
85
|
// Fetch web pages
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
await
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
86
|
+
await index.beginUpdate();
|
|
87
|
+
try {
|
|
88
|
+
const fileFetcher = new FileFetcher();
|
|
89
|
+
const webFetcher = args.cookie ? new WebFetcher({ headers: { "cookie": args.cookie }}) : new WebFetcher();
|
|
90
|
+
for (const path of uris) {
|
|
91
|
+
try {
|
|
92
|
+
console.log(Colorize.progress(`fetching ${path}`));
|
|
93
|
+
const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
|
|
94
|
+
await fetcher.fetch(path, async (uri, text, docType) => {
|
|
95
|
+
console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
|
|
96
|
+
await index.upsertDocument(uri, text, docType);
|
|
97
|
+
console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
|
|
98
|
+
return true;
|
|
99
|
+
});
|
|
100
|
+
} catch (err: unknown) {
|
|
101
|
+
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
|
|
102
|
+
}
|
|
95
103
|
}
|
|
104
|
+
} finally {
|
|
105
|
+
await index.endUpdate();
|
|
96
106
|
}
|
|
97
107
|
})
|
|
98
108
|
.command('remove <index>', `removes one or more documents from an index`, (yargs) => {
|