vectra 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/vectra.js +3 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.js +17 -0
- package/lib/GPT3Tokenizer.js.map +1 -0
- package/lib/ItemSelector.d.ts +1 -1
- package/lib/ItemSelector.d.ts.map +1 -1
- package/lib/ItemSelector.js.map +1 -1
- package/lib/LocalDocument.d.ts +16 -0
- package/lib/LocalDocument.d.ts.map +1 -0
- package/lib/LocalDocument.js +99 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +48 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.js +367 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +12 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -0
- package/lib/LocalDocumentResult.js +186 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalIndex.d.ts +9 -63
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +14 -1
- package/lib/LocalIndex.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +98 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +139 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/TextSplitter.d.ts +17 -0
- package/lib/TextSplitter.d.ts.map +1 -0
- package/lib/TextSplitter.js +460 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/WebFetcher.d.ts +16 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +144 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/index.d.ts +8 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +13 -1
- package/lib/index.js.map +1 -1
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +64 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +42 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/types.d.ts +133 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -0
- package/lib/vectra-cli.js +276 -0
- package/lib/vectra-cli.js.map +1 -0
- package/package.json +21 -3
- package/src/GPT3Tokenizer.ts +15 -0
- package/src/ItemSelector.ts +9 -9
- package/src/LocalDocument.ts +70 -0
- package/src/LocalDocumentIndex.ts +355 -0
- package/src/LocalDocumentResult.ts +206 -0
- package/src/LocalIndex.ts +12 -78
- package/src/OpenAIEmbeddings.ts +205 -0
- package/src/TextSplitter.ts +480 -0
- package/src/WebFetcher.ts +128 -0
- package/src/index.ts +8 -0
- package/src/internals/Colorize.ts +64 -0
- package/src/internals/index.ts +2 -0
- package/src/internals/types.ts +46 -0
- package/src/types.ts +160 -0
- package/src/vectra-cli.ts +238 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AAGvC,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,iBAAiB,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YAC5E,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,OAAO,GAAG,IAAI,uBAAU,EAAE,CAAC;YACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC;oBAClD,MAAM,OAAO,GAAI,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;oBACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;oBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;iBACvE;gBAAC,OAAO,GAAY,EAAE;oBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,GAAG,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;iBACxG;aACJ;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBAC5B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC;wBAChF,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;wBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;qBAC9C;iBACJ;qBAAM,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE;oBAChC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;qBACtE;iBACJ;aACJ;QACL,CAAC,CAAA,CAAC;aACD,IAAI,EAAE;aACN,aAAa,EAAE;aACf,UAAU,EAAE,CAAC;IACtB,CAAC;CAAA;AAxND,kBAwNC;AAGD,SAAe,WAAW,CAAC,KAAe,EAAE,QAAgB,EAAE,OAAe;;QACzE,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,OAAO,KAAK,CAAC;SAChB;aAAM,IAAI,OAAO,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;YAClE,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;SACxF;aAAM;YACH,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,6EAA6E,CAAC,CAAA;SACvJ;IACL,CAAC;CAAA"}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "vectra",
|
|
3
3
|
"author": "Steven Ickman",
|
|
4
4
|
"description": "A vector database that uses the local file system for storage.",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.2.0",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"keywords": [
|
|
8
8
|
"gpt"
|
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
},
|
|
17
17
|
"main": "./lib/index.js",
|
|
18
18
|
"types": "./lib/index.d.ts",
|
|
19
|
+
"bin": {
|
|
20
|
+
"vectra": "./bin/vectra.js"
|
|
21
|
+
},
|
|
19
22
|
"typesVersions": {
|
|
20
23
|
"<3.9": {
|
|
21
24
|
"*": [
|
|
@@ -24,13 +27,28 @@
|
|
|
24
27
|
}
|
|
25
28
|
},
|
|
26
29
|
"dependencies": {
|
|
27
|
-
"
|
|
30
|
+
"axios": "^1.3.4",
|
|
31
|
+
"cheerio": "^1.0.0-rc.12",
|
|
32
|
+
"dotenv": "^8.2.0",
|
|
33
|
+
"gpt-3-encoder": "1.1.4",
|
|
34
|
+
"json-colorizer": "^2.2.2",
|
|
35
|
+
"openai": "^3.2.1",
|
|
36
|
+
"uuid": "^9.0.0",
|
|
37
|
+
"yargs": "^17.7.2"
|
|
28
38
|
},
|
|
29
39
|
"resolutions": {
|
|
30
40
|
},
|
|
31
41
|
"devDependencies": {
|
|
32
42
|
"@types/node": "^14.14.31",
|
|
33
|
-
"@types/
|
|
43
|
+
"@types/mocha": "^8.2.0",
|
|
44
|
+
"@types/assert": "^1.5.3",
|
|
45
|
+
"@types/uuid": "9.0.1",
|
|
46
|
+
"@types/yargs": "17.0.24",
|
|
47
|
+
"mocha": "10.2.0",
|
|
48
|
+
"nyc": "^15.1.0",
|
|
49
|
+
"shx": "^0.3.2",
|
|
50
|
+
"ts-mocha": "10.0.0",
|
|
51
|
+
"typescript": "^4.2.3"
|
|
34
52
|
},
|
|
35
53
|
"scripts": {
|
|
36
54
|
"build": "tsc -b",
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Tokenizer } from "./types";
|
|
2
|
+
import { encode, decode } from "gpt-3-encoder";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Tokenizer that uses GPT-3's encoder.
|
|
6
|
+
*/
|
|
7
|
+
export class GPT3Tokenizer implements Tokenizer {
|
|
8
|
+
public decode(tokens: number[]): string {
|
|
9
|
+
return decode(tokens);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
public encode(text: string): number[] {
|
|
13
|
+
return encode(text);
|
|
14
|
+
}
|
|
15
|
+
}
|
package/src/ItemSelector.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MetadataFilter, MetadataTypes } from './
|
|
1
|
+
import { MetadataFilter, MetadataTypes } from './types';
|
|
2
2
|
|
|
3
3
|
export class ItemSelector {
|
|
4
4
|
/**
|
|
@@ -62,12 +62,12 @@ export class ItemSelector {
|
|
|
62
62
|
for (const key in filter) {
|
|
63
63
|
switch (key) {
|
|
64
64
|
case '$and':
|
|
65
|
-
if (!filter[key]
|
|
65
|
+
if (!filter[key]!.every((f: MetadataFilter) => this.select(metadata, f))) {
|
|
66
66
|
return false;
|
|
67
67
|
}
|
|
68
68
|
break;
|
|
69
69
|
case '$or':
|
|
70
|
-
if (!filter[key]
|
|
70
|
+
if (!filter[key]!.some((f: MetadataFilter) => this.select(metadata, f))) {
|
|
71
71
|
return false;
|
|
72
72
|
}
|
|
73
73
|
break;
|
|
@@ -120,32 +120,32 @@ export class ItemSelector {
|
|
|
120
120
|
}
|
|
121
121
|
break;
|
|
122
122
|
case '$gt':
|
|
123
|
-
if (typeof value != 'number' || value <= filter[key]) {
|
|
123
|
+
if (typeof value != 'number' || value <= filter[key]!) {
|
|
124
124
|
return false;
|
|
125
125
|
}
|
|
126
126
|
break;
|
|
127
127
|
case '$gte':
|
|
128
|
-
if (typeof value != 'number' || value < filter[key]) {
|
|
128
|
+
if (typeof value != 'number' || value < filter[key]!) {
|
|
129
129
|
return false;
|
|
130
130
|
}
|
|
131
131
|
break;
|
|
132
132
|
case '$lt':
|
|
133
|
-
if (typeof value != 'number' || value >= filter[key]) {
|
|
133
|
+
if (typeof value != 'number' || value >= filter[key]!) {
|
|
134
134
|
return false;
|
|
135
135
|
}
|
|
136
136
|
break;
|
|
137
137
|
case '$lte':
|
|
138
|
-
if (typeof value != 'number' || value > filter[key]) {
|
|
138
|
+
if (typeof value != 'number' || value > filter[key]!) {
|
|
139
139
|
return false;
|
|
140
140
|
}
|
|
141
141
|
break;
|
|
142
142
|
case '$in':
|
|
143
|
-
if (typeof value == 'boolean' || !filter[key]
|
|
143
|
+
if (typeof value == 'boolean' || !filter[key]!.includes(value)) {
|
|
144
144
|
return false;
|
|
145
145
|
}
|
|
146
146
|
break;
|
|
147
147
|
case '$nin':
|
|
148
|
-
if (typeof value == 'boolean' || filter[key]
|
|
148
|
+
if (typeof value == 'boolean' || filter[key]!.includes(value)) {
|
|
149
149
|
return false;
|
|
150
150
|
}
|
|
151
151
|
break;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { MetadataTypes } from './types';
|
|
4
|
+
|
|
5
|
+
export class LocalDocument {
|
|
6
|
+
private readonly _folderPath: string;
|
|
7
|
+
private readonly _id: string;
|
|
8
|
+
private readonly _uri: string;
|
|
9
|
+
private _metadata: Record<string,MetadataTypes>|undefined;
|
|
10
|
+
private _text: string|undefined;
|
|
11
|
+
|
|
12
|
+
public constructor(folderPath: string, id: string, uri: string) {
|
|
13
|
+
this._folderPath = folderPath;
|
|
14
|
+
this._id = id;
|
|
15
|
+
this._uri = uri;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
public get folderPath(): string {
|
|
19
|
+
return this._folderPath;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
public get id(): string {
|
|
23
|
+
return this._id;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
public get uri(): string {
|
|
27
|
+
return this._uri;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
public async hasMetadata(): Promise<boolean> {
|
|
31
|
+
try {
|
|
32
|
+
await fs.access(path.join(this.folderPath, `${this.id}.json`));
|
|
33
|
+
return true;
|
|
34
|
+
} catch (err: unknown) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
public async loadMetadata(): Promise<Record<string,MetadataTypes>> {
|
|
40
|
+
if (this._metadata == undefined) {
|
|
41
|
+
let json: string;
|
|
42
|
+
try {
|
|
43
|
+
json = (await fs.readFile(path.join(this.folderPath, `${this.id}.json`))).toString();
|
|
44
|
+
} catch (err: unknown) {
|
|
45
|
+
throw new Error(`Error reading metadata for document "${this.uri}": ${(err as any).toString()}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
this._metadata = JSON.parse(json);
|
|
50
|
+
} catch (err: unknown) {
|
|
51
|
+
throw new Error(`Error parsing metadata for document "${this.uri}": ${(err as any).toString()}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return this._metadata!;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
public async loadText(): Promise<string> {
|
|
59
|
+
if (this._text == undefined) {
|
|
60
|
+
try {
|
|
61
|
+
this._text = (await fs.readFile(path.join(this.folderPath, `${this.id}.txt`))).toString();
|
|
62
|
+
} catch (err: unknown) {
|
|
63
|
+
throw new Error(`Error reading text file for document "${this.uri}": ${(err as any).toString()}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return this._text;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
}
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { v4 } from 'uuid';
|
|
4
|
+
import { GPT3Tokenizer } from "./GPT3Tokenizer";
|
|
5
|
+
import { CreateIndexConfig, LocalIndex } from "./LocalIndex";
|
|
6
|
+
import { TextSplitter, TextSplitterConfig } from "./TextSplitter";
|
|
7
|
+
import { MetadataFilter, EmbeddingsModel, Tokenizer, MetadataTypes, EmbeddingsResponse, QueryResult, DocumentChunkMetadata, DocumentCatalogStats } from "./types";
|
|
8
|
+
import { LocalDocumentResult } from './LocalDocumentResult';
|
|
9
|
+
import { LocalDocument } from './LocalDocument';
|
|
10
|
+
|
|
11
|
+
const EMBEDDINGS_BATCH_SIZE = 500;
|
|
12
|
+
|
|
13
|
+
export interface DocumentQueryOptions {
|
|
14
|
+
maxDocuments?: number;
|
|
15
|
+
maxChunks?: number;
|
|
16
|
+
filter?: MetadataFilter;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface LocalDocumentIndexConfig {
|
|
20
|
+
folderPath: string;
|
|
21
|
+
embeddings?: EmbeddingsModel;
|
|
22
|
+
tokenizer?: Tokenizer;
|
|
23
|
+
chunkingConfig?: Partial<TextSplitterConfig>;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export class LocalDocumentIndex extends LocalIndex {
|
|
27
|
+
private readonly _embeddings?: EmbeddingsModel;
|
|
28
|
+
private readonly _tokenizer: Tokenizer;
|
|
29
|
+
private readonly _chunkingConfig?: TextSplitterConfig;
|
|
30
|
+
private _catalog?: DocumentCatalog;
|
|
31
|
+
private _newCatalog?: DocumentCatalog;
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
public constructor(config: LocalDocumentIndexConfig) {
|
|
35
|
+
super(config.folderPath);
|
|
36
|
+
this._embeddings = config.embeddings;
|
|
37
|
+
this._chunkingConfig = Object.assign({
|
|
38
|
+
keepSeparators: true,
|
|
39
|
+
chunkSize: 512,
|
|
40
|
+
chunkOverlap: 0,
|
|
41
|
+
} as TextSplitterConfig, config.chunkingConfig);
|
|
42
|
+
this._tokenizer = config.tokenizer ?? this._chunkingConfig.tokenizer ?? new GPT3Tokenizer();
|
|
43
|
+
this._chunkingConfig.tokenizer = this._tokenizer;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Returns true if the document catalog exists.
|
|
48
|
+
*/
|
|
49
|
+
public async isCatalogCreated(): Promise<boolean> {
|
|
50
|
+
try {
|
|
51
|
+
await fs.access(path.join(this.folderPath, 'catalog.json'));
|
|
52
|
+
return true;
|
|
53
|
+
} catch (err: unknown) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
public async getDocumentId(uri: string): Promise<string | undefined> {
|
|
59
|
+
await this.loadIndexData();
|
|
60
|
+
return this._catalog?.uriToId[uri];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
public async getDocumentUri(documentId: string): Promise<string | undefined> {
|
|
64
|
+
await this.loadIndexData();
|
|
65
|
+
return this._catalog?.idToUri[documentId];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
public async createIndex(config?: CreateIndexConfig): Promise<void> {
|
|
69
|
+
await super.createIndex(config);
|
|
70
|
+
await this.loadIndexData();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
public async deleteDocument(uri: string): Promise<void> {
|
|
74
|
+
// Lookup document ID
|
|
75
|
+
const documentId = await this.getDocumentId(uri);
|
|
76
|
+
if (documentId == undefined) {
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Delete document chunks from index and remove from catalog
|
|
81
|
+
await this.beginUpdate();
|
|
82
|
+
try {
|
|
83
|
+
// Get list of chunks for document
|
|
84
|
+
const chunks = await this.listItemsByMetadata<DocumentChunkMetadata>({ documentId });
|
|
85
|
+
|
|
86
|
+
// Delete chunks
|
|
87
|
+
for (const chunk of chunks) {
|
|
88
|
+
await this.deleteItem(chunk.id);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Remove entry from catalog
|
|
92
|
+
delete this._newCatalog!.uriToId[uri];
|
|
93
|
+
delete this._newCatalog!.idToUri[documentId];
|
|
94
|
+
this._newCatalog!.count--;
|
|
95
|
+
|
|
96
|
+
// Commit changes
|
|
97
|
+
await this.endUpdate();
|
|
98
|
+
} catch (err: unknown) {
|
|
99
|
+
// Cancel update and raise error
|
|
100
|
+
this.cancelUpdate();
|
|
101
|
+
throw new Error(`Error deleting document "${uri}": ${(err as any).toString()}`);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Delete text file from disk
|
|
105
|
+
try {
|
|
106
|
+
await fs.unlink(path.join(this.folderPath, `${documentId}.txt`));
|
|
107
|
+
} catch (err: unknown) {
|
|
108
|
+
throw new Error(`Error removing text file for document "${uri}" from disk: ${(err as any).toString()}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Delete metadata file from disk
|
|
112
|
+
try {
|
|
113
|
+
await fs.unlink(path.join(this.folderPath, `${documentId}.json`));
|
|
114
|
+
} catch (err: unknown) {
|
|
115
|
+
// Ignore error
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
public async getCatalogStats(): Promise<DocumentCatalogStats> {
|
|
120
|
+
const stats = await this.getIndexStats()
|
|
121
|
+
return {
|
|
122
|
+
version: this._catalog!.version,
|
|
123
|
+
documents: this._catalog!.count,
|
|
124
|
+
chunks: stats.items,
|
|
125
|
+
metadata_config: stats.metadata_config
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Adds a document to the catalog.
|
|
131
|
+
* @remarks
|
|
132
|
+
* A new update is started if one is not already in progress. If an document with the same uri
|
|
133
|
+
* already exists, it will be replaced.
|
|
134
|
+
* @param item Item to insert
|
|
135
|
+
* @returns Inserted document
|
|
136
|
+
*/
|
|
137
|
+
public async upsertDocument(uri: string, text: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument> {
|
|
138
|
+
// Ensure embeddings configured
|
|
139
|
+
if (!this._embeddings) {
|
|
140
|
+
throw new Error(`Embeddings model not configured.`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Check for existing document ID
|
|
144
|
+
let documentId = await this.getDocumentId(uri);
|
|
145
|
+
if (documentId != undefined) {
|
|
146
|
+
// Delete existing document
|
|
147
|
+
await this.deleteDocument(uri);
|
|
148
|
+
} else {
|
|
149
|
+
// Generate new document ID
|
|
150
|
+
documentId = v4();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Populate docType based on extension
|
|
154
|
+
const config = Object.assign({}, this._chunkingConfig);
|
|
155
|
+
const pos = uri.lastIndexOf('.');
|
|
156
|
+
if (pos >= 0) {
|
|
157
|
+
const ext = uri.substring(pos + 1).toLowerCase();
|
|
158
|
+
config.docType = ext;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Split text into chunks
|
|
162
|
+
const splitter = new TextSplitter(config);
|
|
163
|
+
const chunks = splitter.split(text);
|
|
164
|
+
|
|
165
|
+
// Break chunks into batches for embedding generation
|
|
166
|
+
const chunkBatches: string[][] = [];
|
|
167
|
+
let currentBatch: string[] = [];
|
|
168
|
+
for (const chunk of chunks) {
|
|
169
|
+
currentBatch.push(chunk.text);
|
|
170
|
+
if (currentBatch.length >= EMBEDDINGS_BATCH_SIZE) {
|
|
171
|
+
chunkBatches.push(currentBatch);
|
|
172
|
+
currentBatch = [];
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (currentBatch.length > 0) {
|
|
176
|
+
chunkBatches.push(currentBatch);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Generate embeddings for chunks
|
|
180
|
+
const embeddings: number[][] = [];
|
|
181
|
+
for (const batch of chunkBatches) {
|
|
182
|
+
let response: EmbeddingsResponse;
|
|
183
|
+
try {
|
|
184
|
+
response = await this._embeddings.createEmbeddings(batch);
|
|
185
|
+
} catch (err: unknown) {
|
|
186
|
+
throw new Error(`Error generating embeddings: ${(err as any).toString()}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Check for error
|
|
190
|
+
if (response.status != 'success') {
|
|
191
|
+
throw new Error(`Error generating embeddings: ${response.message}`);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Add embeddings to output
|
|
195
|
+
for (const embedding of response.output!) {
|
|
196
|
+
embeddings.push(embedding);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Add document chunks to index
|
|
201
|
+
await this.beginUpdate();
|
|
202
|
+
try {
|
|
203
|
+
// Add chunks to index
|
|
204
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
205
|
+
const chunk = chunks[i];
|
|
206
|
+
const embedding = embeddings[i];
|
|
207
|
+
const chunkMetadata: DocumentChunkMetadata = Object.assign({
|
|
208
|
+
documentId,
|
|
209
|
+
startPos: chunk.startPos,
|
|
210
|
+
endPos: chunk.endPos,
|
|
211
|
+
}, metadata);
|
|
212
|
+
await this.insertItem({
|
|
213
|
+
id: v4(),
|
|
214
|
+
metadata: chunkMetadata,
|
|
215
|
+
vector: embedding,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Save metadata file to disk
|
|
220
|
+
if (metadata != undefined) {
|
|
221
|
+
await fs.writeFile(path.join(this.folderPath, `${documentId}.json`), JSON.stringify(metadata));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Save text file to disk
|
|
225
|
+
await fs.writeFile(path.join(this.folderPath, `${documentId}.txt`), text);
|
|
226
|
+
|
|
227
|
+
// Add entry to catalog
|
|
228
|
+
this._newCatalog!.uriToId[uri] = documentId;
|
|
229
|
+
this._newCatalog!.idToUri[documentId] = uri;
|
|
230
|
+
this._newCatalog!.count++;
|
|
231
|
+
|
|
232
|
+
// Commit changes
|
|
233
|
+
await this.endUpdate();
|
|
234
|
+
} catch (err: unknown) {
|
|
235
|
+
// Cancel update and raise error
|
|
236
|
+
this.cancelUpdate();
|
|
237
|
+
throw new Error(`Error adding document "${uri}": ${(err as any).toString()}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Return document
|
|
241
|
+
return new LocalDocument(this.folderPath, documentId, uri);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
public async queryDocuments(query: string, options?: DocumentQueryOptions): Promise<LocalDocumentResult[]> {
|
|
246
|
+
// Ensure embeddings configured
|
|
247
|
+
if (!this._embeddings) {
|
|
248
|
+
throw new Error(`Embeddings model not configured.`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Ensure options are defined
|
|
252
|
+
options = Object.assign({
|
|
253
|
+
maxDocuments: 10,
|
|
254
|
+
maxChunks: 50,
|
|
255
|
+
}, options);
|
|
256
|
+
|
|
257
|
+
// Generate embeddings for query
|
|
258
|
+
let embeddings: EmbeddingsResponse;
|
|
259
|
+
try {
|
|
260
|
+
embeddings = await this._embeddings.createEmbeddings(query);
|
|
261
|
+
} catch (err: unknown) {
|
|
262
|
+
throw new Error(`Error generating embeddings for query: ${(err as any).toString()}`);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Check for error
|
|
266
|
+
if (embeddings.status != 'success') {
|
|
267
|
+
throw new Error(`Error generating embeddings for query: ${embeddings.message}`);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Query index for chunks
|
|
271
|
+
const results = await this.queryItems<DocumentChunkMetadata>(embeddings.output![0], options.maxChunks!, options.filter);
|
|
272
|
+
|
|
273
|
+
// Group chunks by document
|
|
274
|
+
const documentChunks: { [documentId: string]: QueryResult<DocumentChunkMetadata>[]; } = {};
|
|
275
|
+
for (const result of results) {
|
|
276
|
+
const metadata = result.item.metadata;
|
|
277
|
+
if (documentChunks[metadata.documentId] == undefined) {
|
|
278
|
+
documentChunks[metadata.documentId] = [];
|
|
279
|
+
}
|
|
280
|
+
documentChunks[metadata.documentId].push(result);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Create a document result for each document
|
|
284
|
+
const documentResults: LocalDocumentResult[] = [];
|
|
285
|
+
for (const documentId in documentChunks) {
|
|
286
|
+
const chunks = documentChunks[documentId];
|
|
287
|
+
const uri = await this.getDocumentUri(documentId) as string;
|
|
288
|
+
const documentResult = new LocalDocumentResult(this.folderPath, documentId, uri, chunks, this._tokenizer);
|
|
289
|
+
documentResults.push(documentResult);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Sort document results by score and return top results
|
|
293
|
+
return documentResults.sort((a, b) => b.score - a.score).slice(0, options.maxDocuments!);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Overrides
|
|
297
|
+
|
|
298
|
+
public async beginUpdate(): Promise<void> {
|
|
299
|
+
await super.beginUpdate();
|
|
300
|
+
this._newCatalog = Object.assign({}, this._catalog);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
public cancelUpdate(): void {
|
|
304
|
+
super.cancelUpdate();
|
|
305
|
+
this._newCatalog = undefined;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
public async endUpdate(): Promise<void> {
|
|
309
|
+
await super.endUpdate();
|
|
310
|
+
|
|
311
|
+
try {
|
|
312
|
+
// Save catalog
|
|
313
|
+
await fs.writeFile(path.join(this.folderPath, 'catalog.json'), JSON.stringify(this._newCatalog));
|
|
314
|
+
this._catalog = this._newCatalog;
|
|
315
|
+
this._newCatalog = undefined;
|
|
316
|
+
} catch(err: unknown) {
|
|
317
|
+
throw new Error(`Error saving document catalog: ${(err as any).toString()}`);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
protected async loadIndexData(): Promise<void> {
|
|
322
|
+
await super.loadIndexData();
|
|
323
|
+
|
|
324
|
+
if (this._catalog) {
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const catalogPath = path.join(this.folderPath, 'catalog.json');
|
|
329
|
+
if (await this.isCatalogCreated()) {
|
|
330
|
+
// Load catalog
|
|
331
|
+
const buffer = await fs.readFile(catalogPath);
|
|
332
|
+
this._catalog = JSON.parse(buffer.toString());
|
|
333
|
+
} else {
|
|
334
|
+
try {
|
|
335
|
+
// Initialize catalog
|
|
336
|
+
this._catalog = {
|
|
337
|
+
version: 1,
|
|
338
|
+
count: 0,
|
|
339
|
+
uriToId: {},
|
|
340
|
+
idToUri: {},
|
|
341
|
+
};
|
|
342
|
+
await fs.writeFile(catalogPath, JSON.stringify(this._catalog));
|
|
343
|
+
} catch(err: unknown) {
|
|
344
|
+
throw new Error(`Error creating document catalog: ${(err as any).toString()}`);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
interface DocumentCatalog {
|
|
351
|
+
version: number;
|
|
352
|
+
count: number;
|
|
353
|
+
uriToId: { [uri: string]: string; };
|
|
354
|
+
idToUri: { [id: string]: string; };
|
|
355
|
+
}
|