vectra 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +5 -0
  2. package/bin/vectra.js +3 -0
  3. package/lib/GPT3Tokenizer.d.ts +9 -0
  4. package/lib/GPT3Tokenizer.d.ts.map +1 -0
  5. package/lib/GPT3Tokenizer.js +17 -0
  6. package/lib/GPT3Tokenizer.js.map +1 -0
  7. package/lib/ItemSelector.d.ts +1 -1
  8. package/lib/ItemSelector.d.ts.map +1 -1
  9. package/lib/ItemSelector.js.map +1 -1
  10. package/lib/LocalDocument.d.ts +16 -0
  11. package/lib/LocalDocument.d.ts.map +1 -0
  12. package/lib/LocalDocument.js +99 -0
  13. package/lib/LocalDocument.js.map +1 -0
  14. package/lib/LocalDocumentIndex.d.ts +48 -0
  15. package/lib/LocalDocumentIndex.d.ts.map +1 -0
  16. package/lib/LocalDocumentIndex.js +367 -0
  17. package/lib/LocalDocumentIndex.js.map +1 -0
  18. package/lib/LocalDocumentResult.d.ts +12 -0
  19. package/lib/LocalDocumentResult.d.ts.map +1 -0
  20. package/lib/LocalDocumentResult.js +186 -0
  21. package/lib/LocalDocumentResult.js.map +1 -0
  22. package/lib/LocalIndex.d.ts +9 -63
  23. package/lib/LocalIndex.d.ts.map +1 -1
  24. package/lib/LocalIndex.js +14 -1
  25. package/lib/LocalIndex.js.map +1 -1
  26. package/lib/OpenAIEmbeddings.d.ts +98 -0
  27. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  28. package/lib/OpenAIEmbeddings.js +139 -0
  29. package/lib/OpenAIEmbeddings.js.map +1 -0
  30. package/lib/TextSplitter.d.ts +17 -0
  31. package/lib/TextSplitter.d.ts.map +1 -0
  32. package/lib/TextSplitter.js +460 -0
  33. package/lib/TextSplitter.js.map +1 -0
  34. package/lib/WebFetcher.d.ts +16 -0
  35. package/lib/WebFetcher.d.ts.map +1 -0
  36. package/lib/WebFetcher.js +144 -0
  37. package/lib/WebFetcher.js.map +1 -0
  38. package/lib/index.d.ts +8 -0
  39. package/lib/index.d.ts.map +1 -1
  40. package/lib/index.js +13 -1
  41. package/lib/index.js.map +1 -1
  42. package/lib/internals/Colorize.d.ts +14 -0
  43. package/lib/internals/Colorize.d.ts.map +1 -0
  44. package/lib/internals/Colorize.js +64 -0
  45. package/lib/internals/Colorize.js.map +1 -0
  46. package/lib/internals/index.d.ts +3 -0
  47. package/lib/internals/index.d.ts.map +1 -0
  48. package/lib/internals/index.js +19 -0
  49. package/lib/internals/index.js.map +1 -0
  50. package/lib/internals/types.d.ts +42 -0
  51. package/lib/internals/types.d.ts.map +1 -0
  52. package/lib/internals/types.js +3 -0
  53. package/lib/internals/types.js.map +1 -0
  54. package/lib/types.d.ts +133 -0
  55. package/lib/types.d.ts.map +1 -0
  56. package/lib/types.js +3 -0
  57. package/lib/types.js.map +1 -0
  58. package/lib/vectra-cli.d.ts +2 -0
  59. package/lib/vectra-cli.d.ts.map +1 -0
  60. package/lib/vectra-cli.js +277 -0
  61. package/lib/vectra-cli.js.map +1 -0
  62. package/package.json +21 -3
  63. package/src/GPT3Tokenizer.ts +15 -0
  64. package/src/ItemSelector.ts +9 -9
  65. package/src/LocalDocument.ts +70 -0
  66. package/src/LocalDocumentIndex.ts +355 -0
  67. package/src/LocalDocumentResult.ts +206 -0
  68. package/src/LocalIndex.ts +12 -78
  69. package/src/OpenAIEmbeddings.ts +205 -0
  70. package/src/TextSplitter.ts +480 -0
  71. package/src/WebFetcher.ts +128 -0
  72. package/src/index.ts +8 -0
  73. package/src/internals/Colorize.ts +64 -0
  74. package/src/internals/index.ts +2 -0
  75. package/src/internals/types.ts +46 -0
  76. package/src/types.ts +160 -0
  77. package/src/vectra-cli.ts +238 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vectra-cli.js","sourceRoot":"","sources":["../src/vectra-cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,wDAAgC;AAChC,2CAAwC;AACxC,6DAA0D;AAC1D,6CAA0C;AAC1C,yDAAsD;AACtD,2CAAuC;AAEvC,SAAsB,GAAG;;QACrB,kBAAkB;QAClB,MAAM,IAAI,GAAG,MAAM,IAAA,eAAK,EAAC,IAAA,iBAAO,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;aAC1C,UAAU,CAAC,QAAQ,CAAC;aACpB,OAAO,CAAC,gBAAgB,EAAE,0BAA0B,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,gCAAgC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,iBAAiB,EAAE,wCAAwC,EAAE,CAAC,KAAK,EAAE,EAAE;YAC5E,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;gBAC1F,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,sCAAsC;gBAChD,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,sDAAsD;gBAChE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,YAAY,EAAE;gBAClB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,0DAA0D;gBACpE,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;aACf,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC,CAAC;YAEzD,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;gBACV,cAAc,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC5B;aACJ,CAAC,CAAC;YAEH,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,kBAAkB;YAClB,MAAM,OAAO,GAAG,IAAI,uBAAU,EAAE,CAAC;YACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,IAAI;oBACA,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC;oBAClD,MAAM,OAAO,GAAI,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,QAAQ,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;oBACxE,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;oBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;iBACvE;gBAAC,OAAO,GAAY,EAAE;oBACnB,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,WAAW,CAAC,oBAAQ,CAAC,KAAK,CAAC,iBAAiB,GAAG,KAAM,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;iBACxG;aACJ;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,gBAAgB,EAAE,6CAA6C,EAAE,CAAC,KAAK,EAAE,EAAE;YAChF,OAAO,KAAK;iBACP,MAAM,CAAC,KAAK,EAAE;gBACX,KAAK,EAAE,GAAG;gBACV,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,6BAA6B;gBACvC,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,yDAAyD;gBACnE,IAAI,EAAE,QAAQ;aACjB,CAAC;iBACD,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE;oBAChD,OAAO,IAAI,CAAC;iBACf;qBAAM,IAAI,OAAO,IAAI,CAAC,IAAI,IAAI,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;oBACpE,OAAO,IAAI,CAAC;iBACf;qBAAM;oBACH,MAAM,IAAI,KAAK,CAAC,mJAAmJ,CAAC,CAAC;iBACxK;YACL,CAAC,CAAC,CAAC;QACX,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YAErD,oBAAoB;YACpB,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,GAAe,EAAE,IAAI,CAAC,IAAc,EAAE,UAAU,CAAC,CAAC;YAEtF,mBAAmB;YACnB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACpB,OAAO,CAAC,GAAG,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;gBAC/B,MAAM,KAAK,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;aACnC;QACL,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,eAAe,EAAE,oCAAoC,EAAE,EAAE,EAAE,CAAO,IAAI,EAAE,EAAE;YAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;YAC5C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC,CAAA,CAAC;aACD,OAAO,CAAC,uBAAuB,EAAE,uBAAuB,EAAE,CAAC,KAAK,EAAE,EAAE;YACjE,OAAO,KAAK;iBACP,MAAM,CAAC,MAAM,EAAE;gBACZ,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,gFAAgF;aAC7F,CAAC;iBACD,MAAM,CAAC,gBAAgB,EAAE;gBACtB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,oDAAoD;gBAC9D,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,aAAa,EAAE;gBACnB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,iDAAiD;gBAC3D,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,EAAE;aACd,CAAC;iBACD,MAAM,CAAC,eAAe,EAAE;gBACrB,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE,2DAA2D;gBACrE,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,CAAC;aACb,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,6EAA6E;gBACvF,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,IAAI;aAChB,CAAC;iBACD,MAAM,CAAC,QAAQ,EAAE;gBACd,KAAK,EAAE,GAAG;gBACV,QAAQ,EAAE,wDAAwD;gBAClE,OAAO,EAAE,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC;gBACxC,OAAO,EAAE,UAAU;aACtB,CAAC;iBACD,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC,EAAE,CAAO,IAAI,EAAE,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;YAE9C,oBAAoB;YACpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,OAAO,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,IAAI,mCAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC;YAElG,mBAAmB;YACnB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAe,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,uCAAkB,CAAC;gBACjC,UAAU;gBACV,UAAU;aACb,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,KAAK,GAAG,IAAI,CAAC,KAAe,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,KAAK,EAAE;gBAC9C,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,SAAS,EAAE,IAAI,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,iBAAiB;YACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC1B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;gBACnD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC5D,IAAI,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE;oBAC3B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBACtC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBAC5B,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC;wBAChF,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;wBACpD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;qBAC9C;iBACJ;qBAAM,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE;oBAChC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;oBACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;wBAC3C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;wBAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAC1C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAClD,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;wBAC9C,OAAO,CAAC,GAAG,CAAC,oBAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;qBACtE;iBACJ;aACJ;QACL,CAAC,CAAA,CAAC;aACD,IAAI,EAAE;aACN,aAAa,EAAE;aACf,UAAU,EAAE,CAAC;IACtB,CAAC;CAAA;AAzND,kBAyNC;AAGD,SAAe,WAAW,CAAC,KAAe,EAAE,QAAgB,EAAE,OAAe;;QACzE,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YAC1C,OAAO,KAAK,CAAC;SAChB;aAAM,IAAI,OAAO,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE;YAClE,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAClD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;SACxF;aAAM;YACH,MAAM,IAAI,KAAK,CAAC,+CAA+C,OAAO,6EAA6E,CAAC,CAAA;SACvJ;IACL,CAAC;CAAA"}
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "vectra",
3
3
  "author": "Steven Ickman",
4
4
  "description": "A vector database that uses the local file system for storage.",
5
- "version": "0.1.2",
5
+ "version": "0.2.1",
6
6
  "license": "MIT",
7
7
  "keywords": [
8
8
  "gpt"
@@ -16,6 +16,9 @@
16
16
  },
17
17
  "main": "./lib/index.js",
18
18
  "types": "./lib/index.d.ts",
19
+ "bin": {
20
+ "vectra": "./bin/vectra.js"
21
+ },
19
22
  "typesVersions": {
20
23
  "<3.9": {
21
24
  "*": [
@@ -24,13 +27,28 @@
24
27
  }
25
28
  },
26
29
  "dependencies": {
27
- "uuid": "^8.3.2"
30
+ "axios": "^1.3.4",
31
+ "cheerio": "^1.0.0-rc.12",
32
+ "dotenv": "^8.2.0",
33
+ "gpt-3-encoder": "1.1.4",
34
+ "json-colorizer": "^2.2.2",
35
+ "openai": "^3.2.1",
36
+ "uuid": "^9.0.0",
37
+ "yargs": "^17.7.2"
28
38
  },
29
39
  "resolutions": {
30
40
  },
31
41
  "devDependencies": {
32
42
  "@types/node": "^14.14.31",
33
- "@types/uuid": "^8.3.0"
43
+ "@types/mocha": "^8.2.0",
44
+ "@types/assert": "^1.5.3",
45
+ "@types/uuid": "9.0.1",
46
+ "@types/yargs": "17.0.24",
47
+ "mocha": "10.2.0",
48
+ "nyc": "^15.1.0",
49
+ "shx": "^0.3.2",
50
+ "ts-mocha": "10.0.0",
51
+ "typescript": "^4.2.3"
34
52
  },
35
53
  "scripts": {
36
54
  "build": "tsc -b",
@@ -0,0 +1,15 @@
1
+ import { Tokenizer } from "./types";
2
+ import { encode, decode } from "gpt-3-encoder";
3
+
4
+ /**
5
+ * Tokenizer that uses GPT-3's encoder.
6
+ */
7
+ export class GPT3Tokenizer implements Tokenizer {
8
+ public decode(tokens: number[]): string {
9
+ return decode(tokens);
10
+ }
11
+
12
+ public encode(text: string): number[] {
13
+ return encode(text);
14
+ }
15
+ }
@@ -1,4 +1,4 @@
1
- import { MetadataFilter, MetadataTypes } from './LocalIndex';
1
+ import { MetadataFilter, MetadataTypes } from './types';
2
2
 
3
3
  export class ItemSelector {
4
4
  /**
@@ -62,12 +62,12 @@ export class ItemSelector {
62
62
  for (const key in filter) {
63
63
  switch (key) {
64
64
  case '$and':
65
- if (!filter[key].every((f) => this.select(metadata, f))) {
65
+ if (!filter[key]!.every((f: MetadataFilter) => this.select(metadata, f))) {
66
66
  return false;
67
67
  }
68
68
  break;
69
69
  case '$or':
70
- if (!filter[key].some((f) => this.select(metadata, f))) {
70
+ if (!filter[key]!.some((f: MetadataFilter) => this.select(metadata, f))) {
71
71
  return false;
72
72
  }
73
73
  break;
@@ -120,32 +120,32 @@ export class ItemSelector {
120
120
  }
121
121
  break;
122
122
  case '$gt':
123
- if (typeof value != 'number' || value <= filter[key]) {
123
+ if (typeof value != 'number' || value <= filter[key]!) {
124
124
  return false;
125
125
  }
126
126
  break;
127
127
  case '$gte':
128
- if (typeof value != 'number' || value < filter[key]) {
128
+ if (typeof value != 'number' || value < filter[key]!) {
129
129
  return false;
130
130
  }
131
131
  break;
132
132
  case '$lt':
133
- if (typeof value != 'number' || value >= filter[key]) {
133
+ if (typeof value != 'number' || value >= filter[key]!) {
134
134
  return false;
135
135
  }
136
136
  break;
137
137
  case '$lte':
138
- if (typeof value != 'number' || value > filter[key]) {
138
+ if (typeof value != 'number' || value > filter[key]!) {
139
139
  return false;
140
140
  }
141
141
  break;
142
142
  case '$in':
143
- if (typeof value == 'boolean' || !filter[key].includes(value)) {
143
+ if (typeof value == 'boolean' || !filter[key]!.includes(value)) {
144
144
  return false;
145
145
  }
146
146
  break;
147
147
  case '$nin':
148
- if (typeof value == 'boolean' || filter[key].includes(value)) {
148
+ if (typeof value == 'boolean' || filter[key]!.includes(value)) {
149
149
  return false;
150
150
  }
151
151
  break;
@@ -0,0 +1,70 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { MetadataTypes } from './types';
4
+
5
+ export class LocalDocument {
6
+ private readonly _folderPath: string;
7
+ private readonly _id: string;
8
+ private readonly _uri: string;
9
+ private _metadata: Record<string,MetadataTypes>|undefined;
10
+ private _text: string|undefined;
11
+
12
+ public constructor(folderPath: string, id: string, uri: string) {
13
+ this._folderPath = folderPath;
14
+ this._id = id;
15
+ this._uri = uri;
16
+ }
17
+
18
+ public get folderPath(): string {
19
+ return this._folderPath;
20
+ }
21
+
22
+ public get id(): string {
23
+ return this._id;
24
+ }
25
+
26
+ public get uri(): string {
27
+ return this._uri;
28
+ }
29
+
30
+ public async hasMetadata(): Promise<boolean> {
31
+ try {
32
+ await fs.access(path.join(this.folderPath, `${this.id}.json`));
33
+ return true;
34
+ } catch (err: unknown) {
35
+ return false;
36
+ }
37
+ }
38
+
39
+ public async loadMetadata(): Promise<Record<string,MetadataTypes>> {
40
+ if (this._metadata == undefined) {
41
+ let json: string;
42
+ try {
43
+ json = (await fs.readFile(path.join(this.folderPath, `${this.id}.json`))).toString();
44
+ } catch (err: unknown) {
45
+ throw new Error(`Error reading metadata for document "${this.uri}": ${(err as any).toString()}`);
46
+ }
47
+
48
+ try {
49
+ this._metadata = JSON.parse(json);
50
+ } catch (err: unknown) {
51
+ throw new Error(`Error parsing metadata for document "${this.uri}": ${(err as any).toString()}`);
52
+ }
53
+ }
54
+
55
+ return this._metadata!;
56
+ }
57
+
58
+ public async loadText(): Promise<string> {
59
+ if (this._text == undefined) {
60
+ try {
61
+ this._text = (await fs.readFile(path.join(this.folderPath, `${this.id}.txt`))).toString();
62
+ } catch (err: unknown) {
63
+ throw new Error(`Error reading text file for document "${this.uri}": ${(err as any).toString()}`);
64
+ }
65
+ }
66
+
67
+ return this._text;
68
+ }
69
+
70
+ }
@@ -0,0 +1,355 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { v4 } from 'uuid';
4
+ import { GPT3Tokenizer } from "./GPT3Tokenizer";
5
+ import { CreateIndexConfig, LocalIndex } from "./LocalIndex";
6
+ import { TextSplitter, TextSplitterConfig } from "./TextSplitter";
7
+ import { MetadataFilter, EmbeddingsModel, Tokenizer, MetadataTypes, EmbeddingsResponse, QueryResult, DocumentChunkMetadata, DocumentCatalogStats } from "./types";
8
+ import { LocalDocumentResult } from './LocalDocumentResult';
9
+ import { LocalDocument } from './LocalDocument';
10
+
11
+ const EMBEDDINGS_BATCH_SIZE = 500;
12
+
13
+ export interface DocumentQueryOptions {
14
+ maxDocuments?: number;
15
+ maxChunks?: number;
16
+ filter?: MetadataFilter;
17
+ }
18
+
19
+ export interface LocalDocumentIndexConfig {
20
+ folderPath: string;
21
+ embeddings?: EmbeddingsModel;
22
+ tokenizer?: Tokenizer;
23
+ chunkingConfig?: Partial<TextSplitterConfig>;
24
+ }
25
+
26
+ export class LocalDocumentIndex extends LocalIndex {
27
+ private readonly _embeddings?: EmbeddingsModel;
28
+ private readonly _tokenizer: Tokenizer;
29
+ private readonly _chunkingConfig?: TextSplitterConfig;
30
+ private _catalog?: DocumentCatalog;
31
+ private _newCatalog?: DocumentCatalog;
32
+
33
+
34
+ public constructor(config: LocalDocumentIndexConfig) {
35
+ super(config.folderPath);
36
+ this._embeddings = config.embeddings;
37
+ this._chunkingConfig = Object.assign({
38
+ keepSeparators: true,
39
+ chunkSize: 512,
40
+ chunkOverlap: 0,
41
+ } as TextSplitterConfig, config.chunkingConfig);
42
+ this._tokenizer = config.tokenizer ?? this._chunkingConfig.tokenizer ?? new GPT3Tokenizer();
43
+ this._chunkingConfig.tokenizer = this._tokenizer;
44
+ }
45
+
46
+ /**
47
+ * Returns true if the document catalog exists.
48
+ */
49
+ public async isCatalogCreated(): Promise<boolean> {
50
+ try {
51
+ await fs.access(path.join(this.folderPath, 'catalog.json'));
52
+ return true;
53
+ } catch (err: unknown) {
54
+ return false;
55
+ }
56
+ }
57
+
58
+ public async getDocumentId(uri: string): Promise<string | undefined> {
59
+ await this.loadIndexData();
60
+ return this._catalog?.uriToId[uri];
61
+ }
62
+
63
+ public async getDocumentUri(documentId: string): Promise<string | undefined> {
64
+ await this.loadIndexData();
65
+ return this._catalog?.idToUri[documentId];
66
+ }
67
+
68
+ public async createIndex(config?: CreateIndexConfig): Promise<void> {
69
+ await super.createIndex(config);
70
+ await this.loadIndexData();
71
+ }
72
+
73
+ public async deleteDocument(uri: string): Promise<void> {
74
+ // Lookup document ID
75
+ const documentId = await this.getDocumentId(uri);
76
+ if (documentId == undefined) {
77
+ return;
78
+ }
79
+
80
+ // Delete document chunks from index and remove from catalog
81
+ await this.beginUpdate();
82
+ try {
83
+ // Get list of chunks for document
84
+ const chunks = await this.listItemsByMetadata<DocumentChunkMetadata>({ documentId });
85
+
86
+ // Delete chunks
87
+ for (const chunk of chunks) {
88
+ await this.deleteItem(chunk.id);
89
+ }
90
+
91
+ // Remove entry from catalog
92
+ delete this._newCatalog!.uriToId[uri];
93
+ delete this._newCatalog!.idToUri[documentId];
94
+ this._newCatalog!.count--;
95
+
96
+ // Commit changes
97
+ await this.endUpdate();
98
+ } catch (err: unknown) {
99
+ // Cancel update and raise error
100
+ this.cancelUpdate();
101
+ throw new Error(`Error deleting document "${uri}": ${(err as any).toString()}`);
102
+ }
103
+
104
+ // Delete text file from disk
105
+ try {
106
+ await fs.unlink(path.join(this.folderPath, `${documentId}.txt`));
107
+ } catch (err: unknown) {
108
+ throw new Error(`Error removing text file for document "${uri}" from disk: ${(err as any).toString()}`);
109
+ }
110
+
111
+ // Delete metadata file from disk
112
+ try {
113
+ await fs.unlink(path.join(this.folderPath, `${documentId}.json`));
114
+ } catch (err: unknown) {
115
+ // Ignore error
116
+ }
117
+ }
118
+
119
+ public async getCatalogStats(): Promise<DocumentCatalogStats> {
120
+ const stats = await this.getIndexStats()
121
+ return {
122
+ version: this._catalog!.version,
123
+ documents: this._catalog!.count,
124
+ chunks: stats.items,
125
+ metadata_config: stats.metadata_config
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Adds a document to the catalog.
131
+ * @remarks
132
+ * A new update is started if one is not already in progress. If an document with the same uri
133
+ * already exists, it will be replaced.
134
+ * @param item Item to insert
135
+ * @returns Inserted document
136
+ */
137
+ public async upsertDocument(uri: string, text: string, metadata?: Record<string, MetadataTypes>): Promise<LocalDocument> {
138
+ // Ensure embeddings configured
139
+ if (!this._embeddings) {
140
+ throw new Error(`Embeddings model not configured.`);
141
+ }
142
+
143
+ // Check for existing document ID
144
+ let documentId = await this.getDocumentId(uri);
145
+ if (documentId != undefined) {
146
+ // Delete existing document
147
+ await this.deleteDocument(uri);
148
+ } else {
149
+ // Generate new document ID
150
+ documentId = v4();
151
+ }
152
+
153
+ // Populate docType based on extension
154
+ const config = Object.assign({}, this._chunkingConfig);
155
+ const pos = uri.lastIndexOf('.');
156
+ if (pos >= 0) {
157
+ const ext = uri.substring(pos + 1).toLowerCase();
158
+ config.docType = ext;
159
+ }
160
+
161
+ // Split text into chunks
162
+ const splitter = new TextSplitter(config);
163
+ const chunks = splitter.split(text);
164
+
165
+ // Break chunks into batches for embedding generation
166
+ const chunkBatches: string[][] = [];
167
+ let currentBatch: string[] = [];
168
+ for (const chunk of chunks) {
169
+ currentBatch.push(chunk.text);
170
+ if (currentBatch.length >= EMBEDDINGS_BATCH_SIZE) {
171
+ chunkBatches.push(currentBatch);
172
+ currentBatch = [];
173
+ }
174
+ }
175
+ if (currentBatch.length > 0) {
176
+ chunkBatches.push(currentBatch);
177
+ }
178
+
179
+ // Generate embeddings for chunks
180
+ const embeddings: number[][] = [];
181
+ for (const batch of chunkBatches) {
182
+ let response: EmbeddingsResponse;
183
+ try {
184
+ response = await this._embeddings.createEmbeddings(batch);
185
+ } catch (err: unknown) {
186
+ throw new Error(`Error generating embeddings: ${(err as any).toString()}`);
187
+ }
188
+
189
+ // Check for error
190
+ if (response.status != 'success') {
191
+ throw new Error(`Error generating embeddings: ${response.message}`);
192
+ }
193
+
194
+ // Add embeddings to output
195
+ for (const embedding of response.output!) {
196
+ embeddings.push(embedding);
197
+ }
198
+ }
199
+
200
+ // Add document chunks to index
201
+ await this.beginUpdate();
202
+ try {
203
+ // Add chunks to index
204
+ for (let i = 0; i < chunks.length; i++) {
205
+ const chunk = chunks[i];
206
+ const embedding = embeddings[i];
207
+ const chunkMetadata: DocumentChunkMetadata = Object.assign({
208
+ documentId,
209
+ startPos: chunk.startPos,
210
+ endPos: chunk.endPos,
211
+ }, metadata);
212
+ await this.insertItem({
213
+ id: v4(),
214
+ metadata: chunkMetadata,
215
+ vector: embedding,
216
+ });
217
+ }
218
+
219
+ // Save metadata file to disk
220
+ if (metadata != undefined) {
221
+ await fs.writeFile(path.join(this.folderPath, `${documentId}.json`), JSON.stringify(metadata));
222
+ }
223
+
224
+ // Save text file to disk
225
+ await fs.writeFile(path.join(this.folderPath, `${documentId}.txt`), text);
226
+
227
+ // Add entry to catalog
228
+ this._newCatalog!.uriToId[uri] = documentId;
229
+ this._newCatalog!.idToUri[documentId] = uri;
230
+ this._newCatalog!.count++;
231
+
232
+ // Commit changes
233
+ await this.endUpdate();
234
+ } catch (err: unknown) {
235
+ // Cancel update and raise error
236
+ this.cancelUpdate();
237
+ throw new Error(`Error adding document "${uri}": ${(err as any).toString()}`);
238
+ }
239
+
240
+ // Return document
241
+ return new LocalDocument(this.folderPath, documentId, uri);
242
+ }
243
+
244
+
245
+ public async queryDocuments(query: string, options?: DocumentQueryOptions): Promise<LocalDocumentResult[]> {
246
+ // Ensure embeddings configured
247
+ if (!this._embeddings) {
248
+ throw new Error(`Embeddings model not configured.`);
249
+ }
250
+
251
+ // Ensure options are defined
252
+ options = Object.assign({
253
+ maxDocuments: 10,
254
+ maxChunks: 50,
255
+ }, options);
256
+
257
+ // Generate embeddings for query
258
+ let embeddings: EmbeddingsResponse;
259
+ try {
260
+ embeddings = await this._embeddings.createEmbeddings(query);
261
+ } catch (err: unknown) {
262
+ throw new Error(`Error generating embeddings for query: ${(err as any).toString()}`);
263
+ }
264
+
265
+ // Check for error
266
+ if (embeddings.status != 'success') {
267
+ throw new Error(`Error generating embeddings for query: ${embeddings.message}`);
268
+ }
269
+
270
+ // Query index for chunks
271
+ const results = await this.queryItems<DocumentChunkMetadata>(embeddings.output![0], options.maxChunks!, options.filter);
272
+
273
+ // Group chunks by document
274
+ const documentChunks: { [documentId: string]: QueryResult<DocumentChunkMetadata>[]; } = {};
275
+ for (const result of results) {
276
+ const metadata = result.item.metadata;
277
+ if (documentChunks[metadata.documentId] == undefined) {
278
+ documentChunks[metadata.documentId] = [];
279
+ }
280
+ documentChunks[metadata.documentId].push(result);
281
+ }
282
+
283
+ // Create a document result for each document
284
+ const documentResults: LocalDocumentResult[] = [];
285
+ for (const documentId in documentChunks) {
286
+ const chunks = documentChunks[documentId];
287
+ const uri = await this.getDocumentUri(documentId) as string;
288
+ const documentResult = new LocalDocumentResult(this.folderPath, documentId, uri, chunks, this._tokenizer);
289
+ documentResults.push(documentResult);
290
+ }
291
+
292
+ // Sort document results by score and return top results
293
+ return documentResults.sort((a, b) => b.score - a.score).slice(0, options.maxDocuments!);
294
+ }
295
+
296
+ // Overrides
297
+
298
+ public async beginUpdate(): Promise<void> {
299
+ await super.beginUpdate();
300
+ this._newCatalog = Object.assign({}, this._catalog);
301
+ }
302
+
303
+ public cancelUpdate(): void {
304
+ super.cancelUpdate();
305
+ this._newCatalog = undefined;
306
+ }
307
+
308
+ public async endUpdate(): Promise<void> {
309
+ await super.endUpdate();
310
+
311
+ try {
312
+ // Save catalog
313
+ await fs.writeFile(path.join(this.folderPath, 'catalog.json'), JSON.stringify(this._newCatalog));
314
+ this._catalog = this._newCatalog;
315
+ this._newCatalog = undefined;
316
+ } catch(err: unknown) {
317
+ throw new Error(`Error saving document catalog: ${(err as any).toString()}`);
318
+ }
319
+ }
320
+
321
+ protected async loadIndexData(): Promise<void> {
322
+ await super.loadIndexData();
323
+
324
+ if (this._catalog) {
325
+ return;
326
+ }
327
+
328
+ const catalogPath = path.join(this.folderPath, 'catalog.json');
329
+ if (await this.isCatalogCreated()) {
330
+ // Load catalog
331
+ const buffer = await fs.readFile(catalogPath);
332
+ this._catalog = JSON.parse(buffer.toString());
333
+ } else {
334
+ try {
335
+ // Initialize catalog
336
+ this._catalog = {
337
+ version: 1,
338
+ count: 0,
339
+ uriToId: {},
340
+ idToUri: {},
341
+ };
342
+ await fs.writeFile(catalogPath, JSON.stringify(this._catalog));
343
+ } catch(err: unknown) {
344
+ throw new Error(`Error creating document catalog: ${(err as any).toString()}`);
345
+ }
346
+ }
347
+ }
348
+ }
349
+
350
+ interface DocumentCatalog {
351
+ version: number;
352
+ count: number;
353
+ uriToId: { [uri: string]: string; };
354
+ idToUri: { [id: string]: string; };
355
+ }