vectra 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +92 -100
  3. package/bin/vectra.js +3 -0
  4. package/lib/BrowserWebFetcher.d.ts +75 -0
  5. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  6. package/lib/BrowserWebFetcher.js +290 -0
  7. package/lib/BrowserWebFetcher.js.map +1 -0
  8. package/lib/FileFetcher.d.ts +5 -0
  9. package/lib/FileFetcher.d.ts.map +1 -0
  10. package/lib/FileFetcher.js +89 -0
  11. package/lib/FileFetcher.js.map +1 -0
  12. package/lib/FileFetcher.spec.d.ts +2 -0
  13. package/lib/FileFetcher.spec.d.ts.map +1 -0
  14. package/lib/FileFetcher.spec.js +244 -0
  15. package/lib/FileFetcher.spec.js.map +1 -0
  16. package/lib/FolderWatcher.d.ts +91 -0
  17. package/lib/FolderWatcher.d.ts.map +1 -0
  18. package/lib/FolderWatcher.js +304 -0
  19. package/lib/FolderWatcher.js.map +1 -0
  20. package/lib/FolderWatcher.spec.d.ts +2 -0
  21. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  22. package/lib/FolderWatcher.spec.js +308 -0
  23. package/lib/FolderWatcher.spec.js.map +1 -0
  24. package/lib/GPT3Tokenizer.d.ts +9 -0
  25. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  26. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  27. package/lib/GPT3Tokenizer.spec.js +45 -0
  28. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  29. package/lib/ItemSelector.d.ts +41 -0
  30. package/lib/ItemSelector.d.ts.map +1 -0
  31. package/lib/ItemSelector.js +179 -0
  32. package/lib/ItemSelector.js.map +1 -0
  33. package/lib/ItemSelector.spec.d.ts +2 -0
  34. package/lib/ItemSelector.spec.d.ts.map +1 -0
  35. package/lib/ItemSelector.spec.js +204 -0
  36. package/lib/ItemSelector.spec.js.map +1 -0
  37. package/lib/LocalDocument.d.ts +54 -0
  38. package/lib/LocalDocument.d.ts.map +1 -1
  39. package/lib/LocalDocument.js +116 -0
  40. package/lib/LocalDocument.js.map +1 -0
  41. package/lib/LocalDocument.spec.d.ts +2 -0
  42. package/lib/LocalDocument.spec.d.ts.map +1 -0
  43. package/lib/LocalDocument.spec.js +214 -0
  44. package/lib/LocalDocument.spec.js.map +1 -0
  45. package/lib/LocalDocumentIndex.d.ts +152 -0
  46. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  47. package/lib/LocalDocumentIndex.js +420 -0
  48. package/lib/LocalDocumentIndex.js.map +1 -0
  49. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  50. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  51. package/lib/LocalDocumentIndex.spec.js +494 -0
  52. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  53. package/lib/LocalDocumentResult.d.ts +66 -0
  54. package/lib/LocalDocumentResult.d.ts.map +1 -1
  55. package/lib/LocalDocumentResult.js +376 -0
  56. package/lib/LocalDocumentResult.js.map +1 -0
  57. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  58. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  59. package/lib/LocalDocumentResult.spec.js +373 -0
  60. package/lib/LocalDocumentResult.spec.js.map +1 -0
  61. package/lib/LocalEmbeddings.d.ts +59 -0
  62. package/lib/LocalEmbeddings.d.ts.map +1 -0
  63. package/lib/LocalEmbeddings.js +101 -0
  64. package/lib/LocalEmbeddings.js.map +1 -0
  65. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  66. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  67. package/lib/LocalEmbeddings.spec.js +155 -0
  68. package/lib/LocalEmbeddings.spec.js.map +1 -0
  69. package/lib/LocalIndex.d.ts +159 -0
  70. package/lib/LocalIndex.d.ts.map +1 -1
  71. package/lib/LocalIndex.js +519 -0
  72. package/lib/LocalIndex.js.map +1 -0
  73. package/lib/LocalIndex.spec.d.ts +2 -0
  74. package/lib/LocalIndex.spec.js +611 -9
  75. package/lib/LocalIndex.spec.js.map +1 -1
  76. package/lib/OpenAIEmbeddings.d.ts +124 -0
  77. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  78. package/lib/OpenAIEmbeddings.js +166 -0
  79. package/lib/OpenAIEmbeddings.js.map +1 -0
  80. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  81. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  82. package/lib/OpenAIEmbeddings.spec.js +298 -0
  83. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  84. package/lib/TextSplitter.d.ts +21 -0
  85. package/lib/TextSplitter.d.ts.map +1 -1
  86. package/lib/TextSplitter.js +500 -0
  87. package/lib/TextSplitter.js.map +1 -0
  88. package/lib/TextSplitter.spec.d.ts +2 -0
  89. package/lib/TextSplitter.spec.d.ts.map +1 -0
  90. package/lib/TextSplitter.spec.js +337 -0
  91. package/lib/TextSplitter.spec.js.map +1 -0
  92. package/lib/TransformersEmbeddings.d.ts +121 -0
  93. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  94. package/lib/TransformersEmbeddings.js +176 -0
  95. package/lib/TransformersEmbeddings.js.map +1 -0
  96. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  97. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  98. package/lib/TransformersEmbeddings.spec.js +198 -0
  99. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  100. package/lib/TransformersTokenizer.d.ts +33 -0
  101. package/lib/TransformersTokenizer.d.ts.map +1 -0
  102. package/lib/TransformersTokenizer.js +44 -0
  103. package/lib/TransformersTokenizer.js.map +1 -0
  104. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  105. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  106. package/lib/TransformersTokenizer.spec.js +112 -0
  107. package/lib/TransformersTokenizer.spec.js.map +1 -0
  108. package/lib/WebFetcher.d.ts +14 -0
  109. package/lib/WebFetcher.d.ts.map +1 -0
  110. package/lib/WebFetcher.js +238 -0
  111. package/lib/WebFetcher.js.map +1 -0
  112. package/lib/WebFetcher.spec.d.ts +2 -0
  113. package/lib/WebFetcher.spec.d.ts.map +1 -0
  114. package/lib/WebFetcher.spec.js +263 -0
  115. package/lib/WebFetcher.spec.js.map +1 -0
  116. package/lib/browser.d.ts +30 -0
  117. package/lib/browser.d.ts.map +1 -0
  118. package/lib/browser.js +52 -0
  119. package/lib/browser.js.map +1 -0
  120. package/lib/codecs/IndexCodec.d.ts +37 -0
  121. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  122. package/lib/codecs/IndexCodec.js +3 -0
  123. package/lib/codecs/IndexCodec.js.map +1 -0
  124. package/lib/codecs/JsonCodec.d.ts +19 -0
  125. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  126. package/lib/codecs/JsonCodec.js +35 -0
  127. package/lib/codecs/JsonCodec.js.map +1 -0
  128. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  129. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  130. package/lib/codecs/JsonCodec.spec.js +66 -0
  131. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  132. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  133. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  134. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  135. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  136. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  137. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  138. package/lib/codecs/ProtobufCodec.js +225 -0
  139. package/lib/codecs/ProtobufCodec.js.map +1 -0
  140. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  141. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  142. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  143. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  144. package/lib/codecs/index.d.ts +5 -0
  145. package/lib/codecs/index.d.ts.map +1 -0
  146. package/lib/codecs/index.js +21 -0
  147. package/lib/codecs/index.js.map +1 -0
  148. package/lib/codecs/migrateIndex.d.ts +24 -0
  149. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  150. package/lib/codecs/migrateIndex.js +119 -0
  151. package/lib/codecs/migrateIndex.js.map +1 -0
  152. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  153. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  154. package/lib/codecs/migrateIndex.spec.js +151 -0
  155. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  156. package/lib/codecs/schemas/index.proto +34 -0
  157. package/lib/index.d.ts +20 -0
  158. package/lib/index.d.ts.map +1 -1
  159. package/lib/index.js +36 -0
  160. package/lib/index.js.map +1 -0
  161. package/lib/internals/Colorize.d.ts +14 -0
  162. package/lib/internals/Colorize.d.ts.map +1 -0
  163. package/lib/internals/Colorize.js +69 -0
  164. package/lib/internals/Colorize.js.map +1 -0
  165. package/lib/internals/index.d.ts +3 -0
  166. package/lib/internals/index.d.ts.map +1 -0
  167. package/lib/internals/index.js +19 -0
  168. package/lib/internals/index.js.map +1 -0
  169. package/lib/internals/types.d.ts +43 -0
  170. package/lib/internals/types.d.ts.map +1 -0
  171. package/lib/internals/types.js +3 -0
  172. package/lib/internals/types.js.map +1 -0
  173. package/lib/server/IndexManager.d.ts +78 -0
  174. package/lib/server/IndexManager.d.ts.map +1 -0
  175. package/lib/server/IndexManager.js +259 -0
  176. package/lib/server/IndexManager.js.map +1 -0
  177. package/lib/server/VectraServer.d.ts +40 -0
  178. package/lib/server/VectraServer.d.ts.map +1 -0
  179. package/lib/server/VectraServer.js +151 -0
  180. package/lib/server/VectraServer.js.map +1 -0
  181. package/lib/server/VectraServer.spec.d.ts +2 -0
  182. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  183. package/lib/server/VectraServer.spec.js +322 -0
  184. package/lib/server/VectraServer.spec.js.map +1 -0
  185. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  186. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  187. package/lib/server/handlers/documentHandlers.js +95 -0
  188. package/lib/server/handlers/documentHandlers.js.map +1 -0
  189. package/lib/server/handlers/helpers.d.ts +23 -0
  190. package/lib/server/handlers/helpers.d.ts.map +1 -0
  191. package/lib/server/handlers/helpers.js +138 -0
  192. package/lib/server/handlers/helpers.js.map +1 -0
  193. package/lib/server/handlers/index.d.ts +8 -0
  194. package/lib/server/handlers/index.d.ts.map +1 -0
  195. package/lib/server/handlers/index.js +22 -0
  196. package/lib/server/handlers/index.js.map +1 -0
  197. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  198. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  199. package/lib/server/handlers/indexHandlers.js +85 -0
  200. package/lib/server/handlers/indexHandlers.js.map +1 -0
  201. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  202. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  203. package/lib/server/handlers/itemHandlers.js +166 -0
  204. package/lib/server/handlers/itemHandlers.js.map +1 -0
  205. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  206. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  207. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  208. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  209. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  210. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  211. package/lib/server/handlers/queryHandlers.js +135 -0
  212. package/lib/server/handlers/queryHandlers.js.map +1 -0
  213. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  214. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  215. package/lib/server/handlers/statsHandlers.js +81 -0
  216. package/lib/server/handlers/statsHandlers.js.map +1 -0
  217. package/lib/server/index.d.ts +4 -0
  218. package/lib/server/index.d.ts.map +1 -0
  219. package/lib/server/index.js +23 -0
  220. package/lib/server/index.js.map +1 -0
  221. package/lib/storage/FileStorage.d.ts +92 -0
  222. package/lib/storage/FileStorage.d.ts.map +1 -0
  223. package/lib/storage/FileStorage.js +3 -0
  224. package/lib/storage/FileStorage.js.map +1 -0
  225. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  226. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  227. package/lib/storage/FileStorageUtilities.js +91 -0
  228. package/lib/storage/FileStorageUtilities.js.map +1 -0
  229. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  230. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  231. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  232. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  233. package/lib/storage/FileType.d.ts +29 -0
  234. package/lib/storage/FileType.d.ts.map +1 -0
  235. package/lib/storage/FileType.js +38 -0
  236. package/lib/storage/FileType.js.map +1 -0
  237. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  238. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  239. package/lib/storage/IndexedDBStorage.js +347 -0
  240. package/lib/storage/IndexedDBStorage.js.map +1 -0
  241. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  242. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  243. package/lib/storage/LocalFileStorage.browser.js +43 -0
  244. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  245. package/lib/storage/LocalFileStorage.d.ts +23 -0
  246. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  247. package/lib/storage/LocalFileStorage.js +152 -0
  248. package/lib/storage/LocalFileStorage.js.map +1 -0
  249. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  250. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  251. package/lib/storage/LocalFileStorage.spec.js +249 -0
  252. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  253. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  254. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  255. package/lib/storage/VirtualFileStorage.js +178 -0
  256. package/lib/storage/VirtualFileStorage.js.map +1 -0
  257. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  258. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  259. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  260. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  261. package/lib/storage/index.d.ts +6 -0
  262. package/lib/storage/index.d.ts.map +1 -0
  263. package/lib/storage/index.js +22 -0
  264. package/lib/storage/index.js.map +1 -0
  265. package/lib/templates/templates/csharp/README.md +48 -0
  266. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  267. package/lib/templates/templates/go/README.md +71 -0
  268. package/lib/templates/templates/go/vectra_client.go +322 -0
  269. package/lib/templates/templates/java/README.md +81 -0
  270. package/lib/templates/templates/java/VectraClient.java +232 -0
  271. package/lib/templates/templates/python/README.md +37 -0
  272. package/lib/templates/templates/python/vectra_client.py +279 -0
  273. package/lib/templates/templates/rust/Cargo.toml +14 -0
  274. package/lib/templates/templates/rust/README.md +39 -0
  275. package/lib/templates/templates/rust/build.rs +4 -0
  276. package/lib/templates/templates/rust/lib.rs +284 -0
  277. package/lib/templates/templates/typescript/README.md +96 -0
  278. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  279. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  280. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  281. package/lib/templates/typescript/VectraClient.js +328 -0
  282. package/lib/templates/typescript/VectraClient.js.map +1 -0
  283. package/lib/types.d.ts +153 -0
  284. package/lib/types.d.ts.map +1 -0
  285. package/lib/types.js +3 -0
  286. package/lib/types.js.map +1 -0
  287. package/lib/utils/index.d.ts +2 -0
  288. package/lib/utils/index.d.ts.map +1 -0
  289. package/lib/utils/index.js +18 -0
  290. package/lib/utils/index.js.map +1 -0
  291. package/lib/utils/pathUtils.d.ts +40 -0
  292. package/lib/utils/pathUtils.d.ts.map +1 -0
  293. package/lib/utils/pathUtils.js +98 -0
  294. package/lib/utils/pathUtils.js.map +1 -0
  295. package/lib/vectra-cli.d.ts +2 -0
  296. package/lib/vectra-cli.d.ts.map +1 -1
  297. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  298. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  299. package/lib/vectra-cli.generate.spec.js +112 -0
  300. package/lib/vectra-cli.generate.spec.js.map +1 -0
  301. package/lib/vectra-cli.js +760 -0
  302. package/lib/vectra-cli.js.map +1 -0
  303. package/lib/vectra-cli.spec.d.ts +1 -0
  304. package/lib/vectra-cli.spec.d.ts.map +1 -0
  305. package/lib/vectra-cli.spec.js +2 -0
  306. package/lib/vectra-cli.spec.js.map +1 -0
  307. package/package.json +91 -16
  308. package/proto/vectra_service.proto +276 -0
  309. package/src/BrowserWebFetcher.ts +345 -0
  310. package/src/FileFetcher.spec.ts +234 -0
  311. package/src/FileFetcher.ts +37 -25
  312. package/src/FolderWatcher.spec.ts +288 -0
  313. package/src/FolderWatcher.ts +304 -0
  314. package/src/GPT3Tokenizer.spec.ts +50 -0
  315. package/src/ItemSelector.spec.ts +252 -0
  316. package/src/ItemSelector.ts +163 -150
  317. package/src/LocalDocument.spec.ts +211 -0
  318. package/src/LocalDocument.ts +88 -94
  319. package/src/LocalDocumentIndex.spec.ts +481 -0
  320. package/src/LocalDocumentIndex.ts +39 -40
  321. package/src/LocalDocumentResult.spec.ts +373 -0
  322. package/src/LocalDocumentResult.ts +489 -319
  323. package/src/LocalEmbeddings.spec.ts +138 -0
  324. package/src/LocalEmbeddings.ts +120 -0
  325. package/src/LocalIndex.spec.ts +808 -66
  326. package/src/LocalIndex.ts +479 -429
  327. package/src/OpenAIEmbeddings.spec.ts +354 -0
  328. package/src/OpenAIEmbeddings.ts +26 -27
  329. package/src/TextSplitter.spec.ts +342 -0
  330. package/src/TextSplitter.ts +517 -532
  331. package/src/TransformersEmbeddings.spec.ts +188 -0
  332. package/src/TransformersEmbeddings.ts +232 -0
  333. package/src/TransformersTokenizer.spec.ts +143 -0
  334. package/src/TransformersTokenizer.ts +45 -0
  335. package/src/WebFetcher.spec.ts +288 -0
  336. package/src/WebFetcher.ts +184 -186
  337. package/src/browser.ts +69 -0
  338. package/src/codecs/IndexCodec.ts +40 -0
  339. package/src/codecs/JsonCodec.spec.ts +70 -0
  340. package/src/codecs/JsonCodec.ts +37 -0
  341. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  342. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  343. package/src/codecs/ProtobufCodec.ts +193 -0
  344. package/src/codecs/index.ts +4 -0
  345. package/src/codecs/migrateIndex.spec.ts +176 -0
  346. package/src/codecs/migrateIndex.ts +125 -0
  347. package/src/codecs/schemas/index.proto +34 -0
  348. package/src/index.ts +9 -1
  349. package/src/internals/Colorize.ts +19 -16
  350. package/src/server/IndexManager.ts +243 -0
  351. package/src/server/VectraServer.spec.ts +303 -0
  352. package/src/server/VectraServer.ts +156 -0
  353. package/src/server/handlers/documentHandlers.ts +59 -0
  354. package/src/server/handlers/helpers.ts +93 -0
  355. package/src/server/handlers/index.ts +7 -0
  356. package/src/server/handlers/indexHandlers.ts +44 -0
  357. package/src/server/handlers/itemHandlers.ts +140 -0
  358. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  359. package/src/server/handlers/queryHandlers.ts +96 -0
  360. package/src/server/handlers/statsHandlers.ts +38 -0
  361. package/src/server/index.ts +3 -0
  362. package/src/storage/FileStorage.ts +105 -0
  363. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  364. package/src/storage/FileStorageUtilities.ts +77 -0
  365. package/src/storage/FileType.ts +61 -0
  366. package/src/storage/IndexedDBStorage.ts +365 -0
  367. package/src/storage/LocalFileStorage.browser.ts +52 -0
  368. package/src/storage/LocalFileStorage.spec.ts +292 -0
  369. package/src/storage/LocalFileStorage.ts +98 -0
  370. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  371. package/src/storage/VirtualFileStorage.ts +169 -0
  372. package/src/storage/index.ts +5 -0
  373. package/src/templates/csharp/README.md +48 -0
  374. package/src/templates/csharp/VectraClient.cs +234 -0
  375. package/src/templates/go/README.md +71 -0
  376. package/src/templates/go/vectra_client.go +322 -0
  377. package/src/templates/java/README.md +81 -0
  378. package/src/templates/java/VectraClient.java +232 -0
  379. package/src/templates/python/README.md +37 -0
  380. package/src/templates/python/vectra_client.py +279 -0
  381. package/src/templates/rust/Cargo.toml +14 -0
  382. package/src/templates/rust/README.md +39 -0
  383. package/src/templates/rust/build.rs +4 -0
  384. package/src/templates/rust/lib.rs +284 -0
  385. package/src/templates/typescript/README.md +96 -0
  386. package/src/templates/typescript/VectraClient.ts +374 -0
  387. package/src/types.ts +131 -123
  388. package/src/utils/index.ts +1 -0
  389. package/src/utils/pathUtils.ts +106 -0
  390. package/src/vectra-cli.generate.spec.ts +72 -0
  391. package/src/vectra-cli.spec.ts +0 -0
  392. package/src/vectra-cli.ts +687 -246
package/src/LocalIndex.ts CHANGED
@@ -1,5 +1,4 @@
1
- import * as fs from 'fs/promises';
2
- import * as path from 'path';
1
+ import { pathUtils as path } from './utils/pathUtils';
3
2
  import { v4 } from 'uuid';
4
3
  import { ItemSelector } from './ItemSelector';
5
4
  import { IndexItem, IndexStats, MetadataFilter, MetadataTypes, QueryResult } from './types';
@@ -8,12 +7,15 @@ import { LocalDocumentIndex } from './LocalDocumentIndex';
8
7
  import bm25 from 'wink-bm25-text-search';
9
8
  import winkNLP from 'wink-nlp';
10
9
  import model from 'wink-eng-lite-web-model';
10
+ import { FileStorage, LocalFileStorage } from './storage';
11
+ import { IndexCodec, JsonCodec } from './codecs';
12
+
11
13
  export interface CreateIndexConfig {
12
- version: number;
13
- deleteIfExists?: boolean;
14
- metadata_config?: {
15
- indexed?: string[];
16
- };
14
+ version: number;
15
+ deleteIfExists?: boolean;
16
+ metadata_config?: {
17
+ indexed?: string[];
18
+ };
17
19
  }
18
20
 
19
21
  /**
@@ -22,468 +24,516 @@ export interface CreateIndexConfig {
22
24
  * This class is used to create, update, and query a local vector index.
23
25
  * Each index is a folder on disk containing an index.json file and an optional set of metadata files.
24
26
  */
25
- export class LocalIndex<TMetadata extends Record<string,MetadataTypes> = Record<string,MetadataTypes>>{
26
- private readonly _folderPath: string;
27
- private readonly _indexName: string;
28
-
29
- private _data?: IndexData;
30
- private _update?: IndexData;
31
- //member fields for BM25
32
- private _bm25Engine: any;
33
-
34
- /**
35
- * Creates a new instance of LocalIndex.
36
- * @param folderPath Path to the index folder.
37
- * @param indexName Optional name of the index file. Defaults to index.json.
38
- */
39
- public constructor(folderPath: string, indexName?: string) {
40
- this._folderPath = folderPath;
41
- this._indexName = indexName || "index.json";
42
- }
43
-
44
- /**
45
- * Path to the index folder.
46
- */
47
- public get folderPath(): string {
48
- return this._folderPath;
27
+ export class LocalIndex<TMetadata extends Record<string, MetadataTypes> = Record<string, MetadataTypes>> {
28
+ private readonly _folderPath: string;
29
+ private readonly _indexName: string = 'index.json';
30
+ private readonly _storage: FileStorage;
31
+ private readonly _codec: IndexCodec;
32
+ private _data?: IndexData;
33
+ private _update?: IndexData;
34
+
35
+ // member fields for BM25
36
+ private _bm25Engine: any;
37
+ private readonly _bm25Factory: () => any;
38
+ private readonly _docReader: (docId: string) => Promise<string>;
39
+
40
+ /**
41
+ * Creates a new instance of LocalIndex.
42
+ * @param folderPath Path to the index folder.
43
+ * @param indexName Optional index file name. Defaults to 'index' + codec.extension.
44
+ * @param storage Optional file storage instance. Defaults to LocalFileStorage.
45
+ * @param codec Optional codec for serialization. Defaults to JsonCodec.
46
+ * @param options Optional constructor options for dependency injection.
47
+ */
48
+ public constructor(
49
+ folderPath: string,
50
+ indexName?: string,
51
+ storage?: FileStorage,
52
+ codec?: IndexCodec,
53
+ options?: {
54
+ bm25Factory?: () => any;
55
+ docReader?: (docId: string) => Promise<string>;
49
56
  }
50
-
51
- /**
52
- * Optional name of the index file.
53
- */
54
- public get indexName(): string {
55
- return this._indexName;
57
+ ) {
58
+ this._folderPath = folderPath;
59
+ this._codec = codec || new JsonCodec();
60
+ if (indexName) {
61
+ this._indexName = indexName;
62
+ } else {
63
+ this._indexName = `index${this._codec.extension}`;
56
64
  }
57
-
58
- /**
59
- * Begins an update to the index.
60
- * @remarks
61
- * This method loads the index into memory and prepares it for updates.
62
- */
63
- public async beginUpdate(): Promise<void> {
64
- if (this._update) {
65
- throw new Error('Update already in progress');
66
- }
67
-
68
- await this.loadIndexData();
69
- this._update = structuredClone(this._data);
70
- }
71
-
72
- /**
73
- * Cancels an update to the index.
74
- * @remarks
75
- * This method discards any changes made to the index since the update began.
76
- */
77
- public cancelUpdate(): void {
78
- this._update = undefined;
79
- }
80
-
81
- /**
82
- * Creates a new index.
83
- * @remarks
84
- * This method creates a new folder on disk containing an index.json file.
85
- * @param config Index configuration.
86
- */
87
- public async createIndex(config: CreateIndexConfig = {version: 1}): Promise<void> {
88
- // Delete if exists
89
- if (await this.isIndexCreated()) {
90
- if (config.deleteIfExists) {
91
- await this.deleteIndex();
92
- } else {
93
- throw new Error('Index already exists');
94
- }
95
- }
96
-
97
- try {
98
- // Create folder for index
99
- await fs.mkdir(this._folderPath, { recursive: true });
100
-
101
- // Initialize index.json file
102
- this._data = {
103
- version: config.version,
104
- metadata_config: config.metadata_config ?? {},
105
- items: []
106
- };
107
-
108
- await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._data));
109
- } catch (err: unknown) {
110
- await this.deleteIndex();
111
- throw new Error('Error creating index');
112
- }
65
+ this._storage = storage || new LocalFileStorage();
66
+ this._bm25Factory = options?.bm25Factory || (() => bm25());
67
+ this._docReader = options?.docReader || (async (docId: string) => {
68
+ const doc = new LocalDocument((this as unknown) as LocalDocumentIndex, docId, '');
69
+ return await doc.loadText();
70
+ });
71
+ }
72
+
73
+ /** Path to the index folder. */
74
+ public get folderPath(): string {
75
+ return this._folderPath;
76
+ }
77
+
78
+ /** Name of the index file. */
79
+ public get indexName(): string {
80
+ return this._indexName;
81
+ }
82
+
83
+ /** Storage provider used to store the index. */
84
+ public get storage(): FileStorage {
85
+ return this._storage;
86
+ }
87
+
88
+ /** Codec used for serialization. */
89
+ public get codec(): IndexCodec {
90
+ return this._codec;
91
+ }
92
+
93
+ /**
94
+ * Begins an update to the index.
95
+ * @remarks
96
+ * This method loads the index into memory and prepares it for updates.
97
+ */
98
+ public async beginUpdate(): Promise<void> {
99
+ if (this._update) {
100
+ throw new Error('Update already in progress');
113
101
  }
114
-
115
- /**
116
- * Deletes the index.
117
- * @remarks
118
- * This method deletes the index folder from disk.
119
- */
120
- public deleteIndex(): Promise<void> {
121
- this._data = undefined;
122
- return fs.rm(this._folderPath, {
123
- recursive: true,
124
- maxRetries: 3
125
- });
102
+ await this.loadIndexData();
103
+ this._update = structuredClone(this._data);
104
+ }
105
+
106
+ /**
107
+ * Cancels an update to the index.
108
+ * @remarks
109
+ * This method discards any changes made to the index since the update began.
110
+ */
111
+ public cancelUpdate(): void {
112
+ this._update = undefined;
113
+ }
114
+
115
+ /**
116
+ * Creates a new index.
117
+ * @remarks
118
+ * This method creates a new folder on disk containing an index.json file.
119
+ * @param config Index configuration.
120
+ */
121
+ public async createIndex(config: CreateIndexConfig = { version: 1 }): Promise<void> {
122
+ // Delete if exists
123
+ if (await this.isIndexCreated()) {
124
+ if (config.deleteIfExists) {
125
+ await this.deleteIndex();
126
+ } else {
127
+ throw new Error('Index already exists');
128
+ }
126
129
  }
127
130
 
128
- /**
129
- * Deletes an item from the index.
130
- * @param id ID of item to delete.
131
- */
132
- public async deleteItem(id: string): Promise<void> {
133
- if (this._update) {
134
- const index = this._update.items.findIndex(i => i.id === id);
135
- if (index >= 0) {
136
- this._update.items.splice(index, 1);
137
- }
138
- } else {
139
- await this.beginUpdate();
140
- const index = this._update!.items.findIndex(i => i.id === id);
141
- if (index >= 0) {
142
- this._update!.items.splice(index, 1);
143
- }
144
- await this.endUpdate();
145
- }
131
+ try {
132
+ // Create folder for index
133
+ await this.storage.createFolder(this._folderPath);
134
+
135
+ // Initialize index.json file
136
+ this._data = {
137
+ version: config.version,
138
+ metadata_config: config.metadata_config ?? {},
139
+ items: []
140
+ };
141
+ await this.storage.upsertFile(path.join(this._folderPath, this._indexName), this._codec.serializeIndex(this._data));
142
+ } catch {
143
+ await this.deleteIndex();
144
+ throw new Error('Error creating index');
146
145
  }
147
-
148
- /**
149
- * Ends an update to the index.
150
- * @remarks
151
- * This method saves the index to disk.
152
- */
153
- public async endUpdate(): Promise<void> {
154
- if (!this._update) {
155
- throw new Error('No update in progress');
156
- }
157
-
158
- try {
159
- // Save index
160
- await fs.writeFile(path.join(this._folderPath, this._indexName), JSON.stringify(this._update));
161
- this._data = this._update;
162
- this._update = undefined;
163
- } catch(err: unknown) {
164
- throw new Error(`Error saving index: ${(err as any).toString()}`);
165
- }
146
+ }
147
+
148
+ /**
149
+ * Deletes the index.
150
+ * @remarks
151
+ * This method deletes the index folder from disk.
152
+ */
153
+ public async deleteIndex(): Promise<void> {
154
+ this._data = undefined;
155
+ return await this.storage.deleteFolder(this._folderPath);
156
+ }
157
+
158
+ /**
159
+ * Deletes an item from the index.
160
+ * @param id ID of item to delete.
161
+ */
162
+ public async deleteItem(id: string): Promise<void> {
163
+ if (this._update) {
164
+ const index = this._update.items.findIndex(i => i.id === id);
165
+ if (index >= 0) {
166
+ this._update.items.splice(index, 1);
167
+ }
168
+ } else {
169
+ await this.beginUpdate();
170
+ const index = this._update!.items.findIndex(i => i.id === id);
171
+ if (index >= 0) {
172
+ this._update!.items.splice(index, 1);
173
+ }
174
+ await this.endUpdate();
166
175
  }
167
-
168
- /**
169
- * Loads an index from disk and returns its stats.
170
- * @returns Index stats.
171
- */
172
- public async getIndexStats(): Promise<IndexStats> {
173
- await this.loadIndexData();
174
- return {
175
- version: this._data!.version,
176
- metadata_config: this._data!.metadata_config,
177
- items: this._data!.items.length
178
- };
176
+ }
177
+
178
+ /**
179
+ * Ends an update to the index.
180
+ * @remarks
181
+ * This method saves the index to disk.
182
+ */
183
+ public async endUpdate(): Promise<void> {
184
+ if (!this._update) {
185
+ throw new Error('No update in progress');
179
186
  }
180
187
 
181
- /**
182
- * Returns an item from the index given its ID.
183
- * @param id ID of the item to retrieve.
184
- * @returns Item or undefined if not found.
185
- */
186
- public async getItem<TItemMetadata extends TMetadata = TMetadata>(id: string): Promise<IndexItem<TItemMetadata> | undefined> {
187
- await this.loadIndexData();
188
- return this._data!.items.find(i => i.id === id) as any | undefined;
188
+ try {
189
+ // Save index
190
+ await this.storage.upsertFile(path.join(this._folderPath, this._indexName), this._codec.serializeIndex(this._update));
191
+ this._data = this._update;
192
+ this._update = undefined;
193
+ } catch (err: unknown) {
194
+ throw new Error(`Error saving index: ${(err as any).toString()}`);
189
195
  }
190
-
191
- /**
192
- * Adds an item to the index.
193
- * @remarks
194
- * A new update is started if one is not already in progress. If an item with the same ID
195
- * already exists, an error will be thrown.
196
- * @param item Item to insert.
197
- * @returns Inserted item.
198
- */
199
- public async insertItem<TItemMetadata extends TMetadata = TMetadata>(item: Partial<IndexItem<TItemMetadata>>): Promise<IndexItem<TItemMetadata>> {
200
- if (this._update) {
201
- return await this.addItemToUpdate(item, true) as any;
202
- } else {
203
- await this.beginUpdate();
204
- const newItem = await this.addItemToUpdate(item, true);
205
- await this.endUpdate();
206
- return newItem as any;
207
- }
196
+ }
197
+
198
+ /**
199
+ * Loads an index from disk and returns its stats.
200
+ * @returns Index stats.
201
+ */
202
+ public async getIndexStats(): Promise<IndexStats> {
203
+ await this.loadIndexData();
204
+ return {
205
+ version: this._data!.version,
206
+ metadata_config: this._data!.metadata_config,
207
+ items: this._data!.items.length
208
+ };
209
+ }
210
+
211
+ /**
212
+ * Returns an item from the index given its ID.
213
+ * @param id ID of the item to retrieve.
214
+ * @returns Item or undefined if not found.
215
+ */
216
+ public async getItem<TItemMetadata extends TMetadata = TMetadata>(id: string): Promise<IndexItem<TItemMetadata> | undefined> {
217
+ await this.loadIndexData();
218
+ return this._data!.items.find(i => i.id === id) as any | undefined;
219
+ }
220
+
221
+ /**
222
+ * Adds an item to the index.
223
+ * @remarks
224
+ * A new update is started if one is not already in progress. If an item with the same ID
225
+ * already exists, an error will be thrown.
226
+ * @param item Item to insert.
227
+ * @returns Inserted item.
228
+ */
229
+ public async insertItem<TItemMetadata extends TMetadata = TMetadata>(item: Partial<IndexItem<TItemMetadata>>): Promise<IndexItem<TItemMetadata>> {
230
+ if (this._update) {
231
+ return await this.addItemToUpdate(item, true) as any;
232
+ } else {
233
+ await this.beginUpdate();
234
+ const newItem = await this.addItemToUpdate(item, true);
235
+ await this.endUpdate();
236
+ return newItem as any;
208
237
  }
209
-
210
- /**
211
- * Adds a batch of items to the index.
212
- * @remarks
213
- * Batch update requires no update to be in progress. This is necessary so that if any one
214
- * insert operation fails, the entire update can be safely cancelled. This prevents partial
215
- * updates from being applied to the local index.
216
- * @param items Items to insert.
217
- * @returns Inserted items.
218
- */
219
- public async batchInsertItems<TItemMetadata extends TMetadata = TMetadata>(items: Partial<IndexItem<TItemMetadata>>[]): Promise<IndexItem[]> {
220
- await this.beginUpdate();
221
- try {
222
- const newItems: IndexItem[] = [];
223
- for (const item of items) {
224
- const newItem = await this.addItemToUpdate(item, true);
225
- newItems.push(newItem);
226
- }
227
- await this.endUpdate();
228
- return newItems;
229
- } catch (e) {
230
- // cancels this update to prevent partial batch updates. allows error to bubble up.
231
- await this.cancelUpdate();
232
- throw e;
233
- }
238
+ }
239
+
240
+ /**
241
+ * Adds a batch of items to the index.
242
+ * @remarks
243
+ * Batch update requires no update to be in progress. This is necessary so that if any one
244
+ * insert operation fails, the entire update can be safely cancelled. This prevents partial
245
+ * updates from being applied to the local index.
246
+ * @param items Items to insert.
247
+ * @returns Inserted items.
248
+ */
249
+ public async batchInsertItems<TItemMetadata extends TMetadata = TMetadata>(items: Partial<IndexItem<TItemMetadata>>[]): Promise<IndexItem[]> {
250
+ await this.beginUpdate();
251
+ try {
252
+ const newItems: IndexItem[] = [];
253
+ for (const item of items) {
254
+ const newItem = await this.addItemToUpdate(item, true);
255
+ newItems.push(newItem);
256
+ }
257
+ await this.endUpdate();
258
+ return newItems;
259
+ } catch (e) {
260
+ // cancels this update to prevent partial batch updates. allows error to bubble up.
261
+ await this.cancelUpdate();
262
+ throw e;
234
263
  }
235
-
236
- /**
237
- * Returns true if the index exists.
238
- */
239
- public async isIndexCreated(): Promise<boolean> {
240
- try {
241
- await fs.access(path.join(this._folderPath, this.indexName));
242
- return true;
243
- } catch (err: unknown) {
244
- return false;
245
- }
264
+ }
265
+
266
+ /** Returns true if the index exists. */
267
+ public async isIndexCreated(): Promise<boolean> {
268
+ return await this.storage.pathExists(path.join(this._folderPath, this._indexName));
269
+ }
270
+
271
+ /**
272
+ * Returns all items in the index.
273
+ * @remarks
274
+ * This method loads the index into memory and returns all its items. A copy of the items
275
+ * array is returned so no modifications should be made to the array.
276
+ * @returns Array of all items in the index.
277
+ */
278
+ public async listItems<TItemMetadata extends TMetadata = TMetadata>(): Promise<IndexItem<TItemMetadata>[]> {
279
+ await this.loadIndexData();
280
+ return this._data!.items.slice() as any;
281
+ }
282
+
283
+ /**
284
+ * Returns all items in the index matching the filter.
285
+ * @remarks
286
+ * This method loads the index into memory and returns all its items matching the filter.
287
+ * @param filter Filter to apply.
288
+ * @returns Array of items matching the filter.
289
+ */
290
+ public async listItemsByMetadata<TItemMetadata extends TMetadata = TMetadata>(filter: MetadataFilter): Promise<IndexItem<TItemMetadata>[]> {
291
+ await this.loadIndexData();
292
+ return this._data!.items.filter(i => ItemSelector.select(i.metadata, filter)) as any;
293
+ }
294
+
295
+ /**
296
+ * Finds the top k items in the index that are most similar to the vector.
297
+ * @remarks
298
+ * This method loads the index into memory and returns the top k items that are most similar.
299
+ * An optional filter can be applied to the metadata of the items.
300
+ * @param vector Vector to query against.
301
+ * @param query Query string (used when isBm25=true).
302
+ * @param topK Number of items to return.
303
+ * @param filter Optional. Filter to apply.
304
+ * @param isBm25 Optional. If true, append BM25 keyword results to semantic results.
305
+ * @returns Similar items to the vector that match the supplied filter.
306
+ */
307
+ public async queryItems<TItemMetadata extends TMetadata = TMetadata>(
308
+ vector: number[],
309
+ query: string,
310
+ topK: number,
311
+ filter?: MetadataFilter,
312
+ isBm25?: boolean
313
+ ): Promise<QueryResult<TItemMetadata>[]> {
314
+ await this.loadIndexData();
315
+
316
+ // Filter items
317
+ let items = this._data!.items;
318
+ if (filter) {
319
+ items = items.filter(i => ItemSelector.select(i.metadata, filter));
246
320
  }
247
321
 
248
- /**
249
- * Returns all items in the index.
250
- * @remarks
251
- * This method loads the index into memory and returns all its items. A copy of the items
252
- * array is returned so no modifications should be made to the array.
253
- * @returns Array of all items in the index.
254
- */
255
- public async listItems<TItemMetadata extends TMetadata = TMetadata>(): Promise<IndexItem<TItemMetadata>[]> {
256
- await this.loadIndexData();
257
- return this._data!.items.slice() as any;
322
+ // Calculate distances
323
+ const norm = ItemSelector.normalize(vector);
324
+ const distances: { index: number; distance: number }[] = [];
325
+ for (let i = 0; i < items.length; i++) {
326
+ const item = items[i];
327
+ const distance = ItemSelector.normalizedCosineSimilarity(vector, norm, item.vector, item.norm);
328
+ distances.push({ index: i, distance: distance });
258
329
  }
259
330
 
260
- /**
261
- * Returns all items in the index matching the filter.
262
- * @remarks
263
- * This method loads the index into memory and returns all its items matching the filter.
264
- * @param filter Filter to apply.
265
- * @returns Array of items matching the filter.
266
- */
267
- public async listItemsByMetadata<TItemMetadata extends TMetadata = TMetadata>(filter: MetadataFilter): Promise<IndexItem<TItemMetadata>[]> {
268
- await this.loadIndexData();
269
- return this._data!.items.filter(i => ItemSelector.select(i.metadata, filter)) as any;
331
+ // Sort by distance DESCENDING
332
+ distances.sort((a, b) => b.distance - a.distance);
333
+
334
+ // Find top k
335
+ const top: QueryResult<TItemMetadata>[] = distances.slice(0, topK).map(d => {
336
+ return {
337
+ item: Object.assign({}, items[d.index]) as any,
338
+ score: d.distance
339
+ };
340
+ });
341
+
342
+ // Load external metadata
343
+ for (const item of top) {
344
+ if (item.item.metadataFile) {
345
+ const metadataPath = path.join(this._folderPath, item.item.metadataFile);
346
+ const metadataBuffer = await this.storage.readFile(metadataPath);
347
+ item.item.metadata = this._codec.deserializeMetadata(metadataBuffer) as any;
348
+ }
270
349
  }
271
350
 
272
- /**
273
- * Finds the top k items in the index that are most similar to the vector.
274
- * @remarks
275
- * This method loads the index into memory and returns the top k items that are most similar.
276
- * An optional filter can be applied to the metadata of the items.
277
- * @param vector Vector to query against.
278
- * @param topK Number of items to return.
279
- * @param filter Optional. Filter to apply.
280
- * @returns Similar items to the vector that matche the supplied filter.
281
- */
282
- public async queryItems<TItemMetadata extends TMetadata = TMetadata>(vector: number[], query: string, topK: number, filter?: MetadataFilter, isBm25?: boolean): Promise<QueryResult<TItemMetadata>[]> {
283
- await this.loadIndexData();
284
-
285
- // Filter items
286
- let items = this._data!.items;
287
- if (filter) {
288
- items = items.filter(i => ItemSelector.select(i.metadata, filter));
351
+ // Perform bm25 search only if enabled. Avoid duplicate chunks that are already selected during semantic search.
352
+ if (isBm25) {
353
+ const itemSet = new Set<string>();
354
+ for (const r of top) itemSet.add(r.item.id);
355
+
356
+ // Set up BM25 engine
357
+ await this.setupBm25();
358
+
359
+ // Add docs if we have necessary metadata; guard everything to avoid crashes
360
+ for (let i = 0; i < items.length; i++) {
361
+ if (!itemSet.has(items[i].id)) {
362
+ const item = items[i] as any;
363
+ const md = item.metadata || {};
364
+ if (md.documentId != undefined && md.startPos != undefined && md.endPos != undefined) {
365
+ try {
366
+ const currDocTxt = await this._docReader(String(md.documentId));
367
+ const startPos = Number(md.startPos);
368
+ const endPos = Number(md.endPos);
369
+ const chunkText = currDocTxt.substring(startPos, endPos + 1);
370
+ this._bm25Engine.addDoc?.({ body: chunkText }, i);
371
+ } catch {
372
+ // Ignore load or engine errors for BM25 doc prep
373
+ }
374
+ }
289
375
  }
376
+ }
290
377
 
291
- // Calculate distances
292
- const norm = ItemSelector.normalize(vector);
293
- const distances: { index: number, distance: number }[] = [];
294
- for (let i = 0; i < items.length; i++) {
295
- const item = items[i];
296
- const distance = ItemSelector.normalizedCosineSimilarity(vector, norm, item.vector, item.norm);
297
- distances.push({ index: i, distance: distance });
298
- }
378
+ this._bm25Engine.consolidate?.();
299
379
 
300
- // Sort by distance DESCENDING
301
- distances.sort((a, b) => b.distance - a.distance);
302
-
303
- // Find top k
304
- const top: QueryResult<TItemMetadata>[] = distances.slice(0, topK).map(d => {
305
- return {
306
- item: Object.assign({}, items[d.index]) as any,
307
- score: d.distance
308
- };
309
- });
310
-
311
- // Load external metadata
312
- for (const item of top) {
313
- if (item.item.metadataFile) {
314
- const metadataPath = path.join(this._folderPath, item.item.metadataFile);
315
- const metadata = await fs.readFile(metadataPath);
316
- item.item.metadata = JSON.parse(metadata.toString());
317
- }
318
- }
380
+ const results: any[] = await this.bm25Search(query, items, topK);
319
381
 
320
- //Peform bm25 search only if enabled. Avoid duplicate chunks, which are already selected during semantic search.
321
- if (isBm25) {
322
- const itemSet = new Set();
323
- for (const item of top) itemSet.add(item.item.id);
324
-
325
- this.setupbm25();
326
-
327
- let currDoc;
328
- let currDocTxt;
329
- for (let i = 0; i < items.length; i++) {
330
- if (!itemSet.has(items[i].id)) {
331
- const item = items[i];
332
- currDoc = new LocalDocument((this as unknown) as LocalDocumentIndex, item.metadata.documentId.toString(), '');
333
- currDocTxt = await currDoc.loadText();
334
- const startPos = item.metadata.startPos;
335
- const endPos = item.metadata.endPos;
336
- const chunkText = currDocTxt.substring(Number(startPos), Number(endPos) + 1);
337
- this._bm25Engine.addDoc({body: chunkText}, i);
338
- }
339
- }
340
- this._bm25Engine.consolidate();
341
- var results = await this.bm25Search(query, items, topK);
342
- results.forEach((res: any) => {
343
- top.push({
344
- item: Object.assign({}, {...items[res[0]], metadata: {...items[res[0]].metadata, isBm25: true}}) as any,
345
- score: res[1]
346
- });
382
+ results.forEach((res: any) => {
383
+ // Support both [index, score] tuples and { item, score } objects
384
+ if (Array.isArray(res)) {
385
+ const idx = res[0];
386
+ const score = res[1];
387
+ if (items[idx]) {
388
+ top.push({
389
+ item: Object.assign({}, { ...items[idx], metadata: { ...items[idx].metadata, isBm25: true } }) as any,
390
+ score
347
391
  });
348
-
392
+ }
393
+ } else if (res && typeof res === 'object' && 'item' in res && 'score' in res) {
394
+ const objItem = Object.assign({}, { ...(res.item || {}), metadata: { ...(res.item?.metadata || {}), isBm25: true } }) as any;
395
+ top.push({ item: objItem, score: res.score });
349
396
  }
350
- return top;
397
+ });
351
398
  }
352
399
 
353
- /**
354
- * Adds or replaces an item in the index.
355
- * @remarks
356
- * A new update is started if one is not already in progress. If an item with the same ID
357
- * already exists, it will be replaced.
358
- * @param item Item to insert or replace.
359
- * @returns Upserted item.
360
- */
361
- public async upsertItem<TItemMetadata extends TMetadata = TMetadata>(item: Partial<IndexItem<TItemMetadata>>): Promise<IndexItem<TItemMetadata>> {
362
- if (this._update) {
363
- return await this.addItemToUpdate(item, false) as any;
364
- } else {
365
- await this.beginUpdate();
366
- const newItem = await this.addItemToUpdate(item, false);
367
- await this.endUpdate();
368
- return newItem as any;
369
- }
400
+ return top;
401
+ }
402
+
403
+ /**
404
+ * Adds or replaces an item in the index.
405
+ * @remarks
406
+ * A new update is started if one is not already in progress. If an item with the same ID
407
+ * already exists, it will be replaced.
408
+ * @param item Item to insert or replace.
409
+ * @returns Upserted item.
410
+ */
411
+ public async upsertItem<TItemMetadata extends TMetadata = TMetadata>(item: Partial<IndexItem<TItemMetadata>>): Promise<IndexItem<TItemMetadata>> {
412
+ if (this._update) {
413
+ return await this.addItemToUpdate(item, false) as any;
414
+ } else {
415
+ await this.beginUpdate();
416
+ const newItem = await this.addItemToUpdate(item, false);
417
+ await this.endUpdate();
418
+ return newItem as any;
370
419
  }
420
+ }
371
421
 
372
- /**
373
- * Ensures that the index has been loaded into memory.
374
- */
375
- protected async loadIndexData(): Promise<void> {
376
- if (this._data) {
377
- return;
378
- }
379
-
380
- if (!await this.isIndexCreated()) {
381
- throw new Error('Index does not exist');
382
- }
383
-
384
- const data = await fs.readFile(path.join(this._folderPath, this.indexName));
385
- this._data = JSON.parse(data.toString());
422
+ /** Ensures that the index has been loaded into memory. */
423
+ protected async loadIndexData(): Promise<void> {
424
+ if (this._data) {
425
+ return;
426
+ }
427
+ if (!await this.isIndexCreated()) {
428
+ throw new Error('Index does not exist');
386
429
  }
387
430
 
388
- private async addItemToUpdate(item: Partial<IndexItem<any>>, unique: boolean): Promise<IndexItem> {
389
- // Ensure vector is provided
390
- if (!item.vector) {
391
- throw new Error('Vector is required');
392
- }
393
-
394
- // Ensure unique
395
- const id = item.id ?? v4();
396
- if (unique) {
397
- const existing = this._update!.items.find(i => i.id === id);
398
- if (existing) {
399
- throw new Error(`Item with id ${id} already exists`);
400
- }
401
- }
402
-
403
- // Check for indexed metadata
404
- let metadata: Record<string,any> = {};
405
- let metadataFile: string | undefined;
406
- if (this._update!.metadata_config.indexed && this._update!.metadata_config.indexed.length > 0 && item.metadata) {
407
- // Copy only indexed metadata
408
- for (const key of this._update!.metadata_config.indexed) {
409
- if (item.metadata && item.metadata[key]) {
410
- metadata[key] = item.metadata[key];
411
- }
412
- }
431
+ const data = await this.storage.readFile(path.join(this._folderPath, this.indexName));
432
+ this._data = this._codec.deserializeIndex(data);
433
+ }
413
434
 
414
- // Save remaining metadata to disk
415
- metadataFile = `${v4()}.json`;
416
- const metadataPath = path.join(this._folderPath, metadataFile);
417
- await fs.writeFile(metadataPath, JSON.stringify(item.metadata));
418
- } else if (item.metadata) {
419
- metadata = item.metadata;
420
- }
435
+ private async addItemToUpdate(item: Partial<IndexItem<any>>, unique: boolean): Promise<IndexItem> {
436
+ // Ensure vector is provided
437
+ if (!item.vector) {
438
+ throw new Error('Vector is required');
439
+ }
421
440
 
422
- // Create new item
423
- const newItem: IndexItem = {
424
- id: id,
425
- metadata: metadata,
426
- vector: item.vector,
427
- norm: ItemSelector.normalize(item.vector)
428
- };
429
- if (metadataFile) {
430
- newItem.metadataFile = metadataFile;
431
- }
441
+ // Ensure unique
442
+ const id = item.id ?? v4();
443
+ if (unique) {
444
+ const existing = this._update!.items.find(i => i.id === id);
445
+ if (existing) {
446
+ throw new Error(`Item with id ${id} already exists`);
447
+ }
448
+ }
432
449
 
433
- // Add item to index
434
- if (!unique) {
435
- const existing = this._update!.items.find(i => i.id === id);
436
- if (existing) {
437
- existing.metadata = newItem.metadata;
438
- existing.vector = newItem.vector;
439
- existing.metadataFile = newItem.metadataFile;
440
- return existing;
441
- } else {
442
- this._update!.items.push(newItem);
443
- return newItem;
444
- }
445
- } else {
446
- this._update!.items.push(newItem);
447
- return newItem;
450
+ // Check for indexed metadata
451
+ let metadata: Record<string, any> = {};
452
+ let metadataFile: string | undefined;
453
+ const indexedKeys = this._update!.metadata_config.indexed ?? [];
454
+ if (indexedKeys.length > 0 && item.metadata) {
455
+ // Copy only indexed metadata
456
+ const indexedOnly: Record<string, any> = {};
457
+ for (const key of indexedKeys) {
458
+ if (Object.prototype.hasOwnProperty.call(item.metadata, key)) {
459
+ indexedOnly[key] = (item.metadata as any)[key];
448
460
  }
461
+ }
462
+
463
+ // Determine if there are any non-indexed keys
464
+ const hasNonIndexed = Object.keys(item.metadata).some(k => !indexedKeys.includes(k));
465
+
466
+ // Always store only indexed keys in the index
467
+ metadata = indexedOnly;
468
+
469
+ // Write full metadata externally only if there are non-indexed keys present
470
+ if (hasNonIndexed) {
471
+ metadataFile = `${v4()}${this._codec.extension}`;
472
+ const metadataPath = path.join(this._folderPath, metadataFile);
473
+ await this.storage.upsertFile(metadataPath, this._codec.serializeMetadata(item.metadata as Record<string, MetadataTypes>));
474
+ }
475
+ } else if (item.metadata) {
476
+ metadata = item.metadata;
449
477
  }
450
478
 
451
- private async setupbm25(): Promise<any> {
452
- this._bm25Engine = bm25();
453
- const nlp = winkNLP( model );
454
- const its = nlp.its;
455
-
456
- const prepTask = function ( text: string ) {
457
- const tokens: any[] = [];
458
- nlp.readDoc(text)
459
- .tokens()
460
- // Use only words ignoring punctuations etc and from them remove stop words
461
- .filter( (t: any) => ( t.out(its.type) === 'word' && !t.out(its.stopWordFlag) ) )
462
- // Handle negation and extract stem of the word
463
- .each( (t: any) => tokens.push( (t.out(its.negationFlag)) ? '!' + t.out(its.stem) : t.out(its.stem) ) );
464
-
465
- return tokens;
466
- };
467
-
468
- this._bm25Engine.defineConfig( { fldWeights: { body: 1 } } );
469
- // Step II: Define PrepTasks pipe.
470
- this._bm25Engine.definePrepTasks( [ prepTask ] );
479
+ // Create new item
480
+ const newItem: IndexItem = {
481
+ id: id,
482
+ metadata: metadata,
483
+ vector: item.vector,
484
+ norm: ItemSelector.normalize(item.vector)
485
+ };
486
+ if (metadataFile) {
487
+ newItem.metadataFile = metadataFile;
471
488
  }
472
489
 
473
- private async bm25Search(searchQuery: string, items: any, topK: number): Promise<any> {
474
- var query = searchQuery;
475
- // `results` is an array of [ doc-id, score ], sorted by score
476
- var results = this._bm25Engine.search( query );
477
-
478
- return results.slice(0, topK);
490
+ // Add item to index
491
+ if (!unique) {
492
+ const existing = this._update!.items.find(i => i.id === id);
493
+ if (existing) {
494
+ existing.metadata = newItem.metadata;
495
+ existing.vector = newItem.vector;
496
+ existing.metadataFile = newItem.metadataFile;
497
+ return existing;
498
+ } else {
499
+ this._update!.items.push(newItem);
500
+ return newItem;
501
+ }
502
+ } else {
503
+ this._update!.items.push(newItem);
504
+ return newItem;
479
505
  }
480
-
506
+ }
507
+
508
+ private async setupBm25(): Promise<any> {
509
+ this._bm25Engine = this._bm25Factory();
510
+ const nlp = winkNLP(model);
511
+ const its = nlp.its;
512
+ const prepTask = function (text: string) {
513
+ const tokens: any[] = [];
514
+ nlp.readDoc(text)
515
+ .tokens()
516
+ // Use only words ignoring punctuations etc and from them remove stop words
517
+ .filter((t: any) => (t.out(its.type) === 'word' && !t.out(its.stopWordFlag)))
518
+ // Handle negation and extract stem of the word
519
+ .each((t: any) => tokens.push((t.out(its.negationFlag)) ? '!' + t.out(its.stem) : t.out(its.stem)));
520
+ return tokens;
521
+ };
522
+ this._bm25Engine.defineConfig({ fldWeights: { body: 1 } });
523
+ // Step II: Define PrepTasks pipe.
524
+ this._bm25Engine.definePrepTasks([prepTask]);
525
+ }
526
+
527
+ private async bm25Search(searchQuery: string, _items: any, topK: number): Promise<any> {
528
+ const results = this._bm25Engine.search(searchQuery);
529
+ return results.slice(0, topK);
530
+ }
481
531
  }
482
532
 
483
533
  interface IndexData {
484
- version: number;
485
- metadata_config: {
486
- indexed?: string[];
487
- };
488
- items: IndexItem[];
534
+ version: number;
535
+ metadata_config: {
536
+ indexed?: string[];
537
+ };
538
+ items: IndexItem[];
489
539
  }