vectra 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +92 -100
  3. package/bin/vectra.js +3 -0
  4. package/lib/BrowserWebFetcher.d.ts +75 -0
  5. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  6. package/lib/BrowserWebFetcher.js +290 -0
  7. package/lib/BrowserWebFetcher.js.map +1 -0
  8. package/lib/FileFetcher.d.ts +5 -0
  9. package/lib/FileFetcher.d.ts.map +1 -0
  10. package/lib/FileFetcher.js +89 -0
  11. package/lib/FileFetcher.js.map +1 -0
  12. package/lib/FileFetcher.spec.d.ts +2 -0
  13. package/lib/FileFetcher.spec.d.ts.map +1 -0
  14. package/lib/FileFetcher.spec.js +244 -0
  15. package/lib/FileFetcher.spec.js.map +1 -0
  16. package/lib/FolderWatcher.d.ts +91 -0
  17. package/lib/FolderWatcher.d.ts.map +1 -0
  18. package/lib/FolderWatcher.js +304 -0
  19. package/lib/FolderWatcher.js.map +1 -0
  20. package/lib/FolderWatcher.spec.d.ts +2 -0
  21. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  22. package/lib/FolderWatcher.spec.js +308 -0
  23. package/lib/FolderWatcher.spec.js.map +1 -0
  24. package/lib/GPT3Tokenizer.d.ts +9 -0
  25. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  26. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  27. package/lib/GPT3Tokenizer.spec.js +45 -0
  28. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  29. package/lib/ItemSelector.d.ts +41 -0
  30. package/lib/ItemSelector.d.ts.map +1 -0
  31. package/lib/ItemSelector.js +179 -0
  32. package/lib/ItemSelector.js.map +1 -0
  33. package/lib/ItemSelector.spec.d.ts +2 -0
  34. package/lib/ItemSelector.spec.d.ts.map +1 -0
  35. package/lib/ItemSelector.spec.js +204 -0
  36. package/lib/ItemSelector.spec.js.map +1 -0
  37. package/lib/LocalDocument.d.ts +54 -0
  38. package/lib/LocalDocument.d.ts.map +1 -1
  39. package/lib/LocalDocument.js +116 -0
  40. package/lib/LocalDocument.js.map +1 -0
  41. package/lib/LocalDocument.spec.d.ts +2 -0
  42. package/lib/LocalDocument.spec.d.ts.map +1 -0
  43. package/lib/LocalDocument.spec.js +214 -0
  44. package/lib/LocalDocument.spec.js.map +1 -0
  45. package/lib/LocalDocumentIndex.d.ts +152 -0
  46. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  47. package/lib/LocalDocumentIndex.js +420 -0
  48. package/lib/LocalDocumentIndex.js.map +1 -0
  49. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  50. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  51. package/lib/LocalDocumentIndex.spec.js +494 -0
  52. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  53. package/lib/LocalDocumentResult.d.ts +66 -0
  54. package/lib/LocalDocumentResult.d.ts.map +1 -1
  55. package/lib/LocalDocumentResult.js +376 -0
  56. package/lib/LocalDocumentResult.js.map +1 -0
  57. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  58. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  59. package/lib/LocalDocumentResult.spec.js +373 -0
  60. package/lib/LocalDocumentResult.spec.js.map +1 -0
  61. package/lib/LocalEmbeddings.d.ts +59 -0
  62. package/lib/LocalEmbeddings.d.ts.map +1 -0
  63. package/lib/LocalEmbeddings.js +101 -0
  64. package/lib/LocalEmbeddings.js.map +1 -0
  65. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  66. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  67. package/lib/LocalEmbeddings.spec.js +155 -0
  68. package/lib/LocalEmbeddings.spec.js.map +1 -0
  69. package/lib/LocalIndex.d.ts +159 -0
  70. package/lib/LocalIndex.d.ts.map +1 -1
  71. package/lib/LocalIndex.js +519 -0
  72. package/lib/LocalIndex.js.map +1 -0
  73. package/lib/LocalIndex.spec.d.ts +2 -0
  74. package/lib/LocalIndex.spec.js +611 -9
  75. package/lib/LocalIndex.spec.js.map +1 -1
  76. package/lib/OpenAIEmbeddings.d.ts +124 -0
  77. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  78. package/lib/OpenAIEmbeddings.js +166 -0
  79. package/lib/OpenAIEmbeddings.js.map +1 -0
  80. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  81. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  82. package/lib/OpenAIEmbeddings.spec.js +298 -0
  83. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  84. package/lib/TextSplitter.d.ts +21 -0
  85. package/lib/TextSplitter.d.ts.map +1 -1
  86. package/lib/TextSplitter.js +500 -0
  87. package/lib/TextSplitter.js.map +1 -0
  88. package/lib/TextSplitter.spec.d.ts +2 -0
  89. package/lib/TextSplitter.spec.d.ts.map +1 -0
  90. package/lib/TextSplitter.spec.js +337 -0
  91. package/lib/TextSplitter.spec.js.map +1 -0
  92. package/lib/TransformersEmbeddings.d.ts +121 -0
  93. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  94. package/lib/TransformersEmbeddings.js +176 -0
  95. package/lib/TransformersEmbeddings.js.map +1 -0
  96. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  97. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  98. package/lib/TransformersEmbeddings.spec.js +198 -0
  99. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  100. package/lib/TransformersTokenizer.d.ts +33 -0
  101. package/lib/TransformersTokenizer.d.ts.map +1 -0
  102. package/lib/TransformersTokenizer.js +44 -0
  103. package/lib/TransformersTokenizer.js.map +1 -0
  104. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  105. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  106. package/lib/TransformersTokenizer.spec.js +112 -0
  107. package/lib/TransformersTokenizer.spec.js.map +1 -0
  108. package/lib/WebFetcher.d.ts +14 -0
  109. package/lib/WebFetcher.d.ts.map +1 -0
  110. package/lib/WebFetcher.js +238 -0
  111. package/lib/WebFetcher.js.map +1 -0
  112. package/lib/WebFetcher.spec.d.ts +2 -0
  113. package/lib/WebFetcher.spec.d.ts.map +1 -0
  114. package/lib/WebFetcher.spec.js +263 -0
  115. package/lib/WebFetcher.spec.js.map +1 -0
  116. package/lib/browser.d.ts +30 -0
  117. package/lib/browser.d.ts.map +1 -0
  118. package/lib/browser.js +52 -0
  119. package/lib/browser.js.map +1 -0
  120. package/lib/codecs/IndexCodec.d.ts +37 -0
  121. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  122. package/lib/codecs/IndexCodec.js +3 -0
  123. package/lib/codecs/IndexCodec.js.map +1 -0
  124. package/lib/codecs/JsonCodec.d.ts +19 -0
  125. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  126. package/lib/codecs/JsonCodec.js +35 -0
  127. package/lib/codecs/JsonCodec.js.map +1 -0
  128. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  129. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  130. package/lib/codecs/JsonCodec.spec.js +66 -0
  131. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  132. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  133. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  134. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  135. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  136. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  137. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  138. package/lib/codecs/ProtobufCodec.js +225 -0
  139. package/lib/codecs/ProtobufCodec.js.map +1 -0
  140. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  141. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  142. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  143. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  144. package/lib/codecs/index.d.ts +5 -0
  145. package/lib/codecs/index.d.ts.map +1 -0
  146. package/lib/codecs/index.js +21 -0
  147. package/lib/codecs/index.js.map +1 -0
  148. package/lib/codecs/migrateIndex.d.ts +24 -0
  149. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  150. package/lib/codecs/migrateIndex.js +119 -0
  151. package/lib/codecs/migrateIndex.js.map +1 -0
  152. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  153. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  154. package/lib/codecs/migrateIndex.spec.js +151 -0
  155. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  156. package/lib/codecs/schemas/index.proto +34 -0
  157. package/lib/index.d.ts +20 -0
  158. package/lib/index.d.ts.map +1 -1
  159. package/lib/index.js +36 -0
  160. package/lib/index.js.map +1 -0
  161. package/lib/internals/Colorize.d.ts +14 -0
  162. package/lib/internals/Colorize.d.ts.map +1 -0
  163. package/lib/internals/Colorize.js +69 -0
  164. package/lib/internals/Colorize.js.map +1 -0
  165. package/lib/internals/index.d.ts +3 -0
  166. package/lib/internals/index.d.ts.map +1 -0
  167. package/lib/internals/index.js +19 -0
  168. package/lib/internals/index.js.map +1 -0
  169. package/lib/internals/types.d.ts +43 -0
  170. package/lib/internals/types.d.ts.map +1 -0
  171. package/lib/internals/types.js +3 -0
  172. package/lib/internals/types.js.map +1 -0
  173. package/lib/server/IndexManager.d.ts +78 -0
  174. package/lib/server/IndexManager.d.ts.map +1 -0
  175. package/lib/server/IndexManager.js +259 -0
  176. package/lib/server/IndexManager.js.map +1 -0
  177. package/lib/server/VectraServer.d.ts +40 -0
  178. package/lib/server/VectraServer.d.ts.map +1 -0
  179. package/lib/server/VectraServer.js +151 -0
  180. package/lib/server/VectraServer.js.map +1 -0
  181. package/lib/server/VectraServer.spec.d.ts +2 -0
  182. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  183. package/lib/server/VectraServer.spec.js +322 -0
  184. package/lib/server/VectraServer.spec.js.map +1 -0
  185. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  186. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  187. package/lib/server/handlers/documentHandlers.js +95 -0
  188. package/lib/server/handlers/documentHandlers.js.map +1 -0
  189. package/lib/server/handlers/helpers.d.ts +23 -0
  190. package/lib/server/handlers/helpers.d.ts.map +1 -0
  191. package/lib/server/handlers/helpers.js +138 -0
  192. package/lib/server/handlers/helpers.js.map +1 -0
  193. package/lib/server/handlers/index.d.ts +8 -0
  194. package/lib/server/handlers/index.d.ts.map +1 -0
  195. package/lib/server/handlers/index.js +22 -0
  196. package/lib/server/handlers/index.js.map +1 -0
  197. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  198. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  199. package/lib/server/handlers/indexHandlers.js +85 -0
  200. package/lib/server/handlers/indexHandlers.js.map +1 -0
  201. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  202. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  203. package/lib/server/handlers/itemHandlers.js +166 -0
  204. package/lib/server/handlers/itemHandlers.js.map +1 -0
  205. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  206. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  207. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  208. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  209. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  210. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  211. package/lib/server/handlers/queryHandlers.js +135 -0
  212. package/lib/server/handlers/queryHandlers.js.map +1 -0
  213. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  214. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  215. package/lib/server/handlers/statsHandlers.js +81 -0
  216. package/lib/server/handlers/statsHandlers.js.map +1 -0
  217. package/lib/server/index.d.ts +4 -0
  218. package/lib/server/index.d.ts.map +1 -0
  219. package/lib/server/index.js +23 -0
  220. package/lib/server/index.js.map +1 -0
  221. package/lib/storage/FileStorage.d.ts +92 -0
  222. package/lib/storage/FileStorage.d.ts.map +1 -0
  223. package/lib/storage/FileStorage.js +3 -0
  224. package/lib/storage/FileStorage.js.map +1 -0
  225. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  226. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  227. package/lib/storage/FileStorageUtilities.js +91 -0
  228. package/lib/storage/FileStorageUtilities.js.map +1 -0
  229. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  230. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  231. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  232. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  233. package/lib/storage/FileType.d.ts +29 -0
  234. package/lib/storage/FileType.d.ts.map +1 -0
  235. package/lib/storage/FileType.js +38 -0
  236. package/lib/storage/FileType.js.map +1 -0
  237. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  238. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  239. package/lib/storage/IndexedDBStorage.js +347 -0
  240. package/lib/storage/IndexedDBStorage.js.map +1 -0
  241. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  242. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  243. package/lib/storage/LocalFileStorage.browser.js +43 -0
  244. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  245. package/lib/storage/LocalFileStorage.d.ts +23 -0
  246. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  247. package/lib/storage/LocalFileStorage.js +152 -0
  248. package/lib/storage/LocalFileStorage.js.map +1 -0
  249. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  250. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  251. package/lib/storage/LocalFileStorage.spec.js +249 -0
  252. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  253. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  254. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  255. package/lib/storage/VirtualFileStorage.js +178 -0
  256. package/lib/storage/VirtualFileStorage.js.map +1 -0
  257. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  258. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  259. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  260. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  261. package/lib/storage/index.d.ts +6 -0
  262. package/lib/storage/index.d.ts.map +1 -0
  263. package/lib/storage/index.js +22 -0
  264. package/lib/storage/index.js.map +1 -0
  265. package/lib/templates/templates/csharp/README.md +48 -0
  266. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  267. package/lib/templates/templates/go/README.md +71 -0
  268. package/lib/templates/templates/go/vectra_client.go +322 -0
  269. package/lib/templates/templates/java/README.md +81 -0
  270. package/lib/templates/templates/java/VectraClient.java +232 -0
  271. package/lib/templates/templates/python/README.md +37 -0
  272. package/lib/templates/templates/python/vectra_client.py +279 -0
  273. package/lib/templates/templates/rust/Cargo.toml +14 -0
  274. package/lib/templates/templates/rust/README.md +39 -0
  275. package/lib/templates/templates/rust/build.rs +4 -0
  276. package/lib/templates/templates/rust/lib.rs +284 -0
  277. package/lib/templates/templates/typescript/README.md +96 -0
  278. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  279. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  280. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  281. package/lib/templates/typescript/VectraClient.js +328 -0
  282. package/lib/templates/typescript/VectraClient.js.map +1 -0
  283. package/lib/types.d.ts +153 -0
  284. package/lib/types.d.ts.map +1 -0
  285. package/lib/types.js +3 -0
  286. package/lib/types.js.map +1 -0
  287. package/lib/utils/index.d.ts +2 -0
  288. package/lib/utils/index.d.ts.map +1 -0
  289. package/lib/utils/index.js +18 -0
  290. package/lib/utils/index.js.map +1 -0
  291. package/lib/utils/pathUtils.d.ts +40 -0
  292. package/lib/utils/pathUtils.d.ts.map +1 -0
  293. package/lib/utils/pathUtils.js +98 -0
  294. package/lib/utils/pathUtils.js.map +1 -0
  295. package/lib/vectra-cli.d.ts +2 -0
  296. package/lib/vectra-cli.d.ts.map +1 -1
  297. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  298. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  299. package/lib/vectra-cli.generate.spec.js +112 -0
  300. package/lib/vectra-cli.generate.spec.js.map +1 -0
  301. package/lib/vectra-cli.js +760 -0
  302. package/lib/vectra-cli.js.map +1 -0
  303. package/lib/vectra-cli.spec.d.ts +1 -0
  304. package/lib/vectra-cli.spec.d.ts.map +1 -0
  305. package/lib/vectra-cli.spec.js +2 -0
  306. package/lib/vectra-cli.spec.js.map +1 -0
  307. package/package.json +91 -16
  308. package/proto/vectra_service.proto +276 -0
  309. package/src/BrowserWebFetcher.ts +345 -0
  310. package/src/FileFetcher.spec.ts +234 -0
  311. package/src/FileFetcher.ts +37 -25
  312. package/src/FolderWatcher.spec.ts +288 -0
  313. package/src/FolderWatcher.ts +304 -0
  314. package/src/GPT3Tokenizer.spec.ts +50 -0
  315. package/src/ItemSelector.spec.ts +252 -0
  316. package/src/ItemSelector.ts +163 -150
  317. package/src/LocalDocument.spec.ts +211 -0
  318. package/src/LocalDocument.ts +88 -94
  319. package/src/LocalDocumentIndex.spec.ts +481 -0
  320. package/src/LocalDocumentIndex.ts +39 -40
  321. package/src/LocalDocumentResult.spec.ts +373 -0
  322. package/src/LocalDocumentResult.ts +489 -319
  323. package/src/LocalEmbeddings.spec.ts +138 -0
  324. package/src/LocalEmbeddings.ts +120 -0
  325. package/src/LocalIndex.spec.ts +808 -66
  326. package/src/LocalIndex.ts +479 -429
  327. package/src/OpenAIEmbeddings.spec.ts +354 -0
  328. package/src/OpenAIEmbeddings.ts +26 -27
  329. package/src/TextSplitter.spec.ts +342 -0
  330. package/src/TextSplitter.ts +517 -532
  331. package/src/TransformersEmbeddings.spec.ts +188 -0
  332. package/src/TransformersEmbeddings.ts +232 -0
  333. package/src/TransformersTokenizer.spec.ts +143 -0
  334. package/src/TransformersTokenizer.ts +45 -0
  335. package/src/WebFetcher.spec.ts +288 -0
  336. package/src/WebFetcher.ts +184 -186
  337. package/src/browser.ts +69 -0
  338. package/src/codecs/IndexCodec.ts +40 -0
  339. package/src/codecs/JsonCodec.spec.ts +70 -0
  340. package/src/codecs/JsonCodec.ts +37 -0
  341. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  342. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  343. package/src/codecs/ProtobufCodec.ts +193 -0
  344. package/src/codecs/index.ts +4 -0
  345. package/src/codecs/migrateIndex.spec.ts +176 -0
  346. package/src/codecs/migrateIndex.ts +125 -0
  347. package/src/codecs/schemas/index.proto +34 -0
  348. package/src/index.ts +9 -1
  349. package/src/internals/Colorize.ts +19 -16
  350. package/src/server/IndexManager.ts +243 -0
  351. package/src/server/VectraServer.spec.ts +303 -0
  352. package/src/server/VectraServer.ts +156 -0
  353. package/src/server/handlers/documentHandlers.ts +59 -0
  354. package/src/server/handlers/helpers.ts +93 -0
  355. package/src/server/handlers/index.ts +7 -0
  356. package/src/server/handlers/indexHandlers.ts +44 -0
  357. package/src/server/handlers/itemHandlers.ts +140 -0
  358. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  359. package/src/server/handlers/queryHandlers.ts +96 -0
  360. package/src/server/handlers/statsHandlers.ts +38 -0
  361. package/src/server/index.ts +3 -0
  362. package/src/storage/FileStorage.ts +105 -0
  363. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  364. package/src/storage/FileStorageUtilities.ts +77 -0
  365. package/src/storage/FileType.ts +61 -0
  366. package/src/storage/IndexedDBStorage.ts +365 -0
  367. package/src/storage/LocalFileStorage.browser.ts +52 -0
  368. package/src/storage/LocalFileStorage.spec.ts +292 -0
  369. package/src/storage/LocalFileStorage.ts +98 -0
  370. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  371. package/src/storage/VirtualFileStorage.ts +169 -0
  372. package/src/storage/index.ts +5 -0
  373. package/src/templates/csharp/README.md +48 -0
  374. package/src/templates/csharp/VectraClient.cs +234 -0
  375. package/src/templates/go/README.md +71 -0
  376. package/src/templates/go/vectra_client.go +322 -0
  377. package/src/templates/java/README.md +81 -0
  378. package/src/templates/java/VectraClient.java +232 -0
  379. package/src/templates/python/README.md +37 -0
  380. package/src/templates/python/vectra_client.py +279 -0
  381. package/src/templates/rust/Cargo.toml +14 -0
  382. package/src/templates/rust/README.md +39 -0
  383. package/src/templates/rust/build.rs +4 -0
  384. package/src/templates/rust/lib.rs +284 -0
  385. package/src/templates/typescript/README.md +96 -0
  386. package/src/templates/typescript/VectraClient.ts +374 -0
  387. package/src/types.ts +131 -123
  388. package/src/utils/index.ts +1 -0
  389. package/src/utils/pathUtils.ts +106 -0
  390. package/src/vectra-cli.generate.spec.ts +72 -0
  391. package/src/vectra-cli.spec.ts +0 -0
  392. package/src/vectra-cli.ts +687 -246
package/src/vectra-cli.ts CHANGED
@@ -1,4 +1,6 @@
1
1
  import * as fs from 'fs/promises';
2
+ import * as fsSync from 'fs';
3
+ import * as path from 'path';
2
4
  import yargs from "yargs/yargs";
3
5
  import { hideBin } from "yargs/helpers";
4
6
  import { LocalDocumentIndex } from "./LocalDocumentIndex";
@@ -6,272 +8,711 @@ import { WebFetcher } from './WebFetcher';
6
8
  import { AzureOpenAIEmbeddingsOptions, OSSEmbeddingsOptions, OpenAIEmbeddings, OpenAIEmbeddingsOptions } from './OpenAIEmbeddings';
7
9
  import { Colorize } from './internals';
8
10
  import { FileFetcher } from './FileFetcher';
11
+ import { LocalFileStorage } from './storage/LocalFileStorage';
12
+ import { VirtualFileStorage } from './storage/VirtualFileStorage';
13
+ import { IndexCodec, JsonCodec, ProtobufCodec, detectCodec, migrateIndex, FormatName } from './codecs';
14
+ import { VectraServer } from './server/VectraServer';
15
+ import { FolderWatcher } from './FolderWatcher';
16
+
17
+ function getStorage(args: any) {
18
+ if (args.storage === 'virtual') {
19
+ return new VirtualFileStorage();
20
+ } else {
21
+ return new LocalFileStorage(args.storageRoot);
22
+ }
23
+ }
24
+
25
+ function getCodecFromFormat(format?: string): IndexCodec | undefined {
26
+ if (format === 'protobuf') return new ProtobufCodec();
27
+ if (format === 'json') return new JsonCodec();
28
+ return undefined; // default
29
+ }
9
30
 
10
31
  export async function run() {
11
- // prettier-ignore
12
- const args = await yargs(hideBin(process.argv))
13
- .scriptName('vectra')
14
- .command('create <index>', `create a new local index`, {}, async (args) => {
15
- const folderPath = args.index as string;
16
- const index = new LocalDocumentIndex({ folderPath });
17
- console.log(Colorize.output(`creating index at ${folderPath}`));
18
- await index.createIndex({ version: 1, deleteIfExists: true });
32
+ // prettier-ignore
33
+ const args = await yargs(hideBin(process.argv))
34
+ .scriptName('vectra')
35
+ .option('storage', {
36
+ describe: 'storage backend to use',
37
+ choices: ['local', 'virtual'],
38
+ default: 'local'
39
+ })
40
+ .option('storage-root', {
41
+ describe: 'root folder for local storage (only applies if storage=local)',
42
+ type: 'string'
43
+ })
44
+ .command('create <index>', `create a new local index`, (yargs) => {
45
+ return yargs.option('format', {
46
+ describe: 'serialization format for the index',
47
+ choices: ['json', 'protobuf'] as const,
48
+ default: 'json' as const
49
+ });
50
+ }, async (args) => {
51
+ const folderPath = args.index as string;
52
+ const storage = getStorage(args);
53
+ const codec = getCodecFromFormat(args.format);
54
+ const index = new LocalDocumentIndex({ folderPath, storage, codec });
55
+ const formatLabel = args.format === 'protobuf' ? 'protobuf' : 'json';
56
+ console.log(Colorize.output(`creating ${formatLabel} index at ${folderPath}`));
57
+ await index.createIndex({ version: 1, deleteIfExists: true });
58
+ })
59
+ .command('delete <index>', `delete an existing local index`, {}, async (args) => {
60
+ const folderPath = args.index as string;
61
+ console.log(Colorize.output(`deleting index at ${folderPath}`));
62
+ const storage = getStorage(args);
63
+ const codec = await detectCodec(folderPath, storage).catch(() => undefined);
64
+ const index = new LocalDocumentIndex({ folderPath, storage, codec });
65
+ await index.deleteIndex();
66
+ })
67
+ .command('add <index>', `adds one or more web pages to an index`, (yargs) => {
68
+ return yargs
69
+ .option('keys', {
70
+ alias: 'k',
71
+ describe: 'path of a JSON file containing the model keys to use for generating embeddings',
72
+ type: 'string'
73
+ })
74
+ .option('uri', {
75
+ alias: 'u',
76
+ array: true,
77
+ describe: 'http/https link to a web page to add',
78
+ type: 'string'
79
+ })
80
+ .option('list', {
81
+ alias: 'l',
82
+ describe: 'path to a file containing a list of web pages to add',
83
+ type: 'string'
84
+ })
85
+ .option('cookie', {
86
+ alias: 'c',
87
+ describe: 'optional cookies to add to web fetch requests',
88
+ type: 'string'
89
+ })
90
+ .option('chunk-size', {
91
+ alias: 'cs',
92
+ describe: 'size of the generated chunks in tokens (defaults to 512)',
93
+ type: 'number',
94
+ default: 512
95
+ })
96
+ .check((argv) => {
97
+ if (Array.isArray(argv.uri) && argv.uri.length > 0) {
98
+ return true;
99
+ } else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
100
+ return true;
101
+ } else {
102
+ throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
103
+ }
104
+ })
105
+ .demandOption(['keys']);
106
+ }, async (args) => {
107
+ console.log(Colorize.title('Adding Web Pages to Index'));
108
+ // Get embedding options
109
+ const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
110
+ if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
111
+ (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
112
+ (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
113
+ }
114
+ // Create embeddings
115
+ const embeddings = new OpenAIEmbeddings(options);
116
+ // Initialize index
117
+ const folderPath = args.index as string;
118
+ const storage = getStorage(args);
119
+ const codec = await detectCodec(folderPath, storage).catch(() => undefined);
120
+ const index = new LocalDocumentIndex({
121
+ folderPath,
122
+ embeddings,
123
+ chunkingConfig: {
124
+ chunkSize: args.chunkSize
125
+ },
126
+ storage,
127
+ codec
128
+ });
129
+ // Get list of url's
130
+ const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
131
+ // Fetch documents
132
+ const fileFetcher = new FileFetcher();
133
+ const webFetcher = args.cookie ? new WebFetcher({ headers: { "cookie": args.cookie } }) : new WebFetcher();
134
+ for (const path of uris) {
135
+ try {
136
+ console.log(Colorize.progress(`fetching ${path}`));
137
+ const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
138
+ await fetcher.fetch(path, async (uri, text, docType) => {
139
+ console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
140
+ await index.upsertDocument(uri, text, docType);
141
+ console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
142
+ return true;
143
+ });
144
+ } catch (err: unknown) {
145
+ console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
146
+ }
147
+ }
148
+ })
149
+ .command('remove <index>', `removes one or more documents from an index`, (yargs) => {
150
+ return yargs
151
+ .option('uri', {
152
+ alias: 'u',
153
+ array: true,
154
+ describe: 'uri of a document to remove',
155
+ type: 'string'
156
+ })
157
+ .option('list', {
158
+ alias: 'l',
159
+ describe: 'path to a file containing a list of documents to remove',
160
+ type: 'string'
161
+ })
162
+ .check((argv) => {
163
+ if (Array.isArray(argv.uri) && argv.uri.length > 0) {
164
+ return true;
165
+ } else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
166
+ return true;
167
+ } else {
168
+ throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
169
+ }
170
+ });
171
+ }, async (args) => {
172
+ // Initialize index
173
+ const folderPath = args.index as string;
174
+ const storage = getStorage(args);
175
+ const codec = await detectCodec(folderPath, storage).catch(() => undefined);
176
+ const index = new LocalDocumentIndex({ folderPath, storage, codec });
177
+ // Get list of uri's
178
+ const uris = await getItemList(args.uri as string[], args.list as string, 'document');
179
+ // Remove documents
180
+ for (const uri of uris) {
181
+ console.log(`removing ${uri}`);
182
+ await index.deleteDocument(uri);
183
+ }
184
+ })
185
+ .command('stats <index>', `prints the stats for a local index`, (yargs) => {
186
+ return yargs;
187
+ }, async (args) => {
188
+ const folderPath = args.index as string;
189
+ const storage = getStorage(args);
190
+ // Auto-detect format from files on disk
191
+ const codec = await detectCodec(folderPath, storage);
192
+ const index = new LocalDocumentIndex({ folderPath, storage, codec });
193
+ const stats = await index.getCatalogStats();
194
+ console.log(Colorize.title('Index Stats'));
195
+ console.log(Colorize.output(stats));
196
+ })
197
+ .command('migrate <index>', `migrate an index between serialization formats`, (yargs) => {
198
+ return yargs.option('to', {
199
+ describe: 'target format',
200
+ choices: ['json', 'protobuf'] as const,
201
+ demandOption: true
202
+ });
203
+ }, async (args) => {
204
+ const folderPath = args.index as string;
205
+ const storage = getStorage(args);
206
+ const to = args.to as FormatName;
207
+ console.log(Colorize.output(`migrating index at ${folderPath} to ${to} format`));
208
+ await migrateIndex(folderPath, { to, storage });
209
+ console.log(Colorize.output(`migration complete`));
210
+ })
211
+ .command('query <index> <query>', `queries a local index`, (yargs) => {
212
+ return yargs
213
+ .option('keys', {
214
+ alias: 'k',
215
+ describe: 'path of a JSON file containing the model keys to use for generating embeddings'
216
+ })
217
+ .option('document-count', {
218
+ alias: 'dc',
219
+ describe: 'max number of documents to return (defaults to 10)',
220
+ type: 'number',
221
+ default: 10
222
+ })
223
+ .option('chunk-count', {
224
+ alias: 'cc',
225
+ describe: 'max number of chunks to return (defaults to 50)',
226
+ type: 'number',
227
+ default: 50
228
+ })
229
+ .option('section-count', {
230
+ alias: 'sc',
231
+ describe: 'max number of document sections to render (defaults to 1)',
232
+ type: 'number',
233
+ default: 1
234
+ })
235
+ .option('tokens', {
236
+ alias: 't',
237
+ describe: 'max number of tokens to render for each document section (defaults to 2000)',
238
+ type: 'number',
239
+ default: 2000
240
+ })
241
+ .option('format', {
242
+ alias: 'f',
243
+ describe: `format of the rendered results. Defaults to 'sections'`,
244
+ choices: ['sections', 'stats', 'chunks'],
245
+ default: 'sections'
246
+ })
247
+ .option('overlap', {
248
+ alias: 'o',
249
+ describe: `whether to add overlapping chunks to sections.`,
250
+ type: 'boolean',
251
+ default: true
252
+ })
253
+ .option('bm25', {
254
+ alias: 'b',
255
+ describe: 'Use Okapi-bm25 keyword search alogrithm to perform hybrid search - semantic + keyword. Displayed in blue during search.',
256
+ type: 'boolean',
257
+ default: false
258
+ })
259
+ .demandOption(['keys']);
260
+ }, async (args) => {
261
+ console.log(Colorize.title('Querying Index'));
262
+ // Get embedding options
263
+ const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
264
+ if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
265
+ (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
266
+ (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
267
+ }
268
+ // Create embeddings
269
+ const embeddings = new OpenAIEmbeddings(options);
270
+ // Initialize index
271
+ const folderPath = args.index as string;
272
+ const storage = getStorage(args);
273
+ const codec = await detectCodec(folderPath, storage).catch(() => undefined);
274
+ const index = new LocalDocumentIndex({
275
+ folderPath,
276
+ embeddings,
277
+ storage,
278
+ codec
279
+ });
280
+ // Query index
281
+ const query = args.query as string;
282
+ const results = await index.queryDocuments(query, {
283
+ maxDocuments: args.documentCount,
284
+ maxChunks: args.chunkCount,
285
+ isBm25: args.bm25 as boolean,
286
+ });
287
+ // Render results
288
+ for (const result of results) {
289
+ console.log(Colorize.output(result.uri));
290
+ console.log(Colorize.value('score', result.score));
291
+ console.log(Colorize.value('chunks', result.chunks.length));
292
+ if (args.format == 'sections') {
293
+ const sections = await result.renderSections(args.tokens, args.sectionCount, args.overlap);
294
+ console.log(sections.length);
295
+ for (let i = 0; i < sections.length; i++) {
296
+ const section = sections[i];
297
+ const isBm25 = sections[i].isBm25;
298
+ console.log(isBm25);
299
+ console.log(Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
300
+ console.log(Colorize.value('score', section.score));
301
+ console.log(Colorize.value('tokens', section.tokenCount));
302
+ console.log(Colorize.output(section.text, isBm25));
303
+ }
304
+ } else if (args.format == 'chunks') {
305
+ const text = await result.loadText();
306
+ for (let i = 0; i < result.chunks.length; i++) {
307
+ const chunk = result.chunks[i];
308
+ const startPos = chunk.item.metadata.startPos;
309
+ const endPos = chunk.item.metadata.endPos;
310
+ const isBm25 = Boolean(chunk.item.metadata.isBm25);
311
+ console.log(Colorize.title(`Chunk ${i + 1}`));
312
+ console.log(Colorize.value('score', chunk.score));
313
+ console.log(Colorize.value('startPos', startPos));
314
+ console.log(Colorize.value('endPos', endPos));
315
+ console.log(Colorize.output(text.substring(startPos, endPos + 1), isBm25));
316
+ }
317
+ }
318
+ }
319
+ })
320
+ .command('watch <index>', 'watch folders and automatically sync file changes into the index', (yargs) => {
321
+ return yargs
322
+ .option('keys', {
323
+ alias: 'k',
324
+ describe: 'path of a JSON file containing the model keys to use for generating embeddings',
325
+ type: 'string'
326
+ })
327
+ .option('uri', {
328
+ alias: 'u',
329
+ array: true,
330
+ describe: 'folder or file path to watch',
331
+ type: 'string'
332
+ })
333
+ .option('list', {
334
+ alias: 'l',
335
+ describe: 'path to a file containing a list of folders/files to watch',
336
+ type: 'string'
19
337
  })
20
- .command('delete <index>', `delete an existing local index`, {}, async (args) => {
21
- const folderPath = args.index as string;
22
- console.log(Colorize.output(`deleting index at ${folderPath}`));
23
- const index = new LocalDocumentIndex({ folderPath });
24
- await index.deleteIndex();
338
+ .option('extensions', {
339
+ alias: 'e',
340
+ array: true,
341
+ describe: 'file extensions to include (e.g., .txt .md .html)',
342
+ type: 'string'
25
343
  })
26
- .command('add <index>', `adds one or more web pages to an index`, (yargs) => {
27
- return yargs
28
- .option('keys', {
29
- alias: 'k',
30
- describe: 'path of a JSON file containing the model keys to use for generating embeddings',
31
- type: 'string'
32
- })
33
- .option('uri', {
34
- alias: 'u',
35
- array: true,
36
- describe: 'http/https link to a web page to add',
37
- type: 'string'
38
- })
39
- .option('list', {
40
- alias: 'l',
41
- describe: 'path to a file containing a list of web pages to add',
42
- type: 'string'
43
- })
44
- .option('cookie', {
45
- alias: 'c',
46
- describe: 'optional cookies to add to web fetch requests',
47
- type: 'string'
48
- })
49
- .option('chunk-size', {
50
- alias: 'cs',
51
- describe: 'size of the generated chunks in tokens (defaults to 512)',
52
- type: 'number',
53
- default: 512
54
- })
55
- .check((argv) => {
56
- if (Array.isArray(argv.uri) && argv.uri.length > 0) {
57
- return true;
58
- } else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
59
- return true;
60
- } else {
61
- throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
62
- }
63
- })
64
- .demandOption(['keys']);
65
- }, async (args) => {
66
- console.log(Colorize.title('Adding Web Pages to Index'));
344
+ .option('chunk-size', {
345
+ alias: 'cs',
346
+ describe: 'size of the generated chunks in tokens (defaults to 512)',
347
+ type: 'number',
348
+ default: 512
349
+ })
350
+ .option('debounce', {
351
+ describe: 'debounce interval in milliseconds (defaults to 500)',
352
+ type: 'number',
353
+ default: 500
354
+ })
355
+ .check((argv) => {
356
+ if (Array.isArray(argv.uri) && argv.uri.length > 0) {
357
+ return true;
358
+ } else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
359
+ return true;
360
+ } else {
361
+ throw new Error(`you must specify either one or more "--uri <path>" for the folders/files to watch or a "--list <file path>" for a file containing the paths.`);
362
+ }
363
+ })
364
+ .demandOption(['keys']);
365
+ }, async (args) => {
366
+ console.log(Colorize.title('Vectra Watch Mode'));
367
+
368
+ // Get embedding options
369
+ const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
370
+ if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
371
+ (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
372
+ (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
373
+ }
374
+
375
+ // Create embeddings
376
+ const embeddings = new OpenAIEmbeddings(options);
377
+
378
+ // Initialize index
379
+ const folderPath = args.index as string;
380
+ const storage = getStorage(args);
381
+ const codec = await detectCodec(folderPath, storage).catch(() => undefined);
382
+ const index = new LocalDocumentIndex({
383
+ folderPath,
384
+ embeddings,
385
+ chunkingConfig: {
386
+ chunkSize: args.chunkSize
387
+ },
388
+ storage,
389
+ codec
390
+ });
67
391
 
68
- // Get embedding options
69
- const options: OpenAIEmbeddingsOptions|AzureOpenAIEmbeddingsOptions|OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
70
- if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
71
- (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
72
- (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
73
- }
392
+ // Get list of paths to watch
393
+ const watchPaths = await getItemList(args.uri as string[], args.list as string, 'path');
74
394
 
75
- // Create embeddings
76
- const embeddings = new OpenAIEmbeddings(options);
395
+ // Create watcher
396
+ const watcher = new FolderWatcher({
397
+ index,
398
+ paths: watchPaths,
399
+ extensions: args.extensions as string[] | undefined,
400
+ debounceMs: args.debounce
401
+ });
77
402
 
78
- // Initialize index
79
- const folderPath = args.index as string;
80
- const index = new LocalDocumentIndex({
81
- folderPath,
82
- embeddings,
83
- chunkingConfig: {
84
- chunkSize: args.chunkSize
85
- }
86
- });
403
+ // Wire up events
404
+ watcher.on('sync', (uri: string, action: string) => {
405
+ if (action === 'deleted') {
406
+ console.log(Colorize.warning(`removed ${uri}`));
407
+ } else {
408
+ console.log(Colorize.success(`${action} ${uri}`));
409
+ }
410
+ });
411
+ watcher.on('error', (err: Error, uri: string) => {
412
+ console.log(Colorize.error(`Error syncing ${uri}: ${err.message}`));
413
+ });
87
414
 
88
- // Get list of url's
89
- const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
415
+ // Start watching
416
+ console.log(Colorize.progress(`performing initial sync...`));
417
+ await watcher.start();
418
+ console.log(Colorize.success(`initial sync complete (${watcher.trackedFileCount} files tracked)`));
419
+ console.log(Colorize.output(`watching for changes... (press Ctrl+C to stop)`));
90
420
 
91
- // Fetch documents
92
- const fileFetcher = new FileFetcher();
93
- const webFetcher = args.cookie ? new WebFetcher({ headers: { "cookie": args.cookie }}) : new WebFetcher();
94
- for (const path of uris) {
95
- try {
96
- console.log(Colorize.progress(`fetching ${path}`));
97
- const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
98
- await fetcher.fetch(path, async (uri, text, docType) => {
99
- console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
100
- await index.upsertDocument(uri, text, docType);
101
- console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
102
- return true;
103
- });
104
- } catch (err: unknown) {
105
- console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
106
- }
107
- }
421
+ // Handle graceful shutdown
422
+ const handleSignal = async () => {
423
+ console.log(Colorize.output('\nStopping watcher...'));
424
+ await watcher.stop();
425
+ process.exit(0);
426
+ };
427
+ process.on('SIGINT', handleSignal);
428
+ process.on('SIGTERM', handleSignal);
429
+ })
430
+ .command('generate', 'generate language bindings for the gRPC service', (yargs) => {
431
+ return yargs
432
+ .option('language', {
433
+ alias: 'l',
434
+ describe: 'target language for the generated bindings',
435
+ choices: ['python', 'csharp', 'rust', 'go', 'java', 'typescript'] as const,
436
+ demandOption: true
108
437
  })
109
- .command('remove <index>', `removes one or more documents from an index`, (yargs) => {
110
- return yargs
111
- .option('uri', {
112
- alias: 'u',
113
- array: true,
114
- describe: 'uri of a document to remove',
115
- type: 'string'
116
- })
117
- .option('list', {
118
- alias: 'l',
119
- describe: 'path to a file containing a list of documents to remove',
120
- type: 'string'
121
- })
122
- .check((argv) => {
123
- if (Array.isArray(argv.uri) && argv.uri.length > 0) {
124
- return true;
125
- } else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
126
- return true;
127
- } else {
128
- throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
129
- }
130
- });
131
- }, async (args) => {
132
- // Initialize index
133
- const folderPath = args.index as string;
134
- const index = new LocalDocumentIndex({ folderPath });
438
+ .option('output', {
439
+ alias: 'o',
440
+ describe: 'output directory for the generated files',
441
+ type: 'string',
442
+ demandOption: true
443
+ });
444
+ }, async (args) => {
445
+ const language = args.language as string;
446
+ const outputDir = path.resolve(args.output as string);
447
+
448
+ // Locate the proto file check lib/ first (installed package), then project root
449
+ const protoSearchPaths = [
450
+ path.join(__dirname, '..', 'proto', 'vectra_service.proto'),
451
+ path.join(__dirname, '..', '..', 'proto', 'vectra_service.proto'),
452
+ ];
453
+ let protoSource: string | undefined;
454
+ for (const p of protoSearchPaths) {
455
+ if (fsSync.existsSync(p)) {
456
+ protoSource = p;
457
+ break;
458
+ }
459
+ }
460
+ if (!protoSource) {
461
+ console.error(Colorize.error('Could not locate vectra_service.proto'));
462
+ process.exit(1);
463
+ }
464
+
465
+ // Locate the template directory
466
+ const templateSearchPaths = [
467
+ path.join(__dirname, '..', 'src', 'templates', language),
468
+ path.join(__dirname, 'templates', language),
469
+ ];
470
+ let templateDir: string | undefined;
471
+ for (const p of templateSearchPaths) {
472
+ if (fsSync.existsSync(p)) {
473
+ templateDir = p;
474
+ break;
475
+ }
476
+ }
477
+ if (!templateDir) {
478
+ console.error(Colorize.error(`Could not locate template for language: ${language}`));
479
+ process.exit(1);
480
+ }
481
+
482
+ // Create output directory
483
+ await fs.mkdir(outputDir, { recursive: true });
484
+
485
+ // Copy proto file
486
+ const protoDest = path.join(outputDir, 'vectra_service.proto');
487
+ await fs.copyFile(protoSource, protoDest);
488
+ console.log(Colorize.success(`copied vectra_service.proto`));
489
+
490
+ // Copy all template files
491
+ const templateFiles = await fs.readdir(templateDir);
492
+ for (const file of templateFiles) {
493
+ const src = path.join(templateDir, file);
494
+ const stat = await fs.stat(src);
495
+ if (stat.isFile()) {
496
+ const dest = path.join(outputDir, file);
497
+ await fs.copyFile(src, dest);
498
+ console.log(Colorize.success(`copied ${file}`));
499
+ }
500
+ }
135
501
 
136
- // Get list of uri's
137
- const uris = await getItemList(args.uri as string[], args.list as string, 'document');
502
+ console.log(Colorize.output(`\nGenerated ${language} bindings in ${outputDir}`));
138
503
 
139
- // Remove documents
140
- for (const uri of uris) {
141
- console.log(`removing ${uri}`);
142
- await index.deleteDocument(uri);
143
- }
504
+ // Print next steps
505
+ const nextSteps: Record<string, string> = {
506
+ python: [
507
+ 'Next steps:',
508
+ ' pip install grpcio grpcio-tools',
509
+ ' python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. vectra_service.proto',
510
+ ].join('\n'),
511
+ csharp: [
512
+ 'Next steps:',
513
+ ' dotnet add package Grpc.Net.Client',
514
+ ' dotnet add package Google.Protobuf',
515
+ ' dotnet add package Grpc.Tools',
516
+ ' Add <Protobuf Include="vectra_service.proto" GrpcServices="Client" /> to your .csproj',
517
+ ].join('\n'),
518
+ rust: [
519
+ 'Next steps:',
520
+ ' Ensure protoc is installed (apt install protobuf-compiler / brew install protobuf)',
521
+ ' cargo build (tonic-build generates stubs automatically)',
522
+ ].join('\n'),
523
+ go: [
524
+ 'Next steps:',
525
+ ' go install google.golang.org/protobuf/cmd/protoc-gen-go@latest',
526
+ ' go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest',
527
+ ' protoc --go_out=. --go-grpc_out=. vectra_service.proto',
528
+ ' Update the import path in vectra_client.go to match your module',
529
+ ].join('\n'),
530
+ java: [
531
+ 'Next steps:',
532
+ ' Place vectra_service.proto in src/main/proto/',
533
+ ' Add gRPC dependencies to your build tool (see README.md for Gradle/Maven)',
534
+ ' Build to generate stubs automatically',
535
+ ].join('\n'),
536
+ typescript: [
537
+ 'Next steps:',
538
+ ' npm install @grpc/grpc-js @grpc/proto-loader',
539
+ ' No codegen needed — proto is loaded dynamically at runtime',
540
+ ' import { VectraClient } from \'./VectraClient\';',
541
+ ].join('\n'),
542
+ };
543
+ console.log(Colorize.output(nextSteps[language]));
544
+ })
545
+ .command('serve [index]', 'start the gRPC server to serve indexes', (yargs) => {
546
+ return yargs
547
+ .positional('index', {
548
+ describe: 'path to a single index directory (mutually exclusive with --root)',
549
+ type: 'string'
144
550
  })
145
- .command('stats <index>', `prints the stats for a local index`, {}, async (args) => {
146
- const folderPath = args.index as string;
147
- const index = new LocalDocumentIndex({ folderPath });
148
- const stats = await index.getCatalogStats();
149
- console.log(Colorize.title('Index Stats'));
150
- console.log(Colorize.output(stats));
551
+ .option('root', {
552
+ describe: 'directory containing multiple index subdirectories',
553
+ type: 'string'
151
554
  })
152
- .command('query <index> <query>', `queries a local index`, (yargs) => {
153
- return yargs
154
- .option('keys', {
155
- alias: 'k',
156
- describe: 'path of a JSON file containing the model keys to use for generating embeddings'
157
- })
158
- .option('document-count', {
159
- alias: 'dc',
160
- describe: 'max number of documents to return (defaults to 10)',
161
- type: 'number',
162
- default: 10
163
- })
164
- .option('chunk-count', {
165
- alias: 'cc',
166
- describe: 'max number of chunks to return (defaults to 50)',
167
- type: 'number',
168
- default: 50
169
- })
170
- .option('section-count', {
171
- alias: 'sc',
172
- describe: 'max number of document sections to render (defaults to 1)',
173
- type: 'number',
174
- default: 1
175
- })
176
- .option('tokens', {
177
- alias: 't',
178
- describe: 'max number of tokens to render for each document section (defaults to 2000)',
179
- type: 'number',
180
- default: 2000
181
- })
182
- .option('format', {
183
- alias: 'f',
184
- describe: `format of the rendered results. Defaults to 'sections'`,
185
- choices: ['sections', 'stats', 'chunks'],
186
- default: 'sections'
187
- })
188
- .option('overlap', {
189
- alias: 'o',
190
- describe: `whether to add overlapping chunks to sections.`,
191
- type: 'boolean',
192
- default: true
193
- })
194
- .option('bm25', {
195
- alias: 'b',
196
- describe: 'Use Okapi-bm25 keyword search alogrithm to perform hybrid search - semantic + keyword. Displayed in blue during search.',
197
- type: 'boolean',
198
- default: false
199
- })
200
- .demandOption(['keys']);
201
- }, async (args) => {
202
- console.log(Colorize.title('Querying Index'));
555
+ .option('port', {
556
+ alias: 'p',
557
+ describe: 'port to bind the gRPC server on',
558
+ type: 'number',
559
+ default: 50051
560
+ })
561
+ .option('daemon', {
562
+ describe: 'fork to background as a daemon process',
563
+ type: 'boolean',
564
+ default: false
565
+ })
566
+ .option('pid-file', {
567
+ describe: 'path to PID file (daemon mode only)',
568
+ type: 'string'
569
+ })
570
+ .option('keys', {
571
+ alias: 'k',
572
+ describe: 'path to a JSON file containing the model keys for embeddings',
573
+ type: 'string'
574
+ })
575
+ .check((argv) => {
576
+ if (!argv.index && !argv.root) {
577
+ throw new Error('You must provide either an <index> path or --root <dir>');
578
+ }
579
+ if (argv.index && argv.root) {
580
+ throw new Error('<index> and --root are mutually exclusive');
581
+ }
582
+ return true;
583
+ });
584
+ }, async (args) => {
585
+ // Load embeddings if keys provided
586
+ let embeddings: OpenAIEmbeddings | undefined;
587
+ if (args.keys) {
588
+ const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
589
+ if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
590
+ (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
591
+ (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
592
+ }
593
+ embeddings = new OpenAIEmbeddings(options);
594
+ }
203
595
 
204
- // Get embedding options
205
- const options: OpenAIEmbeddingsOptions|AzureOpenAIEmbeddingsOptions|OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
206
- if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
207
- (options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
208
- (options as OpenAIEmbeddingsOptions).maxTokens = 8000;
209
- }
596
+ const server = new VectraServer({
597
+ port: args.port,
598
+ indexPath: args.index as string | undefined,
599
+ rootDir: args.root as string | undefined,
600
+ embeddings,
601
+ });
210
602
 
211
- // Create embeddings
212
- const embeddings = new OpenAIEmbeddings(options);
603
+ if (args.daemon) {
604
+ // Daemon mode: fork a child process
605
+ const { spawn } = require('child_process');
606
+ const cliArgs = process.argv.slice(2).filter(a => a !== '--daemon');
607
+ const child = spawn(process.execPath, [process.argv[1], ...cliArgs], {
608
+ detached: true,
609
+ stdio: 'ignore',
610
+ });
611
+ child.unref();
213
612
 
214
- // Initialize index
215
- const folderPath = args.index as string;
216
- const index = new LocalDocumentIndex({
217
- folderPath,
218
- embeddings
219
- });
613
+ // Write PID file
614
+ const pidFile = args.pidFile as string || path.join(
615
+ (args.root as string) || path.dirname(args.index as string),
616
+ '.vectra.pid'
617
+ );
618
+ await fs.writeFile(pidFile, String(child.pid));
619
+ console.log(Colorize.output(`Vectra server started as daemon (PID: ${child.pid})`));
620
+ console.log(Colorize.output(`PID file: ${pidFile}`));
621
+ process.exit(0);
622
+ } else {
623
+ // Foreground mode
624
+ const port = await server.start();
625
+ console.log(Colorize.output(`Vectra gRPC server listening on 127.0.0.1:${port}`));
220
626
 
221
- // Query index
222
- const query = args.query as string;
223
- const results = await index.queryDocuments(query, {
224
- maxDocuments: args.documentCount,
225
- maxChunks: args.chunkCount,
226
- isBm25: args.bm25 as boolean,
227
- });
627
+ const loaded = server.indexManager.listIndexes();
628
+ if (loaded.length > 0) {
629
+ console.log(Colorize.output(`Loaded indexes:`));
630
+ for (const idx of loaded) {
631
+ console.log(Colorize.output(` - ${idx.name} (${idx.format}, ${idx.isDocumentIndex ? 'document' : 'item'})`));
632
+ }
633
+ } else {
634
+ console.log(Colorize.output(`No indexes loaded yet. Use CreateIndex RPC or add index directories.`));
635
+ }
228
636
 
229
- // Render results
230
- for (const result of results) {
231
- console.log(Colorize.output(result.uri));
232
- console.log(Colorize.value('score', result.score));
233
- console.log(Colorize.value('chunks', result.chunks.length));
234
- if (args.format == 'sections') {
235
- const sections = await result.renderSections(args.tokens, args.sectionCount, args.overlap);
236
- console.log(sections.length);
237
- for (let i = 0; i < sections.length; i++) {
238
- const section = sections[i];
239
- const isBm25 = sections[i].isBm25;
240
- console.log(isBm25);
241
- console.log(Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
242
- console.log(Colorize.value('score', section.score));
243
- console.log(Colorize.value('tokens', section.tokenCount));
244
- console.log(Colorize.output(section.text, isBm25));
245
- }
246
- } else if (args.format == 'chunks') {
247
- const text = await result.loadText();
248
- for (let i = 0; i < result.chunks.length; i++) {
249
- const chunk = result.chunks[i];
250
- const startPos = chunk.item.metadata.startPos;
251
- const endPos = chunk.item.metadata.endPos;
252
- const isBm25 = Boolean(chunk.item.metadata.isBm25);
253
- console.log(Colorize.title(`Chunk ${i + 1}`));
254
- console.log(Colorize.value('score', chunk.score));
255
- console.log(Colorize.value('startPos', startPos));
256
- console.log(Colorize.value('endPos', endPos));
257
- console.log(Colorize.output(text.substring(startPos, endPos + 1), isBm25));
258
- }
259
- }
260
- }
261
- })
262
- .help()
263
- .demandCommand()
264
- .parseAsync();
265
- }
637
+ // Handle graceful shutdown
638
+ const handleSignal = async () => {
639
+ console.log(Colorize.output('\nShutting down...'));
640
+ await server.shutdown();
641
+ process.exit(0);
642
+ };
643
+ process.on('SIGINT', handleSignal);
644
+ process.on('SIGTERM', handleSignal);
645
+ }
646
+ })
647
+ .command('stop', 'stop a running Vectra daemon', (yargs) => {
648
+ return yargs.option('pid-file', {
649
+ describe: 'path to PID file',
650
+ type: 'string',
651
+ demandOption: true
652
+ });
653
+ }, async (args) => {
654
+ const pidFile = args.pidFile as string;
655
+ if (!fsSync.existsSync(pidFile)) {
656
+ console.log(Colorize.error(`PID file not found: ${pidFile}`));
657
+ process.exit(1);
658
+ }
659
+ const pid = parseInt(await fs.readFile(pidFile, 'utf-8'), 10);
660
+ if (isNaN(pid)) {
661
+ console.log(Colorize.error(`Invalid PID in file: ${pidFile}`));
662
+ process.exit(1);
663
+ }
664
+
665
+ try {
666
+ // Send SIGTERM for graceful shutdown
667
+ process.kill(pid, 'SIGTERM');
668
+ console.log(Colorize.output(`Sent SIGTERM to PID ${pid}`));
669
+
670
+ // Wait up to 10s for process to exit
671
+ const deadline = Date.now() + 10000;
672
+ while (Date.now() < deadline) {
673
+ try {
674
+ process.kill(pid, 0); // check if process exists
675
+ await new Promise(r => setTimeout(r, 500));
676
+ } catch {
677
+ // Process no longer exists
678
+ break;
679
+ }
680
+ }
266
681
 
682
+ // Check if still alive and force kill
683
+ try {
684
+ process.kill(pid, 0);
685
+ process.kill(pid, 'SIGKILL');
686
+ console.log(Colorize.output(`Force-killed PID ${pid}`));
687
+ } catch {
688
+ // Already dead
689
+ }
690
+
691
+ // Remove PID file
692
+ await fs.unlink(pidFile).catch(() => {});
693
+ console.log(Colorize.output('Vectra server stopped'));
694
+ } catch (err: any) {
695
+ if (err.code === 'ESRCH') {
696
+ console.log(Colorize.output(`Process ${pid} not running. Cleaning up PID file.`));
697
+ await fs.unlink(pidFile).catch(() => {});
698
+ } else {
699
+ console.log(Colorize.error(`Failed to stop server: ${err.message}`));
700
+ process.exit(1);
701
+ }
702
+ }
703
+ })
704
+ .help()
705
+ .demandCommand()
706
+ .parseAsync();
707
+ }
267
708
 
268
709
  async function getItemList(items: string[], listFile: string, uriType: string): Promise<string[]> {
269
- if (Array.isArray(items) && items.length > 0) {
270
- return items;
271
- } else if (typeof listFile == 'string' && listFile.trim().length > 0) {
272
- const list = await fs.readFile(listFile, 'utf-8');
273
- return list.split('\n').map((item) => item.trim()).filter((item) => item.length > 0);
274
- } else {
275
- throw new Error(`you must specify either one or more "--uri <${uriType}>" for the items or a "--list <file path>" for a file containing the items.`)
276
- }
710
+ if (Array.isArray(items) && items.length > 0) {
711
+ return items;
712
+ } else if (typeof listFile == 'string' && listFile.trim().length > 0) {
713
+ const list = await fs.readFile(listFile, 'utf-8');
714
+ return list.split('\n').map((item) => item.trim()).filter((item) => item.length > 0);
715
+ } else {
716
+ throw new Error(`you must specify either one or more "--uri <${uriType}>" for the items or a "--list <file path>" for a file containing the items.`);
717
+ }
277
718
  }