vectra 0.12.3 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. package/README.md +92 -100
  2. package/lib/BrowserWebFetcher.d.ts +75 -0
  3. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  4. package/lib/BrowserWebFetcher.js +290 -0
  5. package/lib/BrowserWebFetcher.js.map +1 -0
  6. package/lib/FileFetcher.d.ts.map +1 -1
  7. package/lib/FileFetcher.js +25 -15
  8. package/lib/FileFetcher.js.map +1 -1
  9. package/lib/FileFetcher.spec.d.ts +2 -0
  10. package/lib/FileFetcher.spec.d.ts.map +1 -0
  11. package/lib/FileFetcher.spec.js +244 -0
  12. package/lib/FileFetcher.spec.js.map +1 -0
  13. package/lib/FolderWatcher.d.ts +91 -0
  14. package/lib/FolderWatcher.d.ts.map +1 -0
  15. package/lib/FolderWatcher.js +304 -0
  16. package/lib/FolderWatcher.js.map +1 -0
  17. package/lib/FolderWatcher.spec.d.ts +2 -0
  18. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  19. package/lib/FolderWatcher.spec.js +308 -0
  20. package/lib/FolderWatcher.spec.js.map +1 -0
  21. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  22. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  23. package/lib/GPT3Tokenizer.spec.js +45 -0
  24. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  25. package/lib/ItemSelector.d.ts.map +1 -1
  26. package/lib/ItemSelector.js +19 -8
  27. package/lib/ItemSelector.js.map +1 -1
  28. package/lib/ItemSelector.spec.d.ts +2 -0
  29. package/lib/ItemSelector.spec.d.ts.map +1 -0
  30. package/lib/ItemSelector.spec.js +204 -0
  31. package/lib/ItemSelector.spec.js.map +1 -0
  32. package/lib/LocalDocument.d.ts +1 -1
  33. package/lib/LocalDocument.d.ts.map +1 -1
  34. package/lib/LocalDocument.js +5 -45
  35. package/lib/LocalDocument.js.map +1 -1
  36. package/lib/LocalDocument.spec.d.ts +2 -0
  37. package/lib/LocalDocument.spec.d.ts.map +1 -0
  38. package/lib/LocalDocument.spec.js +214 -0
  39. package/lib/LocalDocument.spec.js.map +1 -0
  40. package/lib/LocalDocumentIndex.d.ts +20 -0
  41. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  42. package/lib/LocalDocumentIndex.js +16 -52
  43. package/lib/LocalDocumentIndex.js.map +1 -1
  44. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  45. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  46. package/lib/LocalDocumentIndex.spec.js +494 -0
  47. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  48. package/lib/LocalDocumentResult.d.ts +32 -11
  49. package/lib/LocalDocumentResult.d.ts.map +1 -1
  50. package/lib/LocalDocumentResult.js +305 -257
  51. package/lib/LocalDocumentResult.js.map +1 -1
  52. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  53. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  54. package/lib/LocalDocumentResult.spec.js +373 -0
  55. package/lib/LocalDocumentResult.spec.js.map +1 -0
  56. package/lib/LocalEmbeddings.d.ts +59 -0
  57. package/lib/LocalEmbeddings.d.ts.map +1 -0
  58. package/lib/LocalEmbeddings.js +101 -0
  59. package/lib/LocalEmbeddings.js.map +1 -0
  60. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  61. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  62. package/lib/LocalEmbeddings.spec.js +155 -0
  63. package/lib/LocalEmbeddings.spec.js.map +1 -0
  64. package/lib/LocalIndex.d.ts +27 -18
  65. package/lib/LocalIndex.d.ts.map +1 -1
  66. package/lib/LocalIndex.js +109 -105
  67. package/lib/LocalIndex.js.map +1 -1
  68. package/lib/LocalIndex.spec.js +434 -43
  69. package/lib/LocalIndex.spec.js.map +1 -1
  70. package/lib/OpenAIEmbeddings.d.ts +4 -6
  71. package/lib/OpenAIEmbeddings.d.ts.map +1 -1
  72. package/lib/OpenAIEmbeddings.js +16 -24
  73. package/lib/OpenAIEmbeddings.js.map +1 -1
  74. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  75. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  76. package/lib/OpenAIEmbeddings.spec.js +298 -0
  77. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  78. package/lib/TextSplitter.d.ts +2 -0
  79. package/lib/TextSplitter.d.ts.map +1 -1
  80. package/lib/TextSplitter.js +154 -111
  81. package/lib/TextSplitter.js.map +1 -1
  82. package/lib/TextSplitter.spec.js +289 -61
  83. package/lib/TextSplitter.spec.js.map +1 -1
  84. package/lib/TransformersEmbeddings.d.ts +121 -0
  85. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  86. package/lib/TransformersEmbeddings.js +176 -0
  87. package/lib/TransformersEmbeddings.js.map +1 -0
  88. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  89. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  90. package/lib/TransformersEmbeddings.spec.js +198 -0
  91. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  92. package/lib/TransformersTokenizer.d.ts +33 -0
  93. package/lib/TransformersTokenizer.d.ts.map +1 -0
  94. package/lib/TransformersTokenizer.js +44 -0
  95. package/lib/TransformersTokenizer.js.map +1 -0
  96. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  97. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  98. package/lib/TransformersTokenizer.spec.js +112 -0
  99. package/lib/TransformersTokenizer.spec.js.map +1 -0
  100. package/lib/WebFetcher.d.ts +1 -2
  101. package/lib/WebFetcher.d.ts.map +1 -1
  102. package/lib/WebFetcher.js +58 -54
  103. package/lib/WebFetcher.js.map +1 -1
  104. package/lib/WebFetcher.spec.d.ts +2 -0
  105. package/lib/WebFetcher.spec.d.ts.map +1 -0
  106. package/lib/WebFetcher.spec.js +263 -0
  107. package/lib/WebFetcher.spec.js.map +1 -0
  108. package/lib/browser.d.ts +30 -0
  109. package/lib/browser.d.ts.map +1 -0
  110. package/lib/browser.js +52 -0
  111. package/lib/browser.js.map +1 -0
  112. package/lib/codecs/IndexCodec.d.ts +37 -0
  113. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  114. package/lib/codecs/IndexCodec.js +3 -0
  115. package/lib/codecs/IndexCodec.js.map +1 -0
  116. package/lib/codecs/JsonCodec.d.ts +19 -0
  117. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  118. package/lib/codecs/JsonCodec.js +35 -0
  119. package/lib/codecs/JsonCodec.js.map +1 -0
  120. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  121. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  122. package/lib/codecs/JsonCodec.spec.js +66 -0
  123. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  124. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  125. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  126. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  127. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  128. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  129. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  130. package/lib/codecs/ProtobufCodec.js +225 -0
  131. package/lib/codecs/ProtobufCodec.js.map +1 -0
  132. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  133. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  134. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  135. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  136. package/lib/codecs/index.d.ts +5 -0
  137. package/lib/codecs/index.d.ts.map +1 -0
  138. package/lib/codecs/index.js +21 -0
  139. package/lib/codecs/index.js.map +1 -0
  140. package/lib/codecs/migrateIndex.d.ts +24 -0
  141. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  142. package/lib/codecs/migrateIndex.js +119 -0
  143. package/lib/codecs/migrateIndex.js.map +1 -0
  144. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  145. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  146. package/lib/codecs/migrateIndex.spec.js +151 -0
  147. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  148. package/lib/codecs/schemas/index.proto +34 -0
  149. package/lib/index.d.ts +9 -1
  150. package/lib/index.d.ts.map +1 -1
  151. package/lib/index.js +9 -1
  152. package/lib/index.js.map +1 -1
  153. package/lib/internals/Colorize.d.ts.map +1 -1
  154. package/lib/internals/Colorize.js +20 -15
  155. package/lib/internals/Colorize.js.map +1 -1
  156. package/lib/server/IndexManager.d.ts +78 -0
  157. package/lib/server/IndexManager.d.ts.map +1 -0
  158. package/lib/server/IndexManager.js +259 -0
  159. package/lib/server/IndexManager.js.map +1 -0
  160. package/lib/server/VectraServer.d.ts +40 -0
  161. package/lib/server/VectraServer.d.ts.map +1 -0
  162. package/lib/server/VectraServer.js +151 -0
  163. package/lib/server/VectraServer.js.map +1 -0
  164. package/lib/server/VectraServer.spec.d.ts +2 -0
  165. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  166. package/lib/server/VectraServer.spec.js +322 -0
  167. package/lib/server/VectraServer.spec.js.map +1 -0
  168. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  169. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  170. package/lib/server/handlers/documentHandlers.js +95 -0
  171. package/lib/server/handlers/documentHandlers.js.map +1 -0
  172. package/lib/server/handlers/helpers.d.ts +23 -0
  173. package/lib/server/handlers/helpers.d.ts.map +1 -0
  174. package/lib/server/handlers/helpers.js +138 -0
  175. package/lib/server/handlers/helpers.js.map +1 -0
  176. package/lib/server/handlers/index.d.ts +8 -0
  177. package/lib/server/handlers/index.d.ts.map +1 -0
  178. package/lib/server/handlers/index.js +22 -0
  179. package/lib/server/handlers/index.js.map +1 -0
  180. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  181. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  182. package/lib/server/handlers/indexHandlers.js +85 -0
  183. package/lib/server/handlers/indexHandlers.js.map +1 -0
  184. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  185. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  186. package/lib/server/handlers/itemHandlers.js +166 -0
  187. package/lib/server/handlers/itemHandlers.js.map +1 -0
  188. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  189. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  190. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  191. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  192. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  193. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  194. package/lib/server/handlers/queryHandlers.js +135 -0
  195. package/lib/server/handlers/queryHandlers.js.map +1 -0
  196. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  197. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  198. package/lib/server/handlers/statsHandlers.js +81 -0
  199. package/lib/server/handlers/statsHandlers.js.map +1 -0
  200. package/lib/server/index.d.ts +4 -0
  201. package/lib/server/index.d.ts.map +1 -0
  202. package/lib/server/index.js +23 -0
  203. package/lib/server/index.js.map +1 -0
  204. package/lib/storage/FileStorage.d.ts +92 -0
  205. package/lib/storage/FileStorage.d.ts.map +1 -0
  206. package/lib/storage/FileStorage.js +3 -0
  207. package/lib/storage/FileStorage.js.map +1 -0
  208. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  209. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  210. package/lib/storage/FileStorageUtilities.js +91 -0
  211. package/lib/storage/FileStorageUtilities.js.map +1 -0
  212. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  213. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  214. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  215. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  216. package/lib/storage/FileType.d.ts +29 -0
  217. package/lib/storage/FileType.d.ts.map +1 -0
  218. package/lib/storage/FileType.js +38 -0
  219. package/lib/storage/FileType.js.map +1 -0
  220. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  221. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  222. package/lib/storage/IndexedDBStorage.js +347 -0
  223. package/lib/storage/IndexedDBStorage.js.map +1 -0
  224. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  225. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  226. package/lib/storage/LocalFileStorage.browser.js +43 -0
  227. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  228. package/lib/storage/LocalFileStorage.d.ts +23 -0
  229. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  230. package/lib/storage/LocalFileStorage.js +152 -0
  231. package/lib/storage/LocalFileStorage.js.map +1 -0
  232. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  233. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  234. package/lib/storage/LocalFileStorage.spec.js +249 -0
  235. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  236. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  237. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  238. package/lib/storage/VirtualFileStorage.js +178 -0
  239. package/lib/storage/VirtualFileStorage.js.map +1 -0
  240. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  241. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  242. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  243. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  244. package/lib/storage/index.d.ts +6 -0
  245. package/lib/storage/index.d.ts.map +1 -0
  246. package/lib/storage/index.js +22 -0
  247. package/lib/storage/index.js.map +1 -0
  248. package/lib/templates/templates/csharp/README.md +48 -0
  249. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  250. package/lib/templates/templates/go/README.md +71 -0
  251. package/lib/templates/templates/go/vectra_client.go +322 -0
  252. package/lib/templates/templates/java/README.md +81 -0
  253. package/lib/templates/templates/java/VectraClient.java +232 -0
  254. package/lib/templates/templates/python/README.md +37 -0
  255. package/lib/templates/templates/python/vectra_client.py +279 -0
  256. package/lib/templates/templates/rust/Cargo.toml +14 -0
  257. package/lib/templates/templates/rust/README.md +39 -0
  258. package/lib/templates/templates/rust/build.rs +4 -0
  259. package/lib/templates/templates/rust/lib.rs +284 -0
  260. package/lib/templates/templates/typescript/README.md +96 -0
  261. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  262. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  263. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  264. package/lib/templates/typescript/VectraClient.js +328 -0
  265. package/lib/templates/typescript/VectraClient.js.map +1 -0
  266. package/lib/types.d.ts +7 -0
  267. package/lib/types.d.ts.map +1 -1
  268. package/lib/utils/index.d.ts +2 -0
  269. package/lib/utils/index.d.ts.map +1 -0
  270. package/lib/utils/index.js +18 -0
  271. package/lib/utils/index.js.map +1 -0
  272. package/lib/utils/pathUtils.d.ts +40 -0
  273. package/lib/utils/pathUtils.d.ts.map +1 -0
  274. package/lib/utils/pathUtils.js +98 -0
  275. package/lib/utils/pathUtils.js.map +1 -0
  276. package/lib/vectra-cli.d.ts.map +1 -1
  277. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  278. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  279. package/lib/vectra-cli.generate.spec.js +112 -0
  280. package/lib/vectra-cli.generate.spec.js.map +1 -0
  281. package/lib/vectra-cli.js +446 -9
  282. package/lib/vectra-cli.js.map +1 -1
  283. package/lib/vectra-cli.spec.d.ts +1 -0
  284. package/lib/vectra-cli.spec.d.ts.map +1 -0
  285. package/lib/vectra-cli.spec.js +2 -0
  286. package/lib/vectra-cli.spec.js.map +1 -0
  287. package/package.json +89 -16
  288. package/proto/vectra_service.proto +276 -0
  289. package/src/BrowserWebFetcher.ts +345 -0
  290. package/src/FileFetcher.spec.ts +234 -0
  291. package/src/FileFetcher.ts +37 -25
  292. package/src/FolderWatcher.spec.ts +288 -0
  293. package/src/FolderWatcher.ts +304 -0
  294. package/src/GPT3Tokenizer.spec.ts +50 -0
  295. package/src/ItemSelector.spec.ts +252 -0
  296. package/src/ItemSelector.ts +163 -150
  297. package/src/LocalDocument.spec.ts +211 -0
  298. package/src/LocalDocument.ts +88 -94
  299. package/src/LocalDocumentIndex.spec.ts +481 -0
  300. package/src/LocalDocumentIndex.ts +39 -40
  301. package/src/LocalDocumentResult.spec.ts +373 -0
  302. package/src/LocalDocumentResult.ts +489 -319
  303. package/src/LocalEmbeddings.spec.ts +138 -0
  304. package/src/LocalEmbeddings.ts +120 -0
  305. package/src/LocalIndex.spec.ts +808 -323
  306. package/src/LocalIndex.ts +479 -430
  307. package/src/OpenAIEmbeddings.spec.ts +354 -0
  308. package/src/OpenAIEmbeddings.ts +26 -27
  309. package/src/TextSplitter.spec.ts +320 -65
  310. package/src/TextSplitter.ts +172 -115
  311. package/src/TransformersEmbeddings.spec.ts +188 -0
  312. package/src/TransformersEmbeddings.ts +232 -0
  313. package/src/TransformersTokenizer.spec.ts +143 -0
  314. package/src/TransformersTokenizer.ts +45 -0
  315. package/src/WebFetcher.spec.ts +288 -0
  316. package/src/WebFetcher.ts +184 -186
  317. package/src/browser.ts +69 -0
  318. package/src/codecs/IndexCodec.ts +40 -0
  319. package/src/codecs/JsonCodec.spec.ts +70 -0
  320. package/src/codecs/JsonCodec.ts +37 -0
  321. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  322. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  323. package/src/codecs/ProtobufCodec.ts +193 -0
  324. package/src/codecs/index.ts +4 -0
  325. package/src/codecs/migrateIndex.spec.ts +176 -0
  326. package/src/codecs/migrateIndex.ts +125 -0
  327. package/src/codecs/schemas/index.proto +34 -0
  328. package/src/index.ts +9 -1
  329. package/src/internals/Colorize.ts +19 -16
  330. package/src/server/IndexManager.ts +243 -0
  331. package/src/server/VectraServer.spec.ts +303 -0
  332. package/src/server/VectraServer.ts +156 -0
  333. package/src/server/handlers/documentHandlers.ts +59 -0
  334. package/src/server/handlers/helpers.ts +93 -0
  335. package/src/server/handlers/index.ts +7 -0
  336. package/src/server/handlers/indexHandlers.ts +44 -0
  337. package/src/server/handlers/itemHandlers.ts +140 -0
  338. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  339. package/src/server/handlers/queryHandlers.ts +96 -0
  340. package/src/server/handlers/statsHandlers.ts +38 -0
  341. package/src/server/index.ts +3 -0
  342. package/src/storage/FileStorage.ts +105 -0
  343. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  344. package/src/storage/FileStorageUtilities.ts +77 -0
  345. package/src/storage/FileType.ts +61 -0
  346. package/src/storage/IndexedDBStorage.ts +365 -0
  347. package/src/storage/LocalFileStorage.browser.ts +52 -0
  348. package/src/storage/LocalFileStorage.spec.ts +292 -0
  349. package/src/storage/LocalFileStorage.ts +98 -0
  350. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  351. package/src/storage/VirtualFileStorage.ts +169 -0
  352. package/src/storage/index.ts +5 -0
  353. package/src/templates/csharp/README.md +48 -0
  354. package/src/templates/csharp/VectraClient.cs +234 -0
  355. package/src/templates/go/README.md +71 -0
  356. package/src/templates/go/vectra_client.go +322 -0
  357. package/src/templates/java/README.md +81 -0
  358. package/src/templates/java/VectraClient.java +232 -0
  359. package/src/templates/python/README.md +37 -0
  360. package/src/templates/python/vectra_client.py +279 -0
  361. package/src/templates/rust/Cargo.toml +14 -0
  362. package/src/templates/rust/README.md +39 -0
  363. package/src/templates/rust/build.rs +4 -0
  364. package/src/templates/rust/lib.rs +284 -0
  365. package/src/templates/typescript/README.md +96 -0
  366. package/src/templates/typescript/VectraClient.ts +374 -0
  367. package/src/types.ts +131 -123
  368. package/src/utils/index.ts +1 -0
  369. package/src/utils/pathUtils.ts +106 -0
  370. package/src/vectra-cli.generate.spec.ts +72 -0
  371. package/src/vectra-cli.spec.ts +0 -0
  372. package/src/vectra-cli.ts +687 -246
  373. package/README.draft.md +0 -499
  374. package/README.draft.outline.md +0 -160
  375. package/README.research.md +0 -2159
@@ -0,0 +1,232 @@
1
+ import { EmbeddingsModel, EmbeddingsResponse } from "./types";
2
+ import { TransformersTokenizer } from "./TransformersTokenizer";
3
+ import { FeatureExtractionPipeline, PreTrainedTokenizer } from "@huggingface/transformers";
4
+
5
+
6
+ const DEFAULT_MODEL = 'Xenova/all-MiniLM-L6-v2';
7
+
8
+ /**
9
+ * Type definition for the Transformers.js library.
10
+ * Used for dynamic import and type safety.
11
+ */
12
+ type TransformersLibrary = typeof import('@huggingface/transformers');
13
+
14
+ /**
15
+ * Configuration options for TransformersEmbeddings.
16
+ */
17
+ export interface TransformersEmbeddingsOptions {
18
+ /**
19
+ * Optional. Model name/path to use for embeddings.
20
+ * @remarks
21
+ * Common models:
22
+ * - 'Xenova/all-MiniLM-L6-v2' (384 dimensions, fast, good quality)
23
+ * - 'Xenova/bge-small-en-v1.5' (384 dimensions, better quality)
24
+ * - 'Xenova/bge-base-en-v1.5' (768 dimensions, best quality)
25
+ * @default 'Xenova/all-MiniLM-L6-v2'
26
+ */
27
+ model?: string;
28
+
29
+ /**
30
+ * Optional. Maximum number of tokens that can be sent to the embedding model.
31
+ * @remarks
32
+ * This affects batching behavior in LocalDocumentIndex.
33
+ * Most small models support 512 tokens.
34
+ * @default 512
35
+ */
36
+ maxTokens?: number;
37
+
38
+ /**
39
+ * Optional. Device to run inference on.
40
+ * @remarks
41
+ * - 'auto': Automatically select the best available device
42
+ * - 'gpu': Use GPU (WebGPU in browser, CUDA in Node.js if available)
43
+ * - 'cpu': Use CPU (most compatible)
44
+ * - 'wasm': Use WebAssembly
45
+ * @default 'auto'
46
+ */
47
+ device?: 'auto' | 'gpu' | 'cpu' | 'wasm';
48
+
49
+ /**
50
+ * Optional. Data type for model weights.
51
+ * @remarks
52
+ * - 'fp32': Full precision (best quality, largest size)
53
+ * - 'fp16': Half precision (good quality, smaller)
54
+ * - 'q8': 8-bit quantization (good quality, smaller)
55
+ * - 'q4': 4-bit quantization (fastest, smallest, lower quality)
56
+ * @default 'fp32'
57
+ */
58
+ dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
59
+
60
+ /**
61
+ * Optional. Whether to normalize embeddings to unit length.
62
+ * @default true
63
+ */
64
+ normalize?: boolean;
65
+
66
+ /**
67
+ * Optional. Pooling strategy for token embeddings.
68
+ * @remarks
69
+ * - 'mean': Mean pooling (default, recommended)
70
+ * - 'cls': Use [CLS] token embedding
71
+ * @default 'mean'
72
+ */
73
+ pooling?: 'mean' | 'cls';
74
+
75
+ /**
76
+ * Optional. Callback for tracking model download/load progress.
77
+ */
78
+ progressCallback?: (progress: { status: string; progress?: number; file?: string }) => void;
79
+ }
80
+
81
+ /**
82
+ * An embeddings model using Transformers.js for local, offline inference.
83
+ * @remarks
84
+ * Requires @huggingface/transformers as a peer dependency.
85
+ * Use the static `create()` method to instantiate.
86
+ *
87
+ * @example
88
+ * ```typescript
89
+ * const embeddings = await TransformersEmbeddings.create({
90
+ * model: 'Xenova/all-MiniLM-L6-v2'
91
+ * });
92
+ *
93
+ * const index = new LocalDocumentIndex({
94
+ * folderPath: 'my-index',
95
+ * embeddings: embeddings,
96
+ * tokenizer: embeddings.getTokenizer()
97
+ * });
98
+ * ```
99
+ */
100
+ export class TransformersEmbeddings implements EmbeddingsModel {
101
+ private readonly _extractor: FeatureExtractionPipeline;
102
+ private readonly _tokenizer: PreTrainedTokenizer;
103
+ private readonly _options: Required<Omit<TransformersEmbeddingsOptions, 'progressCallback'>> & Pick<TransformersEmbeddingsOptions, 'progressCallback'>;
104
+
105
+ public readonly maxTokens: number;
106
+
107
+ /**
108
+ * Private constructor - use TransformersEmbeddings.create() instead.
109
+ */
110
+ private constructor(
111
+ extractor: FeatureExtractionPipeline,
112
+ tokenizer: PreTrainedTokenizer,
113
+ options: Required<Omit<TransformersEmbeddingsOptions, 'progressCallback'>> & Pick<TransformersEmbeddingsOptions, 'progressCallback'>
114
+ ) {
115
+ this._extractor = extractor;
116
+ this._tokenizer = tokenizer;
117
+ this._options = options;
118
+ this.maxTokens = options.maxTokens;
119
+ }
120
+
121
+ /**
122
+ * Creates a new TransformersEmbeddings instance.
123
+ * @param options Configuration options.
124
+ * @returns Promise resolving to initialized TransformersEmbeddings instance.
125
+ * @throws Error if @huggingface/transformers is not installed.
126
+ */
127
+ public static async create(options?: TransformersEmbeddingsOptions): Promise<TransformersEmbeddings> {
128
+ // Dynamically import to allow optional dependency
129
+ let transformers: TransformersLibrary;
130
+
131
+ try {
132
+ transformers = await import('@huggingface/transformers');
133
+ } catch (e) {
134
+ throw new Error(
135
+ 'TransformersEmbeddings requires @huggingface/transformers. ' +
136
+ 'Install it with: npm install @huggingface/transformers'
137
+ );
138
+ }
139
+
140
+ const { pipeline } = transformers;
141
+
142
+ // Apply defaults
143
+ const opts = {
144
+ model: options?.model ?? DEFAULT_MODEL,
145
+ maxTokens: options?.maxTokens ?? 512,
146
+ device: options?.device ?? 'auto',
147
+ dtype: options?.dtype ?? 'fp32',
148
+ normalize: options?.normalize ?? true,
149
+ pooling: options?.pooling ?? 'mean',
150
+ progressCallback: options?.progressCallback
151
+ };
152
+
153
+ // Build pipeline options
154
+ const pipelineOptions: any = {
155
+ device: opts.device,
156
+ dtype: opts.dtype
157
+ };
158
+
159
+ if (opts.progressCallback) {
160
+ pipelineOptions.progress_callback = opts.progressCallback;
161
+ }
162
+
163
+ // Load the feature extraction pipeline
164
+ const extractor = await pipeline(
165
+ 'feature-extraction',
166
+ opts.model,
167
+ pipelineOptions
168
+ );
169
+
170
+ // Load the tokenizer separately for use with TextSplitter
171
+ const tokenizer = extractor.tokenizer;
172
+
173
+ return new TransformersEmbeddings(extractor, tokenizer, opts);
174
+ }
175
+
176
+ /**
177
+ * Returns a tokenizer that uses the same tokenization as this embedding model.
178
+ * @remarks
179
+ * Use this tokenizer with LocalDocumentIndex to ensure text chunking
180
+ * aligns with the embedding model's token boundaries.
181
+ * @returns TransformersTokenizer instance.
182
+ */
183
+ public getTokenizer(): TransformersTokenizer {
184
+ return new TransformersTokenizer(this._tokenizer);
185
+ }
186
+
187
+ /**
188
+ * Creates embeddings for the given inputs.
189
+ * @param inputs Text inputs to create embeddings for.
190
+ * @returns EmbeddingsResponse with status and generated embeddings.
191
+ */
192
+ public async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
193
+ try {
194
+ const inputArray = Array.isArray(inputs) ? inputs : [inputs];
195
+
196
+ // Process all inputs in a single batch
197
+ const output = await this._extractor(inputArray, {
198
+ pooling: this._options.pooling,
199
+ normalize: this._options.normalize
200
+ });
201
+
202
+ const [batchSize, embeddingDim] = output.dims;
203
+ const data = output.data as Float32Array;
204
+
205
+ // Slice the flat array into individual embeddings
206
+ const embeddings: number[][] = [];
207
+ for (let i = 0; i < batchSize; i++) {
208
+ const start = i * embeddingDim;
209
+ const end = start + embeddingDim;
210
+ embeddings.push(Array.from(data.slice(start, end)));
211
+ }
212
+
213
+ return {
214
+ status: 'success',
215
+ output: embeddings,
216
+ model: this._options.model
217
+ };
218
+ } catch (error: unknown) {
219
+ return {
220
+ status: 'error',
221
+ message: `Error generating embeddings: ${(error as Error).message}`
222
+ };
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Returns the model name being used.
228
+ */
229
+ public get model(): string {
230
+ return this._options.model;
231
+ }
232
+ }
@@ -0,0 +1,143 @@
1
+ import { strict as assert } from 'node:assert';
2
+ import { describe, it } from 'mocha';
3
+ import { TransformersTokenizer } from './TransformersTokenizer';
4
+
5
+ describe('TransformersTokenizer', () => {
6
+ // Create a mock tokenizer that mimics Transformers.js behavior
7
+ function createMockTokenizer() {
8
+ const vocab: Map<string, number> = new Map([
9
+ ['hello', 101],
10
+ ['world', 102],
11
+ ['test', 103],
12
+ ['[CLS]', 1],
13
+ ['[SEP]', 2]
14
+ ]);
15
+ const reverseVocab: Map<number, string> = new Map();
16
+ vocab.forEach((v, k) => reverseVocab.set(v, k));
17
+
18
+ return {
19
+ // Mimics the callable tokenizer behavior
20
+ __call__: (text: string) => {
21
+ const words = text.toLowerCase().split(/\s+/).filter(w => w);
22
+ const ids = words.map(w => vocab.get(w) ?? 100);
23
+ return {
24
+ input_ids: {
25
+ data: BigInt64Array.from(ids.map(id => BigInt(id)))
26
+ }
27
+ };
28
+ },
29
+ decode: (tokens: number[], options?: { skip_special_tokens?: boolean }) => {
30
+ const words = tokens
31
+ .filter(t => !options?.skip_special_tokens || (t !== 1 && t !== 2))
32
+ .map(t => reverseVocab.get(t) ?? '[UNK]');
33
+ return words.join(' ');
34
+ }
35
+ };
36
+ }
37
+
38
+ it('encodes text to token array using callable tokenizer', () => {
39
+ const mockTokenizer = createMockTokenizer();
40
+ // Make it callable
41
+ const callableTokenizer = Object.assign(
42
+ (text: string) => mockTokenizer.__call__(text),
43
+ { decode: mockTokenizer.decode }
44
+ ) as any;
45
+
46
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
47
+ const tokens = tokenizer.encode('hello world');
48
+
49
+ assert.ok(Array.isArray(tokens), 'encode should return an array');
50
+ assert.equal(tokens.length, 2, 'should have 2 tokens');
51
+ assert.deepEqual(tokens, [101, 102], 'tokens should match expected values');
52
+ });
53
+
54
+ it('handles BigInt64Array conversion correctly', () => {
55
+ const mockTokenizer = {
56
+ __call__: () => ({
57
+ input_ids: {
58
+ data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)])
59
+ }
60
+ }),
61
+ decode: () => 'decoded'
62
+ };
63
+ const callableTokenizer = Object.assign(
64
+ () => mockTokenizer.__call__(),
65
+ { decode: mockTokenizer.decode }
66
+ ) as any;
67
+
68
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
69
+ const tokens = tokenizer.encode('any text');
70
+
71
+ assert.deepEqual(tokens, [1, 2, 3], 'should convert BigInt to number');
72
+ tokens.forEach(t => {
73
+ assert.equal(typeof t, 'number', 'each token should be a number');
74
+ });
75
+ });
76
+
77
+ it('decodes tokens back to text', () => {
78
+ const mockTokenizer = {
79
+ __call__: () => ({ input_ids: { data: BigInt64Array.from([]) } }),
80
+ decode: (tokens: number[], opts?: { skip_special_tokens?: boolean }) => {
81
+ if (opts?.skip_special_tokens) {
82
+ return 'hello world';
83
+ }
84
+ return '[CLS] hello world [SEP]';
85
+ }
86
+ };
87
+ const callableTokenizer = Object.assign(
88
+ () => mockTokenizer.__call__(),
89
+ { decode: mockTokenizer.decode }
90
+ ) as any;
91
+
92
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
93
+ const text = tokenizer.decode([1, 101, 102, 2]);
94
+
95
+ assert.equal(text, 'hello world', 'should decode with skip_special_tokens=true');
96
+ });
97
+
98
+ it('handles empty input', () => {
99
+ const mockTokenizer = {
100
+ __call__: () => ({
101
+ input_ids: { data: BigInt64Array.from([]) }
102
+ }),
103
+ decode: () => ''
104
+ };
105
+ const callableTokenizer = Object.assign(
106
+ () => mockTokenizer.__call__(),
107
+ { decode: mockTokenizer.decode }
108
+ ) as any;
109
+
110
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
111
+
112
+ const tokens = tokenizer.encode('');
113
+ assert.deepEqual(tokens, [], 'empty input should return empty array');
114
+
115
+ const text = tokenizer.decode([]);
116
+ assert.equal(text, '', 'empty tokens should return empty string');
117
+ });
118
+
119
+ it('returns consistent results for same input', () => {
120
+ let callCount = 0;
121
+ const mockTokenizer = {
122
+ __call__: () => {
123
+ callCount++;
124
+ return {
125
+ input_ids: { data: BigInt64Array.from([BigInt(101), BigInt(102)]) }
126
+ };
127
+ },
128
+ decode: () => 'hello world'
129
+ };
130
+ const callableTokenizer = Object.assign(
131
+ () => mockTokenizer.__call__(),
132
+ { decode: mockTokenizer.decode }
133
+ ) as any;
134
+
135
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
136
+
137
+ const tokens1 = tokenizer.encode('hello world');
138
+ const tokens2 = tokenizer.encode('hello world');
139
+
140
+ assert.deepEqual(tokens1, tokens2, 'encode should be deterministic');
141
+ assert.equal(callCount, 2, 'should call underlying tokenizer each time');
142
+ });
143
+ });
@@ -0,0 +1,45 @@
1
+ import { PreTrainedTokenizer } from "@huggingface/transformers";
2
+ import { Tokenizer } from "./types";
3
+
4
+ /**
5
+ * A tokenizer wrapper for Transformers.js models.
6
+ * @remarks
7
+ * This tokenizer uses the same tokenizer as the embedding model,
8
+ * ensuring consistency between text splitting and embedding generation.
9
+ *
10
+ * Obtain an instance via TransformersEmbeddings.getTokenizer().
11
+ */
12
+ export class TransformersTokenizer implements Tokenizer {
13
+ private readonly _tokenizer: PreTrainedTokenizer;
14
+
15
+ /**
16
+ * Creates a new TransformersTokenizer.
17
+ * @param tokenizer The underlying Transformers.js tokenizer.
18
+ * @remarks
19
+ * Typically created via TransformersEmbeddings.getTokenizer().
20
+ */
21
+ public constructor(tokenizer: PreTrainedTokenizer) {
22
+ this._tokenizer = tokenizer;
23
+ }
24
+
25
+ /**
26
+ * Encodes text into token IDs.
27
+ * @param text The text to encode.
28
+ * @returns Array of token IDs.
29
+ */
30
+ public encode(text: string): number[] {
31
+ const encoded = this._tokenizer(text);
32
+ // Transformers.js returns an object with input_ids as BigInt64Array or similar
33
+ const inputIds = encoded.input_ids?.data ?? encoded.input_ids ?? encoded;
34
+ return Array.from(inputIds).map((id: any) => Number(id));
35
+ }
36
+
37
+ /**
38
+ * Decodes token IDs back into text.
39
+ * @param tokens Array of token IDs.
40
+ * @returns Decoded text string.
41
+ */
42
+ public decode(tokens: number[]): string {
43
+ return this._tokenizer.decode(tokens, { skip_special_tokens: true });
44
+ }
45
+ }