vectra 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +92 -100
  3. package/bin/vectra.js +3 -0
  4. package/lib/BrowserWebFetcher.d.ts +75 -0
  5. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  6. package/lib/BrowserWebFetcher.js +290 -0
  7. package/lib/BrowserWebFetcher.js.map +1 -0
  8. package/lib/FileFetcher.d.ts +5 -0
  9. package/lib/FileFetcher.d.ts.map +1 -0
  10. package/lib/FileFetcher.js +89 -0
  11. package/lib/FileFetcher.js.map +1 -0
  12. package/lib/FileFetcher.spec.d.ts +2 -0
  13. package/lib/FileFetcher.spec.d.ts.map +1 -0
  14. package/lib/FileFetcher.spec.js +244 -0
  15. package/lib/FileFetcher.spec.js.map +1 -0
  16. package/lib/FolderWatcher.d.ts +91 -0
  17. package/lib/FolderWatcher.d.ts.map +1 -0
  18. package/lib/FolderWatcher.js +304 -0
  19. package/lib/FolderWatcher.js.map +1 -0
  20. package/lib/FolderWatcher.spec.d.ts +2 -0
  21. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  22. package/lib/FolderWatcher.spec.js +308 -0
  23. package/lib/FolderWatcher.spec.js.map +1 -0
  24. package/lib/GPT3Tokenizer.d.ts +9 -0
  25. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  26. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  27. package/lib/GPT3Tokenizer.spec.js +45 -0
  28. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  29. package/lib/ItemSelector.d.ts +41 -0
  30. package/lib/ItemSelector.d.ts.map +1 -0
  31. package/lib/ItemSelector.js +179 -0
  32. package/lib/ItemSelector.js.map +1 -0
  33. package/lib/ItemSelector.spec.d.ts +2 -0
  34. package/lib/ItemSelector.spec.d.ts.map +1 -0
  35. package/lib/ItemSelector.spec.js +204 -0
  36. package/lib/ItemSelector.spec.js.map +1 -0
  37. package/lib/LocalDocument.d.ts +54 -0
  38. package/lib/LocalDocument.d.ts.map +1 -1
  39. package/lib/LocalDocument.js +116 -0
  40. package/lib/LocalDocument.js.map +1 -0
  41. package/lib/LocalDocument.spec.d.ts +2 -0
  42. package/lib/LocalDocument.spec.d.ts.map +1 -0
  43. package/lib/LocalDocument.spec.js +214 -0
  44. package/lib/LocalDocument.spec.js.map +1 -0
  45. package/lib/LocalDocumentIndex.d.ts +152 -0
  46. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  47. package/lib/LocalDocumentIndex.js +420 -0
  48. package/lib/LocalDocumentIndex.js.map +1 -0
  49. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  50. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  51. package/lib/LocalDocumentIndex.spec.js +494 -0
  52. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  53. package/lib/LocalDocumentResult.d.ts +66 -0
  54. package/lib/LocalDocumentResult.d.ts.map +1 -1
  55. package/lib/LocalDocumentResult.js +376 -0
  56. package/lib/LocalDocumentResult.js.map +1 -0
  57. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  58. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  59. package/lib/LocalDocumentResult.spec.js +373 -0
  60. package/lib/LocalDocumentResult.spec.js.map +1 -0
  61. package/lib/LocalEmbeddings.d.ts +59 -0
  62. package/lib/LocalEmbeddings.d.ts.map +1 -0
  63. package/lib/LocalEmbeddings.js +101 -0
  64. package/lib/LocalEmbeddings.js.map +1 -0
  65. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  66. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  67. package/lib/LocalEmbeddings.spec.js +155 -0
  68. package/lib/LocalEmbeddings.spec.js.map +1 -0
  69. package/lib/LocalIndex.d.ts +159 -0
  70. package/lib/LocalIndex.d.ts.map +1 -1
  71. package/lib/LocalIndex.js +519 -0
  72. package/lib/LocalIndex.js.map +1 -0
  73. package/lib/LocalIndex.spec.d.ts +2 -0
  74. package/lib/LocalIndex.spec.js +611 -9
  75. package/lib/LocalIndex.spec.js.map +1 -1
  76. package/lib/OpenAIEmbeddings.d.ts +124 -0
  77. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  78. package/lib/OpenAIEmbeddings.js +166 -0
  79. package/lib/OpenAIEmbeddings.js.map +1 -0
  80. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  81. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  82. package/lib/OpenAIEmbeddings.spec.js +298 -0
  83. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  84. package/lib/TextSplitter.d.ts +21 -0
  85. package/lib/TextSplitter.d.ts.map +1 -1
  86. package/lib/TextSplitter.js +500 -0
  87. package/lib/TextSplitter.js.map +1 -0
  88. package/lib/TextSplitter.spec.d.ts +2 -0
  89. package/lib/TextSplitter.spec.d.ts.map +1 -0
  90. package/lib/TextSplitter.spec.js +337 -0
  91. package/lib/TextSplitter.spec.js.map +1 -0
  92. package/lib/TransformersEmbeddings.d.ts +121 -0
  93. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  94. package/lib/TransformersEmbeddings.js +176 -0
  95. package/lib/TransformersEmbeddings.js.map +1 -0
  96. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  97. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  98. package/lib/TransformersEmbeddings.spec.js +198 -0
  99. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  100. package/lib/TransformersTokenizer.d.ts +33 -0
  101. package/lib/TransformersTokenizer.d.ts.map +1 -0
  102. package/lib/TransformersTokenizer.js +44 -0
  103. package/lib/TransformersTokenizer.js.map +1 -0
  104. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  105. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  106. package/lib/TransformersTokenizer.spec.js +112 -0
  107. package/lib/TransformersTokenizer.spec.js.map +1 -0
  108. package/lib/WebFetcher.d.ts +14 -0
  109. package/lib/WebFetcher.d.ts.map +1 -0
  110. package/lib/WebFetcher.js +238 -0
  111. package/lib/WebFetcher.js.map +1 -0
  112. package/lib/WebFetcher.spec.d.ts +2 -0
  113. package/lib/WebFetcher.spec.d.ts.map +1 -0
  114. package/lib/WebFetcher.spec.js +263 -0
  115. package/lib/WebFetcher.spec.js.map +1 -0
  116. package/lib/browser.d.ts +30 -0
  117. package/lib/browser.d.ts.map +1 -0
  118. package/lib/browser.js +52 -0
  119. package/lib/browser.js.map +1 -0
  120. package/lib/codecs/IndexCodec.d.ts +37 -0
  121. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  122. package/lib/codecs/IndexCodec.js +3 -0
  123. package/lib/codecs/IndexCodec.js.map +1 -0
  124. package/lib/codecs/JsonCodec.d.ts +19 -0
  125. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  126. package/lib/codecs/JsonCodec.js +35 -0
  127. package/lib/codecs/JsonCodec.js.map +1 -0
  128. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  129. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  130. package/lib/codecs/JsonCodec.spec.js +66 -0
  131. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  132. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  133. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  134. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  135. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  136. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  137. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  138. package/lib/codecs/ProtobufCodec.js +225 -0
  139. package/lib/codecs/ProtobufCodec.js.map +1 -0
  140. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  141. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  142. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  143. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  144. package/lib/codecs/index.d.ts +5 -0
  145. package/lib/codecs/index.d.ts.map +1 -0
  146. package/lib/codecs/index.js +21 -0
  147. package/lib/codecs/index.js.map +1 -0
  148. package/lib/codecs/migrateIndex.d.ts +24 -0
  149. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  150. package/lib/codecs/migrateIndex.js +119 -0
  151. package/lib/codecs/migrateIndex.js.map +1 -0
  152. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  153. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  154. package/lib/codecs/migrateIndex.spec.js +151 -0
  155. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  156. package/lib/codecs/schemas/index.proto +34 -0
  157. package/lib/index.d.ts +20 -0
  158. package/lib/index.d.ts.map +1 -1
  159. package/lib/index.js +36 -0
  160. package/lib/index.js.map +1 -0
  161. package/lib/internals/Colorize.d.ts +14 -0
  162. package/lib/internals/Colorize.d.ts.map +1 -0
  163. package/lib/internals/Colorize.js +69 -0
  164. package/lib/internals/Colorize.js.map +1 -0
  165. package/lib/internals/index.d.ts +3 -0
  166. package/lib/internals/index.d.ts.map +1 -0
  167. package/lib/internals/index.js +19 -0
  168. package/lib/internals/index.js.map +1 -0
  169. package/lib/internals/types.d.ts +43 -0
  170. package/lib/internals/types.d.ts.map +1 -0
  171. package/lib/internals/types.js +3 -0
  172. package/lib/internals/types.js.map +1 -0
  173. package/lib/server/IndexManager.d.ts +78 -0
  174. package/lib/server/IndexManager.d.ts.map +1 -0
  175. package/lib/server/IndexManager.js +259 -0
  176. package/lib/server/IndexManager.js.map +1 -0
  177. package/lib/server/VectraServer.d.ts +40 -0
  178. package/lib/server/VectraServer.d.ts.map +1 -0
  179. package/lib/server/VectraServer.js +151 -0
  180. package/lib/server/VectraServer.js.map +1 -0
  181. package/lib/server/VectraServer.spec.d.ts +2 -0
  182. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  183. package/lib/server/VectraServer.spec.js +322 -0
  184. package/lib/server/VectraServer.spec.js.map +1 -0
  185. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  186. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  187. package/lib/server/handlers/documentHandlers.js +95 -0
  188. package/lib/server/handlers/documentHandlers.js.map +1 -0
  189. package/lib/server/handlers/helpers.d.ts +23 -0
  190. package/lib/server/handlers/helpers.d.ts.map +1 -0
  191. package/lib/server/handlers/helpers.js +138 -0
  192. package/lib/server/handlers/helpers.js.map +1 -0
  193. package/lib/server/handlers/index.d.ts +8 -0
  194. package/lib/server/handlers/index.d.ts.map +1 -0
  195. package/lib/server/handlers/index.js +22 -0
  196. package/lib/server/handlers/index.js.map +1 -0
  197. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  198. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  199. package/lib/server/handlers/indexHandlers.js +85 -0
  200. package/lib/server/handlers/indexHandlers.js.map +1 -0
  201. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  202. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  203. package/lib/server/handlers/itemHandlers.js +166 -0
  204. package/lib/server/handlers/itemHandlers.js.map +1 -0
  205. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  206. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  207. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  208. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  209. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  210. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  211. package/lib/server/handlers/queryHandlers.js +135 -0
  212. package/lib/server/handlers/queryHandlers.js.map +1 -0
  213. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  214. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  215. package/lib/server/handlers/statsHandlers.js +81 -0
  216. package/lib/server/handlers/statsHandlers.js.map +1 -0
  217. package/lib/server/index.d.ts +4 -0
  218. package/lib/server/index.d.ts.map +1 -0
  219. package/lib/server/index.js +23 -0
  220. package/lib/server/index.js.map +1 -0
  221. package/lib/storage/FileStorage.d.ts +92 -0
  222. package/lib/storage/FileStorage.d.ts.map +1 -0
  223. package/lib/storage/FileStorage.js +3 -0
  224. package/lib/storage/FileStorage.js.map +1 -0
  225. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  226. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  227. package/lib/storage/FileStorageUtilities.js +91 -0
  228. package/lib/storage/FileStorageUtilities.js.map +1 -0
  229. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  230. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  231. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  232. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  233. package/lib/storage/FileType.d.ts +29 -0
  234. package/lib/storage/FileType.d.ts.map +1 -0
  235. package/lib/storage/FileType.js +38 -0
  236. package/lib/storage/FileType.js.map +1 -0
  237. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  238. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  239. package/lib/storage/IndexedDBStorage.js +347 -0
  240. package/lib/storage/IndexedDBStorage.js.map +1 -0
  241. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  242. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  243. package/lib/storage/LocalFileStorage.browser.js +43 -0
  244. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  245. package/lib/storage/LocalFileStorage.d.ts +23 -0
  246. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  247. package/lib/storage/LocalFileStorage.js +152 -0
  248. package/lib/storage/LocalFileStorage.js.map +1 -0
  249. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  250. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  251. package/lib/storage/LocalFileStorage.spec.js +249 -0
  252. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  253. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  254. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  255. package/lib/storage/VirtualFileStorage.js +178 -0
  256. package/lib/storage/VirtualFileStorage.js.map +1 -0
  257. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  258. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  259. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  260. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  261. package/lib/storage/index.d.ts +6 -0
  262. package/lib/storage/index.d.ts.map +1 -0
  263. package/lib/storage/index.js +22 -0
  264. package/lib/storage/index.js.map +1 -0
  265. package/lib/templates/templates/csharp/README.md +48 -0
  266. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  267. package/lib/templates/templates/go/README.md +71 -0
  268. package/lib/templates/templates/go/vectra_client.go +322 -0
  269. package/lib/templates/templates/java/README.md +81 -0
  270. package/lib/templates/templates/java/VectraClient.java +232 -0
  271. package/lib/templates/templates/python/README.md +37 -0
  272. package/lib/templates/templates/python/vectra_client.py +279 -0
  273. package/lib/templates/templates/rust/Cargo.toml +14 -0
  274. package/lib/templates/templates/rust/README.md +39 -0
  275. package/lib/templates/templates/rust/build.rs +4 -0
  276. package/lib/templates/templates/rust/lib.rs +284 -0
  277. package/lib/templates/templates/typescript/README.md +96 -0
  278. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  279. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  280. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  281. package/lib/templates/typescript/VectraClient.js +328 -0
  282. package/lib/templates/typescript/VectraClient.js.map +1 -0
  283. package/lib/types.d.ts +153 -0
  284. package/lib/types.d.ts.map +1 -0
  285. package/lib/types.js +3 -0
  286. package/lib/types.js.map +1 -0
  287. package/lib/utils/index.d.ts +2 -0
  288. package/lib/utils/index.d.ts.map +1 -0
  289. package/lib/utils/index.js +18 -0
  290. package/lib/utils/index.js.map +1 -0
  291. package/lib/utils/pathUtils.d.ts +40 -0
  292. package/lib/utils/pathUtils.d.ts.map +1 -0
  293. package/lib/utils/pathUtils.js +98 -0
  294. package/lib/utils/pathUtils.js.map +1 -0
  295. package/lib/vectra-cli.d.ts +2 -0
  296. package/lib/vectra-cli.d.ts.map +1 -1
  297. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  298. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  299. package/lib/vectra-cli.generate.spec.js +112 -0
  300. package/lib/vectra-cli.generate.spec.js.map +1 -0
  301. package/lib/vectra-cli.js +760 -0
  302. package/lib/vectra-cli.js.map +1 -0
  303. package/lib/vectra-cli.spec.d.ts +1 -0
  304. package/lib/vectra-cli.spec.d.ts.map +1 -0
  305. package/lib/vectra-cli.spec.js +2 -0
  306. package/lib/vectra-cli.spec.js.map +1 -0
  307. package/package.json +91 -16
  308. package/proto/vectra_service.proto +276 -0
  309. package/src/BrowserWebFetcher.ts +345 -0
  310. package/src/FileFetcher.spec.ts +234 -0
  311. package/src/FileFetcher.ts +37 -25
  312. package/src/FolderWatcher.spec.ts +288 -0
  313. package/src/FolderWatcher.ts +304 -0
  314. package/src/GPT3Tokenizer.spec.ts +50 -0
  315. package/src/ItemSelector.spec.ts +252 -0
  316. package/src/ItemSelector.ts +163 -150
  317. package/src/LocalDocument.spec.ts +211 -0
  318. package/src/LocalDocument.ts +88 -94
  319. package/src/LocalDocumentIndex.spec.ts +481 -0
  320. package/src/LocalDocumentIndex.ts +39 -40
  321. package/src/LocalDocumentResult.spec.ts +373 -0
  322. package/src/LocalDocumentResult.ts +489 -319
  323. package/src/LocalEmbeddings.spec.ts +138 -0
  324. package/src/LocalEmbeddings.ts +120 -0
  325. package/src/LocalIndex.spec.ts +808 -66
  326. package/src/LocalIndex.ts +479 -429
  327. package/src/OpenAIEmbeddings.spec.ts +354 -0
  328. package/src/OpenAIEmbeddings.ts +26 -27
  329. package/src/TextSplitter.spec.ts +342 -0
  330. package/src/TextSplitter.ts +517 -532
  331. package/src/TransformersEmbeddings.spec.ts +188 -0
  332. package/src/TransformersEmbeddings.ts +232 -0
  333. package/src/TransformersTokenizer.spec.ts +143 -0
  334. package/src/TransformersTokenizer.ts +45 -0
  335. package/src/WebFetcher.spec.ts +288 -0
  336. package/src/WebFetcher.ts +184 -186
  337. package/src/browser.ts +69 -0
  338. package/src/codecs/IndexCodec.ts +40 -0
  339. package/src/codecs/JsonCodec.spec.ts +70 -0
  340. package/src/codecs/JsonCodec.ts +37 -0
  341. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  342. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  343. package/src/codecs/ProtobufCodec.ts +193 -0
  344. package/src/codecs/index.ts +4 -0
  345. package/src/codecs/migrateIndex.spec.ts +176 -0
  346. package/src/codecs/migrateIndex.ts +125 -0
  347. package/src/codecs/schemas/index.proto +34 -0
  348. package/src/index.ts +9 -1
  349. package/src/internals/Colorize.ts +19 -16
  350. package/src/server/IndexManager.ts +243 -0
  351. package/src/server/VectraServer.spec.ts +303 -0
  352. package/src/server/VectraServer.ts +156 -0
  353. package/src/server/handlers/documentHandlers.ts +59 -0
  354. package/src/server/handlers/helpers.ts +93 -0
  355. package/src/server/handlers/index.ts +7 -0
  356. package/src/server/handlers/indexHandlers.ts +44 -0
  357. package/src/server/handlers/itemHandlers.ts +140 -0
  358. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  359. package/src/server/handlers/queryHandlers.ts +96 -0
  360. package/src/server/handlers/statsHandlers.ts +38 -0
  361. package/src/server/index.ts +3 -0
  362. package/src/storage/FileStorage.ts +105 -0
  363. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  364. package/src/storage/FileStorageUtilities.ts +77 -0
  365. package/src/storage/FileType.ts +61 -0
  366. package/src/storage/IndexedDBStorage.ts +365 -0
  367. package/src/storage/LocalFileStorage.browser.ts +52 -0
  368. package/src/storage/LocalFileStorage.spec.ts +292 -0
  369. package/src/storage/LocalFileStorage.ts +98 -0
  370. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  371. package/src/storage/VirtualFileStorage.ts +169 -0
  372. package/src/storage/index.ts +5 -0
  373. package/src/templates/csharp/README.md +48 -0
  374. package/src/templates/csharp/VectraClient.cs +234 -0
  375. package/src/templates/go/README.md +71 -0
  376. package/src/templates/go/vectra_client.go +322 -0
  377. package/src/templates/java/README.md +81 -0
  378. package/src/templates/java/VectraClient.java +232 -0
  379. package/src/templates/python/README.md +37 -0
  380. package/src/templates/python/vectra_client.py +279 -0
  381. package/src/templates/rust/Cargo.toml +14 -0
  382. package/src/templates/rust/README.md +39 -0
  383. package/src/templates/rust/build.rs +4 -0
  384. package/src/templates/rust/lib.rs +284 -0
  385. package/src/templates/typescript/README.md +96 -0
  386. package/src/templates/typescript/VectraClient.ts +374 -0
  387. package/src/types.ts +131 -123
  388. package/src/utils/index.ts +1 -0
  389. package/src/utils/pathUtils.ts +106 -0
  390. package/src/vectra-cli.generate.spec.ts +72 -0
  391. package/src/vectra-cli.spec.ts +0 -0
  392. package/src/vectra-cli.ts +687 -246
@@ -0,0 +1,188 @@
1
+ import { strict as assert } from 'node:assert';
2
+ import { describe, it, beforeEach, afterEach } from 'mocha';
3
+ import sinon from 'sinon';
4
+ import { EmbeddingsModel } from './types';
5
+ import * as transformersModule from '@huggingface/transformers';
6
+
7
+ describe('TransformersEmbeddings', () => {
8
+ let TransformersEmbeddings: any;
9
+ let mockExtractor: sinon.SinonStub;
10
+ let mockTokenizer: any;
11
+ let sandbox: sinon.SinonSandbox;
12
+ let pipelineStub: sinon.SinonStub;
13
+
14
+ beforeEach(async () => {
15
+ sandbox = sinon.createSandbox();
16
+
17
+ // Create mock tokenizer
18
+ mockTokenizer = {
19
+ __call__: sandbox.stub().returns({
20
+ input_ids: { data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)]) }
21
+ }),
22
+ decode: sandbox.stub().returns('decoded text')
23
+ };
24
+ // Make it callable
25
+ const callableTokenizer = Object.assign(
26
+ (...args: any[]) => mockTokenizer.__call__(...args),
27
+ mockTokenizer
28
+ );
29
+
30
+ // Create mock extractor (feature extraction pipeline)
31
+ mockExtractor = sandbox.stub().callsFake(async (inputs: string | string[]) => {
32
+ const inputArray = Array.isArray(inputs) ? inputs : [inputs];
33
+ const batchSize = inputArray.length;
34
+ const embeddingDim = 4;
35
+
36
+ const data = new Float32Array(batchSize * embeddingDim);
37
+ for (let i = 0; i < batchSize; i++) {
38
+ data[i * embeddingDim] = 0.1;
39
+ data[i * embeddingDim + 1] = 0.2;
40
+ data[i * embeddingDim + 2] = 0.3;
41
+ data[i * embeddingDim + 3] = 0.4;
42
+ }
43
+
44
+ return {
45
+ data: data,
46
+ dims: [batchSize, embeddingDim]
47
+ };
48
+ });
49
+
50
+ // Attach tokenizer to the mock extractor so pipeline result has .tokenizer
51
+ (mockExtractor as any).tokenizer = callableTokenizer;
52
+
53
+ // Stub the pipeline function from @huggingface/transformers
54
+ pipelineStub = sandbox.stub(transformersModule, 'pipeline' as any).resolves(mockExtractor);
55
+
56
+ // Import TransformersEmbeddings fresh (uses the stubbed pipeline via dynamic import)
57
+ const mod = await import('./TransformersEmbeddings');
58
+ TransformersEmbeddings = mod.TransformersEmbeddings;
59
+ });
60
+
61
+ afterEach(() => {
62
+ sandbox.restore();
63
+ });
64
+
65
+ describe('create()', () => {
66
+ it('creates instance with default options', async () => {
67
+ const embeddings = await TransformersEmbeddings.create();
68
+
69
+ assert.equal(embeddings.maxTokens, 512, 'default maxTokens should be 512');
70
+ assert.equal(embeddings.model, 'Xenova/all-MiniLM-L6-v2', 'default model should be all-MiniLM-L6-v2');
71
+
72
+ // Verify pipeline was called with correct arguments
73
+ assert.ok(pipelineStub.calledOnce, 'pipeline should be called once');
74
+ assert.equal(pipelineStub.firstCall.args[0], 'feature-extraction');
75
+ assert.equal(pipelineStub.firstCall.args[1], 'Xenova/all-MiniLM-L6-v2');
76
+ });
77
+
78
+ it('creates instance with custom options', async () => {
79
+ const embeddings = await TransformersEmbeddings.create({
80
+ model: 'Xenova/bge-small-en-v1.5',
81
+ maxTokens: 256,
82
+ device: 'cpu',
83
+ normalize: false,
84
+ pooling: 'cls'
85
+ });
86
+
87
+ assert.equal(embeddings.maxTokens, 256);
88
+ assert.equal(embeddings.model, 'Xenova/bge-small-en-v1.5');
89
+ });
90
+
91
+ it('implements EmbeddingsModel interface', async () => {
92
+ const embeddings: EmbeddingsModel = await TransformersEmbeddings.create();
93
+
94
+ assert.equal(typeof embeddings.maxTokens, 'number');
95
+ assert.equal(typeof embeddings.createEmbeddings, 'function');
96
+ });
97
+ });
98
+
99
+ describe('createEmbeddings()', () => {
100
+ it('generates embeddings for single string', async () => {
101
+ const embeddings = await TransformersEmbeddings.create();
102
+ const result = await embeddings.createEmbeddings('hello world');
103
+
104
+ assert.equal(result.status, 'success');
105
+ assert.ok(result.output, 'output should be defined');
106
+ assert.equal(result.output!.length, 1, 'should have one embedding');
107
+ assert.equal(result.output![0].length, 4, 'embedding should have 4 dimensions');
108
+ const expected = [0.1, 0.2, 0.3, 0.4];
109
+ result.output![0].forEach((val: number, i: number) => {
110
+ assert.ok(Math.abs(val - expected[i]) < 0.001, `value ${val} should be close to ${expected[i]}`);
111
+ });
112
+ assert.equal(result.model, 'Xenova/all-MiniLM-L6-v2');
113
+ });
114
+
115
+ it('generates embeddings for string array', async () => {
116
+ const embeddings = await TransformersEmbeddings.create();
117
+ const result = await embeddings.createEmbeddings(['hello', 'world']);
118
+
119
+ assert.equal(result.status, 'success');
120
+ assert.ok(result.output, 'output should be defined');
121
+ assert.equal(result.output!.length, 2, 'should have two embeddings');
122
+
123
+ assert.equal(mockExtractor.callCount, 1);
124
+ assert.deepEqual(mockExtractor.firstCall.args[0], ['hello', 'world']);
125
+ });
126
+
127
+ it('passes pooling and normalize options to extractor', async () => {
128
+ const embeddings = await TransformersEmbeddings.create({
129
+ pooling: 'cls',
130
+ normalize: false
131
+ });
132
+ await embeddings.createEmbeddings('test');
133
+
134
+ assert.ok(mockExtractor.calledOnce);
135
+ const options = mockExtractor.firstCall.args[1];
136
+ assert.equal(options.pooling, 'cls');
137
+ assert.equal(options.normalize, false);
138
+ });
139
+
140
+ it('returns error status on failure', async () => {
141
+ mockExtractor.rejects(new Error('Model inference failed'));
142
+
143
+ const embeddings = await TransformersEmbeddings.create();
144
+ const result = await embeddings.createEmbeddings('test');
145
+
146
+ assert.equal(result.status, 'error');
147
+ assert.ok(result.message?.includes('Model inference failed'));
148
+ });
149
+
150
+ it('handles empty string input', async () => {
151
+ const embeddings = await TransformersEmbeddings.create();
152
+ const result = await embeddings.createEmbeddings('');
153
+
154
+ assert.equal(result.status, 'success');
155
+ assert.ok(result.output);
156
+ assert.equal(result.output!.length, 1);
157
+ });
158
+
159
+ it('handles empty array input', async () => {
160
+ const embeddings = await TransformersEmbeddings.create();
161
+ const result = await embeddings.createEmbeddings([]);
162
+
163
+ assert.equal(result.status, 'success');
164
+ assert.ok(result.output);
165
+ assert.equal(result.output!.length, 0);
166
+ });
167
+ });
168
+
169
+ describe('getTokenizer()', () => {
170
+ it('returns a TransformersTokenizer instance', async () => {
171
+ const embeddings = await TransformersEmbeddings.create();
172
+ const tokenizer = embeddings.getTokenizer();
173
+
174
+ assert.ok(tokenizer, 'tokenizer should be defined');
175
+ assert.equal(typeof tokenizer.encode, 'function');
176
+ assert.equal(typeof tokenizer.decode, 'function');
177
+ });
178
+
179
+ it('returns consistent tokenizer across calls', async () => {
180
+ const embeddings = await TransformersEmbeddings.create();
181
+ const tokenizer1 = embeddings.getTokenizer();
182
+ const tokenizer2 = embeddings.getTokenizer();
183
+
184
+ assert.ok(tokenizer1);
185
+ assert.ok(tokenizer2);
186
+ });
187
+ });
188
+ });
@@ -0,0 +1,232 @@
1
+ import { EmbeddingsModel, EmbeddingsResponse } from "./types";
2
+ import { TransformersTokenizer } from "./TransformersTokenizer";
3
+ import { FeatureExtractionPipeline, PreTrainedTokenizer } from "@huggingface/transformers";
4
+
5
+
6
+ const DEFAULT_MODEL = 'Xenova/all-MiniLM-L6-v2';
7
+
8
+ /**
9
+ * Type definition for the Transformers.js library.
10
+ * Used for dynamic import and type safety.
11
+ */
12
+ type TransformersLibrary = typeof import('@huggingface/transformers');
13
+
14
+ /**
15
+ * Configuration options for TransformersEmbeddings.
16
+ */
17
+ export interface TransformersEmbeddingsOptions {
18
+ /**
19
+ * Optional. Model name/path to use for embeddings.
20
+ * @remarks
21
+ * Common models:
22
+ * - 'Xenova/all-MiniLM-L6-v2' (384 dimensions, fast, good quality)
23
+ * - 'Xenova/bge-small-en-v1.5' (384 dimensions, better quality)
24
+ * - 'Xenova/bge-base-en-v1.5' (768 dimensions, best quality)
25
+ * @default 'Xenova/all-MiniLM-L6-v2'
26
+ */
27
+ model?: string;
28
+
29
+ /**
30
+ * Optional. Maximum number of tokens that can be sent to the embedding model.
31
+ * @remarks
32
+ * This affects batching behavior in LocalDocumentIndex.
33
+ * Most small models support 512 tokens.
34
+ * @default 512
35
+ */
36
+ maxTokens?: number;
37
+
38
+ /**
39
+ * Optional. Device to run inference on.
40
+ * @remarks
41
+ * - 'auto': Automatically select the best available device
42
+ * - 'gpu': Use GPU (WebGPU in browser, CUDA in Node.js if available)
43
+ * - 'cpu': Use CPU (most compatible)
44
+ * - 'wasm': Use WebAssembly
45
+ * @default 'auto'
46
+ */
47
+ device?: 'auto' | 'gpu' | 'cpu' | 'wasm';
48
+
49
+ /**
50
+ * Optional. Data type for model weights.
51
+ * @remarks
52
+ * - 'fp32': Full precision (best quality, largest size)
53
+ * - 'fp16': Half precision (good quality, smaller)
54
+ * - 'q8': 8-bit quantization (good quality, smaller)
55
+ * - 'q4': 4-bit quantization (fastest, smallest, lower quality)
56
+ * @default 'fp32'
57
+ */
58
+ dtype?: 'fp32' | 'fp16' | 'q8' | 'q4';
59
+
60
+ /**
61
+ * Optional. Whether to normalize embeddings to unit length.
62
+ * @default true
63
+ */
64
+ normalize?: boolean;
65
+
66
+ /**
67
+ * Optional. Pooling strategy for token embeddings.
68
+ * @remarks
69
+ * - 'mean': Mean pooling (default, recommended)
70
+ * - 'cls': Use [CLS] token embedding
71
+ * @default 'mean'
72
+ */
73
+ pooling?: 'mean' | 'cls';
74
+
75
+ /**
76
+ * Optional. Callback for tracking model download/load progress.
77
+ */
78
+ progressCallback?: (progress: { status: string; progress?: number; file?: string }) => void;
79
+ }
80
+
81
+ /**
82
+ * An embeddings model using Transformers.js for local, offline inference.
83
+ * @remarks
84
+ * Requires @huggingface/transformers as a peer dependency.
85
+ * Use the static `create()` method to instantiate.
86
+ *
87
+ * @example
88
+ * ```typescript
89
+ * const embeddings = await TransformersEmbeddings.create({
90
+ * model: 'Xenova/all-MiniLM-L6-v2'
91
+ * });
92
+ *
93
+ * const index = new LocalDocumentIndex({
94
+ * folderPath: 'my-index',
95
+ * embeddings: embeddings,
96
+ * tokenizer: embeddings.getTokenizer()
97
+ * });
98
+ * ```
99
+ */
100
+ export class TransformersEmbeddings implements EmbeddingsModel {
101
+ private readonly _extractor: FeatureExtractionPipeline;
102
+ private readonly _tokenizer: PreTrainedTokenizer;
103
+ private readonly _options: Required<Omit<TransformersEmbeddingsOptions, 'progressCallback'>> & Pick<TransformersEmbeddingsOptions, 'progressCallback'>;
104
+
105
+ public readonly maxTokens: number;
106
+
107
+ /**
108
+ * Private constructor - use TransformersEmbeddings.create() instead.
109
+ */
110
+ private constructor(
111
+ extractor: FeatureExtractionPipeline,
112
+ tokenizer: PreTrainedTokenizer,
113
+ options: Required<Omit<TransformersEmbeddingsOptions, 'progressCallback'>> & Pick<TransformersEmbeddingsOptions, 'progressCallback'>
114
+ ) {
115
+ this._extractor = extractor;
116
+ this._tokenizer = tokenizer;
117
+ this._options = options;
118
+ this.maxTokens = options.maxTokens;
119
+ }
120
+
121
+ /**
122
+ * Creates a new TransformersEmbeddings instance.
123
+ * @param options Configuration options.
124
+ * @returns Promise resolving to initialized TransformersEmbeddings instance.
125
+ * @throws Error if @huggingface/transformers is not installed.
126
+ */
127
+ public static async create(options?: TransformersEmbeddingsOptions): Promise<TransformersEmbeddings> {
128
+ // Dynamically import to allow optional dependency
129
+ let transformers: TransformersLibrary;
130
+
131
+ try {
132
+ transformers = await import('@huggingface/transformers');
133
+ } catch (e) {
134
+ throw new Error(
135
+ 'TransformersEmbeddings requires @huggingface/transformers. ' +
136
+ 'Install it with: npm install @huggingface/transformers'
137
+ );
138
+ }
139
+
140
+ const { pipeline } = transformers;
141
+
142
+ // Apply defaults
143
+ const opts = {
144
+ model: options?.model ?? DEFAULT_MODEL,
145
+ maxTokens: options?.maxTokens ?? 512,
146
+ device: options?.device ?? 'auto',
147
+ dtype: options?.dtype ?? 'fp32',
148
+ normalize: options?.normalize ?? true,
149
+ pooling: options?.pooling ?? 'mean',
150
+ progressCallback: options?.progressCallback
151
+ };
152
+
153
+ // Build pipeline options
154
+ const pipelineOptions: any = {
155
+ device: opts.device,
156
+ dtype: opts.dtype
157
+ };
158
+
159
+ if (opts.progressCallback) {
160
+ pipelineOptions.progress_callback = opts.progressCallback;
161
+ }
162
+
163
+ // Load the feature extraction pipeline
164
+ const extractor = await pipeline(
165
+ 'feature-extraction',
166
+ opts.model,
167
+ pipelineOptions
168
+ );
169
+
170
+ // Load the tokenizer separately for use with TextSplitter
171
+ const tokenizer = extractor.tokenizer;
172
+
173
+ return new TransformersEmbeddings(extractor, tokenizer, opts);
174
+ }
175
+
176
+ /**
177
+ * Returns a tokenizer that uses the same tokenization as this embedding model.
178
+ * @remarks
179
+ * Use this tokenizer with LocalDocumentIndex to ensure text chunking
180
+ * aligns with the embedding model's token boundaries.
181
+ * @returns TransformersTokenizer instance.
182
+ */
183
+ public getTokenizer(): TransformersTokenizer {
184
+ return new TransformersTokenizer(this._tokenizer);
185
+ }
186
+
187
+ /**
188
+ * Creates embeddings for the given inputs.
189
+ * @param inputs Text inputs to create embeddings for.
190
+ * @returns EmbeddingsResponse with status and generated embeddings.
191
+ */
192
+ public async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
193
+ try {
194
+ const inputArray = Array.isArray(inputs) ? inputs : [inputs];
195
+
196
+ // Process all inputs in a single batch
197
+ const output = await this._extractor(inputArray, {
198
+ pooling: this._options.pooling,
199
+ normalize: this._options.normalize
200
+ });
201
+
202
+ const [batchSize, embeddingDim] = output.dims;
203
+ const data = output.data as Float32Array;
204
+
205
+ // Slice the flat array into individual embeddings
206
+ const embeddings: number[][] = [];
207
+ for (let i = 0; i < batchSize; i++) {
208
+ const start = i * embeddingDim;
209
+ const end = start + embeddingDim;
210
+ embeddings.push(Array.from(data.slice(start, end)));
211
+ }
212
+
213
+ return {
214
+ status: 'success',
215
+ output: embeddings,
216
+ model: this._options.model
217
+ };
218
+ } catch (error: unknown) {
219
+ return {
220
+ status: 'error',
221
+ message: `Error generating embeddings: ${(error as Error).message}`
222
+ };
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Returns the model name being used.
228
+ */
229
+ public get model(): string {
230
+ return this._options.model;
231
+ }
232
+ }
@@ -0,0 +1,143 @@
1
+ import { strict as assert } from 'node:assert';
2
+ import { describe, it } from 'mocha';
3
+ import { TransformersTokenizer } from './TransformersTokenizer';
4
+
5
+ describe('TransformersTokenizer', () => {
6
+ // Create a mock tokenizer that mimics Transformers.js behavior
7
+ function createMockTokenizer() {
8
+ const vocab: Map<string, number> = new Map([
9
+ ['hello', 101],
10
+ ['world', 102],
11
+ ['test', 103],
12
+ ['[CLS]', 1],
13
+ ['[SEP]', 2]
14
+ ]);
15
+ const reverseVocab: Map<number, string> = new Map();
16
+ vocab.forEach((v, k) => reverseVocab.set(v, k));
17
+
18
+ return {
19
+ // Mimics the callable tokenizer behavior
20
+ __call__: (text: string) => {
21
+ const words = text.toLowerCase().split(/\s+/).filter(w => w);
22
+ const ids = words.map(w => vocab.get(w) ?? 100);
23
+ return {
24
+ input_ids: {
25
+ data: BigInt64Array.from(ids.map(id => BigInt(id)))
26
+ }
27
+ };
28
+ },
29
+ decode: (tokens: number[], options?: { skip_special_tokens?: boolean }) => {
30
+ const words = tokens
31
+ .filter(t => !options?.skip_special_tokens || (t !== 1 && t !== 2))
32
+ .map(t => reverseVocab.get(t) ?? '[UNK]');
33
+ return words.join(' ');
34
+ }
35
+ };
36
+ }
37
+
38
+ it('encodes text to token array using callable tokenizer', () => {
39
+ const mockTokenizer = createMockTokenizer();
40
+ // Make it callable
41
+ const callableTokenizer = Object.assign(
42
+ (text: string) => mockTokenizer.__call__(text),
43
+ { decode: mockTokenizer.decode }
44
+ ) as any;
45
+
46
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
47
+ const tokens = tokenizer.encode('hello world');
48
+
49
+ assert.ok(Array.isArray(tokens), 'encode should return an array');
50
+ assert.equal(tokens.length, 2, 'should have 2 tokens');
51
+ assert.deepEqual(tokens, [101, 102], 'tokens should match expected values');
52
+ });
53
+
54
+ it('handles BigInt64Array conversion correctly', () => {
55
+ const mockTokenizer = {
56
+ __call__: () => ({
57
+ input_ids: {
58
+ data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)])
59
+ }
60
+ }),
61
+ decode: () => 'decoded'
62
+ };
63
+ const callableTokenizer = Object.assign(
64
+ () => mockTokenizer.__call__(),
65
+ { decode: mockTokenizer.decode }
66
+ ) as any;
67
+
68
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
69
+ const tokens = tokenizer.encode('any text');
70
+
71
+ assert.deepEqual(tokens, [1, 2, 3], 'should convert BigInt to number');
72
+ tokens.forEach(t => {
73
+ assert.equal(typeof t, 'number', 'each token should be a number');
74
+ });
75
+ });
76
+
77
+ it('decodes tokens back to text', () => {
78
+ const mockTokenizer = {
79
+ __call__: () => ({ input_ids: { data: BigInt64Array.from([]) } }),
80
+ decode: (tokens: number[], opts?: { skip_special_tokens?: boolean }) => {
81
+ if (opts?.skip_special_tokens) {
82
+ return 'hello world';
83
+ }
84
+ return '[CLS] hello world [SEP]';
85
+ }
86
+ };
87
+ const callableTokenizer = Object.assign(
88
+ () => mockTokenizer.__call__(),
89
+ { decode: mockTokenizer.decode }
90
+ ) as any;
91
+
92
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
93
+ const text = tokenizer.decode([1, 101, 102, 2]);
94
+
95
+ assert.equal(text, 'hello world', 'should decode with skip_special_tokens=true');
96
+ });
97
+
98
+ it('handles empty input', () => {
99
+ const mockTokenizer = {
100
+ __call__: () => ({
101
+ input_ids: { data: BigInt64Array.from([]) }
102
+ }),
103
+ decode: () => ''
104
+ };
105
+ const callableTokenizer = Object.assign(
106
+ () => mockTokenizer.__call__(),
107
+ { decode: mockTokenizer.decode }
108
+ ) as any;
109
+
110
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
111
+
112
+ const tokens = tokenizer.encode('');
113
+ assert.deepEqual(tokens, [], 'empty input should return empty array');
114
+
115
+ const text = tokenizer.decode([]);
116
+ assert.equal(text, '', 'empty tokens should return empty string');
117
+ });
118
+
119
+ it('returns consistent results for same input', () => {
120
+ let callCount = 0;
121
+ const mockTokenizer = {
122
+ __call__: () => {
123
+ callCount++;
124
+ return {
125
+ input_ids: { data: BigInt64Array.from([BigInt(101), BigInt(102)]) }
126
+ };
127
+ },
128
+ decode: () => 'hello world'
129
+ };
130
+ const callableTokenizer = Object.assign(
131
+ () => mockTokenizer.__call__(),
132
+ { decode: mockTokenizer.decode }
133
+ ) as any;
134
+
135
+ const tokenizer = new TransformersTokenizer(callableTokenizer);
136
+
137
+ const tokens1 = tokenizer.encode('hello world');
138
+ const tokens2 = tokenizer.encode('hello world');
139
+
140
+ assert.deepEqual(tokens1, tokens2, 'encode should be deterministic');
141
+ assert.equal(callCount, 2, 'should call underlying tokenizer each time');
142
+ });
143
+ });
@@ -0,0 +1,45 @@
1
+ import { PreTrainedTokenizer } from "@huggingface/transformers";
2
+ import { Tokenizer } from "./types";
3
+
4
+ /**
5
+ * A tokenizer wrapper for Transformers.js models.
6
+ * @remarks
7
+ * This tokenizer uses the same tokenizer as the embedding model,
8
+ * ensuring consistency between text splitting and embedding generation.
9
+ *
10
+ * Obtain an instance via TransformersEmbeddings.getTokenizer().
11
+ */
12
+ export class TransformersTokenizer implements Tokenizer {
13
+ private readonly _tokenizer: PreTrainedTokenizer;
14
+
15
+ /**
16
+ * Creates a new TransformersTokenizer.
17
+ * @param tokenizer The underlying Transformers.js tokenizer.
18
+ * @remarks
19
+ * Typically created via TransformersEmbeddings.getTokenizer().
20
+ */
21
+ public constructor(tokenizer: PreTrainedTokenizer) {
22
+ this._tokenizer = tokenizer;
23
+ }
24
+
25
+ /**
26
+ * Encodes text into token IDs.
27
+ * @param text The text to encode.
28
+ * @returns Array of token IDs.
29
+ */
30
+ public encode(text: string): number[] {
31
+ const encoded = this._tokenizer(text);
32
+ // Transformers.js returns an object with input_ids as BigInt64Array or similar
33
+ const inputIds = encoded.input_ids?.data ?? encoded.input_ids ?? encoded;
34
+ return Array.from(inputIds).map((id: any) => Number(id));
35
+ }
36
+
37
+ /**
38
+ * Decodes token IDs back into text.
39
+ * @param tokens Array of token IDs.
40
+ * @returns Decoded text string.
41
+ */
42
+ public decode(tokens: number[]): string {
43
+ return this._tokenizer.decode(tokens, { skip_special_tokens: true });
44
+ }
45
+ }