vectra 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +92 -100
  3. package/bin/vectra.js +3 -0
  4. package/lib/BrowserWebFetcher.d.ts +75 -0
  5. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  6. package/lib/BrowserWebFetcher.js +290 -0
  7. package/lib/BrowserWebFetcher.js.map +1 -0
  8. package/lib/FileFetcher.d.ts +5 -0
  9. package/lib/FileFetcher.d.ts.map +1 -0
  10. package/lib/FileFetcher.js +89 -0
  11. package/lib/FileFetcher.js.map +1 -0
  12. package/lib/FileFetcher.spec.d.ts +2 -0
  13. package/lib/FileFetcher.spec.d.ts.map +1 -0
  14. package/lib/FileFetcher.spec.js +244 -0
  15. package/lib/FileFetcher.spec.js.map +1 -0
  16. package/lib/FolderWatcher.d.ts +91 -0
  17. package/lib/FolderWatcher.d.ts.map +1 -0
  18. package/lib/FolderWatcher.js +304 -0
  19. package/lib/FolderWatcher.js.map +1 -0
  20. package/lib/FolderWatcher.spec.d.ts +2 -0
  21. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  22. package/lib/FolderWatcher.spec.js +308 -0
  23. package/lib/FolderWatcher.spec.js.map +1 -0
  24. package/lib/GPT3Tokenizer.d.ts +9 -0
  25. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  26. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  27. package/lib/GPT3Tokenizer.spec.js +45 -0
  28. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  29. package/lib/ItemSelector.d.ts +41 -0
  30. package/lib/ItemSelector.d.ts.map +1 -0
  31. package/lib/ItemSelector.js +179 -0
  32. package/lib/ItemSelector.js.map +1 -0
  33. package/lib/ItemSelector.spec.d.ts +2 -0
  34. package/lib/ItemSelector.spec.d.ts.map +1 -0
  35. package/lib/ItemSelector.spec.js +204 -0
  36. package/lib/ItemSelector.spec.js.map +1 -0
  37. package/lib/LocalDocument.d.ts +54 -0
  38. package/lib/LocalDocument.d.ts.map +1 -1
  39. package/lib/LocalDocument.js +116 -0
  40. package/lib/LocalDocument.js.map +1 -0
  41. package/lib/LocalDocument.spec.d.ts +2 -0
  42. package/lib/LocalDocument.spec.d.ts.map +1 -0
  43. package/lib/LocalDocument.spec.js +214 -0
  44. package/lib/LocalDocument.spec.js.map +1 -0
  45. package/lib/LocalDocumentIndex.d.ts +152 -0
  46. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  47. package/lib/LocalDocumentIndex.js +420 -0
  48. package/lib/LocalDocumentIndex.js.map +1 -0
  49. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  50. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  51. package/lib/LocalDocumentIndex.spec.js +494 -0
  52. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  53. package/lib/LocalDocumentResult.d.ts +66 -0
  54. package/lib/LocalDocumentResult.d.ts.map +1 -1
  55. package/lib/LocalDocumentResult.js +376 -0
  56. package/lib/LocalDocumentResult.js.map +1 -0
  57. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  58. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  59. package/lib/LocalDocumentResult.spec.js +373 -0
  60. package/lib/LocalDocumentResult.spec.js.map +1 -0
  61. package/lib/LocalEmbeddings.d.ts +59 -0
  62. package/lib/LocalEmbeddings.d.ts.map +1 -0
  63. package/lib/LocalEmbeddings.js +101 -0
  64. package/lib/LocalEmbeddings.js.map +1 -0
  65. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  66. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  67. package/lib/LocalEmbeddings.spec.js +155 -0
  68. package/lib/LocalEmbeddings.spec.js.map +1 -0
  69. package/lib/LocalIndex.d.ts +159 -0
  70. package/lib/LocalIndex.d.ts.map +1 -1
  71. package/lib/LocalIndex.js +519 -0
  72. package/lib/LocalIndex.js.map +1 -0
  73. package/lib/LocalIndex.spec.d.ts +2 -0
  74. package/lib/LocalIndex.spec.js +611 -9
  75. package/lib/LocalIndex.spec.js.map +1 -1
  76. package/lib/OpenAIEmbeddings.d.ts +124 -0
  77. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  78. package/lib/OpenAIEmbeddings.js +166 -0
  79. package/lib/OpenAIEmbeddings.js.map +1 -0
  80. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  81. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  82. package/lib/OpenAIEmbeddings.spec.js +298 -0
  83. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  84. package/lib/TextSplitter.d.ts +21 -0
  85. package/lib/TextSplitter.d.ts.map +1 -1
  86. package/lib/TextSplitter.js +500 -0
  87. package/lib/TextSplitter.js.map +1 -0
  88. package/lib/TextSplitter.spec.d.ts +2 -0
  89. package/lib/TextSplitter.spec.d.ts.map +1 -0
  90. package/lib/TextSplitter.spec.js +337 -0
  91. package/lib/TextSplitter.spec.js.map +1 -0
  92. package/lib/TransformersEmbeddings.d.ts +121 -0
  93. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  94. package/lib/TransformersEmbeddings.js +176 -0
  95. package/lib/TransformersEmbeddings.js.map +1 -0
  96. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  97. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  98. package/lib/TransformersEmbeddings.spec.js +198 -0
  99. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  100. package/lib/TransformersTokenizer.d.ts +33 -0
  101. package/lib/TransformersTokenizer.d.ts.map +1 -0
  102. package/lib/TransformersTokenizer.js +44 -0
  103. package/lib/TransformersTokenizer.js.map +1 -0
  104. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  105. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  106. package/lib/TransformersTokenizer.spec.js +112 -0
  107. package/lib/TransformersTokenizer.spec.js.map +1 -0
  108. package/lib/WebFetcher.d.ts +14 -0
  109. package/lib/WebFetcher.d.ts.map +1 -0
  110. package/lib/WebFetcher.js +238 -0
  111. package/lib/WebFetcher.js.map +1 -0
  112. package/lib/WebFetcher.spec.d.ts +2 -0
  113. package/lib/WebFetcher.spec.d.ts.map +1 -0
  114. package/lib/WebFetcher.spec.js +263 -0
  115. package/lib/WebFetcher.spec.js.map +1 -0
  116. package/lib/browser.d.ts +30 -0
  117. package/lib/browser.d.ts.map +1 -0
  118. package/lib/browser.js +52 -0
  119. package/lib/browser.js.map +1 -0
  120. package/lib/codecs/IndexCodec.d.ts +37 -0
  121. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  122. package/lib/codecs/IndexCodec.js +3 -0
  123. package/lib/codecs/IndexCodec.js.map +1 -0
  124. package/lib/codecs/JsonCodec.d.ts +19 -0
  125. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  126. package/lib/codecs/JsonCodec.js +35 -0
  127. package/lib/codecs/JsonCodec.js.map +1 -0
  128. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  129. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  130. package/lib/codecs/JsonCodec.spec.js +66 -0
  131. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  132. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  133. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  134. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  135. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  136. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  137. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  138. package/lib/codecs/ProtobufCodec.js +225 -0
  139. package/lib/codecs/ProtobufCodec.js.map +1 -0
  140. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  141. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  142. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  143. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  144. package/lib/codecs/index.d.ts +5 -0
  145. package/lib/codecs/index.d.ts.map +1 -0
  146. package/lib/codecs/index.js +21 -0
  147. package/lib/codecs/index.js.map +1 -0
  148. package/lib/codecs/migrateIndex.d.ts +24 -0
  149. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  150. package/lib/codecs/migrateIndex.js +119 -0
  151. package/lib/codecs/migrateIndex.js.map +1 -0
  152. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  153. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  154. package/lib/codecs/migrateIndex.spec.js +151 -0
  155. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  156. package/lib/codecs/schemas/index.proto +34 -0
  157. package/lib/index.d.ts +20 -0
  158. package/lib/index.d.ts.map +1 -1
  159. package/lib/index.js +36 -0
  160. package/lib/index.js.map +1 -0
  161. package/lib/internals/Colorize.d.ts +14 -0
  162. package/lib/internals/Colorize.d.ts.map +1 -0
  163. package/lib/internals/Colorize.js +69 -0
  164. package/lib/internals/Colorize.js.map +1 -0
  165. package/lib/internals/index.d.ts +3 -0
  166. package/lib/internals/index.d.ts.map +1 -0
  167. package/lib/internals/index.js +19 -0
  168. package/lib/internals/index.js.map +1 -0
  169. package/lib/internals/types.d.ts +43 -0
  170. package/lib/internals/types.d.ts.map +1 -0
  171. package/lib/internals/types.js +3 -0
  172. package/lib/internals/types.js.map +1 -0
  173. package/lib/server/IndexManager.d.ts +78 -0
  174. package/lib/server/IndexManager.d.ts.map +1 -0
  175. package/lib/server/IndexManager.js +259 -0
  176. package/lib/server/IndexManager.js.map +1 -0
  177. package/lib/server/VectraServer.d.ts +40 -0
  178. package/lib/server/VectraServer.d.ts.map +1 -0
  179. package/lib/server/VectraServer.js +151 -0
  180. package/lib/server/VectraServer.js.map +1 -0
  181. package/lib/server/VectraServer.spec.d.ts +2 -0
  182. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  183. package/lib/server/VectraServer.spec.js +322 -0
  184. package/lib/server/VectraServer.spec.js.map +1 -0
  185. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  186. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  187. package/lib/server/handlers/documentHandlers.js +95 -0
  188. package/lib/server/handlers/documentHandlers.js.map +1 -0
  189. package/lib/server/handlers/helpers.d.ts +23 -0
  190. package/lib/server/handlers/helpers.d.ts.map +1 -0
  191. package/lib/server/handlers/helpers.js +138 -0
  192. package/lib/server/handlers/helpers.js.map +1 -0
  193. package/lib/server/handlers/index.d.ts +8 -0
  194. package/lib/server/handlers/index.d.ts.map +1 -0
  195. package/lib/server/handlers/index.js +22 -0
  196. package/lib/server/handlers/index.js.map +1 -0
  197. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  198. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  199. package/lib/server/handlers/indexHandlers.js +85 -0
  200. package/lib/server/handlers/indexHandlers.js.map +1 -0
  201. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  202. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  203. package/lib/server/handlers/itemHandlers.js +166 -0
  204. package/lib/server/handlers/itemHandlers.js.map +1 -0
  205. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  206. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  207. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  208. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  209. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  210. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  211. package/lib/server/handlers/queryHandlers.js +135 -0
  212. package/lib/server/handlers/queryHandlers.js.map +1 -0
  213. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  214. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  215. package/lib/server/handlers/statsHandlers.js +81 -0
  216. package/lib/server/handlers/statsHandlers.js.map +1 -0
  217. package/lib/server/index.d.ts +4 -0
  218. package/lib/server/index.d.ts.map +1 -0
  219. package/lib/server/index.js +23 -0
  220. package/lib/server/index.js.map +1 -0
  221. package/lib/storage/FileStorage.d.ts +92 -0
  222. package/lib/storage/FileStorage.d.ts.map +1 -0
  223. package/lib/storage/FileStorage.js +3 -0
  224. package/lib/storage/FileStorage.js.map +1 -0
  225. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  226. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  227. package/lib/storage/FileStorageUtilities.js +91 -0
  228. package/lib/storage/FileStorageUtilities.js.map +1 -0
  229. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  230. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  231. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  232. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  233. package/lib/storage/FileType.d.ts +29 -0
  234. package/lib/storage/FileType.d.ts.map +1 -0
  235. package/lib/storage/FileType.js +38 -0
  236. package/lib/storage/FileType.js.map +1 -0
  237. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  238. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  239. package/lib/storage/IndexedDBStorage.js +347 -0
  240. package/lib/storage/IndexedDBStorage.js.map +1 -0
  241. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  242. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  243. package/lib/storage/LocalFileStorage.browser.js +43 -0
  244. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  245. package/lib/storage/LocalFileStorage.d.ts +23 -0
  246. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  247. package/lib/storage/LocalFileStorage.js +152 -0
  248. package/lib/storage/LocalFileStorage.js.map +1 -0
  249. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  250. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  251. package/lib/storage/LocalFileStorage.spec.js +249 -0
  252. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  253. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  254. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  255. package/lib/storage/VirtualFileStorage.js +178 -0
  256. package/lib/storage/VirtualFileStorage.js.map +1 -0
  257. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  258. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  259. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  260. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  261. package/lib/storage/index.d.ts +6 -0
  262. package/lib/storage/index.d.ts.map +1 -0
  263. package/lib/storage/index.js +22 -0
  264. package/lib/storage/index.js.map +1 -0
  265. package/lib/templates/templates/csharp/README.md +48 -0
  266. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  267. package/lib/templates/templates/go/README.md +71 -0
  268. package/lib/templates/templates/go/vectra_client.go +322 -0
  269. package/lib/templates/templates/java/README.md +81 -0
  270. package/lib/templates/templates/java/VectraClient.java +232 -0
  271. package/lib/templates/templates/python/README.md +37 -0
  272. package/lib/templates/templates/python/vectra_client.py +279 -0
  273. package/lib/templates/templates/rust/Cargo.toml +14 -0
  274. package/lib/templates/templates/rust/README.md +39 -0
  275. package/lib/templates/templates/rust/build.rs +4 -0
  276. package/lib/templates/templates/rust/lib.rs +284 -0
  277. package/lib/templates/templates/typescript/README.md +96 -0
  278. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  279. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  280. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  281. package/lib/templates/typescript/VectraClient.js +328 -0
  282. package/lib/templates/typescript/VectraClient.js.map +1 -0
  283. package/lib/types.d.ts +153 -0
  284. package/lib/types.d.ts.map +1 -0
  285. package/lib/types.js +3 -0
  286. package/lib/types.js.map +1 -0
  287. package/lib/utils/index.d.ts +2 -0
  288. package/lib/utils/index.d.ts.map +1 -0
  289. package/lib/utils/index.js +18 -0
  290. package/lib/utils/index.js.map +1 -0
  291. package/lib/utils/pathUtils.d.ts +40 -0
  292. package/lib/utils/pathUtils.d.ts.map +1 -0
  293. package/lib/utils/pathUtils.js +98 -0
  294. package/lib/utils/pathUtils.js.map +1 -0
  295. package/lib/vectra-cli.d.ts +2 -0
  296. package/lib/vectra-cli.d.ts.map +1 -1
  297. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  298. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  299. package/lib/vectra-cli.generate.spec.js +112 -0
  300. package/lib/vectra-cli.generate.spec.js.map +1 -0
  301. package/lib/vectra-cli.js +760 -0
  302. package/lib/vectra-cli.js.map +1 -0
  303. package/lib/vectra-cli.spec.d.ts +1 -0
  304. package/lib/vectra-cli.spec.d.ts.map +1 -0
  305. package/lib/vectra-cli.spec.js +2 -0
  306. package/lib/vectra-cli.spec.js.map +1 -0
  307. package/package.json +91 -16
  308. package/proto/vectra_service.proto +276 -0
  309. package/src/BrowserWebFetcher.ts +345 -0
  310. package/src/FileFetcher.spec.ts +234 -0
  311. package/src/FileFetcher.ts +37 -25
  312. package/src/FolderWatcher.spec.ts +288 -0
  313. package/src/FolderWatcher.ts +304 -0
  314. package/src/GPT3Tokenizer.spec.ts +50 -0
  315. package/src/ItemSelector.spec.ts +252 -0
  316. package/src/ItemSelector.ts +163 -150
  317. package/src/LocalDocument.spec.ts +211 -0
  318. package/src/LocalDocument.ts +88 -94
  319. package/src/LocalDocumentIndex.spec.ts +481 -0
  320. package/src/LocalDocumentIndex.ts +39 -40
  321. package/src/LocalDocumentResult.spec.ts +373 -0
  322. package/src/LocalDocumentResult.ts +489 -319
  323. package/src/LocalEmbeddings.spec.ts +138 -0
  324. package/src/LocalEmbeddings.ts +120 -0
  325. package/src/LocalIndex.spec.ts +808 -66
  326. package/src/LocalIndex.ts +479 -429
  327. package/src/OpenAIEmbeddings.spec.ts +354 -0
  328. package/src/OpenAIEmbeddings.ts +26 -27
  329. package/src/TextSplitter.spec.ts +342 -0
  330. package/src/TextSplitter.ts +517 -532
  331. package/src/TransformersEmbeddings.spec.ts +188 -0
  332. package/src/TransformersEmbeddings.ts +232 -0
  333. package/src/TransformersTokenizer.spec.ts +143 -0
  334. package/src/TransformersTokenizer.ts +45 -0
  335. package/src/WebFetcher.spec.ts +288 -0
  336. package/src/WebFetcher.ts +184 -186
  337. package/src/browser.ts +69 -0
  338. package/src/codecs/IndexCodec.ts +40 -0
  339. package/src/codecs/JsonCodec.spec.ts +70 -0
  340. package/src/codecs/JsonCodec.ts +37 -0
  341. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  342. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  343. package/src/codecs/ProtobufCodec.ts +193 -0
  344. package/src/codecs/index.ts +4 -0
  345. package/src/codecs/migrateIndex.spec.ts +176 -0
  346. package/src/codecs/migrateIndex.ts +125 -0
  347. package/src/codecs/schemas/index.proto +34 -0
  348. package/src/index.ts +9 -1
  349. package/src/internals/Colorize.ts +19 -16
  350. package/src/server/IndexManager.ts +243 -0
  351. package/src/server/VectraServer.spec.ts +303 -0
  352. package/src/server/VectraServer.ts +156 -0
  353. package/src/server/handlers/documentHandlers.ts +59 -0
  354. package/src/server/handlers/helpers.ts +93 -0
  355. package/src/server/handlers/index.ts +7 -0
  356. package/src/server/handlers/indexHandlers.ts +44 -0
  357. package/src/server/handlers/itemHandlers.ts +140 -0
  358. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  359. package/src/server/handlers/queryHandlers.ts +96 -0
  360. package/src/server/handlers/statsHandlers.ts +38 -0
  361. package/src/server/index.ts +3 -0
  362. package/src/storage/FileStorage.ts +105 -0
  363. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  364. package/src/storage/FileStorageUtilities.ts +77 -0
  365. package/src/storage/FileType.ts +61 -0
  366. package/src/storage/IndexedDBStorage.ts +365 -0
  367. package/src/storage/LocalFileStorage.browser.ts +52 -0
  368. package/src/storage/LocalFileStorage.spec.ts +292 -0
  369. package/src/storage/LocalFileStorage.ts +98 -0
  370. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  371. package/src/storage/VirtualFileStorage.ts +169 -0
  372. package/src/storage/index.ts +5 -0
  373. package/src/templates/csharp/README.md +48 -0
  374. package/src/templates/csharp/VectraClient.cs +234 -0
  375. package/src/templates/go/README.md +71 -0
  376. package/src/templates/go/vectra_client.go +322 -0
  377. package/src/templates/java/README.md +81 -0
  378. package/src/templates/java/VectraClient.java +232 -0
  379. package/src/templates/python/README.md +37 -0
  380. package/src/templates/python/vectra_client.py +279 -0
  381. package/src/templates/rust/Cargo.toml +14 -0
  382. package/src/templates/rust/README.md +39 -0
  383. package/src/templates/rust/build.rs +4 -0
  384. package/src/templates/rust/lib.rs +284 -0
  385. package/src/templates/typescript/README.md +96 -0
  386. package/src/templates/typescript/VectraClient.ts +374 -0
  387. package/src/types.ts +131 -123
  388. package/src/utils/index.ts +1 -0
  389. package/src/utils/pathUtils.ts +106 -0
  390. package/src/vectra-cli.generate.spec.ts +72 -0
  391. package/src/vectra-cli.spec.ts +0 -0
  392. package/src/vectra-cli.ts +687 -246
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.TransformersEmbeddings = void 0;
46
+ const TransformersTokenizer_1 = require("./TransformersTokenizer");
47
+ const DEFAULT_MODEL = 'Xenova/all-MiniLM-L6-v2';
48
+ /**
49
+ * An embeddings model using Transformers.js for local, offline inference.
50
+ * @remarks
51
+ * Requires @huggingface/transformers as a peer dependency.
52
+ * Use the static `create()` method to instantiate.
53
+ *
54
+ * @example
55
+ * ```typescript
56
+ * const embeddings = await TransformersEmbeddings.create({
57
+ * model: 'Xenova/all-MiniLM-L6-v2'
58
+ * });
59
+ *
60
+ * const index = new LocalDocumentIndex({
61
+ * folderPath: 'my-index',
62
+ * embeddings: embeddings,
63
+ * tokenizer: embeddings.getTokenizer()
64
+ * });
65
+ * ```
66
+ */
67
+ class TransformersEmbeddings {
68
+ /**
69
+ * Private constructor - use TransformersEmbeddings.create() instead.
70
+ */
71
+ constructor(extractor, tokenizer, options) {
72
+ this._extractor = extractor;
73
+ this._tokenizer = tokenizer;
74
+ this._options = options;
75
+ this.maxTokens = options.maxTokens;
76
+ }
77
+ /**
78
+ * Creates a new TransformersEmbeddings instance.
79
+ * @param options Configuration options.
80
+ * @returns Promise resolving to initialized TransformersEmbeddings instance.
81
+ * @throws Error if @huggingface/transformers is not installed.
82
+ */
83
+ static create(options) {
84
+ return __awaiter(this, void 0, void 0, function* () {
85
+ var _a, _b, _c, _d, _e, _f;
86
+ // Dynamically import to allow optional dependency
87
+ let transformers;
88
+ try {
89
+ transformers = yield Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
90
+ }
91
+ catch (e) {
92
+ throw new Error('TransformersEmbeddings requires @huggingface/transformers. ' +
93
+ 'Install it with: npm install @huggingface/transformers');
94
+ }
95
+ const { pipeline } = transformers;
96
+ // Apply defaults
97
+ const opts = {
98
+ model: (_a = options === null || options === void 0 ? void 0 : options.model) !== null && _a !== void 0 ? _a : DEFAULT_MODEL,
99
+ maxTokens: (_b = options === null || options === void 0 ? void 0 : options.maxTokens) !== null && _b !== void 0 ? _b : 512,
100
+ device: (_c = options === null || options === void 0 ? void 0 : options.device) !== null && _c !== void 0 ? _c : 'auto',
101
+ dtype: (_d = options === null || options === void 0 ? void 0 : options.dtype) !== null && _d !== void 0 ? _d : 'fp32',
102
+ normalize: (_e = options === null || options === void 0 ? void 0 : options.normalize) !== null && _e !== void 0 ? _e : true,
103
+ pooling: (_f = options === null || options === void 0 ? void 0 : options.pooling) !== null && _f !== void 0 ? _f : 'mean',
104
+ progressCallback: options === null || options === void 0 ? void 0 : options.progressCallback
105
+ };
106
+ // Build pipeline options
107
+ const pipelineOptions = {
108
+ device: opts.device,
109
+ dtype: opts.dtype
110
+ };
111
+ if (opts.progressCallback) {
112
+ pipelineOptions.progress_callback = opts.progressCallback;
113
+ }
114
+ // Load the feature extraction pipeline
115
+ const extractor = yield pipeline('feature-extraction', opts.model, pipelineOptions);
116
+ // Load the tokenizer separately for use with TextSplitter
117
+ const tokenizer = extractor.tokenizer;
118
+ return new TransformersEmbeddings(extractor, tokenizer, opts);
119
+ });
120
+ }
121
+ /**
122
+ * Returns a tokenizer that uses the same tokenization as this embedding model.
123
+ * @remarks
124
+ * Use this tokenizer with LocalDocumentIndex to ensure text chunking
125
+ * aligns with the embedding model's token boundaries.
126
+ * @returns TransformersTokenizer instance.
127
+ */
128
+ getTokenizer() {
129
+ return new TransformersTokenizer_1.TransformersTokenizer(this._tokenizer);
130
+ }
131
+ /**
132
+ * Creates embeddings for the given inputs.
133
+ * @param inputs Text inputs to create embeddings for.
134
+ * @returns EmbeddingsResponse with status and generated embeddings.
135
+ */
136
+ createEmbeddings(inputs) {
137
+ return __awaiter(this, void 0, void 0, function* () {
138
+ try {
139
+ const inputArray = Array.isArray(inputs) ? inputs : [inputs];
140
+ // Process all inputs in a single batch
141
+ const output = yield this._extractor(inputArray, {
142
+ pooling: this._options.pooling,
143
+ normalize: this._options.normalize
144
+ });
145
+ const [batchSize, embeddingDim] = output.dims;
146
+ const data = output.data;
147
+ // Slice the flat array into individual embeddings
148
+ const embeddings = [];
149
+ for (let i = 0; i < batchSize; i++) {
150
+ const start = i * embeddingDim;
151
+ const end = start + embeddingDim;
152
+ embeddings.push(Array.from(data.slice(start, end)));
153
+ }
154
+ return {
155
+ status: 'success',
156
+ output: embeddings,
157
+ model: this._options.model
158
+ };
159
+ }
160
+ catch (error) {
161
+ return {
162
+ status: 'error',
163
+ message: `Error generating embeddings: ${error.message}`
164
+ };
165
+ }
166
+ });
167
+ }
168
+ /**
169
+ * Returns the model name being used.
170
+ */
171
+ get model() {
172
+ return this._options.model;
173
+ }
174
+ }
175
+ exports.TransformersEmbeddings = TransformersEmbeddings;
176
+ //# sourceMappingURL=TransformersEmbeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersEmbeddings.js","sourceRoot":"","sources":["../src/TransformersEmbeddings.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,mEAAgE;AAIhE,MAAM,aAAa,GAAG,yBAAyB,CAAC;AA2EhD;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAa,sBAAsB;IAO/B;;OAEG;IACH,YACI,SAAoC,EACpC,SAA8B,EAC9B,OAAoI;QAEpI,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACvC,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAO,MAAM,CAAC,OAAuC;;;YAC9D,kDAAkD;YAClD,IAAI,YAAiC,CAAC;YAEtC,IAAI,CAAC;gBACD,YAAY,GAAG,wDAAa,2BAA2B,GAAC,CAAC;YAC7D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CACX,6DAA6D;oBAC7D,wDAAwD,CAC3D,CAAC;YACN,CAAC;YAED,MAAM,EAAE,QAAQ,EAAE,GAAG,YAAY,CAAC;YAElC,iBAAiB;YACjB,MAAM,IAAI,GAAG;gBACT,KAAK,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,KAAK,mCAAI,aAAa;gBACtC,SAAS,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,SAAS,mCAAI,GAAG;gBACpC,MAAM,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,MAAM,mCAAI,MAAM;gBACjC,KAAK,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,KAAK,mCAAI,MAAM;gBAC/B,SAAS,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,SAAS,mCAAI,IAAI;gBACrC,OAAO,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,OAAO,mCAAI,MAAM;gBACnC,gBAAgB,EAAE,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,gBAAgB;aAC9C,CAAC;YAEF,yBAAyB;YACzB,MAAM,eAAe,GAAQ;gBACzB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;aACpB,CAAC;YAEF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACxB,eAAe,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAAC;YAC9D,CAAC;YAED,uCAAuC;YACvC,MAAM,SAAS,GAAG,MAAM,QAAQ,CAC5B,oBAAoB,EACpB,IAAI,CAAC,KAAK,EACV,eAAe,CAClB,CAAC;YAEF,0DAA0D;YAC1D,MAAM,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;YAEtC,OAAO,IAAI,sBAAsB,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAClE,CAAC;KAAA;IAED;;;;;;OAMG;IACI,YAAY;QACf,OAAO,IAAI,6CAAqB,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACtD,CAAC;IAED;;;;OAIG;IACU,gBAAgB,CAAC,MAAyB;;YACnD,IAAI,CAAC;gBACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAE7D,uCAAuC;gBACvC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE;oBAC7C,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;oBAC9B,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,SAAS;iBACrC,CAAC,CAAC;gBAEH,MAAM,CAAC,SAAS,EAAE,YAAY,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;gBAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAoB,CAAC;gBAEzC,kDAAkD;gBAClD,MAAM,UAAU,GAAe,EAAE,CAAC;gBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACjC,MAAM,KAAK,GAAG,CAAC,GAAG,YAAY,CAAC;oBAC/B,MAAM,GAAG,GAAG,KAAK,GAAG,YAAY,CAAC;oBACjC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;gBACxD,CAAC;gBAED,OAAO;oBACH,MAAM,EAAE,SAAS;oBACjB,MAAM,EAAE,UAAU;oBAClB,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK;iBAC7B,CAAC;YACN,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACtB,OAAO;oBACH,MAAM,EAAE,OAAO;oBACf,OAAO,EAAE,gCAAiC,KAAe,CAAC,OAAO,EAAE;iBACtE,CAAC;YACN,CAAC;QACL,CAAC;KAAA;IAED;;OAEG;IACH,IAAW,KAAK;QACZ,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;IAC/B,CAAC;CACJ;AApID,wDAoIC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=TransformersEmbeddings.spec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersEmbeddings.spec.d.ts","sourceRoot":"","sources":["../src/TransformersEmbeddings.spec.ts"],"names":[],"mappings":""}
@@ -0,0 +1,198 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
47
+ Object.defineProperty(exports, "__esModule", { value: true });
48
+ const node_assert_1 = require("node:assert");
49
+ const mocha_1 = require("mocha");
50
+ const sinon_1 = __importDefault(require("sinon"));
51
+ const transformersModule = __importStar(require("@huggingface/transformers"));
52
+ (0, mocha_1.describe)('TransformersEmbeddings', () => {
53
+ let TransformersEmbeddings;
54
+ let mockExtractor;
55
+ let mockTokenizer;
56
+ let sandbox;
57
+ let pipelineStub;
58
+ (0, mocha_1.beforeEach)(() => __awaiter(void 0, void 0, void 0, function* () {
59
+ sandbox = sinon_1.default.createSandbox();
60
+ // Create mock tokenizer
61
+ mockTokenizer = {
62
+ __call__: sandbox.stub().returns({
63
+ input_ids: { data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)]) }
64
+ }),
65
+ decode: sandbox.stub().returns('decoded text')
66
+ };
67
+ // Make it callable
68
+ const callableTokenizer = Object.assign((...args) => mockTokenizer.__call__(...args), mockTokenizer);
69
+ // Create mock extractor (feature extraction pipeline)
70
+ mockExtractor = sandbox.stub().callsFake((inputs) => __awaiter(void 0, void 0, void 0, function* () {
71
+ const inputArray = Array.isArray(inputs) ? inputs : [inputs];
72
+ const batchSize = inputArray.length;
73
+ const embeddingDim = 4;
74
+ const data = new Float32Array(batchSize * embeddingDim);
75
+ for (let i = 0; i < batchSize; i++) {
76
+ data[i * embeddingDim] = 0.1;
77
+ data[i * embeddingDim + 1] = 0.2;
78
+ data[i * embeddingDim + 2] = 0.3;
79
+ data[i * embeddingDim + 3] = 0.4;
80
+ }
81
+ return {
82
+ data: data,
83
+ dims: [batchSize, embeddingDim]
84
+ };
85
+ }));
86
+ // Attach tokenizer to the mock extractor so pipeline result has .tokenizer
87
+ mockExtractor.tokenizer = callableTokenizer;
88
+ // Stub the pipeline function from @huggingface/transformers
89
+ pipelineStub = sandbox.stub(transformersModule, 'pipeline').resolves(mockExtractor);
90
+ // Import TransformersEmbeddings fresh (uses the stubbed pipeline via dynamic import)
91
+ const mod = yield Promise.resolve().then(() => __importStar(require('./TransformersEmbeddings')));
92
+ TransformersEmbeddings = mod.TransformersEmbeddings;
93
+ }));
94
+ (0, mocha_1.afterEach)(() => {
95
+ sandbox.restore();
96
+ });
97
+ (0, mocha_1.describe)('create()', () => {
98
+ (0, mocha_1.it)('creates instance with default options', () => __awaiter(void 0, void 0, void 0, function* () {
99
+ const embeddings = yield TransformersEmbeddings.create();
100
+ node_assert_1.strict.equal(embeddings.maxTokens, 512, 'default maxTokens should be 512');
101
+ node_assert_1.strict.equal(embeddings.model, 'Xenova/all-MiniLM-L6-v2', 'default model should be all-MiniLM-L6-v2');
102
+ // Verify pipeline was called with correct arguments
103
+ node_assert_1.strict.ok(pipelineStub.calledOnce, 'pipeline should be called once');
104
+ node_assert_1.strict.equal(pipelineStub.firstCall.args[0], 'feature-extraction');
105
+ node_assert_1.strict.equal(pipelineStub.firstCall.args[1], 'Xenova/all-MiniLM-L6-v2');
106
+ }));
107
+ (0, mocha_1.it)('creates instance with custom options', () => __awaiter(void 0, void 0, void 0, function* () {
108
+ const embeddings = yield TransformersEmbeddings.create({
109
+ model: 'Xenova/bge-small-en-v1.5',
110
+ maxTokens: 256,
111
+ device: 'cpu',
112
+ normalize: false,
113
+ pooling: 'cls'
114
+ });
115
+ node_assert_1.strict.equal(embeddings.maxTokens, 256);
116
+ node_assert_1.strict.equal(embeddings.model, 'Xenova/bge-small-en-v1.5');
117
+ }));
118
+ (0, mocha_1.it)('implements EmbeddingsModel interface', () => __awaiter(void 0, void 0, void 0, function* () {
119
+ const embeddings = yield TransformersEmbeddings.create();
120
+ node_assert_1.strict.equal(typeof embeddings.maxTokens, 'number');
121
+ node_assert_1.strict.equal(typeof embeddings.createEmbeddings, 'function');
122
+ }));
123
+ });
124
+ (0, mocha_1.describe)('createEmbeddings()', () => {
125
+ (0, mocha_1.it)('generates embeddings for single string', () => __awaiter(void 0, void 0, void 0, function* () {
126
+ const embeddings = yield TransformersEmbeddings.create();
127
+ const result = yield embeddings.createEmbeddings('hello world');
128
+ node_assert_1.strict.equal(result.status, 'success');
129
+ node_assert_1.strict.ok(result.output, 'output should be defined');
130
+ node_assert_1.strict.equal(result.output.length, 1, 'should have one embedding');
131
+ node_assert_1.strict.equal(result.output[0].length, 4, 'embedding should have 4 dimensions');
132
+ const expected = [0.1, 0.2, 0.3, 0.4];
133
+ result.output[0].forEach((val, i) => {
134
+ node_assert_1.strict.ok(Math.abs(val - expected[i]) < 0.001, `value ${val} should be close to ${expected[i]}`);
135
+ });
136
+ node_assert_1.strict.equal(result.model, 'Xenova/all-MiniLM-L6-v2');
137
+ }));
138
+ (0, mocha_1.it)('generates embeddings for string array', () => __awaiter(void 0, void 0, void 0, function* () {
139
+ const embeddings = yield TransformersEmbeddings.create();
140
+ const result = yield embeddings.createEmbeddings(['hello', 'world']);
141
+ node_assert_1.strict.equal(result.status, 'success');
142
+ node_assert_1.strict.ok(result.output, 'output should be defined');
143
+ node_assert_1.strict.equal(result.output.length, 2, 'should have two embeddings');
144
+ node_assert_1.strict.equal(mockExtractor.callCount, 1);
145
+ node_assert_1.strict.deepEqual(mockExtractor.firstCall.args[0], ['hello', 'world']);
146
+ }));
147
+ (0, mocha_1.it)('passes pooling and normalize options to extractor', () => __awaiter(void 0, void 0, void 0, function* () {
148
+ const embeddings = yield TransformersEmbeddings.create({
149
+ pooling: 'cls',
150
+ normalize: false
151
+ });
152
+ yield embeddings.createEmbeddings('test');
153
+ node_assert_1.strict.ok(mockExtractor.calledOnce);
154
+ const options = mockExtractor.firstCall.args[1];
155
+ node_assert_1.strict.equal(options.pooling, 'cls');
156
+ node_assert_1.strict.equal(options.normalize, false);
157
+ }));
158
+ (0, mocha_1.it)('returns error status on failure', () => __awaiter(void 0, void 0, void 0, function* () {
159
+ var _a;
160
+ mockExtractor.rejects(new Error('Model inference failed'));
161
+ const embeddings = yield TransformersEmbeddings.create();
162
+ const result = yield embeddings.createEmbeddings('test');
163
+ node_assert_1.strict.equal(result.status, 'error');
164
+ node_assert_1.strict.ok((_a = result.message) === null || _a === void 0 ? void 0 : _a.includes('Model inference failed'));
165
+ }));
166
+ (0, mocha_1.it)('handles empty string input', () => __awaiter(void 0, void 0, void 0, function* () {
167
+ const embeddings = yield TransformersEmbeddings.create();
168
+ const result = yield embeddings.createEmbeddings('');
169
+ node_assert_1.strict.equal(result.status, 'success');
170
+ node_assert_1.strict.ok(result.output);
171
+ node_assert_1.strict.equal(result.output.length, 1);
172
+ }));
173
+ (0, mocha_1.it)('handles empty array input', () => __awaiter(void 0, void 0, void 0, function* () {
174
+ const embeddings = yield TransformersEmbeddings.create();
175
+ const result = yield embeddings.createEmbeddings([]);
176
+ node_assert_1.strict.equal(result.status, 'success');
177
+ node_assert_1.strict.ok(result.output);
178
+ node_assert_1.strict.equal(result.output.length, 0);
179
+ }));
180
+ });
181
+ (0, mocha_1.describe)('getTokenizer()', () => {
182
+ (0, mocha_1.it)('returns a TransformersTokenizer instance', () => __awaiter(void 0, void 0, void 0, function* () {
183
+ const embeddings = yield TransformersEmbeddings.create();
184
+ const tokenizer = embeddings.getTokenizer();
185
+ node_assert_1.strict.ok(tokenizer, 'tokenizer should be defined');
186
+ node_assert_1.strict.equal(typeof tokenizer.encode, 'function');
187
+ node_assert_1.strict.equal(typeof tokenizer.decode, 'function');
188
+ }));
189
+ (0, mocha_1.it)('returns consistent tokenizer across calls', () => __awaiter(void 0, void 0, void 0, function* () {
190
+ const embeddings = yield TransformersEmbeddings.create();
191
+ const tokenizer1 = embeddings.getTokenizer();
192
+ const tokenizer2 = embeddings.getTokenizer();
193
+ node_assert_1.strict.ok(tokenizer1);
194
+ node_assert_1.strict.ok(tokenizer2);
195
+ }));
196
+ });
197
+ });
198
+ //# sourceMappingURL=TransformersEmbeddings.spec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersEmbeddings.spec.js","sourceRoot":"","sources":["../src/TransformersEmbeddings.spec.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,6CAA+C;AAC/C,iCAA4D;AAC5D,kDAA0B;AAE1B,8EAAgE;AAEhE,IAAA,gBAAQ,EAAC,wBAAwB,EAAE,GAAG,EAAE;IACpC,IAAI,sBAA2B,CAAC;IAChC,IAAI,aAA8B,CAAC;IACnC,IAAI,aAAkB,CAAC;IACvB,IAAI,OAA2B,CAAC;IAChC,IAAI,YAA6B,CAAC;IAElC,IAAA,kBAAU,EAAC,GAAS,EAAE;QAClB,OAAO,GAAG,eAAK,CAAC,aAAa,EAAE,CAAC;QAEhC,wBAAwB;QACxB,aAAa,GAAG;YACZ,QAAQ,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC;gBAC7B,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7E,CAAC;YACF,MAAM,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,cAAc,CAAC;SACjD,CAAC;QACF,mBAAmB;QACnB,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,CAAC,GAAG,IAAW,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,EACnD,aAAa,CAChB,CAAC;QAEF,sDAAsD;QACtD,aAAa,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAO,MAAyB,EAAE,EAAE;YACzE,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,CAAC;YACpC,MAAM,YAAY,GAAG,CAAC,CAAC;YAEvB,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC;YACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,GAAG,GAAG,CAAC;gBAC7B,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;YACrC,CAAC;YAED,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,IAAI,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;aAClC,CAAC;QACN,CAAC,CAAA,CAAC,CAAC;QAEH,2EAA2E;QAC1E,aAAqB,CAAC,SAAS,GAAG,iBAAiB,CAAC;QAErD,4DAA4D;QAC5D,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,EAAE,UAAiB,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAE3F,qFAAqF;QACrF,MAAM,GAAG,GAAG,wDAAa,0BAA0B,GAAC,CAAC;QACrD,sBAAsB,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACxD,CAAC,CAAA,CAAC,CAAC;IAEH,IAAA,iBAAS,EAAC,GAAG,EAAE;QACX,OAAO,CAAC,OAAO,EAAE,CAAC;IACtB,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,UAAU,EAAE,GAAG,EAAE;QACtB,IAAA,UAAE,EAAC,uCAAuC,EAAE,GAAS,EAAE;YACnD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YAEzD,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,EAAE,GAAG,EAAE,iCAAiC,CAAC,CAAC;YAC3E,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,yBAAyB,EAAE,0CAA0C,CAAC,CAAC;YAEtG,oDAAoD;YACpD,oBAAM,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,gCAAgC,CAAC,CAAC;YACrE,oBAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC;YACnE,oBAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,yBAAyB,CAAC,CAAC;QAC5E,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,sCAAsC,EAAE,GAAS,EAAE;YAClD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,CAAC;gBACnD,KAAK,EAAE,0BAA0B;gBACjC,SAAS,EAAE,GAAG;gBACd,MAAM,EAAE,KAAK;gBACb,SAAS,EAAE,KAAK;gBAChB,OAAO,EAAE,KAAK;aACjB,CAAC,CAAC;YAEH,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YACxC,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,0BAA0B,CAAC,CAAC;QAC/D,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,sCAAsC,EAAE,GAAS,EAAE;YAClD,MAAM,UAAU,GAAoB,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YAE1E,oBAAM,CAAC,KAAK,CAAC,OAAO,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YACpD,oBAAM,CAAC,KAAK,CAAC,OAAO,UAAU,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;QACjE,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,oBAAoB,EAAE,GAAG,EAAE;QAChC,IAAA,UAAE,EAAC,wCAAwC,EAAE,GAAS,EAAE;YACpD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;YAEhE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,0BAA0B,CAAC,CAAC;YACrD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,EAAE,2BAA2B,CAAC,CAAC;YACpE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,oCAAoC,CAAC,CAAC;YAChF,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAW,EAAE,CAAS,EAAE,EAAE;gBACjD,oBAAM,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,EAAE,SAAS,GAAG,uBAAuB,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACrG,CAAC,CAAC,CAAC;YACH,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,yBAAyB,CAAC,CAAC;QAC1D,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,uCAAuC,EAAE,GAAS,EAAE;YACnD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;YAErE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,0BAA0B,CAAC,CAAC;YACrD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,EAAE,4BAA4B,CAAC,CAAC;YAErE,oBAAM,CAAC,KAAK,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;YACzC,oBAAM,CAAC,SAAS,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;QAC1E,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,mDAAmD,EAAE,GAAS,EAAE;YAC/D,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,CAAC;gBACnD,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,KAAK;aACnB,CAAC,CAAC;YACH,MAAM,UAAU,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAE1C,oBAAM,CAAC,EAAE,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;YACpC,MAAM,OAAO,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChD,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACrC,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,iCAAiC,EAAE,GAAS,EAAE;;YAC7C,aAAa,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAAC;YAE3D,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAEzD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACrC,oBAAM,CAAC,EAAE,CAAC,MAAA,MAAM,CAAC,OAAO,0CAAE,QAAQ,CAAC,wBAAwB,CAAC,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,4BAA4B,EAAE,GAAS,EAAE;YACxC,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;YAErD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACzB,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,2BAA2B,EAAE,GAAS,EAAE;YACvC,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;YAErD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACzB,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,gBAAgB,EAAE,GAAG,EAAE;QAC5B,IAAA,UAAE,EAAC,0CAA0C,EAAE,GAAS,EAAE;YACtD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,SAAS,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAE5C,oBAAM,CAAC,EAAE,CAAC,SAAS,EAAE,6BAA6B,CAAC,CAAC;YACpD,oBAAM,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;YAClD,oBAAM,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACtD,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,2CAA2C,EAAE,GAAS,EAAE;YACvD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAE7C,oBAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;YACtB,oBAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;QAC1B,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
@@ -0,0 +1,33 @@
1
+ import { PreTrainedTokenizer } from "@huggingface/transformers";
2
+ import { Tokenizer } from "./types";
3
+ /**
4
+ * A tokenizer wrapper for Transformers.js models.
5
+ * @remarks
6
+ * This tokenizer uses the same tokenizer as the embedding model,
7
+ * ensuring consistency between text splitting and embedding generation.
8
+ *
9
+ * Obtain an instance via TransformersEmbeddings.getTokenizer().
10
+ */
11
+ export declare class TransformersTokenizer implements Tokenizer {
12
+ private readonly _tokenizer;
13
+ /**
14
+ * Creates a new TransformersTokenizer.
15
+ * @param tokenizer The underlying Transformers.js tokenizer.
16
+ * @remarks
17
+ * Typically created via TransformersEmbeddings.getTokenizer().
18
+ */
19
+ constructor(tokenizer: PreTrainedTokenizer);
20
+ /**
21
+ * Encodes text into token IDs.
22
+ * @param text The text to encode.
23
+ * @returns Array of token IDs.
24
+ */
25
+ encode(text: string): number[];
26
+ /**
27
+ * Decodes token IDs back into text.
28
+ * @param tokens Array of token IDs.
29
+ * @returns Decoded text string.
30
+ */
31
+ decode(tokens: number[]): string;
32
+ }
33
+ //# sourceMappingURL=TransformersTokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersTokenizer.d.ts","sourceRoot":"","sources":["../src/TransformersTokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC;;;;;;;GAOG;AACH,qBAAa,qBAAsB,YAAW,SAAS;IACnD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IAEjD;;;;;OAKG;gBACgB,SAAS,EAAE,mBAAmB;IAIjD;;;;OAIG;IACI,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAOrC;;;;OAIG;IACI,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM;CAG1C"}
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TransformersTokenizer = void 0;
4
+ /**
5
+ * A tokenizer wrapper for Transformers.js models.
6
+ * @remarks
7
+ * This tokenizer uses the same tokenizer as the embedding model,
8
+ * ensuring consistency between text splitting and embedding generation.
9
+ *
10
+ * Obtain an instance via TransformersEmbeddings.getTokenizer().
11
+ */
12
+ class TransformersTokenizer {
13
+ /**
14
+ * Creates a new TransformersTokenizer.
15
+ * @param tokenizer The underlying Transformers.js tokenizer.
16
+ * @remarks
17
+ * Typically created via TransformersEmbeddings.getTokenizer().
18
+ */
19
+ constructor(tokenizer) {
20
+ this._tokenizer = tokenizer;
21
+ }
22
+ /**
23
+ * Encodes text into token IDs.
24
+ * @param text The text to encode.
25
+ * @returns Array of token IDs.
26
+ */
27
+ encode(text) {
28
+ var _a, _b, _c;
29
+ const encoded = this._tokenizer(text);
30
+ // Transformers.js returns an object with input_ids as BigInt64Array or similar
31
+ const inputIds = (_c = (_b = (_a = encoded.input_ids) === null || _a === void 0 ? void 0 : _a.data) !== null && _b !== void 0 ? _b : encoded.input_ids) !== null && _c !== void 0 ? _c : encoded;
32
+ return Array.from(inputIds).map((id) => Number(id));
33
+ }
34
+ /**
35
+ * Decodes token IDs back into text.
36
+ * @param tokens Array of token IDs.
37
+ * @returns Decoded text string.
38
+ */
39
+ decode(tokens) {
40
+ return this._tokenizer.decode(tokens, { skip_special_tokens: true });
41
+ }
42
+ }
43
+ exports.TransformersTokenizer = TransformersTokenizer;
44
+ //# sourceMappingURL=TransformersTokenizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersTokenizer.js","sourceRoot":"","sources":["../src/TransformersTokenizer.ts"],"names":[],"mappings":";;;AAGA;;;;;;;GAOG;AACH,MAAa,qBAAqB;IAG9B;;;;;OAKG;IACH,YAAmB,SAA8B;QAC7C,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;IAChC,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,IAAY;;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtC,+EAA+E;QAC/E,MAAM,QAAQ,GAAG,MAAA,MAAA,MAAA,OAAO,CAAC,SAAS,0CAAE,IAAI,mCAAI,OAAO,CAAC,SAAS,mCAAI,OAAO,CAAC;QACzE,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,MAAgB;QAC1B,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;CACJ;AAjCD,sDAiCC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=TransformersTokenizer.spec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersTokenizer.spec.d.ts","sourceRoot":"","sources":["../src/TransformersTokenizer.spec.ts"],"names":[],"mappings":""}
@@ -0,0 +1,112 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const node_assert_1 = require("node:assert");
4
+ const mocha_1 = require("mocha");
5
+ const TransformersTokenizer_1 = require("./TransformersTokenizer");
6
+ (0, mocha_1.describe)('TransformersTokenizer', () => {
7
+ // Create a mock tokenizer that mimics Transformers.js behavior
8
+ function createMockTokenizer() {
9
+ const vocab = new Map([
10
+ ['hello', 101],
11
+ ['world', 102],
12
+ ['test', 103],
13
+ ['[CLS]', 1],
14
+ ['[SEP]', 2]
15
+ ]);
16
+ const reverseVocab = new Map();
17
+ vocab.forEach((v, k) => reverseVocab.set(v, k));
18
+ return {
19
+ // Mimics the callable tokenizer behavior
20
+ __call__: (text) => {
21
+ const words = text.toLowerCase().split(/\s+/).filter(w => w);
22
+ const ids = words.map(w => { var _a; return (_a = vocab.get(w)) !== null && _a !== void 0 ? _a : 100; });
23
+ return {
24
+ input_ids: {
25
+ data: BigInt64Array.from(ids.map(id => BigInt(id)))
26
+ }
27
+ };
28
+ },
29
+ decode: (tokens, options) => {
30
+ const words = tokens
31
+ .filter(t => !(options === null || options === void 0 ? void 0 : options.skip_special_tokens) || (t !== 1 && t !== 2))
32
+ .map(t => { var _a; return (_a = reverseVocab.get(t)) !== null && _a !== void 0 ? _a : '[UNK]'; });
33
+ return words.join(' ');
34
+ }
35
+ };
36
+ }
37
+ (0, mocha_1.it)('encodes text to token array using callable tokenizer', () => {
38
+ const mockTokenizer = createMockTokenizer();
39
+ // Make it callable
40
+ const callableTokenizer = Object.assign((text) => mockTokenizer.__call__(text), { decode: mockTokenizer.decode });
41
+ const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
42
+ const tokens = tokenizer.encode('hello world');
43
+ node_assert_1.strict.ok(Array.isArray(tokens), 'encode should return an array');
44
+ node_assert_1.strict.equal(tokens.length, 2, 'should have 2 tokens');
45
+ node_assert_1.strict.deepEqual(tokens, [101, 102], 'tokens should match expected values');
46
+ });
47
+ (0, mocha_1.it)('handles BigInt64Array conversion correctly', () => {
48
+ const mockTokenizer = {
49
+ __call__: () => ({
50
+ input_ids: {
51
+ data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)])
52
+ }
53
+ }),
54
+ decode: () => 'decoded'
55
+ };
56
+ const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
57
+ const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
58
+ const tokens = tokenizer.encode('any text');
59
+ node_assert_1.strict.deepEqual(tokens, [1, 2, 3], 'should convert BigInt to number');
60
+ tokens.forEach(t => {
61
+ node_assert_1.strict.equal(typeof t, 'number', 'each token should be a number');
62
+ });
63
+ });
64
+ (0, mocha_1.it)('decodes tokens back to text', () => {
65
+ const mockTokenizer = {
66
+ __call__: () => ({ input_ids: { data: BigInt64Array.from([]) } }),
67
+ decode: (tokens, opts) => {
68
+ if (opts === null || opts === void 0 ? void 0 : opts.skip_special_tokens) {
69
+ return 'hello world';
70
+ }
71
+ return '[CLS] hello world [SEP]';
72
+ }
73
+ };
74
+ const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
75
+ const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
76
+ const text = tokenizer.decode([1, 101, 102, 2]);
77
+ node_assert_1.strict.equal(text, 'hello world', 'should decode with skip_special_tokens=true');
78
+ });
79
+ (0, mocha_1.it)('handles empty input', () => {
80
+ const mockTokenizer = {
81
+ __call__: () => ({
82
+ input_ids: { data: BigInt64Array.from([]) }
83
+ }),
84
+ decode: () => ''
85
+ };
86
+ const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
87
+ const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
88
+ const tokens = tokenizer.encode('');
89
+ node_assert_1.strict.deepEqual(tokens, [], 'empty input should return empty array');
90
+ const text = tokenizer.decode([]);
91
+ node_assert_1.strict.equal(text, '', 'empty tokens should return empty string');
92
+ });
93
+ (0, mocha_1.it)('returns consistent results for same input', () => {
94
+ let callCount = 0;
95
+ const mockTokenizer = {
96
+ __call__: () => {
97
+ callCount++;
98
+ return {
99
+ input_ids: { data: BigInt64Array.from([BigInt(101), BigInt(102)]) }
100
+ };
101
+ },
102
+ decode: () => 'hello world'
103
+ };
104
+ const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
105
+ const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
106
+ const tokens1 = tokenizer.encode('hello world');
107
+ const tokens2 = tokenizer.encode('hello world');
108
+ node_assert_1.strict.deepEqual(tokens1, tokens2, 'encode should be deterministic');
109
+ node_assert_1.strict.equal(callCount, 2, 'should call underlying tokenizer each time');
110
+ });
111
+ });
112
+ //# sourceMappingURL=TransformersTokenizer.spec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TransformersTokenizer.spec.js","sourceRoot":"","sources":["../src/TransformersTokenizer.spec.ts"],"names":[],"mappings":";;AAAA,6CAA+C;AAC/C,iCAAqC;AACrC,mEAAgE;AAEhE,IAAA,gBAAQ,EAAC,uBAAuB,EAAE,GAAG,EAAE;IACnC,+DAA+D;IAC/D,SAAS,mBAAmB;QACxB,MAAM,KAAK,GAAwB,IAAI,GAAG,CAAC;YACvC,CAAC,OAAO,EAAE,GAAG,CAAC;YACd,CAAC,OAAO,EAAE,GAAG,CAAC;YACd,CAAC,MAAM,EAAE,GAAG,CAAC;YACb,CAAC,OAAO,EAAE,CAAC,CAAC;YACZ,CAAC,OAAO,EAAE,CAAC,CAAC;SACf,CAAC,CAAC;QACH,MAAM,YAAY,GAAwB,IAAI,GAAG,EAAE,CAAC;QACpD,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhD,OAAO;YACH,yCAAyC;YACzC,QAAQ,EAAE,CAAC,IAAY,EAAE,EAAE;gBACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC7D,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,mCAAI,GAAG,CAAA,EAAA,CAAC,CAAC;gBAChD,OAAO;oBACH,SAAS,EAAE;wBACP,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;qBACtD;iBACJ,CAAC;YACN,CAAC;YACD,MAAM,EAAE,CAAC,MAAgB,EAAE,OAA2C,EAAE,EAAE;gBACtE,MAAM,KAAK,GAAG,MAAM;qBACf,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,mBAAmB,CAAA,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;qBAClE,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,mCAAI,OAAO,CAAA,EAAA,CAAC,CAAC;gBAC9C,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC;SACJ,CAAC;IACN,CAAC;IAED,IAAA,UAAE,EAAC,sDAAsD,EAAE,GAAG,EAAE;QAC5D,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;QAC5C,mBAAmB;QACnB,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,CAAC,IAAY,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,EAC9C,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAE/C,oBAAM,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,+BAA+B,CAAC,CAAC;QAClE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,sBAAsB,CAAC,CAAC;QACvD,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,qCAAqC,CAAC,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,4CAA4C,EAAE,GAAG,EAAE;QAClD,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;gBACb,SAAS,EAAE;oBACP,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;iBAC9D;aACJ,CAAC;YACF,MAAM,EAAE,GAAG,EAAE,CAAC,SAAS;SAC1B,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAE5C,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,iCAAiC,CAAC,CAAC;QACvE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACf,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,QAAQ,EAAE,+BAA+B,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,6BAA6B,EAAE,GAAG,EAAE;QACnC,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjE,MAAM,EAAE,CAAC,MAAgB,EAAE,IAAwC,EAAE,EAAE;gBACnE,IAAI,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,mBAAmB,EAAE,CAAC;oBAC5B,OAAO,aAAa,CAAC;gBACzB,CAAC;gBACD,OAAO,yBAAyB,CAAC;YACrC,CAAC;SACJ,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhD,oBAAM,CAAC,KAAK,CAAC,IAAI,EAAE,aAAa,EAAE,6CAA6C,CAAC,CAAC;IACrF,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,qBAAqB,EAAE,GAAG,EAAE;QAC3B,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;gBACb,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;aAC9C,CAAC;YACF,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;SACnB,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAE/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,EAAE,uCAAuC,CAAC,CAAC;QAEtE,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAClC,oBAAM,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,EAAE,yCAAyC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,2CAA2C,EAAE,GAAG,EAAE;QACjD,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE;gBACX,SAAS,EAAE,CAAC;gBACZ,OAAO;oBACH,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;iBACtE,CAAC;YACN,CAAC;YACD,MAAM,EAAE,GAAG,EAAE,CAAC,aAAa;SAC9B,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAE/D,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEhD,oBAAM,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,EAAE,gCAAgC,CAAC,CAAC;QACrE,oBAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC,EAAE,4CAA4C,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
@@ -0,0 +1,14 @@
1
+ import { TextFetcher } from './types';
2
+ export interface WebFetcherConfig {
3
+ headers?: Record<string, string>;
4
+ requestConfig?: RequestInit;
5
+ htmlToMarkdown: boolean;
6
+ summarizeHtml: boolean;
7
+ }
8
+ export declare class WebFetcher implements TextFetcher {
9
+ private readonly _config;
10
+ constructor(config?: Partial<WebFetcherConfig>);
11
+ fetch(uri: string, onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>): Promise<boolean>;
12
+ private htmlToMarkdown;
13
+ }
14
+ //# sourceMappingURL=WebFetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AA2BtC,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,aAAa,CAAC,EAAE,WAAW,CAAC;IAC5B,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;CACxB;AAED,qBAAa,UAAW,YAAW,WAAW;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;gBAExB,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;IAUxC,KAAK,CAChB,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAC5E,OAAO,CAAC,OAAO,CAAC;IAsCnB,OAAO,CAAC,cAAc;CAsCvB"}