vectra 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (392) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +92 -100
  3. package/bin/vectra.js +3 -0
  4. package/lib/BrowserWebFetcher.d.ts +75 -0
  5. package/lib/BrowserWebFetcher.d.ts.map +1 -0
  6. package/lib/BrowserWebFetcher.js +290 -0
  7. package/lib/BrowserWebFetcher.js.map +1 -0
  8. package/lib/FileFetcher.d.ts +5 -0
  9. package/lib/FileFetcher.d.ts.map +1 -0
  10. package/lib/FileFetcher.js +89 -0
  11. package/lib/FileFetcher.js.map +1 -0
  12. package/lib/FileFetcher.spec.d.ts +2 -0
  13. package/lib/FileFetcher.spec.d.ts.map +1 -0
  14. package/lib/FileFetcher.spec.js +244 -0
  15. package/lib/FileFetcher.spec.js.map +1 -0
  16. package/lib/FolderWatcher.d.ts +91 -0
  17. package/lib/FolderWatcher.d.ts.map +1 -0
  18. package/lib/FolderWatcher.js +304 -0
  19. package/lib/FolderWatcher.js.map +1 -0
  20. package/lib/FolderWatcher.spec.d.ts +2 -0
  21. package/lib/FolderWatcher.spec.d.ts.map +1 -0
  22. package/lib/FolderWatcher.spec.js +308 -0
  23. package/lib/FolderWatcher.spec.js.map +1 -0
  24. package/lib/GPT3Tokenizer.d.ts +9 -0
  25. package/lib/GPT3Tokenizer.spec.d.ts +2 -0
  26. package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
  27. package/lib/GPT3Tokenizer.spec.js +45 -0
  28. package/lib/GPT3Tokenizer.spec.js.map +1 -0
  29. package/lib/ItemSelector.d.ts +41 -0
  30. package/lib/ItemSelector.d.ts.map +1 -0
  31. package/lib/ItemSelector.js +179 -0
  32. package/lib/ItemSelector.js.map +1 -0
  33. package/lib/ItemSelector.spec.d.ts +2 -0
  34. package/lib/ItemSelector.spec.d.ts.map +1 -0
  35. package/lib/ItemSelector.spec.js +204 -0
  36. package/lib/ItemSelector.spec.js.map +1 -0
  37. package/lib/LocalDocument.d.ts +54 -0
  38. package/lib/LocalDocument.d.ts.map +1 -1
  39. package/lib/LocalDocument.js +116 -0
  40. package/lib/LocalDocument.js.map +1 -0
  41. package/lib/LocalDocument.spec.d.ts +2 -0
  42. package/lib/LocalDocument.spec.d.ts.map +1 -0
  43. package/lib/LocalDocument.spec.js +214 -0
  44. package/lib/LocalDocument.spec.js.map +1 -0
  45. package/lib/LocalDocumentIndex.d.ts +152 -0
  46. package/lib/LocalDocumentIndex.d.ts.map +1 -1
  47. package/lib/LocalDocumentIndex.js +420 -0
  48. package/lib/LocalDocumentIndex.js.map +1 -0
  49. package/lib/LocalDocumentIndex.spec.d.ts +2 -0
  50. package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
  51. package/lib/LocalDocumentIndex.spec.js +494 -0
  52. package/lib/LocalDocumentIndex.spec.js.map +1 -0
  53. package/lib/LocalDocumentResult.d.ts +66 -0
  54. package/lib/LocalDocumentResult.d.ts.map +1 -1
  55. package/lib/LocalDocumentResult.js +376 -0
  56. package/lib/LocalDocumentResult.js.map +1 -0
  57. package/lib/LocalDocumentResult.spec.d.ts +2 -0
  58. package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
  59. package/lib/LocalDocumentResult.spec.js +373 -0
  60. package/lib/LocalDocumentResult.spec.js.map +1 -0
  61. package/lib/LocalEmbeddings.d.ts +59 -0
  62. package/lib/LocalEmbeddings.d.ts.map +1 -0
  63. package/lib/LocalEmbeddings.js +101 -0
  64. package/lib/LocalEmbeddings.js.map +1 -0
  65. package/lib/LocalEmbeddings.spec.d.ts +2 -0
  66. package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
  67. package/lib/LocalEmbeddings.spec.js +155 -0
  68. package/lib/LocalEmbeddings.spec.js.map +1 -0
  69. package/lib/LocalIndex.d.ts +159 -0
  70. package/lib/LocalIndex.d.ts.map +1 -1
  71. package/lib/LocalIndex.js +519 -0
  72. package/lib/LocalIndex.js.map +1 -0
  73. package/lib/LocalIndex.spec.d.ts +2 -0
  74. package/lib/LocalIndex.spec.js +611 -9
  75. package/lib/LocalIndex.spec.js.map +1 -1
  76. package/lib/OpenAIEmbeddings.d.ts +124 -0
  77. package/lib/OpenAIEmbeddings.d.ts.map +1 -0
  78. package/lib/OpenAIEmbeddings.js +166 -0
  79. package/lib/OpenAIEmbeddings.js.map +1 -0
  80. package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
  81. package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
  82. package/lib/OpenAIEmbeddings.spec.js +298 -0
  83. package/lib/OpenAIEmbeddings.spec.js.map +1 -0
  84. package/lib/TextSplitter.d.ts +21 -0
  85. package/lib/TextSplitter.d.ts.map +1 -1
  86. package/lib/TextSplitter.js +500 -0
  87. package/lib/TextSplitter.js.map +1 -0
  88. package/lib/TextSplitter.spec.d.ts +2 -0
  89. package/lib/TextSplitter.spec.d.ts.map +1 -0
  90. package/lib/TextSplitter.spec.js +337 -0
  91. package/lib/TextSplitter.spec.js.map +1 -0
  92. package/lib/TransformersEmbeddings.d.ts +121 -0
  93. package/lib/TransformersEmbeddings.d.ts.map +1 -0
  94. package/lib/TransformersEmbeddings.js +176 -0
  95. package/lib/TransformersEmbeddings.js.map +1 -0
  96. package/lib/TransformersEmbeddings.spec.d.ts +2 -0
  97. package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
  98. package/lib/TransformersEmbeddings.spec.js +198 -0
  99. package/lib/TransformersEmbeddings.spec.js.map +1 -0
  100. package/lib/TransformersTokenizer.d.ts +33 -0
  101. package/lib/TransformersTokenizer.d.ts.map +1 -0
  102. package/lib/TransformersTokenizer.js +44 -0
  103. package/lib/TransformersTokenizer.js.map +1 -0
  104. package/lib/TransformersTokenizer.spec.d.ts +2 -0
  105. package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
  106. package/lib/TransformersTokenizer.spec.js +112 -0
  107. package/lib/TransformersTokenizer.spec.js.map +1 -0
  108. package/lib/WebFetcher.d.ts +14 -0
  109. package/lib/WebFetcher.d.ts.map +1 -0
  110. package/lib/WebFetcher.js +238 -0
  111. package/lib/WebFetcher.js.map +1 -0
  112. package/lib/WebFetcher.spec.d.ts +2 -0
  113. package/lib/WebFetcher.spec.d.ts.map +1 -0
  114. package/lib/WebFetcher.spec.js +263 -0
  115. package/lib/WebFetcher.spec.js.map +1 -0
  116. package/lib/browser.d.ts +30 -0
  117. package/lib/browser.d.ts.map +1 -0
  118. package/lib/browser.js +52 -0
  119. package/lib/browser.js.map +1 -0
  120. package/lib/codecs/IndexCodec.d.ts +37 -0
  121. package/lib/codecs/IndexCodec.d.ts.map +1 -0
  122. package/lib/codecs/IndexCodec.js +3 -0
  123. package/lib/codecs/IndexCodec.js.map +1 -0
  124. package/lib/codecs/JsonCodec.d.ts +19 -0
  125. package/lib/codecs/JsonCodec.d.ts.map +1 -0
  126. package/lib/codecs/JsonCodec.js +35 -0
  127. package/lib/codecs/JsonCodec.js.map +1 -0
  128. package/lib/codecs/JsonCodec.spec.d.ts +2 -0
  129. package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
  130. package/lib/codecs/JsonCodec.spec.js +66 -0
  131. package/lib/codecs/JsonCodec.spec.js.map +1 -0
  132. package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
  133. package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
  134. package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
  135. package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
  136. package/lib/codecs/ProtobufCodec.d.ts +20 -0
  137. package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
  138. package/lib/codecs/ProtobufCodec.js +225 -0
  139. package/lib/codecs/ProtobufCodec.js.map +1 -0
  140. package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
  141. package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
  142. package/lib/codecs/ProtobufCodec.spec.js +155 -0
  143. package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
  144. package/lib/codecs/index.d.ts +5 -0
  145. package/lib/codecs/index.d.ts.map +1 -0
  146. package/lib/codecs/index.js +21 -0
  147. package/lib/codecs/index.js.map +1 -0
  148. package/lib/codecs/migrateIndex.d.ts +24 -0
  149. package/lib/codecs/migrateIndex.d.ts.map +1 -0
  150. package/lib/codecs/migrateIndex.js +119 -0
  151. package/lib/codecs/migrateIndex.js.map +1 -0
  152. package/lib/codecs/migrateIndex.spec.d.ts +2 -0
  153. package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
  154. package/lib/codecs/migrateIndex.spec.js +151 -0
  155. package/lib/codecs/migrateIndex.spec.js.map +1 -0
  156. package/lib/codecs/schemas/index.proto +34 -0
  157. package/lib/index.d.ts +20 -0
  158. package/lib/index.d.ts.map +1 -1
  159. package/lib/index.js +36 -0
  160. package/lib/index.js.map +1 -0
  161. package/lib/internals/Colorize.d.ts +14 -0
  162. package/lib/internals/Colorize.d.ts.map +1 -0
  163. package/lib/internals/Colorize.js +69 -0
  164. package/lib/internals/Colorize.js.map +1 -0
  165. package/lib/internals/index.d.ts +3 -0
  166. package/lib/internals/index.d.ts.map +1 -0
  167. package/lib/internals/index.js +19 -0
  168. package/lib/internals/index.js.map +1 -0
  169. package/lib/internals/types.d.ts +43 -0
  170. package/lib/internals/types.d.ts.map +1 -0
  171. package/lib/internals/types.js +3 -0
  172. package/lib/internals/types.js.map +1 -0
  173. package/lib/server/IndexManager.d.ts +78 -0
  174. package/lib/server/IndexManager.d.ts.map +1 -0
  175. package/lib/server/IndexManager.js +259 -0
  176. package/lib/server/IndexManager.js.map +1 -0
  177. package/lib/server/VectraServer.d.ts +40 -0
  178. package/lib/server/VectraServer.d.ts.map +1 -0
  179. package/lib/server/VectraServer.js +151 -0
  180. package/lib/server/VectraServer.js.map +1 -0
  181. package/lib/server/VectraServer.spec.d.ts +2 -0
  182. package/lib/server/VectraServer.spec.d.ts.map +1 -0
  183. package/lib/server/VectraServer.spec.js +322 -0
  184. package/lib/server/VectraServer.spec.js.map +1 -0
  185. package/lib/server/handlers/documentHandlers.d.ts +15 -0
  186. package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
  187. package/lib/server/handlers/documentHandlers.js +95 -0
  188. package/lib/server/handlers/documentHandlers.js.map +1 -0
  189. package/lib/server/handlers/helpers.d.ts +23 -0
  190. package/lib/server/handlers/helpers.d.ts.map +1 -0
  191. package/lib/server/handlers/helpers.js +138 -0
  192. package/lib/server/handlers/helpers.js.map +1 -0
  193. package/lib/server/handlers/index.d.ts +8 -0
  194. package/lib/server/handlers/index.d.ts.map +1 -0
  195. package/lib/server/handlers/index.js +22 -0
  196. package/lib/server/handlers/index.js.map +1 -0
  197. package/lib/server/handlers/indexHandlers.d.ts +14 -0
  198. package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
  199. package/lib/server/handlers/indexHandlers.js +85 -0
  200. package/lib/server/handlers/indexHandlers.js.map +1 -0
  201. package/lib/server/handlers/itemHandlers.d.ts +34 -0
  202. package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
  203. package/lib/server/handlers/itemHandlers.js +166 -0
  204. package/lib/server/handlers/itemHandlers.js.map +1 -0
  205. package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
  206. package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
  207. package/lib/server/handlers/lifecycleHandlers.js +31 -0
  208. package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
  209. package/lib/server/handlers/queryHandlers.d.ts +27 -0
  210. package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
  211. package/lib/server/handlers/queryHandlers.js +135 -0
  212. package/lib/server/handlers/queryHandlers.js.map +1 -0
  213. package/lib/server/handlers/statsHandlers.d.ts +17 -0
  214. package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
  215. package/lib/server/handlers/statsHandlers.js +81 -0
  216. package/lib/server/handlers/statsHandlers.js.map +1 -0
  217. package/lib/server/index.d.ts +4 -0
  218. package/lib/server/index.d.ts.map +1 -0
  219. package/lib/server/index.js +23 -0
  220. package/lib/server/index.js.map +1 -0
  221. package/lib/storage/FileStorage.d.ts +92 -0
  222. package/lib/storage/FileStorage.d.ts.map +1 -0
  223. package/lib/storage/FileStorage.js +3 -0
  224. package/lib/storage/FileStorage.js.map +1 -0
  225. package/lib/storage/FileStorageUtilities.d.ts +36 -0
  226. package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
  227. package/lib/storage/FileStorageUtilities.js +91 -0
  228. package/lib/storage/FileStorageUtilities.js.map +1 -0
  229. package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
  230. package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
  231. package/lib/storage/FileStorageUtilities.spec.js +98 -0
  232. package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
  233. package/lib/storage/FileType.d.ts +29 -0
  234. package/lib/storage/FileType.d.ts.map +1 -0
  235. package/lib/storage/FileType.js +38 -0
  236. package/lib/storage/FileType.js.map +1 -0
  237. package/lib/storage/IndexedDBStorage.d.ts +47 -0
  238. package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
  239. package/lib/storage/IndexedDBStorage.js +347 -0
  240. package/lib/storage/IndexedDBStorage.js.map +1 -0
  241. package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
  242. package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
  243. package/lib/storage/LocalFileStorage.browser.js +43 -0
  244. package/lib/storage/LocalFileStorage.browser.js.map +1 -0
  245. package/lib/storage/LocalFileStorage.d.ts +23 -0
  246. package/lib/storage/LocalFileStorage.d.ts.map +1 -0
  247. package/lib/storage/LocalFileStorage.js +152 -0
  248. package/lib/storage/LocalFileStorage.js.map +1 -0
  249. package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
  250. package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
  251. package/lib/storage/LocalFileStorage.spec.js +249 -0
  252. package/lib/storage/LocalFileStorage.spec.js.map +1 -0
  253. package/lib/storage/VirtualFileStorage.d.ts +18 -0
  254. package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
  255. package/lib/storage/VirtualFileStorage.js +178 -0
  256. package/lib/storage/VirtualFileStorage.js.map +1 -0
  257. package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
  258. package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
  259. package/lib/storage/VirtualFileStorage.spec.js +302 -0
  260. package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
  261. package/lib/storage/index.d.ts +6 -0
  262. package/lib/storage/index.d.ts.map +1 -0
  263. package/lib/storage/index.js +22 -0
  264. package/lib/storage/index.js.map +1 -0
  265. package/lib/templates/templates/csharp/README.md +48 -0
  266. package/lib/templates/templates/csharp/VectraClient.cs +234 -0
  267. package/lib/templates/templates/go/README.md +71 -0
  268. package/lib/templates/templates/go/vectra_client.go +322 -0
  269. package/lib/templates/templates/java/README.md +81 -0
  270. package/lib/templates/templates/java/VectraClient.java +232 -0
  271. package/lib/templates/templates/python/README.md +37 -0
  272. package/lib/templates/templates/python/vectra_client.py +279 -0
  273. package/lib/templates/templates/rust/Cargo.toml +14 -0
  274. package/lib/templates/templates/rust/README.md +39 -0
  275. package/lib/templates/templates/rust/build.rs +4 -0
  276. package/lib/templates/templates/rust/lib.rs +284 -0
  277. package/lib/templates/templates/typescript/README.md +96 -0
  278. package/lib/templates/templates/typescript/VectraClient.ts +374 -0
  279. package/lib/templates/typescript/VectraClient.d.ts +114 -0
  280. package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
  281. package/lib/templates/typescript/VectraClient.js +328 -0
  282. package/lib/templates/typescript/VectraClient.js.map +1 -0
  283. package/lib/types.d.ts +153 -0
  284. package/lib/types.d.ts.map +1 -0
  285. package/lib/types.js +3 -0
  286. package/lib/types.js.map +1 -0
  287. package/lib/utils/index.d.ts +2 -0
  288. package/lib/utils/index.d.ts.map +1 -0
  289. package/lib/utils/index.js +18 -0
  290. package/lib/utils/index.js.map +1 -0
  291. package/lib/utils/pathUtils.d.ts +40 -0
  292. package/lib/utils/pathUtils.d.ts.map +1 -0
  293. package/lib/utils/pathUtils.js +98 -0
  294. package/lib/utils/pathUtils.js.map +1 -0
  295. package/lib/vectra-cli.d.ts +2 -0
  296. package/lib/vectra-cli.d.ts.map +1 -1
  297. package/lib/vectra-cli.generate.spec.d.ts +2 -0
  298. package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
  299. package/lib/vectra-cli.generate.spec.js +112 -0
  300. package/lib/vectra-cli.generate.spec.js.map +1 -0
  301. package/lib/vectra-cli.js +760 -0
  302. package/lib/vectra-cli.js.map +1 -0
  303. package/lib/vectra-cli.spec.d.ts +1 -0
  304. package/lib/vectra-cli.spec.d.ts.map +1 -0
  305. package/lib/vectra-cli.spec.js +2 -0
  306. package/lib/vectra-cli.spec.js.map +1 -0
  307. package/package.json +91 -16
  308. package/proto/vectra_service.proto +276 -0
  309. package/src/BrowserWebFetcher.ts +345 -0
  310. package/src/FileFetcher.spec.ts +234 -0
  311. package/src/FileFetcher.ts +37 -25
  312. package/src/FolderWatcher.spec.ts +288 -0
  313. package/src/FolderWatcher.ts +304 -0
  314. package/src/GPT3Tokenizer.spec.ts +50 -0
  315. package/src/ItemSelector.spec.ts +252 -0
  316. package/src/ItemSelector.ts +163 -150
  317. package/src/LocalDocument.spec.ts +211 -0
  318. package/src/LocalDocument.ts +88 -94
  319. package/src/LocalDocumentIndex.spec.ts +481 -0
  320. package/src/LocalDocumentIndex.ts +39 -40
  321. package/src/LocalDocumentResult.spec.ts +373 -0
  322. package/src/LocalDocumentResult.ts +489 -319
  323. package/src/LocalEmbeddings.spec.ts +138 -0
  324. package/src/LocalEmbeddings.ts +120 -0
  325. package/src/LocalIndex.spec.ts +808 -66
  326. package/src/LocalIndex.ts +479 -429
  327. package/src/OpenAIEmbeddings.spec.ts +354 -0
  328. package/src/OpenAIEmbeddings.ts +26 -27
  329. package/src/TextSplitter.spec.ts +342 -0
  330. package/src/TextSplitter.ts +517 -532
  331. package/src/TransformersEmbeddings.spec.ts +188 -0
  332. package/src/TransformersEmbeddings.ts +232 -0
  333. package/src/TransformersTokenizer.spec.ts +143 -0
  334. package/src/TransformersTokenizer.ts +45 -0
  335. package/src/WebFetcher.spec.ts +288 -0
  336. package/src/WebFetcher.ts +184 -186
  337. package/src/browser.ts +69 -0
  338. package/src/codecs/IndexCodec.ts +40 -0
  339. package/src/codecs/JsonCodec.spec.ts +70 -0
  340. package/src/codecs/JsonCodec.ts +37 -0
  341. package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
  342. package/src/codecs/ProtobufCodec.spec.ts +166 -0
  343. package/src/codecs/ProtobufCodec.ts +193 -0
  344. package/src/codecs/index.ts +4 -0
  345. package/src/codecs/migrateIndex.spec.ts +176 -0
  346. package/src/codecs/migrateIndex.ts +125 -0
  347. package/src/codecs/schemas/index.proto +34 -0
  348. package/src/index.ts +9 -1
  349. package/src/internals/Colorize.ts +19 -16
  350. package/src/server/IndexManager.ts +243 -0
  351. package/src/server/VectraServer.spec.ts +303 -0
  352. package/src/server/VectraServer.ts +156 -0
  353. package/src/server/handlers/documentHandlers.ts +59 -0
  354. package/src/server/handlers/helpers.ts +93 -0
  355. package/src/server/handlers/index.ts +7 -0
  356. package/src/server/handlers/indexHandlers.ts +44 -0
  357. package/src/server/handlers/itemHandlers.ts +140 -0
  358. package/src/server/handlers/lifecycleHandlers.ts +26 -0
  359. package/src/server/handlers/queryHandlers.ts +96 -0
  360. package/src/server/handlers/statsHandlers.ts +38 -0
  361. package/src/server/index.ts +3 -0
  362. package/src/storage/FileStorage.ts +105 -0
  363. package/src/storage/FileStorageUtilities.spec.ts +106 -0
  364. package/src/storage/FileStorageUtilities.ts +77 -0
  365. package/src/storage/FileType.ts +61 -0
  366. package/src/storage/IndexedDBStorage.ts +365 -0
  367. package/src/storage/LocalFileStorage.browser.ts +52 -0
  368. package/src/storage/LocalFileStorage.spec.ts +292 -0
  369. package/src/storage/LocalFileStorage.ts +98 -0
  370. package/src/storage/VirtualFileStorage.spec.ts +307 -0
  371. package/src/storage/VirtualFileStorage.ts +169 -0
  372. package/src/storage/index.ts +5 -0
  373. package/src/templates/csharp/README.md +48 -0
  374. package/src/templates/csharp/VectraClient.cs +234 -0
  375. package/src/templates/go/README.md +71 -0
  376. package/src/templates/go/vectra_client.go +322 -0
  377. package/src/templates/java/README.md +81 -0
  378. package/src/templates/java/VectraClient.java +232 -0
  379. package/src/templates/python/README.md +37 -0
  380. package/src/templates/python/vectra_client.py +279 -0
  381. package/src/templates/rust/Cargo.toml +14 -0
  382. package/src/templates/rust/README.md +39 -0
  383. package/src/templates/rust/build.rs +4 -0
  384. package/src/templates/rust/lib.rs +284 -0
  385. package/src/templates/typescript/README.md +96 -0
  386. package/src/templates/typescript/VectraClient.ts +374 -0
  387. package/src/types.ts +131 -123
  388. package/src/utils/index.ts +1 -0
  389. package/src/utils/pathUtils.ts +106 -0
  390. package/src/vectra-cli.generate.spec.ts +72 -0
  391. package/src/vectra-cli.spec.ts +0 -0
  392. package/src/vectra-cli.ts +687 -246
@@ -0,0 +1,288 @@
1
+ import { strict as assert } from 'assert';
2
+ import * as sinon from 'sinon';
3
+ import * as os from 'os';
4
+ import * as path from 'path';
5
+ import fs from 'node:fs';
6
+ import { FolderWatcher } from './FolderWatcher';
7
+ import { LocalDocumentIndex } from './LocalDocumentIndex';
8
+ import { EmbeddingsModel, EmbeddingsResponse } from './types';
9
+ import { LocalFileStorage } from './storage/LocalFileStorage';
10
+
11
+ // Stub embeddings model that returns deterministic vectors
12
+ class StubEmbeddings implements EmbeddingsModel {
13
+ public readonly maxTokens = 8000;
14
+ public async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
15
+ const texts = Array.isArray(inputs) ? inputs : [inputs];
16
+ const output = texts.map(() => {
17
+ const vec = new Array(384).fill(0);
18
+ vec[0] = 1; // unit vector
19
+ return vec;
20
+ });
21
+ return { status: 'success', output };
22
+ }
23
+ }
24
+
25
+ describe('FolderWatcher', () => {
26
+ let tmpDir: string;
27
+ let indexDir: string;
28
+ let watchDir: string;
29
+ let index: LocalDocumentIndex;
30
+ let sandbox: sinon.SinonSandbox;
31
+
32
+ beforeEach(async () => {
33
+ sandbox = sinon.createSandbox();
34
+ tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'vectra-watch-'));
35
+ indexDir = path.join(tmpDir, 'index');
36
+ watchDir = path.join(tmpDir, 'watch');
37
+ await fs.promises.mkdir(indexDir, { recursive: true });
38
+ await fs.promises.mkdir(watchDir, { recursive: true });
39
+
40
+ index = new LocalDocumentIndex({
41
+ folderPath: indexDir,
42
+ embeddings: new StubEmbeddings(),
43
+ storage: new LocalFileStorage(),
44
+ });
45
+ await index.createIndex({ version: 1, deleteIfExists: true });
46
+ });
47
+
48
+ afterEach(async () => {
49
+ sandbox.restore();
50
+ await fs.promises.rm(tmpDir, { recursive: true, force: true });
51
+ });
52
+
53
+ it('should perform initial sync of existing files', async () => {
54
+ // Create files before starting watcher
55
+ await fs.promises.writeFile(path.join(watchDir, 'file1.txt'), 'hello world');
56
+ await fs.promises.writeFile(path.join(watchDir, 'file2.txt'), 'goodbye world');
57
+
58
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
59
+ const synced: string[] = [];
60
+ watcher.on('sync', (uri: string) => synced.push(uri));
61
+
62
+ await watcher.start();
63
+ try {
64
+ assert.equal(watcher.trackedFileCount, 2);
65
+ assert.equal(synced.length, 2);
66
+ // Verify documents exist in index
67
+ const id1 = await index.getDocumentId(path.join(watchDir, 'file1.txt'));
68
+ const id2 = await index.getDocumentId(path.join(watchDir, 'file2.txt'));
69
+ assert.ok(id1, 'file1.txt should be indexed');
70
+ assert.ok(id2, 'file2.txt should be indexed');
71
+ } finally {
72
+ await watcher.stop();
73
+ }
74
+ });
75
+
76
+ it('should emit ready event after initial sync', async () => {
77
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
78
+ let ready = false;
79
+ watcher.on('ready', () => { ready = true; });
80
+
81
+ await watcher.start();
82
+ try {
83
+ assert.equal(ready, true);
84
+ } finally {
85
+ await watcher.stop();
86
+ }
87
+ });
88
+
89
+ it('should filter by extensions', async () => {
90
+ await fs.promises.writeFile(path.join(watchDir, 'include.txt'), 'included');
91
+ await fs.promises.writeFile(path.join(watchDir, 'exclude.js'), 'excluded');
92
+ await fs.promises.writeFile(path.join(watchDir, 'include.md'), 'also included');
93
+
94
+ const watcher = new FolderWatcher({
95
+ index,
96
+ paths: [watchDir],
97
+ extensions: ['.txt', '.md']
98
+ });
99
+
100
+ await watcher.start();
101
+ try {
102
+ assert.equal(watcher.trackedFileCount, 2);
103
+ const idTxt = await index.getDocumentId(path.join(watchDir, 'include.txt'));
104
+ const idMd = await index.getDocumentId(path.join(watchDir, 'include.md'));
105
+ const idJs = await index.getDocumentId(path.join(watchDir, 'exclude.js'));
106
+ assert.ok(idTxt, 'include.txt should be indexed');
107
+ assert.ok(idMd, 'include.md should be indexed');
108
+ assert.equal(idJs, undefined, 'exclude.js should not be indexed');
109
+ } finally {
110
+ await watcher.stop();
111
+ }
112
+ });
113
+
114
+ it('should handle extensions without leading dot', async () => {
115
+ await fs.promises.writeFile(path.join(watchDir, 'test.txt'), 'hello');
116
+
117
+ const watcher = new FolderWatcher({
118
+ index,
119
+ paths: [watchDir],
120
+ extensions: ['txt']
121
+ });
122
+
123
+ await watcher.start();
124
+ try {
125
+ assert.equal(watcher.trackedFileCount, 1);
126
+ } finally {
127
+ await watcher.stop();
128
+ }
129
+ });
130
+
131
+ it('should recurse into subdirectories', async () => {
132
+ const subDir = path.join(watchDir, 'sub');
133
+ await fs.promises.mkdir(subDir, { recursive: true });
134
+ await fs.promises.writeFile(path.join(watchDir, 'root.txt'), 'root');
135
+ await fs.promises.writeFile(path.join(subDir, 'nested.txt'), 'nested');
136
+
137
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
138
+
139
+ await watcher.start();
140
+ try {
141
+ assert.equal(watcher.trackedFileCount, 2);
142
+ const idRoot = await index.getDocumentId(path.join(watchDir, 'root.txt'));
143
+ const idNested = await index.getDocumentId(path.join(subDir, 'nested.txt'));
144
+ assert.ok(idRoot, 'root.txt should be indexed');
145
+ assert.ok(idNested, 'nested.txt should be indexed');
146
+ } finally {
147
+ await watcher.stop();
148
+ }
149
+ });
150
+
151
+ it('should watch individual files', async () => {
152
+ const singleFile = path.join(tmpDir, 'single.txt');
153
+ await fs.promises.writeFile(singleFile, 'single file');
154
+
155
+ const watcher = new FolderWatcher({ index, paths: [singleFile] });
156
+
157
+ await watcher.start();
158
+ try {
159
+ assert.equal(watcher.trackedFileCount, 1);
160
+ const id = await index.getDocumentId(singleFile);
161
+ assert.ok(id, 'single.txt should be indexed');
162
+ } finally {
163
+ await watcher.stop();
164
+ }
165
+ });
166
+
167
+ it('should watch multiple paths', async () => {
168
+ const dir2 = path.join(tmpDir, 'watch2');
169
+ await fs.promises.mkdir(dir2, { recursive: true });
170
+ await fs.promises.writeFile(path.join(watchDir, 'a.txt'), 'a');
171
+ await fs.promises.writeFile(path.join(dir2, 'b.txt'), 'b');
172
+
173
+ const watcher = new FolderWatcher({ index, paths: [watchDir, dir2] });
174
+
175
+ await watcher.start();
176
+ try {
177
+ assert.equal(watcher.trackedFileCount, 2);
178
+ } finally {
179
+ await watcher.stop();
180
+ }
181
+ });
182
+
183
+ it('should handle sync() detecting deleted files', async () => {
184
+ await fs.promises.writeFile(path.join(watchDir, 'ephemeral.txt'), 'temporary');
185
+
186
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
187
+ await watcher.start();
188
+ assert.equal(watcher.trackedFileCount, 1);
189
+
190
+ // Delete the file
191
+ await fs.promises.unlink(path.join(watchDir, 'ephemeral.txt'));
192
+
193
+ // Manual sync should detect deletion
194
+ const synced: Array<{ uri: string; action: string }> = [];
195
+ watcher.on('sync', (uri: string, action: string) => synced.push({ uri, action }));
196
+ await watcher.sync();
197
+
198
+ try {
199
+ assert.equal(watcher.trackedFileCount, 0);
200
+ const deleted = synced.find(s => s.action === 'deleted');
201
+ assert.ok(deleted, 'should have emitted a delete event');
202
+ } finally {
203
+ await watcher.stop();
204
+ }
205
+ });
206
+
207
+ it('should handle sync() detecting updated files', async () => {
208
+ const filePath = path.join(watchDir, 'mutable.txt');
209
+ await fs.promises.writeFile(filePath, 'version 1');
210
+
211
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
212
+ await watcher.start();
213
+ assert.equal(watcher.trackedFileCount, 1);
214
+
215
+ // Update the file (ensure mtime changes)
216
+ await new Promise(r => setTimeout(r, 50));
217
+ await fs.promises.writeFile(filePath, 'version 2');
218
+
219
+ // Manual sync should detect update
220
+ const synced: Array<{ uri: string; action: string }> = [];
221
+ watcher.on('sync', (uri: string, action: string) => synced.push({ uri, action }));
222
+ await watcher.sync();
223
+
224
+ try {
225
+ assert.equal(watcher.trackedFileCount, 1);
226
+ const updated = synced.find(s => s.action === 'updated');
227
+ assert.ok(updated, 'should have emitted an update event');
228
+ } finally {
229
+ await watcher.stop();
230
+ }
231
+ });
232
+
233
+ it('should not throw if path does not exist', async () => {
234
+ const watcher = new FolderWatcher({
235
+ index,
236
+ paths: [path.join(tmpDir, 'nonexistent')]
237
+ });
238
+
239
+ await watcher.start();
240
+ try {
241
+ assert.equal(watcher.trackedFileCount, 0);
242
+ } finally {
243
+ await watcher.stop();
244
+ }
245
+ });
246
+
247
+ it('should throw if started twice', async () => {
248
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
249
+ await watcher.start();
250
+ try {
251
+ await assert.rejects(() => watcher.start(), /already running/);
252
+ } finally {
253
+ await watcher.stop();
254
+ }
255
+ });
256
+
257
+ it('should report isRunning correctly', async () => {
258
+ const watcher = new FolderWatcher({ index, paths: [watchDir] });
259
+ assert.equal(watcher.isRunning, false);
260
+ await watcher.start();
261
+ assert.equal(watcher.isRunning, true);
262
+ await watcher.stop();
263
+ assert.equal(watcher.isRunning, false);
264
+ });
265
+
266
+ it('should emit error events for sync failures', async () => {
267
+ await fs.promises.writeFile(path.join(watchDir, 'bad.txt'), 'content');
268
+
269
+ // Create watcher with no embeddings to force error
270
+ const badIndex = new LocalDocumentIndex({
271
+ folderPath: indexDir,
272
+ // no embeddings — will throw on upsertDocument
273
+ storage: new LocalFileStorage(),
274
+ });
275
+
276
+ const watcher = new FolderWatcher({ index: badIndex, paths: [watchDir] });
277
+ const errors: Array<{ err: Error; uri: string }> = [];
278
+ watcher.on('error', (err: Error, uri: string) => errors.push({ err, uri }));
279
+
280
+ await watcher.start();
281
+ try {
282
+ assert.equal(errors.length, 1);
283
+ assert.ok(errors[0].err.message.includes('Embeddings model not configured'));
284
+ } finally {
285
+ await watcher.stop();
286
+ }
287
+ });
288
+ });
@@ -0,0 +1,304 @@
1
+ import { EventEmitter } from 'events';
2
+ import fs from 'node:fs';
3
+ import * as path from 'path';
4
+ import { LocalDocumentIndex } from './LocalDocumentIndex';
5
+
6
+ /**
7
+ * Configuration for FolderWatcher.
8
+ */
9
+ export interface FolderWatcherConfig {
10
+ /**
11
+ * The LocalDocumentIndex to sync files into.
12
+ */
13
+ index: LocalDocumentIndex;
14
+
15
+ /**
16
+ * List of folder or file paths to watch.
17
+ */
18
+ paths: string[];
19
+
20
+ /**
21
+ * Optional. File extensions to include (e.g., ['.txt', '.md', '.html']).
22
+ * @remarks
23
+ * If not specified, all files are included.
24
+ */
25
+ extensions?: string[];
26
+
27
+ /**
28
+ * Optional. Debounce interval in milliseconds for file change events.
29
+ * @remarks
30
+ * Default is 500ms. Multiple rapid changes to the same file are collapsed into one sync.
31
+ */
32
+ debounceMs?: number;
33
+ }
34
+
35
+ /**
36
+ * Events emitted by FolderWatcher.
37
+ *
38
+ * - `sync` — emitted after a file is synced. Args: `(uri: string, action: 'added' | 'updated' | 'deleted')`
39
+ * - `error` — emitted when a sync operation fails. Args: `(error: Error, uri: string)`
40
+ * - `ready` — emitted after the initial sync completes.
41
+ */
42
+ export interface FolderWatcherEvents {
43
+ sync: [uri: string, action: 'added' | 'updated' | 'deleted'];
44
+ error: [error: Error, uri: string];
45
+ ready: [];
46
+ }
47
+
48
+ interface TrackedFile {
49
+ uri: string;
50
+ mtimeMs: number;
51
+ }
52
+
53
+ /**
54
+ * Watches folders for file changes and automatically syncs them into a LocalDocumentIndex.
55
+ *
56
+ * @remarks
57
+ * Uses Node.js `fs.watch` for efficient filesystem monitoring with debouncing.
58
+ * Performs an initial full sync on start, then watches for incremental changes.
59
+ */
60
+ export class FolderWatcher extends EventEmitter {
61
+ private readonly _index: LocalDocumentIndex;
62
+ private readonly _paths: string[];
63
+ private readonly _extensions?: Set<string>;
64
+ private readonly _debounceMs: number;
65
+ private readonly _tracked: Map<string, TrackedFile> = new Map();
66
+ private readonly _pending: Map<string, NodeJS.Timeout> = new Map();
67
+ private readonly _watchers: fs.FSWatcher[] = [];
68
+ private _running: boolean = false;
69
+
70
+ /**
71
+ * Creates a new FolderWatcher instance.
72
+ * @param config Configuration for the watcher.
73
+ */
74
+ public constructor(config: FolderWatcherConfig) {
75
+ super();
76
+ this._index = config.index;
77
+ this._paths = config.paths.map(p => path.resolve(p));
78
+ this._extensions = config.extensions
79
+ ? new Set(config.extensions.map(e => e.startsWith('.') ? e.toLowerCase() : `.${e.toLowerCase()}`))
80
+ : undefined;
81
+ this._debounceMs = config.debounceMs ?? 500;
82
+ }
83
+
84
+ /**
85
+ * Returns true if the watcher is currently running.
86
+ */
87
+ public get isRunning(): boolean {
88
+ return this._running;
89
+ }
90
+
91
+ /**
92
+ * Returns the number of tracked files.
93
+ */
94
+ public get trackedFileCount(): number {
95
+ return this._tracked.size;
96
+ }
97
+
98
+ /**
99
+ * Starts the watcher: performs an initial sync and then watches for changes.
100
+ */
101
+ public async start(): Promise<void> {
102
+ if (this._running) {
103
+ throw new Error('FolderWatcher is already running');
104
+ }
105
+ this._running = true;
106
+
107
+ // Initial sync
108
+ await this._initialSync();
109
+ this.emit('ready');
110
+
111
+ // Set up watchers
112
+ for (const watchPath of this._paths) {
113
+ try {
114
+ const stat = await fs.promises.stat(watchPath);
115
+ if (stat.isDirectory()) {
116
+ this._watchDirectory(watchPath);
117
+ } else if (stat.isFile()) {
118
+ this._watchFile(watchPath);
119
+ }
120
+ } catch {
121
+ // Path doesn't exist — skip
122
+ }
123
+ }
124
+ }
125
+
126
+ /**
127
+ * Stops the watcher and cleans up all resources.
128
+ */
129
+ public async stop(): Promise<void> {
130
+ this._running = false;
131
+
132
+ // Close all watchers
133
+ for (const watcher of this._watchers) {
134
+ watcher.close();
135
+ }
136
+ this._watchers.length = 0;
137
+
138
+ // Clear pending debounced operations
139
+ for (const timeout of this._pending.values()) {
140
+ clearTimeout(timeout);
141
+ }
142
+ this._pending.clear();
143
+ }
144
+
145
+ /**
146
+ * Performs a full sync: scans all watched paths and upserts/deletes as needed.
147
+ * @returns Number of files synced (added + updated + deleted).
148
+ */
149
+ public async sync(): Promise<number> {
150
+ let count = 0;
151
+
152
+ // Collect current files on disk
153
+ const currentFiles = new Map<string, number>();
154
+ for (const watchPath of this._paths) {
155
+ await this._collectFiles(watchPath, currentFiles);
156
+ }
157
+
158
+ // Upsert new or changed files
159
+ for (const [filePath, mtimeMs] of currentFiles) {
160
+ const tracked = this._tracked.get(filePath);
161
+ if (!tracked || tracked.mtimeMs < mtimeMs) {
162
+ const ok = await this._syncFile(filePath, mtimeMs);
163
+ if (ok) count++;
164
+ }
165
+ }
166
+
167
+ // Delete files that no longer exist
168
+ for (const [filePath, tracked] of this._tracked) {
169
+ if (!currentFiles.has(filePath)) {
170
+ const ok = await this._deleteFile(tracked.uri);
171
+ if (ok) count++;
172
+ }
173
+ }
174
+
175
+ return count;
176
+ }
177
+
178
+ // --- Private methods ---
179
+
180
+ private async _initialSync(): Promise<void> {
181
+ await this.sync();
182
+ }
183
+
184
+ private _shouldInclude(filePath: string): boolean {
185
+ if (!this._extensions) return true;
186
+ const ext = path.extname(filePath).toLowerCase();
187
+ return this._extensions.has(ext);
188
+ }
189
+
190
+ private async _collectFiles(dirOrFile: string, out: Map<string, number>): Promise<void> {
191
+ let stat: fs.Stats;
192
+ try {
193
+ stat = await fs.promises.stat(dirOrFile);
194
+ } catch {
195
+ return;
196
+ }
197
+
198
+ if (stat.isFile()) {
199
+ if (this._shouldInclude(dirOrFile)) {
200
+ out.set(dirOrFile, stat.mtimeMs);
201
+ }
202
+ } else if (stat.isDirectory()) {
203
+ const entries = await fs.promises.readdir(dirOrFile);
204
+ for (const entry of entries) {
205
+ await this._collectFiles(path.join(dirOrFile, entry), out);
206
+ }
207
+ }
208
+ }
209
+
210
+ private async _syncFile(filePath: string, mtimeMs: number): Promise<boolean> {
211
+ const wasTracked = this._tracked.has(filePath);
212
+ const action = wasTracked ? 'updated' : 'added';
213
+ try {
214
+ const text = await fs.promises.readFile(filePath, 'utf-8');
215
+ const ext = path.extname(filePath);
216
+ const docType = ext ? ext.slice(1).toLowerCase() : undefined;
217
+ await this._index.upsertDocument(filePath, text, docType);
218
+ this._tracked.set(filePath, { uri: filePath, mtimeMs });
219
+ this.emit('sync', filePath, action);
220
+ return true;
221
+ } catch (err: unknown) {
222
+ this.emit('error', err instanceof Error ? err : new Error(String(err)), filePath);
223
+ return false;
224
+ }
225
+ }
226
+
227
+ private async _deleteFile(uri: string): Promise<boolean> {
228
+ try {
229
+ await this._index.deleteDocument(uri);
230
+ // Find and remove from tracked by URI
231
+ for (const [filePath, tracked] of this._tracked) {
232
+ if (tracked.uri === uri) {
233
+ this._tracked.delete(filePath);
234
+ break;
235
+ }
236
+ }
237
+ this.emit('sync', uri, 'deleted');
238
+ return true;
239
+ } catch (err: unknown) {
240
+ this.emit('error', err instanceof Error ? err : new Error(String(err)), uri);
241
+ return false;
242
+ }
243
+ }
244
+
245
+ private _watchDirectory(dirPath: string): void {
246
+ try {
247
+ const watcher = fs.watch(dirPath, { recursive: true }, (eventType, filename) => {
248
+ if (!this._running || !filename) return;
249
+ const fullPath = path.join(dirPath, filename);
250
+ if (this._shouldInclude(fullPath)) {
251
+ this._debouncedSync(fullPath);
252
+ }
253
+ });
254
+ watcher.on('error', (err) => {
255
+ this.emit('error', err, dirPath);
256
+ });
257
+ this._watchers.push(watcher);
258
+ } catch (err: unknown) {
259
+ this.emit('error', err instanceof Error ? err : new Error(String(err)), dirPath);
260
+ }
261
+ }
262
+
263
+ private _watchFile(filePath: string): void {
264
+ try {
265
+ const watcher = fs.watch(filePath, (eventType) => {
266
+ if (!this._running) return;
267
+ this._debouncedSync(filePath);
268
+ });
269
+ watcher.on('error', (err) => {
270
+ this.emit('error', err, filePath);
271
+ });
272
+ this._watchers.push(watcher);
273
+ } catch (err: unknown) {
274
+ this.emit('error', err instanceof Error ? err : new Error(String(err)), filePath);
275
+ }
276
+ }
277
+
278
+ private _debouncedSync(filePath: string): void {
279
+ // Cancel any pending sync for this file
280
+ const existing = this._pending.get(filePath);
281
+ if (existing) {
282
+ clearTimeout(existing);
283
+ }
284
+
285
+ const timeout = setTimeout(async () => {
286
+ this._pending.delete(filePath);
287
+ if (!this._running) return;
288
+
289
+ try {
290
+ const stat = await fs.promises.stat(filePath);
291
+ if (stat.isFile()) {
292
+ await this._syncFile(filePath, stat.mtimeMs);
293
+ }
294
+ } catch {
295
+ // File was deleted
296
+ if (this._tracked.has(filePath)) {
297
+ await this._deleteFile(filePath);
298
+ }
299
+ }
300
+ }, this._debounceMs);
301
+
302
+ this._pending.set(filePath, timeout);
303
+ }
304
+ }
@@ -0,0 +1,50 @@
1
+ import { strict as assert } from 'node:assert';
2
+ import { describe, it } from 'mocha';
3
+ import { GPT3Tokenizer } from '../src/GPT3Tokenizer';
4
+
5
+ describe('GPT3Tokenizer', () => {
6
+ const tokenizer = new GPT3Tokenizer();
7
+
8
+ it('encodes empty string to [] and decodes [] to empty string', () => {
9
+ const tokens = tokenizer.encode('');
10
+ assert.deepEqual(tokens, [], 'encode("") should return []');
11
+
12
+ const text = tokenizer.decode([]);
13
+ assert.equal(text, '', 'decode([]) should return empty string');
14
+ });
15
+
16
+ it('round-trips various strings including unicode and punctuation', () => {
17
+ const samples = [
18
+ 'Hello, world!',
19
+ 'Café 😊 こんにちは 𠜎𠜱𠝹𠱓',
20
+ 'Newlines\nand\ttabs with multiple spaces.',
21
+ '--- *** ===='
22
+ ];
23
+
24
+ for (const s of samples) {
25
+ const tokens = tokenizer.encode(s);
26
+ const decoded = tokenizer.decode(tokens);
27
+ assert.equal(decoded, s, `decode(encode(s)) should equal s for: ${JSON.stringify(s)}`);
28
+
29
+ // Validate token array shape: array of non-negative integers
30
+ assert.ok(Array.isArray(tokens), 'encode should return an array');
31
+ for (const t of tokens) {
32
+ assert.equal(typeof t, 'number', 'each token should be a number');
33
+ assert.ok(Number.isInteger(t), 'each token should be an integer');
34
+ assert.ok(t >= 0, 'each token should be non-negative');
35
+ }
36
+
37
+ // Encoding should be stable across calls for the same input
38
+ const tokens2 = tokenizer.encode(s);
39
+ assert.deepEqual(tokens2, tokens, 'encode should be deterministic for the same input');
40
+ }
41
+ });
42
+
43
+ it('produces non-empty tokens for typical non-empty input', () => {
44
+ const s = 'This is a simple test.';
45
+ const tokens = tokenizer.encode(s);
46
+ assert.ok(tokens.length > 0, 'expected some tokens for non-empty input');
47
+ const decoded = tokenizer.decode(tokens);
48
+ assert.equal(decoded, s, 'decoded text should match original input');
49
+ });
50
+ });