node-llama-cpp 3.0.0-beta.4 → 3.0.0-beta.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (749) hide show
  1. package/README.md +14 -11
  2. package/bins/_linux-x64-cuda.moved.txt +1 -0
  3. package/bins/_win-x64-cuda.moved.txt +1 -0
  4. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-arm64/libggml.so +0 -0
  6. package/bins/linux-arm64/libllama.so +0 -0
  7. package/bins/linux-arm64/llama-addon.node +0 -0
  8. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-armv7l/libggml.so +0 -0
  10. package/bins/linux-armv7l/libllama.so +0 -0
  11. package/bins/linux-armv7l/llama-addon.node +0 -0
  12. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  13. package/bins/linux-x64/libggml.so +0 -0
  14. package/bins/linux-x64/libllama.so +0 -0
  15. package/bins/linux-x64/llama-addon.node +0 -0
  16. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  17. package/bins/linux-x64-vulkan/libggml.so +0 -0
  18. package/bins/linux-x64-vulkan/libllama.so +0 -0
  19. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  20. package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
  21. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  22. package/bins/mac-arm64-metal/ggml-common.h +1833 -0
  23. package/bins/mac-arm64-metal/ggml-metal.metal +6168 -0
  24. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  25. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  26. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  27. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  28. package/bins/mac-x64/libggml.dylib +0 -0
  29. package/bins/mac-x64/libllama.dylib +0 -0
  30. package/bins/mac-x64/llama-addon.node +0 -0
  31. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  32. package/bins/win-arm64/ggml.dll +0 -0
  33. package/bins/win-arm64/llama-addon.exp +0 -0
  34. package/bins/win-arm64/llama-addon.lib +0 -0
  35. package/bins/win-arm64/llama-addon.node +0 -0
  36. package/bins/win-arm64/llama.dll +0 -0
  37. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  38. package/bins/win-x64/ggml.dll +0 -0
  39. package/bins/win-x64/llama-addon.exp +0 -0
  40. package/bins/win-x64/llama-addon.lib +0 -0
  41. package/bins/win-x64/llama-addon.node +0 -0
  42. package/bins/win-x64/llama.dll +0 -0
  43. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  44. package/bins/win-x64-vulkan/ggml.dll +0 -0
  45. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  46. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  47. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  48. package/bins/win-x64-vulkan/llama.dll +0 -0
  49. package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
  50. package/dist/ChatWrapper.d.ts +9 -39
  51. package/dist/ChatWrapper.js +129 -72
  52. package/dist/ChatWrapper.js.map +1 -1
  53. package/dist/apiDocsIndex.d.ts +1 -0
  54. package/dist/apiDocsIndex.js +7 -0
  55. package/dist/apiDocsIndex.js.map +1 -0
  56. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +66 -9
  57. package/dist/bindings/AddonTypes.js +2 -0
  58. package/dist/bindings/AddonTypes.js.map +1 -0
  59. package/dist/bindings/Llama.d.ts +47 -0
  60. package/dist/bindings/Llama.js +356 -0
  61. package/dist/bindings/Llama.js.map +1 -0
  62. package/dist/bindings/consts.d.ts +2 -0
  63. package/dist/bindings/consts.js +11 -0
  64. package/dist/bindings/consts.js.map +1 -0
  65. package/dist/bindings/getLlama.d.ts +152 -0
  66. package/dist/bindings/getLlama.js +403 -0
  67. package/dist/bindings/getLlama.js.map +1 -0
  68. package/dist/bindings/types.d.ts +57 -0
  69. package/dist/bindings/types.js +77 -0
  70. package/dist/bindings/types.js.map +1 -0
  71. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  72. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  73. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  74. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  75. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  76. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  77. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  78. package/dist/bindings/utils/asyncEvery.js +15 -0
  79. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  80. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  81. package/dist/bindings/utils/asyncSome.js +27 -0
  82. package/dist/bindings/utils/asyncSome.js.map +1 -0
  83. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  84. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  85. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  86. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  87. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  88. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  89. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  90. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  91. package/dist/bindings/utils/compileLLamaCpp.d.ts +21 -0
  92. package/dist/bindings/utils/compileLLamaCpp.js +288 -0
  93. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  94. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  95. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  96. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  97. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  98. package/dist/bindings/utils/detectGlibc.js +46 -0
  99. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  100. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  101. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  102. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  103. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  104. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  105. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  106. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  107. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  108. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  109. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  110. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  111. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  112. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +13 -0
  113. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  114. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  115. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  116. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  117. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  118. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  119. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  120. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  121. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  122. package/dist/bindings/utils/getPlatform.js +15 -0
  123. package/dist/bindings/utils/getPlatform.js.map +1 -0
  124. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  125. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  126. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  127. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  128. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  129. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  130. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  131. package/dist/bindings/utils/hasFileInPath.js +34 -0
  132. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  133. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  134. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  135. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  136. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  137. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  138. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  139. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  140. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  141. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  142. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  143. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  144. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  145. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  146. package/dist/bindings/utils/testBindingBinary.js +100 -0
  147. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  148. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  149. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  150. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  151. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  152. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  153. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  154. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  155. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  156. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  157. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  158. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  159. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  160. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  161. package/dist/chatWrappers/FunctionaryChatWrapper.js +326 -118
  162. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  163. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  164. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  165. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  166. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  167. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  168. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  169. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  170. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  171. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  172. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  173. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  174. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  175. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +31 -0
  176. package/dist/chatWrappers/Llama3_1ChatWrapper.js +223 -0
  177. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  178. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +76 -0
  179. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  180. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  181. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  182. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  183. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  184. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  185. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  186. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  187. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  188. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  189. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  190. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  191. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  192. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  193. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  194. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  195. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  196. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +71 -0
  197. package/dist/chatWrappers/utils/resolveChatWrapper.js +289 -0
  198. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  199. package/dist/cli/cli.js +21 -7
  200. package/dist/cli/cli.js.map +1 -1
  201. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  202. package/dist/cli/commands/BuildCommand.js +114 -41
  203. package/dist/cli/commands/BuildCommand.js.map +1 -1
  204. package/dist/cli/commands/ChatCommand.d.ts +19 -7
  205. package/dist/cli/commands/ChatCommand.js +306 -150
  206. package/dist/cli/commands/ChatCommand.js.map +1 -1
  207. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  208. package/dist/cli/commands/ClearCommand.js +11 -12
  209. package/dist/cli/commands/ClearCommand.js.map +1 -1
  210. package/dist/cli/commands/CompleteCommand.d.ts +30 -0
  211. package/dist/cli/commands/CompleteCommand.js +374 -0
  212. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  213. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  214. package/dist/cli/commands/DebugCommand.js +54 -0
  215. package/dist/cli/commands/DebugCommand.js.map +1 -0
  216. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  217. package/dist/cli/commands/DownloadCommand.js +121 -70
  218. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  219. package/dist/cli/commands/InfillCommand.d.ts +32 -0
  220. package/dist/cli/commands/InfillCommand.js +410 -0
  221. package/dist/cli/commands/InfillCommand.js.map +1 -0
  222. package/dist/cli/commands/InitCommand.d.ts +11 -0
  223. package/dist/cli/commands/InitCommand.js +195 -0
  224. package/dist/cli/commands/InitCommand.js.map +1 -0
  225. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  226. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  227. package/dist/cli/commands/PullCommand.d.ts +12 -0
  228. package/dist/cli/commands/PullCommand.js +117 -0
  229. package/dist/cli/commands/PullCommand.js.map +1 -0
  230. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  231. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  232. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  233. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  234. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  235. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  236. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  237. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +180 -0
  238. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  239. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  240. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +626 -0
  241. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  242. package/dist/cli/projectTemplates.d.ts +7 -0
  243. package/dist/cli/projectTemplates.js +10 -0
  244. package/dist/cli/projectTemplates.js.map +1 -0
  245. package/dist/cli/recommendedModels.d.ts +2 -0
  246. package/dist/cli/recommendedModels.js +376 -0
  247. package/dist/cli/recommendedModels.js.map +1 -0
  248. package/dist/cli/startCreateCli.d.ts +2 -0
  249. package/dist/cli/startCreateCli.js +26 -0
  250. package/dist/cli/startCreateCli.js.map +1 -0
  251. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  252. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  253. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  254. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  255. package/dist/cli/utils/ConsoleTable.js +86 -0
  256. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  257. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  258. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  259. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  260. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  261. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  262. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  263. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  264. package/dist/cli/utils/getReadablePath.js +14 -0
  265. package/dist/cli/utils/getReadablePath.js.map +1 -0
  266. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  267. package/dist/cli/utils/interactivelyAskForModel.js +461 -0
  268. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  269. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  270. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  271. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  272. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  273. package/dist/cli/utils/printCommonInfoLines.js +79 -0
  274. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  275. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  276. package/dist/cli/utils/printInfoLine.js +54 -0
  277. package/dist/cli/utils/printInfoLine.js.map +1 -0
  278. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  279. package/dist/cli/utils/projectTemplates.js +47 -0
  280. package/dist/cli/utils/projectTemplates.js.map +1 -0
  281. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  282. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  283. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  284. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  285. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  286. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  287. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  288. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  289. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  290. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  291. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  292. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  293. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  294. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  295. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  296. package/dist/commands.d.ts +1 -0
  297. package/dist/commands.js +3 -0
  298. package/dist/commands.js.map +1 -1
  299. package/dist/config.d.ts +38 -5
  300. package/dist/config.js +61 -16
  301. package/dist/config.js.map +1 -1
  302. package/dist/consts.d.ts +4 -0
  303. package/dist/consts.js +11 -0
  304. package/dist/consts.js.map +1 -0
  305. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +270 -0
  306. package/dist/evaluator/LlamaChat/LlamaChat.js +1544 -0
  307. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  308. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  309. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  310. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  311. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  312. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  313. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  314. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  315. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  316. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  317. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  318. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  319. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +310 -0
  320. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +425 -0
  321. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  322. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  323. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  324. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  325. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  327. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  328. package/dist/evaluator/LlamaCompletion.d.ts +154 -0
  329. package/dist/evaluator/LlamaCompletion.js +424 -0
  330. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  331. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +42 -22
  332. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +338 -81
  333. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  334. package/dist/evaluator/LlamaContext/types.d.ts +175 -0
  335. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  336. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  337. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  338. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  339. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  340. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  341. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  342. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  343. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  344. package/dist/evaluator/LlamaGrammar.d.ts +34 -0
  345. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +15 -12
  346. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  347. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  348. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  349. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  350. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  351. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  352. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +242 -0
  353. package/dist/evaluator/LlamaModel/LlamaModel.js +765 -0
  354. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  355. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  356. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  357. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  358. package/dist/evaluator/TokenBias.d.ts +22 -0
  359. package/dist/evaluator/TokenBias.js +33 -0
  360. package/dist/evaluator/TokenBias.js.map +1 -0
  361. package/dist/evaluator/TokenMeter.d.ts +54 -0
  362. package/dist/evaluator/TokenMeter.js +86 -0
  363. package/dist/evaluator/TokenMeter.js.map +1 -0
  364. package/dist/gguf/consts.d.ts +4 -0
  365. package/dist/gguf/consts.js +12 -0
  366. package/dist/gguf/consts.js.map +1 -0
  367. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  368. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  369. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  370. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  371. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  372. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  373. package/dist/gguf/fileReaders/GgufFileReader.d.ts +37 -0
  374. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  375. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  376. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +18 -0
  377. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  378. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  379. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +23 -0
  380. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +79 -0
  381. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  382. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  383. package/dist/gguf/insights/GgufInsights.js +401 -0
  384. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  385. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +90 -0
  386. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +144 -0
  387. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  388. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  389. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  390. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  391. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  392. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  393. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  394. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  395. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  396. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  397. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  398. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  399. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  400. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  401. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  402. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  403. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  404. package/dist/gguf/parser/parseGguf.js +61 -0
  405. package/dist/gguf/parser/parseGguf.js.map +1 -0
  406. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  407. package/dist/gguf/readGgufFileInfo.js +66 -0
  408. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  409. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  410. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  411. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  412. package/dist/gguf/types/GgufMetadataTypes.d.ts +356 -0
  413. package/dist/gguf/types/GgufMetadataTypes.js +99 -0
  414. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  415. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  416. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  417. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  418. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  419. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  420. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  421. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  422. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +74 -0
  423. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  424. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  425. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  426. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  427. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  428. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  429. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  430. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  431. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  432. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  433. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  434. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  435. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  436. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  437. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  438. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  439. package/dist/index.d.ts +43 -18
  440. package/dist/index.js +38 -15
  441. package/dist/index.js.map +1 -1
  442. package/dist/state.d.ts +4 -0
  443. package/dist/state.js +14 -0
  444. package/dist/state.js.map +1 -1
  445. package/dist/types.d.ts +130 -5
  446. package/dist/types.js.map +1 -1
  447. package/dist/utils/DeepPartialObject.d.ts +3 -0
  448. package/dist/utils/DeepPartialObject.js +2 -0
  449. package/dist/utils/DeepPartialObject.js.map +1 -0
  450. package/dist/utils/DisposeGuard.d.ts +13 -0
  451. package/dist/utils/DisposeGuard.js +120 -0
  452. package/dist/utils/DisposeGuard.js.map +1 -0
  453. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  454. package/dist/utils/InsufficientMemoryError.js +6 -0
  455. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  456. package/dist/utils/LlamaText.d.ts +70 -26
  457. package/dist/utils/LlamaText.js +472 -157
  458. package/dist/utils/LlamaText.js.map +1 -1
  459. package/dist/utils/LruCache.d.ts +12 -0
  460. package/dist/utils/LruCache.js +44 -0
  461. package/dist/utils/LruCache.js.map +1 -0
  462. package/dist/utils/ReplHistory.js +5 -1
  463. package/dist/utils/ReplHistory.js.map +1 -1
  464. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  465. package/dist/utils/StopGenerationDetector.js +108 -22
  466. package/dist/utils/StopGenerationDetector.js.map +1 -1
  467. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  468. package/dist/utils/TokenStreamRegulator.js +102 -10
  469. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  470. package/dist/utils/UnsupportedError.d.ts +2 -0
  471. package/dist/utils/UnsupportedError.js +7 -0
  472. package/dist/utils/UnsupportedError.js.map +1 -0
  473. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  474. package/dist/utils/clearTempFolder.js.map +1 -1
  475. package/dist/utils/cmake.js +38 -20
  476. package/dist/utils/cmake.js.map +1 -1
  477. package/dist/utils/createModelDownloader.d.ts +111 -0
  478. package/dist/utils/createModelDownloader.js +273 -0
  479. package/dist/utils/createModelDownloader.js.map +1 -0
  480. package/dist/utils/findBestOption.d.ts +4 -0
  481. package/dist/utils/findBestOption.js +15 -0
  482. package/dist/utils/findBestOption.js.map +1 -0
  483. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  484. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  485. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  486. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  487. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  488. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  489. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  490. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  491. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  492. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  493. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  494. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  495. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  496. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  497. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  498. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  499. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  500. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  501. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  502. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  503. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  504. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  505. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  506. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  507. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  508. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  509. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  510. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  511. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  512. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  513. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  514. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  515. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  516. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  517. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  518. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  519. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  520. package/dist/utils/gbnfJson/types.d.ts +3 -0
  521. package/dist/utils/gbnfJson/types.js.map +1 -1
  522. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  523. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  524. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  525. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  526. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  527. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  528. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  529. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  530. package/dist/utils/getBuildDefaults.d.ts +1 -2
  531. package/dist/utils/getBuildDefaults.js +2 -3
  532. package/dist/utils/getBuildDefaults.js.map +1 -1
  533. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  534. package/dist/utils/getConsoleLogPrefix.js +10 -0
  535. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  536. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  537. package/dist/utils/getGrammarsFolder.js +8 -7
  538. package/dist/utils/getGrammarsFolder.js.map +1 -1
  539. package/dist/utils/getModuleVersion.d.ts +1 -0
  540. package/dist/utils/getModuleVersion.js +13 -0
  541. package/dist/utils/getModuleVersion.js.map +1 -0
  542. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  543. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  544. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  545. package/dist/utils/getReadableContextSize.d.ts +1 -0
  546. package/dist/utils/getReadableContextSize.js +7 -0
  547. package/dist/utils/getReadableContextSize.js.map +1 -0
  548. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  549. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  550. package/dist/utils/gitReleaseBundles.js +73 -5
  551. package/dist/utils/gitReleaseBundles.js.map +1 -1
  552. package/dist/utils/hashString.d.ts +1 -0
  553. package/dist/utils/hashString.js +8 -0
  554. package/dist/utils/hashString.js.map +1 -0
  555. package/dist/utils/isLockfileActive.d.ts +4 -0
  556. package/dist/utils/isLockfileActive.js +12 -0
  557. package/dist/utils/isLockfileActive.js.map +1 -0
  558. package/dist/utils/isToken.d.ts +2 -0
  559. package/dist/utils/isToken.js +4 -0
  560. package/dist/utils/isToken.js.map +1 -0
  561. package/dist/utils/isUrl.d.ts +1 -0
  562. package/dist/utils/isUrl.js +15 -0
  563. package/dist/utils/isUrl.js.map +1 -0
  564. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  565. package/dist/utils/mergeUnionTypes.js +2 -0
  566. package/dist/utils/mergeUnionTypes.js.map +1 -0
  567. package/dist/utils/parseModelFileName.d.ts +1 -0
  568. package/dist/utils/parseModelFileName.js +6 -1
  569. package/dist/utils/parseModelFileName.js.map +1 -1
  570. package/dist/utils/parseTextTemplate.d.ts +66 -0
  571. package/dist/utils/parseTextTemplate.js +116 -0
  572. package/dist/utils/parseTextTemplate.js.map +1 -0
  573. package/dist/utils/prettyPrintObject.d.ts +10 -0
  574. package/dist/utils/prettyPrintObject.js +84 -0
  575. package/dist/utils/prettyPrintObject.js.map +1 -0
  576. package/dist/utils/pushAll.d.ts +6 -0
  577. package/dist/utils/pushAll.js +11 -0
  578. package/dist/utils/pushAll.js.map +1 -0
  579. package/dist/utils/removeNullFields.d.ts +2 -1
  580. package/dist/utils/removeNullFields.js +8 -0
  581. package/dist/utils/removeNullFields.js.map +1 -1
  582. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  583. package/dist/utils/resolveGithubRelease.js +36 -0
  584. package/dist/utils/resolveGithubRelease.js.map +1 -0
  585. package/dist/utils/resolveLastTokens.d.ts +2 -0
  586. package/dist/utils/resolveLastTokens.js +12 -0
  587. package/dist/utils/resolveLastTokens.js.map +1 -0
  588. package/dist/utils/runtime.d.ts +4 -0
  589. package/dist/utils/runtime.js +8 -0
  590. package/dist/utils/runtime.js.map +1 -0
  591. package/dist/utils/safeEventCallback.d.ts +6 -0
  592. package/dist/utils/safeEventCallback.js +29 -0
  593. package/dist/utils/safeEventCallback.js.map +1 -0
  594. package/dist/utils/spawnCommand.d.ts +11 -1
  595. package/dist/utils/spawnCommand.js +56 -6
  596. package/dist/utils/spawnCommand.js.map +1 -1
  597. package/dist/utils/tokenizeInput.d.ts +3 -0
  598. package/dist/utils/tokenizeInput.js +12 -0
  599. package/dist/utils/tokenizeInput.js.map +1 -0
  600. package/dist/utils/transformPromisable.d.ts +40 -0
  601. package/dist/utils/transformPromisable.js +64 -0
  602. package/dist/utils/transformPromisable.js.map +1 -0
  603. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  604. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  605. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  606. package/dist/utils/utilTypes.d.ts +3 -0
  607. package/dist/utils/utilTypes.js +2 -0
  608. package/dist/utils/utilTypes.js.map +1 -0
  609. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  610. package/dist/utils/waitForLockfileRelease.js +20 -0
  611. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  612. package/dist/utils/withLockfile.d.ts +7 -0
  613. package/dist/utils/withLockfile.js +44 -0
  614. package/dist/utils/withLockfile.js.map +1 -0
  615. package/dist/utils/withOra.d.ts +2 -0
  616. package/dist/utils/withOra.js +22 -6
  617. package/dist/utils/withOra.js.map +1 -1
  618. package/dist/utils/withProgressLog.d.ts +23 -0
  619. package/dist/utils/withProgressLog.js +211 -0
  620. package/dist/utils/withProgressLog.js.map +1 -0
  621. package/dist/utils/withStatusLogs.d.ts +2 -1
  622. package/dist/utils/withStatusLogs.js +12 -9
  623. package/dist/utils/withStatusLogs.js.map +1 -1
  624. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  625. package/dist/utils/wrapAbortSignal.js +9 -0
  626. package/dist/utils/wrapAbortSignal.js.map +1 -0
  627. package/llama/.clang-format +1 -2
  628. package/llama/CMakeLists.txt +126 -5
  629. package/llama/addon/AddonContext.cpp +772 -0
  630. package/llama/addon/AddonContext.h +53 -0
  631. package/llama/addon/AddonGrammar.cpp +44 -0
  632. package/llama/addon/AddonGrammar.h +18 -0
  633. package/llama/addon/AddonGrammarEvaluationState.cpp +28 -0
  634. package/llama/addon/AddonGrammarEvaluationState.h +15 -0
  635. package/llama/addon/AddonModel.cpp +681 -0
  636. package/llama/addon/AddonModel.h +61 -0
  637. package/llama/addon/AddonModelData.cpp +25 -0
  638. package/llama/addon/AddonModelData.h +15 -0
  639. package/llama/addon/AddonModelLora.cpp +107 -0
  640. package/llama/addon/AddonModelLora.h +28 -0
  641. package/llama/addon/addon.cpp +216 -0
  642. package/llama/addon/addonGlobals.cpp +22 -0
  643. package/llama/addon/addonGlobals.h +12 -0
  644. package/llama/addon/globals/addonLog.cpp +135 -0
  645. package/llama/addon/globals/addonLog.h +21 -0
  646. package/llama/addon/globals/addonProgress.cpp +15 -0
  647. package/llama/addon/globals/addonProgress.h +15 -0
  648. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  649. package/llama/addon/globals/getGpuInfo.h +6 -0
  650. package/llama/binariesGithubRelease.json +1 -1
  651. package/llama/gitRelease.bundle +0 -0
  652. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  653. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  654. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  655. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  656. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  657. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  658. package/llama/grammars/README.md +297 -6
  659. package/llama/grammars/json.gbnf +4 -4
  660. package/llama/grammars/json_arr.gbnf +4 -4
  661. package/llama/llama.cpp.info.json +4 -0
  662. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  663. package/package.json +85 -54
  664. package/templates/packed/electron-typescript-react.json +1 -0
  665. package/templates/packed/node-typescript.json +1 -0
  666. package/dist/AbortError.d.ts +0 -2
  667. package/dist/AbortError.js +0 -7
  668. package/dist/AbortError.js.map +0 -1
  669. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  670. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  671. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  672. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  673. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  674. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  675. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  676. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  677. package/dist/llamaEvaluator/LlamaChat/LlamaChat.d.ts +0 -175
  678. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  679. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  680. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  681. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  682. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  683. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  684. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  685. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  686. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  687. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  688. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  689. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  690. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  691. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  692. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  693. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  694. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  695. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  696. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  697. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  698. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -28
  699. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  700. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  701. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  702. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  703. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  704. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  705. package/dist/utils/binariesGithubRelease.js.map +0 -1
  706. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  707. package/dist/utils/clearLlamaBuild.js +0 -12
  708. package/dist/utils/clearLlamaBuild.js.map +0 -1
  709. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  710. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  711. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  712. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  713. package/dist/utils/compileLLamaCpp.js +0 -132
  714. package/dist/utils/compileLLamaCpp.js.map +0 -1
  715. package/dist/utils/getBin.js +0 -78
  716. package/dist/utils/getBin.js.map +0 -1
  717. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  718. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  719. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  720. package/dist/utils/getReleaseInfo.d.ts +0 -7
  721. package/dist/utils/getReleaseInfo.js +0 -30
  722. package/dist/utils/getReleaseInfo.js.map +0 -1
  723. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  724. package/dist/utils/parseModelTypeDescription.js +0 -9
  725. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  726. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  727. package/dist/utils/resolveChatWrapper.js +0 -16
  728. package/dist/utils/resolveChatWrapper.js.map +0 -1
  729. package/dist/utils/usedBinFlag.d.ts +0 -6
  730. package/dist/utils/usedBinFlag.js +0 -15
  731. package/dist/utils/usedBinFlag.js.map +0 -1
  732. package/llama/addon.cpp +0 -814
  733. package/llama/usedBin.json +0 -3
  734. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  735. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  736. package/llamaBins/linux-x64/llama-addon.node +0 -0
  737. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  738. package/llamaBins/mac-x64/llama-addon.node +0 -0
  739. package/llamaBins/win-x64/llama-addon.exp +0 -0
  740. package/llamaBins/win-x64/llama-addon.lib +0 -0
  741. package/llamaBins/win-x64/llama-addon.node +0 -0
  742. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  743. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  744. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  745. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  746. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  747. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  748. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  749. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -1,20 +1,28 @@
1
- import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
1
+ import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
- import { AddonContext } from "../LlamaBins.js";
4
3
  import { compareTokens } from "../../utils/compareTokens.js";
5
- import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
4
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
5
+ import { TokenMeter } from "../TokenMeter.js";
6
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
7
+ const defaultLoraScale = 1;
6
8
  export class LlamaContext {
9
+ /** @internal */ _llama;
7
10
  /** @internal */ _ctx;
8
11
  /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
12
+ /** @internal */ _backendContextDisposeGuard;
9
13
  /** @internal */ _model;
10
14
  /** @internal */ _contextSize;
11
15
  /** @internal */ _batchSize;
16
+ /** @internal */ _flashAttention;
12
17
  /** @internal */ _totalSequences;
13
18
  /** @internal */ _unusedSequenceIds = [];
14
19
  /** @internal */ _batchingOptions;
15
20
  /** @internal */ _queuedDecodeSequenceIds = new Set();
16
21
  /** @internal */ _queuedDecodes = [];
17
- /** @internal */ _disposeAggregator = new DisposeAggregator();
22
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
23
+ /** @internal */ _modelPreventDisposalHandle;
24
+ /** @internal */ _loraAdapters = new Set();
25
+ /** @internal */ _gcRegistry;
18
26
  /** @internal */ _nextGeneratedSequenceId = 0;
19
27
  /** @internal */ _dispatchDecodeScheduled = false;
20
28
  /** @internal */ _batchDispatchPending = false;
@@ -22,44 +30,62 @@ export class LlamaContext {
22
30
  /** @internal */ _allocatedContextSize;
23
31
  /** @internal */ _disposed = false;
24
32
  onDispose = new EventRelay();
25
- /**
26
- * @param options
27
- */
28
- constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
29
- if (model.disposed)
33
+ constructor({ _model }, { sequences, seed = null, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
34
+ if (_model.disposed)
30
35
  throw new DisposedError();
31
- this._model = model;
36
+ this._llama = _model._llama;
37
+ this._model = _model;
38
+ this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
39
+ this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
32
40
  this._totalSequences = Math.max(1, Math.floor(sequences));
33
41
  this._contextSize = Math.max(2, contextSize);
34
42
  this._batchSize = Math.max(batchSize, this._totalSequences);
35
- this._ctx = new AddonContext(this._model._model, removeNullFields({
43
+ this._flashAttention = flashAttention;
44
+ this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
36
45
  seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
37
- contextSize: contextSize * this._totalSequences,
46
+ contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
38
47
  batchSize: this._batchSize,
48
+ sequences: this._totalSequences,
49
+ flashAttention: this._flashAttention,
39
50
  threads: Math.max(0, Math.floor(threads)),
40
- embedding: _embedding,
51
+ embeddings: _embeddings,
41
52
  noSeed: _noSeed
42
53
  }));
43
54
  this._batchingOptions = {
44
55
  dispatchSchedule: batchingDispatchSchedule,
45
- itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
56
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
46
57
  };
58
+ this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
59
+ this._gcRegistry.register(this, this._loraAdapters);
47
60
  this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
61
+ this._disposeAggregator.add(() => {
62
+ this._disposed = true;
63
+ });
64
+ this._disposeAggregator.add(() => this._gcRegistry.unregister(this));
48
65
  this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
49
66
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
67
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
50
68
  this._disposeAggregator.add(() => {
51
- this._ctx.dispose();
69
+ if (this._loraAdapters.size > 0) {
70
+ const loraAdapters = new Set(this._loraAdapters);
71
+ this._loraAdapters.clear();
72
+ return this._model._removeLoraUsage(loraAdapters);
73
+ }
74
+ });
75
+ this._disposeAggregator.add(async () => {
76
+ await this._backendContextDisposeGuard.acquireDisposeLock();
77
+ await this._ctx.dispose();
78
+ this._modelPreventDisposalHandle.dispose();
52
79
  });
53
- this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
54
80
  }
55
- dispose() {
81
+ async dispose() {
56
82
  if (this._disposed)
57
83
  return;
58
84
  this._disposed = true;
59
- this._disposeAggregator.dispose();
85
+ await this._disposeAggregator.dispose();
60
86
  }
61
87
  /** @hidden */
62
- [Symbol.dispose]() {
88
+ [Symbol.asyncDispose]() {
63
89
  return this.dispose();
64
90
  }
65
91
  get disposed() {
@@ -74,6 +100,17 @@ export class LlamaContext {
74
100
  get batchSize() {
75
101
  return this._batchSize;
76
102
  }
103
+ get flashAttention() {
104
+ return this._flashAttention;
105
+ }
106
+ /**
107
+ * The actual size of the state in the memory in bytes.
108
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
109
+ */
110
+ get stateSize() {
111
+ this._ensureNotDisposed();
112
+ return this._ctx.getStateSize();
113
+ }
77
114
  getAllocatedContextSize() {
78
115
  this._ensureNotDisposed();
79
116
  if (this._allocatedContextSize == null)
@@ -89,9 +126,9 @@ export class LlamaContext {
89
126
  /**
90
127
  * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
91
128
  * When there are no sequences left, this method will throw an error.
92
- * @param [options]
93
129
  */
94
- getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
130
+ getSequence(options = {}) {
131
+ const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
95
132
  this._ensureNotDisposed();
96
133
  const nextSequenceId = this._popSequenceId();
97
134
  if (nextSequenceId == null)
@@ -99,6 +136,7 @@ export class LlamaContext {
99
136
  return LlamaContextSequence._create({
100
137
  sequenceId: nextSequenceId,
101
138
  context: this,
139
+ tokenMeter: _tokenMeter,
102
140
  contextShift: {
103
141
  size: contextShiftSize,
104
142
  strategy: contextShiftStrategy
@@ -115,17 +153,18 @@ export class LlamaContext {
115
153
  this._currentDispatchBatchHandle = {};
116
154
  this._dispatchDecodeScheduled = false;
117
155
  this._batchDispatchPending = false;
118
- let prioritizeStrategy;
119
- try {
120
- this._ensureNotDisposed();
121
- prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
122
- }
123
- catch (err) {
124
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
125
- return;
126
- }
127
- let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
128
- while (shouldHaveAnotherBatch) {
156
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
157
+ const resolvePrioritizationStrategy = () => {
158
+ try {
159
+ this._ensureNotDisposed();
160
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
161
+ }
162
+ catch (err) {
163
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
164
+ }
165
+ return null;
166
+ };
167
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
129
168
  const batchItemToQueuedDecodeMap = new Map();
130
169
  const batchItemsList = [];
131
170
  for (const queuedDecode of this._queuedDecodes) {
@@ -138,42 +177,65 @@ export class LlamaContext {
138
177
  }
139
178
  let prioritizedItems;
140
179
  try {
141
- prioritizedItems = prioritizeStrategy({
180
+ prioritizedItems = prioritizationStrategy({
142
181
  items: batchItemsList,
143
182
  size: this._batchSize
144
183
  });
145
184
  }
146
185
  catch (err) {
147
186
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
148
- return;
187
+ return null;
149
188
  }
150
- let batchTokenSlotsLeft = this._batchSize;
151
- const afterDecodeActions = [];
152
- const queuedDecodesToDelete = new Set();
153
- const currentQueuedDecodeItems = new Set();
154
- const currentBatchItems = [];
155
- let currentBatchSize = 0;
156
- for (const prioritizedItem of prioritizedItems) {
189
+ return prioritizedItems.map((prioritizedItem) => {
157
190
  const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
158
191
  if (queuedDecode == null)
159
192
  throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
160
193
  "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
161
- const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
162
- if (processAmount <= 0)
194
+ return {
195
+ queuedDecode,
196
+ processAmount: prioritizedItem.processAmount
197
+ };
198
+ });
199
+ };
200
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
201
+ const currentBatchItems = [];
202
+ let currentBatchSize = 0;
203
+ let batchTokenSlotsLeft = batchSize;
204
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
205
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
206
+ if (resolvedProcessAmount <= 0) {
207
+ if (batchTokenSlotsLeft === 0)
208
+ break;
163
209
  continue;
164
- batchTokenSlotsLeft -= processAmount;
210
+ }
211
+ batchTokenSlotsLeft -= resolvedProcessAmount;
212
+ currentBatchSize += resolvedProcessAmount;
165
213
  currentBatchItems.push({
166
214
  queuedDecode,
167
- processAmount
215
+ processAmount: resolvedProcessAmount
168
216
  });
169
- currentBatchSize += processAmount;
170
217
  }
218
+ return {
219
+ currentBatchItems,
220
+ currentBatchSize
221
+ };
222
+ };
223
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
224
+ const afterDecodeActions = [];
225
+ const queuedDecodesToDelete = new Set();
226
+ const currentQueuedDecodeItems = new Set();
171
227
  if (currentBatchSize !== 0)
172
228
  this._ctx.initBatch(currentBatchSize);
173
- for (const { queuedDecode, processAmount } of currentBatchItems) {
229
+ for (const { queuedDecode, processAmount } of batchItems) {
174
230
  let batchLogitIndex;
175
231
  try {
176
- batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
232
+ const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
233
+ processAmount === queuedDecode.tokens.length;
234
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
235
+ const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
236
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
237
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
238
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
177
239
  }
178
240
  catch (err) {
179
241
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
@@ -192,8 +254,6 @@ export class LlamaContext {
192
254
  queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
193
255
  queuedDecode.firstTokenSequenceIndex += processAmount;
194
256
  }
195
- if (batchTokenSlotsLeft === 0)
196
- break;
197
257
  }
198
258
  for (let i = 0; i < this._queuedDecodes.length; i++) {
199
259
  const queuedDecode = this._queuedDecodes[i];
@@ -203,7 +263,6 @@ export class LlamaContext {
203
263
  i--;
204
264
  }
205
265
  }
206
- shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
207
266
  try {
208
267
  if (currentBatchSize !== 0)
209
268
  await this._ctx.decodeBatch();
@@ -224,14 +283,45 @@ export class LlamaContext {
224
283
  }
225
284
  accept(undefined);
226
285
  }
286
+ };
287
+ const prioritizationStrategy = resolvePrioritizationStrategy();
288
+ if (prioritizationStrategy == null)
289
+ return; // all queued items are rejected and dequeued when we get here
290
+ while (shouldHaveAnotherLoop) {
291
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
292
+ if (orderedQueuedDecodes == null)
293
+ return; // all queued items are rejected and dequeued when we get here
294
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
295
+ let preventDisposalHandle;
296
+ try {
297
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
298
+ }
299
+ catch (err) {
300
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
301
+ return;
302
+ }
303
+ try {
304
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
305
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
306
+ }
307
+ finally {
308
+ preventDisposalHandle.dispose();
309
+ }
227
310
  }
228
311
  });
229
312
  }
230
- printTimings() {
313
+ /**
314
+ * Print the timings of token evaluation since that last print for this context.
315
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
316
+ * it won't print anything.
317
+ */
318
+ async printTimings() {
319
+ this._ensureNotDisposed();
231
320
  this._ctx.printTimings();
321
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
232
322
  }
233
323
  /** @internal */
234
- async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
324
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
235
325
  return await new Promise((accept, reject) => {
236
326
  this._queuedDecodes.push({
237
327
  sequenceId,
@@ -239,6 +329,7 @@ export class LlamaContext {
239
329
  firstTokenSequenceIndex,
240
330
  generateLogitAtTheEnd,
241
331
  evaluationPriority,
332
+ tokenMeter,
242
333
  response: [accept, reject],
243
334
  onDone
244
335
  });
@@ -251,6 +342,8 @@ export class LlamaContext {
251
342
  if (this._disposed)
252
343
  return;
253
344
  void withLock(this, "context", async () => {
345
+ if (this._disposed)
346
+ return;
254
347
  this._ctx.disposeSequence(sequenceId);
255
348
  this._unusedSequenceIds.push(sequenceId);
256
349
  this._onReclaimUnusedSequenceId.dispatchEvent();
@@ -261,6 +354,10 @@ export class LlamaContext {
261
354
  this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
262
355
  }
263
356
  /** @internal */
357
+ _canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
358
+ return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
359
+ }
360
+ /** @internal */
264
361
  _popSequenceId() {
265
362
  if (this._unusedSequenceIds.length > 0)
266
363
  return this._unusedSequenceIds.shift();
@@ -310,20 +407,115 @@ export class LlamaContext {
310
407
  if (this._disposed)
311
408
  throw new DisposedError();
312
409
  }
410
+ /** @internal */
411
+ async _setLora({ filePath, scale }) {
412
+ const lora = await this._model._getOrLoadLora(filePath);
413
+ this._ctx.setLora(lora, scale ?? defaultLoraScale);
414
+ if (!this._loraAdapters.has(lora)) {
415
+ this._loraAdapters.add(lora);
416
+ lora.usages++;
417
+ }
418
+ }
419
+ /** @internal */
420
+ static async _create(options, { _model }) {
421
+ const sequences = options.sequences ?? getDefaultContextSequences();
422
+ const flashAttention = _model.flashAttentionSupported
423
+ ? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
424
+ : false;
425
+ const loraOptions = typeof options.lora === "string"
426
+ ? { adapters: [{ filePath: options.lora }] }
427
+ : options.lora;
428
+ const contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
429
+ batchSize: options.batchSize,
430
+ sequences: sequences,
431
+ modelGpuLayers: _model.gpuLayers,
432
+ modelTrainContextSize: _model.trainContextSize,
433
+ flashAttention,
434
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
435
+ llamaGpu: _model._llama.gpu,
436
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
437
+ isEmbeddingContext: options._embeddings
438
+ });
439
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
440
+ const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
441
+ contextSize,
442
+ sequences,
443
+ isEmbeddingContext: options._embeddings,
444
+ modelGpuLayers: _model.gpuLayers,
445
+ batchSize,
446
+ flashAttention
447
+ }).gpuVram;
448
+ const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
449
+ const { createSignal } = options;
450
+ const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
451
+ ? null
452
+ : _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
453
+ try {
454
+ const contextLoaded = await context._ctx.init();
455
+ if (createSignal?.aborted) {
456
+ if (contextLoaded)
457
+ await context._ctx.dispose();
458
+ throw createSignal.reason;
459
+ }
460
+ else if (!contextLoaded)
461
+ throw new Error("Failed to create context");
462
+ contextCreationMemoryReservation?.dispose?.();
463
+ if (loraOptions != null && loraOptions.adapters.length > 0) {
464
+ let loadedAdapters = 0;
465
+ for (const adapter of loraOptions.adapters) {
466
+ try {
467
+ await context._setLora({
468
+ filePath: adapter.filePath,
469
+ scale: adapter.scale
470
+ });
471
+ loadedAdapters++;
472
+ try {
473
+ loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
474
+ }
475
+ catch (err) {
476
+ console.error(err);
477
+ }
478
+ }
479
+ catch (err) {
480
+ await context.dispose();
481
+ throw err;
482
+ }
483
+ if (createSignal?.aborted) {
484
+ await context.dispose();
485
+ throw createSignal.reason;
486
+ }
487
+ }
488
+ }
489
+ else if (loraOptions?.onLoadProgress != null) {
490
+ try {
491
+ loraOptions.onLoadProgress(1);
492
+ }
493
+ catch (err) {
494
+ console.error(err);
495
+ }
496
+ }
497
+ return context;
498
+ }
499
+ finally {
500
+ contextCreationMemoryReservation?.dispose?.();
501
+ }
502
+ }
313
503
  }
314
504
  export class LlamaContextSequence {
315
505
  /** @internal */ _sequenceId;
316
506
  /** @internal */ _gcRegistry;
317
507
  /** @internal */ _context;
318
508
  /** @internal */ _contextShift;
509
+ /** @internal */ _tokenMeter;
319
510
  /** @internal */ _disposeAggregator = new DisposeAggregator();
320
511
  /** @internal */ _contextTokens = [];
321
512
  /** @internal */ _nextTokenIndex = 0;
322
513
  /** @internal */ _disposed = false;
323
514
  onDispose = new EventRelay();
324
- constructor({ sequenceId, context, contextShift }) {
515
+ constructor({ sequenceId, context, tokenMeter, contextShift }) {
325
516
  this._sequenceId = sequenceId;
326
517
  this._context = context;
518
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
327
519
  this._contextShift = contextShift;
328
520
  this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
329
521
  this._gcRegistry.register(this, sequenceId);
@@ -360,6 +552,9 @@ export class LlamaContextSequence {
360
552
  get contextTokens() {
361
553
  return this._contextTokens.slice();
362
554
  }
555
+ get tokenMeter() {
556
+ return this._tokenMeter;
557
+ }
363
558
  get isLoadedToMemory() {
364
559
  return !this._disposed;
365
560
  }
@@ -385,7 +580,7 @@ export class LlamaContextSequence {
385
580
  }
386
581
  /**
387
582
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
388
- * the start and end of each range are exclusive.
583
+ * The start of each range is inclusive, and the end of each range is exclusive.
389
584
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
390
585
  */
391
586
  async eraseContextTokenRanges(ranges) {
@@ -394,6 +589,8 @@ export class LlamaContextSequence {
394
589
  this._ensureNotDisposed();
395
590
  if (ranges.length === 0)
396
591
  return;
592
+ // if the deletion fails, we'll have to dispose the sequence and fill it up again
593
+ let deletionSuccessful = true;
397
594
  const resolvedRanges = ranges
398
595
  .map(({ start, end }) => {
399
596
  if (start === end)
@@ -423,34 +620,41 @@ export class LlamaContextSequence {
423
620
  let lastDeleteRangeEndPos = null;
424
621
  for (const range of resolvedRanges) {
425
622
  this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
426
- this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
427
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
623
+ if (deletionSuccessful)
624
+ deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
625
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
428
626
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
429
627
  removedTokens += range.end - range.start;
430
628
  lastDeleteRangeEndPos = range.end;
431
629
  }
432
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
630
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
433
631
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
434
632
  this._nextTokenIndex -= removedTokens;
633
+ if (deletionSuccessful)
634
+ return;
635
+ const newSequenceTokens = this._contextTokens.slice();
636
+ this._nextTokenIndex = 0;
637
+ this._context._ctx.disposeSequence(this._sequenceId);
638
+ await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
435
639
  });
436
640
  }
437
- /**
438
- * @param tokens
439
- * @param [options]
440
- */
441
- evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
641
+ evaluate(tokens, options = {}) {
642
+ const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
442
643
  return this._evaluate(tokens, {
443
644
  temperature,
645
+ minP,
444
646
  topK,
445
647
  topP,
446
648
  grammarEvaluationState,
447
649
  repeatPenalty,
650
+ tokenBias,
448
651
  evaluationPriority,
449
652
  contextShiftOptions: {
450
653
  size: contextShiftSize,
451
654
  strategy: contextShiftStrategy
452
655
  },
453
- yieldEosToken
656
+ yieldEogToken,
657
+ _noSampling
454
658
  });
455
659
  }
456
660
  /**
@@ -473,24 +677,29 @@ export class LlamaContextSequence {
473
677
  }
474
678
  }
475
679
  /** @internal */
476
- async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
680
+ async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
477
681
  this._ensureNotDisposed();
478
682
  let evalTokens = tokens;
479
683
  if (evalTokens.length === 0)
480
684
  return;
481
- // eslint-disable-next-line no-constant-condition
482
685
  while (true) {
483
686
  this._ensureNotDisposed();
484
687
  // Evaluate to get the next token.
485
- const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
688
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
689
+ if (_noSampling)
690
+ return null;
486
691
  const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
487
692
  ? repeatPenalty.punishTokens()
488
693
  : repeatPenalty?.punishTokens;
489
694
  const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
490
695
  ? grammarEvaluationState()
491
696
  : grammarEvaluationState;
697
+ if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
698
+ throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
699
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
492
700
  return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
493
701
  temperature,
702
+ minP,
494
703
  topK,
495
704
  topP,
496
705
  repeatPenalty: repeatPenalty?.penalty,
@@ -499,31 +708,36 @@ export class LlamaContextSequence {
499
708
  : undefined,
500
709
  repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
501
710
  repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
711
+ tokenBiasKeys,
712
+ tokenBiasValues,
502
713
  grammarEvaluationState: resolvedGrammarEvaluationState?._state
503
714
  }));
504
715
  });
505
716
  if (nextToken == null)
506
717
  return;
507
718
  // the model finished generating text
508
- if (!yieldEosToken && nextToken === this._context.model.tokens.eos)
719
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
509
720
  break;
510
- yield nextToken;
511
- // Create tokens for the next eval.
512
- evalTokens = [nextToken];
721
+ const replacementToken = (yield nextToken);
722
+ // set the tokens for the next evaluation
723
+ if (replacementToken != null)
724
+ evalTokens = [replacementToken];
725
+ else
726
+ evalTokens = [nextToken];
513
727
  }
514
728
  }
515
729
  /** @internal */
516
- async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
730
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
517
731
  this._ensureNotDisposed();
518
732
  const tokensLeftToDecode = tokens.slice();
519
733
  return await withLock(this, "evaluate", async () => {
520
734
  while (tokensLeftToDecode.length > 0) {
521
735
  this._ensureNotDisposed();
522
- let freeSpace = this._context.contextSize - this._nextTokenIndex;
523
- if (freeSpace <= 1) {
736
+ let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
737
+ if (freeSpace <= 0) {
524
738
  await this._freeUpSpaceForTokens(contextShiftOptions);
525
- freeSpace = this._context.contextSize - this._nextTokenIndex;
526
- if (freeSpace <= 1)
739
+ freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
740
+ if (freeSpace <= 0)
527
741
  throw new Error("Failed to free up space for new tokens");
528
742
  }
529
743
  const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
@@ -533,7 +747,8 @@ export class LlamaContextSequence {
533
747
  tokens: tokensToDecode,
534
748
  firstTokenSequenceIndex: this._nextTokenIndex,
535
749
  generateLogitAtTheEnd,
536
- evaluationPriority
750
+ evaluationPriority,
751
+ tokenMeter
537
752
  }, !generateLogitAtTheEnd
538
753
  ? undefined
539
754
  : onDecodeDone);
@@ -553,7 +768,10 @@ export class LlamaContextSequence {
553
768
  : contextShiftOptions.size));
554
769
  this._ensureNotDisposed();
555
770
  if (contextShiftOptions.strategy === "eraseBeginning") {
556
- await this.eraseContextTokenRanges([{ start: 0, end: size }]);
771
+ let eraseStartIndex = 0;
772
+ if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
773
+ eraseStartIndex = 1;
774
+ await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
557
775
  }
558
776
  else {
559
777
  const ranges = await contextShiftOptions.strategy({
@@ -563,7 +781,7 @@ export class LlamaContextSequence {
563
781
  if (ranges == null)
564
782
  throw new Error("Invalid delete ranges");
565
783
  await this.eraseContextTokenRanges(ranges);
566
- if (this.nextTokenIndex >= this._context.contextSize)
784
+ if (this.nextTokenIndex >= this._context.contextSize - 1)
567
785
  await this.eraseContextTokenRanges([{ start: 0, end: size }]);
568
786
  }
569
787
  }
@@ -576,10 +794,11 @@ export class LlamaContextSequence {
576
794
  * We need this to make it impossible to manually create instances of this class outside the code of this library
577
795
  * @internal
578
796
  */
579
- static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
797
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
580
798
  return new LlamaContextSequence({
581
799
  sequenceId,
582
800
  context,
801
+ tokenMeter,
583
802
  contextShift: {
584
803
  size: contextShiftSize,
585
804
  strategy: contextShiftStrategy
@@ -587,14 +806,52 @@ export class LlamaContextSequence {
587
806
  });
588
807
  }
589
808
  }
809
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
810
+ if (tokenBias == null)
811
+ return {
812
+ tokenBiasKeys: undefined,
813
+ tokenBiasValues: undefined
814
+ };
815
+ if (tokenBias instanceof Function)
816
+ tokenBias = tokenBias();
817
+ if (tokenBias._model !== currentModel)
818
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
819
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
820
+ const tokenBiasKeys = [];
821
+ const tokenBiasValues = [];
822
+ for (const [token, bias] of tokenBias._biases) {
823
+ tokenBiasKeys.push(token);
824
+ tokenBiasValues.push(bias);
825
+ }
826
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
827
+ return {
828
+ tokenBiasKeys: undefined,
829
+ tokenBiasValues: undefined
830
+ };
831
+ }
832
+ return {
833
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
834
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
835
+ };
836
+ }
590
837
  function disposeContextIfReferenced(contextRef) {
591
838
  const context = contextRef.deref();
592
839
  if (context != null)
593
- context.dispose();
840
+ void context.dispose();
594
841
  }
595
842
  function disposeContextSequenceIfReferenced(contextRef) {
596
843
  const context = contextRef.deref();
597
844
  if (context != null)
598
845
  context.dispose();
599
846
  }
847
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
848
+ return Math.min(contextSize * sequences, 512);
849
+ }
850
+ export function getDefaultContextSequences() {
851
+ return 1;
852
+ }
853
+ const defaultFallbackContextSize = 4096;
854
+ export function getDefaultModelContextSize({ trainContextSize }) {
855
+ return trainContextSize ?? defaultFallbackContextSize;
856
+ }
600
857
  //# sourceMappingURL=LlamaContext.js.map