node-llama-cpp 3.0.0-beta.3 → 3.0.0-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (690) hide show
  1. package/README.md +14 -11
  2. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  3. package/bins/linux-arm64/llama-addon.node +0 -0
  4. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-armv7l/llama-addon.node +0 -0
  6. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  7. package/bins/linux-x64/llama-addon.node +0 -0
  8. package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-x64-cuda/llama-addon.node +0 -0
  10. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  11. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  12. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  13. package/bins/mac-arm64-metal/default.metallib +0 -0
  14. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  15. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  16. package/bins/mac-x64/llama-addon.node +0 -0
  17. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  18. package/bins/win-arm64/llama-addon.exp +0 -0
  19. package/bins/win-arm64/llama-addon.lib +0 -0
  20. package/bins/win-arm64/llama-addon.node +0 -0
  21. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  22. package/bins/win-x64/llama-addon.exp +0 -0
  23. package/bins/win-x64/llama-addon.lib +0 -0
  24. package/bins/win-x64/llama-addon.node +0 -0
  25. package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  26. package/bins/win-x64-cuda/llama-addon.exp +0 -0
  27. package/bins/win-x64-cuda/llama-addon.lib +0 -0
  28. package/bins/win-x64-cuda/llama-addon.node +0 -0
  29. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  30. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  31. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  32. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  33. package/dist/ChatWrapper.d.ts +8 -39
  34. package/dist/ChatWrapper.js +115 -72
  35. package/dist/ChatWrapper.js.map +1 -1
  36. package/dist/apiDocsIndex.d.ts +1 -0
  37. package/dist/apiDocsIndex.js +7 -0
  38. package/dist/apiDocsIndex.js.map +1 -0
  39. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -8
  40. package/dist/bindings/AddonTypes.js +2 -0
  41. package/dist/bindings/AddonTypes.js.map +1 -0
  42. package/dist/bindings/Llama.d.ts +47 -0
  43. package/dist/bindings/Llama.js +353 -0
  44. package/dist/bindings/Llama.js.map +1 -0
  45. package/dist/bindings/consts.d.ts +2 -0
  46. package/dist/bindings/consts.js +11 -0
  47. package/dist/bindings/consts.js.map +1 -0
  48. package/dist/bindings/getLlama.d.ts +148 -0
  49. package/dist/bindings/getLlama.js +401 -0
  50. package/dist/bindings/getLlama.js.map +1 -0
  51. package/dist/bindings/types.d.ts +56 -0
  52. package/dist/bindings/types.js +77 -0
  53. package/dist/bindings/types.js.map +1 -0
  54. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  55. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  56. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  57. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  58. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  59. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  60. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  61. package/dist/bindings/utils/asyncEvery.js +15 -0
  62. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  63. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  64. package/dist/bindings/utils/asyncSome.js +27 -0
  65. package/dist/bindings/utils/asyncSome.js.map +1 -0
  66. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  67. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  68. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  69. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  70. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  71. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  72. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  73. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  74. package/dist/bindings/utils/compileLLamaCpp.d.ts +17 -0
  75. package/dist/bindings/utils/compileLLamaCpp.js +226 -0
  76. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  77. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  78. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  79. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  80. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  81. package/dist/bindings/utils/detectGlibc.js +46 -0
  82. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  83. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  84. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  85. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  86. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  87. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  88. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  89. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  90. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  91. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  92. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  93. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  94. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  95. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  96. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  97. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  98. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  99. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  100. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  101. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  102. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  103. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  104. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  105. package/dist/bindings/utils/getPlatform.js +15 -0
  106. package/dist/bindings/utils/getPlatform.js.map +1 -0
  107. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  108. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  109. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  110. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  111. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  112. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  113. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  114. package/dist/bindings/utils/hasFileInPath.js +34 -0
  115. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  116. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  117. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  118. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  119. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  120. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  121. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  122. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  123. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  124. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  125. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  126. package/dist/bindings/utils/resolveCustomCmakeOptions.js +47 -0
  127. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  128. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  129. package/dist/bindings/utils/testBindingBinary.js +100 -0
  130. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  131. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  132. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  133. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  134. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  135. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  136. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  137. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  138. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  139. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  140. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  141. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  142. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  143. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  144. package/dist/chatWrappers/FunctionaryChatWrapper.js +323 -118
  145. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  146. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  147. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  148. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  149. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  150. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  151. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  152. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  153. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  154. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  155. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  156. package/dist/chatWrappers/Llama3ChatWrapper.js +174 -0
  157. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  158. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +67 -0
  159. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  160. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  161. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  162. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  163. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  164. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  165. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  166. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  167. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  168. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  169. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  170. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  171. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  172. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  173. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  174. package/dist/chatWrappers/utils/resolveChatWrapper.js +243 -0
  175. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  176. package/dist/cli/cli.js +21 -7
  177. package/dist/cli/cli.js.map +1 -1
  178. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  179. package/dist/cli/commands/BuildCommand.js +114 -41
  180. package/dist/cli/commands/BuildCommand.js.map +1 -1
  181. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  182. package/dist/cli/commands/ChatCommand.js +300 -143
  183. package/dist/cli/commands/ChatCommand.js.map +1 -1
  184. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  185. package/dist/cli/commands/ClearCommand.js +11 -12
  186. package/dist/cli/commands/ClearCommand.js.map +1 -1
  187. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  188. package/dist/cli/commands/CompleteCommand.js +365 -0
  189. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  190. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  191. package/dist/cli/commands/DebugCommand.js +54 -0
  192. package/dist/cli/commands/DebugCommand.js.map +1 -0
  193. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  194. package/dist/cli/commands/DownloadCommand.js +121 -70
  195. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  196. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  197. package/dist/cli/commands/InfillCommand.js +401 -0
  198. package/dist/cli/commands/InfillCommand.js.map +1 -0
  199. package/dist/cli/commands/InitCommand.d.ts +11 -0
  200. package/dist/cli/commands/InitCommand.js +195 -0
  201. package/dist/cli/commands/InitCommand.js.map +1 -0
  202. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  203. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  204. package/dist/cli/commands/PullCommand.d.ts +12 -0
  205. package/dist/cli/commands/PullCommand.js +117 -0
  206. package/dist/cli/commands/PullCommand.js.map +1 -0
  207. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  208. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  209. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  210. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  211. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  212. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  213. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  214. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +179 -0
  215. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  216. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  217. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  218. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  219. package/dist/cli/projectTemplates.d.ts +7 -0
  220. package/dist/cli/projectTemplates.js +10 -0
  221. package/dist/cli/projectTemplates.js.map +1 -0
  222. package/dist/cli/recommendedModels.d.ts +2 -0
  223. package/dist/cli/recommendedModels.js +363 -0
  224. package/dist/cli/recommendedModels.js.map +1 -0
  225. package/dist/cli/startCreateCli.d.ts +2 -0
  226. package/dist/cli/startCreateCli.js +26 -0
  227. package/dist/cli/startCreateCli.js.map +1 -0
  228. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  229. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  230. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  231. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  232. package/dist/cli/utils/ConsoleTable.js +86 -0
  233. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  234. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  235. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  236. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  237. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  238. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  239. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  240. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  241. package/dist/cli/utils/getReadablePath.js +14 -0
  242. package/dist/cli/utils/getReadablePath.js.map +1 -0
  243. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  244. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  245. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  246. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  247. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  248. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  249. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  250. package/dist/cli/utils/printCommonInfoLines.js +75 -0
  251. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  252. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  253. package/dist/cli/utils/printInfoLine.js +54 -0
  254. package/dist/cli/utils/printInfoLine.js.map +1 -0
  255. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  256. package/dist/cli/utils/projectTemplates.js +47 -0
  257. package/dist/cli/utils/projectTemplates.js.map +1 -0
  258. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  259. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  260. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  261. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  262. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  263. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  264. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  265. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  266. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  267. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  268. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  269. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  270. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  271. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  272. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  273. package/dist/commands.d.ts +1 -0
  274. package/dist/commands.js +3 -0
  275. package/dist/commands.js.map +1 -1
  276. package/dist/config.d.ts +38 -5
  277. package/dist/config.js +61 -16
  278. package/dist/config.js.map +1 -1
  279. package/dist/consts.d.ts +3 -0
  280. package/dist/consts.js +10 -0
  281. package/dist/consts.js.map +1 -0
  282. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +112 -39
  283. package/dist/evaluator/LlamaChat/LlamaChat.js +1512 -0
  284. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  285. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  286. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  287. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  289. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  290. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  291. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  292. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  293. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  294. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  295. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  296. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +288 -0
  297. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +419 -0
  298. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  299. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  300. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  301. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  302. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  303. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  304. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  305. package/dist/evaluator/LlamaCompletion.d.ts +143 -0
  306. package/dist/evaluator/LlamaCompletion.js +418 -0
  307. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  308. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -21
  309. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +270 -81
  310. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  311. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  312. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  313. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  314. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  315. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  316. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  317. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  318. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  319. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  320. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  321. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +10 -7
  322. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -11
  323. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  324. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  325. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  327. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  328. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  329. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +236 -0
  330. package/dist/evaluator/LlamaModel/LlamaModel.js +679 -0
  331. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  332. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  333. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  334. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  335. package/dist/evaluator/TokenBias.d.ts +22 -0
  336. package/dist/evaluator/TokenBias.js +33 -0
  337. package/dist/evaluator/TokenBias.js.map +1 -0
  338. package/dist/evaluator/TokenMeter.d.ts +54 -0
  339. package/dist/evaluator/TokenMeter.js +86 -0
  340. package/dist/evaluator/TokenMeter.js.map +1 -0
  341. package/dist/gguf/consts.d.ts +3 -0
  342. package/dist/gguf/consts.js +8 -0
  343. package/dist/gguf/consts.js.map +1 -0
  344. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  345. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  346. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  347. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  348. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  349. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  350. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  351. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  352. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  353. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  354. package/dist/gguf/fileReaders/GgufFsFileReader.js +46 -0
  355. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  356. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  357. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  358. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  359. package/dist/gguf/insights/GgufInsights.d.ts +48 -0
  360. package/dist/gguf/insights/GgufInsights.js +381 -0
  361. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  362. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  363. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +141 -0
  364. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  365. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  366. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  367. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  368. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  369. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  370. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  371. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  372. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  373. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  374. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  375. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  376. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  377. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  378. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  379. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  380. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  381. package/dist/gguf/parser/parseGguf.js +63 -0
  382. package/dist/gguf/parser/parseGguf.js.map +1 -0
  383. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  384. package/dist/gguf/readGgufFileInfo.js +66 -0
  385. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  386. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  387. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  388. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  389. package/dist/gguf/types/GgufMetadataTypes.d.ts +335 -0
  390. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  391. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  392. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  393. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  394. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  395. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  396. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  397. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  398. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  399. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  400. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  401. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  402. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  403. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  404. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  405. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  406. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  407. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  408. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  409. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  410. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  411. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  412. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  413. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  414. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  415. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  416. package/dist/index.d.ts +41 -18
  417. package/dist/index.js +36 -15
  418. package/dist/index.js.map +1 -1
  419. package/dist/state.d.ts +4 -0
  420. package/dist/state.js +14 -0
  421. package/dist/state.js.map +1 -1
  422. package/dist/types.d.ts +116 -5
  423. package/dist/types.js.map +1 -1
  424. package/dist/utils/DisposeGuard.d.ts +13 -0
  425. package/dist/utils/DisposeGuard.js +120 -0
  426. package/dist/utils/DisposeGuard.js.map +1 -0
  427. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  428. package/dist/utils/InsufficientMemoryError.js +6 -0
  429. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  430. package/dist/utils/LlamaText.d.ts +70 -26
  431. package/dist/utils/LlamaText.js +469 -157
  432. package/dist/utils/LlamaText.js.map +1 -1
  433. package/dist/utils/LruCache.d.ts +12 -0
  434. package/dist/utils/LruCache.js +44 -0
  435. package/dist/utils/LruCache.js.map +1 -0
  436. package/dist/utils/ReplHistory.js.map +1 -1
  437. package/dist/utils/StopGenerationDetector.d.ts +25 -9
  438. package/dist/utils/StopGenerationDetector.js +93 -22
  439. package/dist/utils/StopGenerationDetector.js.map +1 -1
  440. package/dist/utils/TokenStreamRegulator.d.ts +9 -4
  441. package/dist/utils/TokenStreamRegulator.js +81 -8
  442. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  443. package/dist/utils/UnsupportedError.d.ts +2 -0
  444. package/dist/utils/UnsupportedError.js +7 -0
  445. package/dist/utils/UnsupportedError.js.map +1 -0
  446. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  447. package/dist/utils/clearTempFolder.js.map +1 -1
  448. package/dist/utils/cmake.js +38 -20
  449. package/dist/utils/cmake.js.map +1 -1
  450. package/dist/utils/createModelDownloader.d.ts +108 -0
  451. package/dist/utils/createModelDownloader.js +231 -0
  452. package/dist/utils/createModelDownloader.js.map +1 -0
  453. package/dist/utils/findBestOption.d.ts +4 -0
  454. package/dist/utils/findBestOption.js +15 -0
  455. package/dist/utils/findBestOption.js.map +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  458. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  459. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  462. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  465. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  477. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  482. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  484. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  489. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  492. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  493. package/dist/utils/gbnfJson/types.d.ts +3 -0
  494. package/dist/utils/gbnfJson/types.js.map +1 -1
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  497. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  500. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  502. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  503. package/dist/utils/getBuildDefaults.d.ts +1 -2
  504. package/dist/utils/getBuildDefaults.js +2 -3
  505. package/dist/utils/getBuildDefaults.js.map +1 -1
  506. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  507. package/dist/utils/getConsoleLogPrefix.js +10 -0
  508. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  509. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  510. package/dist/utils/getGrammarsFolder.js +8 -7
  511. package/dist/utils/getGrammarsFolder.js.map +1 -1
  512. package/dist/utils/getModuleVersion.d.ts +1 -0
  513. package/dist/utils/getModuleVersion.js +13 -0
  514. package/dist/utils/getModuleVersion.js.map +1 -0
  515. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  516. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  517. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  518. package/dist/utils/getReadableContextSize.d.ts +1 -0
  519. package/dist/utils/getReadableContextSize.js +7 -0
  520. package/dist/utils/getReadableContextSize.js.map +1 -0
  521. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  522. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  523. package/dist/utils/gitReleaseBundles.js +73 -5
  524. package/dist/utils/gitReleaseBundles.js.map +1 -1
  525. package/dist/utils/hashString.d.ts +1 -0
  526. package/dist/utils/hashString.js +8 -0
  527. package/dist/utils/hashString.js.map +1 -0
  528. package/dist/utils/isLockfileActive.d.ts +4 -0
  529. package/dist/utils/isLockfileActive.js +12 -0
  530. package/dist/utils/isLockfileActive.js.map +1 -0
  531. package/dist/utils/isToken.d.ts +2 -0
  532. package/dist/utils/isToken.js +4 -0
  533. package/dist/utils/isToken.js.map +1 -0
  534. package/dist/utils/isUrl.d.ts +1 -0
  535. package/dist/utils/isUrl.js +15 -0
  536. package/dist/utils/isUrl.js.map +1 -0
  537. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  538. package/dist/utils/mergeUnionTypes.js +2 -0
  539. package/dist/utils/mergeUnionTypes.js.map +1 -0
  540. package/dist/utils/parseModelFileName.d.ts +1 -0
  541. package/dist/utils/parseModelFileName.js +6 -1
  542. package/dist/utils/parseModelFileName.js.map +1 -1
  543. package/dist/utils/parseTextTemplate.d.ts +66 -0
  544. package/dist/utils/parseTextTemplate.js +116 -0
  545. package/dist/utils/parseTextTemplate.js.map +1 -0
  546. package/dist/utils/prettyPrintObject.d.ts +10 -0
  547. package/dist/utils/prettyPrintObject.js +84 -0
  548. package/dist/utils/prettyPrintObject.js.map +1 -0
  549. package/dist/utils/removeNullFields.d.ts +2 -1
  550. package/dist/utils/removeNullFields.js +8 -0
  551. package/dist/utils/removeNullFields.js.map +1 -1
  552. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  553. package/dist/utils/resolveGithubRelease.js +36 -0
  554. package/dist/utils/resolveGithubRelease.js.map +1 -0
  555. package/dist/utils/runtime.d.ts +4 -0
  556. package/dist/utils/runtime.js +8 -0
  557. package/dist/utils/runtime.js.map +1 -0
  558. package/dist/utils/safeEventCallback.d.ts +6 -0
  559. package/dist/utils/safeEventCallback.js +29 -0
  560. package/dist/utils/safeEventCallback.js.map +1 -0
  561. package/dist/utils/spawnCommand.d.ts +11 -1
  562. package/dist/utils/spawnCommand.js +56 -6
  563. package/dist/utils/spawnCommand.js.map +1 -1
  564. package/dist/utils/tokenizeInput.d.ts +3 -0
  565. package/dist/utils/tokenizeInput.js +12 -0
  566. package/dist/utils/tokenizeInput.js.map +1 -0
  567. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  569. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  570. package/dist/utils/utilTypes.d.ts +3 -0
  571. package/dist/utils/utilTypes.js +2 -0
  572. package/dist/utils/utilTypes.js.map +1 -0
  573. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  574. package/dist/utils/waitForLockfileRelease.js +20 -0
  575. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  576. package/dist/utils/withLockfile.d.ts +7 -0
  577. package/dist/utils/withLockfile.js +44 -0
  578. package/dist/utils/withLockfile.js.map +1 -0
  579. package/dist/utils/withOra.d.ts +2 -0
  580. package/dist/utils/withOra.js +22 -6
  581. package/dist/utils/withOra.js.map +1 -1
  582. package/dist/utils/withProgressLog.d.ts +23 -0
  583. package/dist/utils/withProgressLog.js +211 -0
  584. package/dist/utils/withProgressLog.js.map +1 -0
  585. package/dist/utils/withStatusLogs.d.ts +2 -1
  586. package/dist/utils/withStatusLogs.js +12 -9
  587. package/dist/utils/withStatusLogs.js.map +1 -1
  588. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  589. package/dist/utils/wrapAbortSignal.js +9 -0
  590. package/dist/utils/wrapAbortSignal.js.map +1 -0
  591. package/llama/.clang-format +1 -2
  592. package/llama/CMakeLists.txt +115 -4
  593. package/llama/addon.cpp +1300 -97
  594. package/llama/binariesGithubRelease.json +1 -1
  595. package/llama/gitRelease.bundle +0 -0
  596. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  597. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  598. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  599. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  600. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  601. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  602. package/llama/grammars/README.md +58 -5
  603. package/llama/grammars/json.gbnf +4 -4
  604. package/llama/grammars/json_arr.gbnf +4 -4
  605. package/llama/llama.cpp.info.json +4 -0
  606. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  607. package/package.json +78 -53
  608. package/templates/packed/electron-typescript-react.json +1 -0
  609. package/templates/packed/node-typescript.json +1 -0
  610. package/dist/AbortError.d.ts +0 -2
  611. package/dist/AbortError.js +0 -7
  612. package/dist/AbortError.js.map +0 -1
  613. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  614. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  615. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  616. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  617. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  618. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  619. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  620. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  621. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  622. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  623. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  624. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  625. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  626. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  627. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  628. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  629. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  630. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  631. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  632. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  633. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  634. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  635. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  636. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  637. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  638. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  639. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  640. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  641. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  642. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  643. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  644. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  645. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  646. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  647. package/dist/utils/binariesGithubRelease.js.map +0 -1
  648. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  649. package/dist/utils/clearLlamaBuild.js +0 -12
  650. package/dist/utils/clearLlamaBuild.js.map +0 -1
  651. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  652. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  653. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  654. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  655. package/dist/utils/compileLLamaCpp.js +0 -132
  656. package/dist/utils/compileLLamaCpp.js.map +0 -1
  657. package/dist/utils/getBin.js +0 -78
  658. package/dist/utils/getBin.js.map +0 -1
  659. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  660. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  661. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  662. package/dist/utils/getReleaseInfo.d.ts +0 -7
  663. package/dist/utils/getReleaseInfo.js +0 -30
  664. package/dist/utils/getReleaseInfo.js.map +0 -1
  665. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  666. package/dist/utils/parseModelTypeDescription.js +0 -9
  667. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  668. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  669. package/dist/utils/resolveChatWrapper.js +0 -16
  670. package/dist/utils/resolveChatWrapper.js.map +0 -1
  671. package/dist/utils/usedBinFlag.d.ts +0 -6
  672. package/dist/utils/usedBinFlag.js +0 -15
  673. package/dist/utils/usedBinFlag.js.map +0 -1
  674. package/llama/usedBin.json +0 -3
  675. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  676. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  677. package/llamaBins/linux-x64/llama-addon.node +0 -0
  678. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  679. package/llamaBins/mac-x64/llama-addon.node +0 -0
  680. package/llamaBins/win-x64/llama-addon.exp +0 -0
  681. package/llamaBins/win-x64/llama-addon.lib +0 -0
  682. package/llamaBins/win-x64/llama-addon.node +0 -0
  683. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  684. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  685. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  686. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  687. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  688. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  689. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  690. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -3,38 +3,59 @@ import process from "process";
3
3
  import path from "path";
4
4
  import chalk from "chalk";
5
5
  import fs from "fs-extra";
6
- import { chatCommandHistoryFilePath, defaultChatSystemPrompt } from "../../config.js";
7
- import { LlamaChatWrapper } from "../../chatWrappers/LlamaChatWrapper.js";
8
- import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
9
- import { ChatMLChatWrapper } from "../../chatWrappers/ChatMLChatWrapper.js";
10
- import { resolveChatWrapperBasedOnModel } from "../../chatWrappers/resolveChatWrapperBasedOnModel.js";
11
- import { FalconChatWrapper } from "../../chatWrappers/FalconChatWrapper.js";
6
+ import { chatCommandHistoryFilePath, defaultChatSystemPrompt, documentationPageUrls } from "../../config.js";
12
7
  import { getIsInDocumentationMode } from "../../state.js";
13
8
  import { ReplHistory } from "../../utils/ReplHistory.js";
14
- import withStatusLogs from "../../utils/withStatusLogs.js";
15
- import { AlpacaChatWrapper } from "../../chatWrappers/AlpacaChatWrapper.js";
16
- import { FunctionaryChatWrapper } from "../../chatWrappers/FunctionaryChatWrapper.js";
17
- import { defineChatSessionFunction } from "../../llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
18
- const modelWrappers = ["auto", "general", "llamaChat", "alpacaChat", "functionary", "chatML", "falconChat"];
9
+ import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
10
+ import { getLlama } from "../../bindings/getLlama.js";
11
+ import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
12
+ import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
13
+ import { LlamaJsonSchemaGrammar } from "../../evaluator/LlamaJsonSchemaGrammar.js";
14
+ import { LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../bindings/types.js";
15
+ import withOra from "../../utils/withOra.js";
16
+ import { TokenMeter } from "../../evaluator/TokenMeter.js";
17
+ import { printInfoLine } from "../utils/printInfoLine.js";
18
+ import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
19
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
20
+ import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
21
+ import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
22
+ import { withProgressLog } from "../../utils/withProgressLog.js";
23
+ import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
24
+ import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
19
25
  export const ChatCommand = {
20
- command: "chat",
21
- describe: "Chat with a Llama model",
26
+ command: "chat [modelPath]",
27
+ describe: withCliCommandDescriptionDocsUrl("Chat with a Llama model", documentationPageUrls.CLI.Chat),
22
28
  builder(yargs) {
23
29
  const isInDocumentationMode = getIsInDocumentationMode();
24
30
  return yargs
25
- .option("model", {
26
- alias: "m",
31
+ .option("modelPath", {
32
+ alias: ["m", "model", "path", "url"],
33
+ type: "string",
34
+ description: "Llama model file to use for the chat. Can be a path to a local file or a URL of a model file to download"
35
+ })
36
+ .option("header", {
37
+ alias: ["H"],
27
38
  type: "string",
28
- demandOption: true,
29
- description: "Llama model file to use for the chat",
30
- group: "Required:"
39
+ array: true,
40
+ description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
41
+ })
42
+ .option("gpu", {
43
+ type: "string",
44
+ // yargs types don't support passing `false` as a choice, although it is supported by yargs
45
+ choices: nodeLlamaCppGpuOptions,
46
+ coerce: (value) => {
47
+ if (value == null || value == "")
48
+ return undefined;
49
+ return parseNodeLlamaCppGpuOption(value);
50
+ },
51
+ defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
52
+ description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
31
53
  })
32
54
  .option("systemInfo", {
33
55
  alias: "i",
34
56
  type: "boolean",
35
57
  default: false,
36
- description: "Print llama.cpp system info",
37
- group: "Optional:"
58
+ description: "Print llama.cpp system info"
38
59
  })
39
60
  .option("systemPrompt", {
40
61
  alias: "s",
@@ -42,211 +63,344 @@ export const ChatCommand = {
42
63
  default: defaultChatSystemPrompt,
43
64
  defaultDescription: " ",
44
65
  description: "System prompt to use against the model" +
45
- (isInDocumentationMode ? "" : (". [default value: " + defaultChatSystemPrompt.split("\n").join(" ") + "]")),
46
- group: "Optional:"
66
+ (isInDocumentationMode ? "" : (". [default value: " + defaultChatSystemPrompt.split("\n").join(" ") + "]"))
67
+ })
68
+ .option("systemPromptFile", {
69
+ type: "string",
70
+ description: "Path to a file to load text from and use as as the model system prompt"
47
71
  })
48
72
  .option("prompt", {
49
73
  type: "string",
50
- description: "First prompt to automatically send to the model when starting the chat",
51
- group: "Optional:"
74
+ description: "First prompt to automatically send to the model when starting the chat"
75
+ })
76
+ .option("promptFile", {
77
+ type: "string",
78
+ description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat"
52
79
  })
53
80
  .option("wrapper", {
54
81
  alias: "w",
55
82
  type: "string",
56
83
  default: "auto",
57
- choices: modelWrappers,
58
- description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token",
59
- group: "Optional:"
84
+ choices: ["auto", ...specializedChatWrapperTypeNames],
85
+ description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token"
86
+ })
87
+ .option("noJinja", {
88
+ type: "boolean",
89
+ default: false,
90
+ description: "Don't use a Jinja wrapper, even if it's the best option for the model"
60
91
  })
61
92
  .option("contextSize", {
62
93
  alias: "c",
63
94
  type: "number",
64
- default: 1024 * 4,
65
- description: "Context size to use for the model",
66
- group: "Optional:"
95
+ description: "Context size to use for the model context",
96
+ default: -1,
97
+ defaultDescription: "Automatically determined based on the available VRAM"
98
+ })
99
+ .option("batchSize", {
100
+ alias: "b",
101
+ type: "number",
102
+ description: "Batch size to use for the model context. The default value is the context size"
103
+ })
104
+ .option("noTrimWhitespace", {
105
+ type: "boolean",
106
+ alias: ["noTrim"],
107
+ default: false,
108
+ description: "Don't trim whitespaces from the model response"
67
109
  })
68
110
  .option("grammar", {
69
111
  alias: "g",
70
112
  type: "string",
71
113
  default: "text",
72
114
  choices: ["text", "json", "list", "arithmetic", "japanese", "chess"],
73
- description: "Restrict the model response to a specific grammar, like JSON for example",
74
- group: "Optional:"
115
+ description: "Restrict the model response to a specific grammar, like JSON for example"
75
116
  })
76
117
  .option("jsonSchemaGrammarFile", {
77
118
  alias: ["jsgf"],
78
119
  type: "string",
79
- description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema",
80
- group: "Optional:"
120
+ description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema"
81
121
  })
82
122
  .option("threads", {
83
123
  type: "number",
84
124
  default: 6,
85
- description: "Number of threads to use for the evaluation of tokens",
86
- group: "Optional:"
125
+ description: "Number of threads to use for the evaluation of tokens"
87
126
  })
88
127
  .option("temperature", {
89
128
  alias: "t",
90
129
  type: "number",
91
130
  default: 0,
92
- description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable.",
93
- group: "Optional:"
131
+ description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
132
+ })
133
+ .option("minP", {
134
+ alias: "mp",
135
+ type: "number",
136
+ default: 0,
137
+ description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
94
138
  })
95
139
  .option("topK", {
96
140
  alias: "k",
97
141
  type: "number",
98
142
  default: 40,
99
- description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0.",
100
- group: "Optional:"
143
+ description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
101
144
  })
102
145
  .option("topP", {
103
146
  alias: "p",
104
147
  type: "number",
105
148
  default: 0.95,
106
- description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`.",
107
- group: "Optional:"
149
+ description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
108
150
  })
109
151
  .option("gpuLayers", {
110
152
  alias: "gl",
111
153
  type: "number",
112
154
  description: "number of layers to store in VRAM",
113
- group: "Optional:"
155
+ default: -1,
156
+ defaultDescription: "Automatically determined based on the available VRAM"
114
157
  })
115
158
  .option("repeatPenalty", {
116
159
  alias: "rp",
117
160
  type: "number",
118
161
  default: 1.1,
119
- description: "Prevent the model from repeating the same token too much. Set to `1` to disable.",
120
- group: "Optional:"
162
+ description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
121
163
  })
122
164
  .option("lastTokensRepeatPenalty", {
123
165
  alias: "rpn",
124
166
  type: "number",
125
167
  default: 64,
126
- description: "Number of recent tokens generated by the model to apply penalties to repetition of",
127
- group: "Optional:"
168
+ description: "Number of recent tokens generated by the model to apply penalties to repetition of"
128
169
  })
129
170
  .option("penalizeRepeatingNewLine", {
130
171
  alias: "rpnl",
131
172
  type: "boolean",
132
173
  default: true,
133
- description: "Penalize new line tokens. set \"--no-penalizeRepeatingNewLine\" or \"--no-rpnl\" to disable",
134
- group: "Optional:"
174
+ description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
135
175
  })
136
176
  .option("repeatFrequencyPenalty", {
137
177
  alias: "rfp",
138
178
  type: "number",
139
- description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable.",
140
- group: "Optional:"
179
+ description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
141
180
  })
142
181
  .option("repeatPresencePenalty", {
143
182
  alias: "rpp",
144
183
  type: "number",
145
- description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable.",
146
- group: "Optional:"
184
+ description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
147
185
  })
148
186
  .option("maxTokens", {
149
187
  alias: "mt",
150
188
  type: "number",
151
189
  default: 0,
152
- description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size",
153
- group: "Optional:"
190
+ description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
154
191
  })
155
192
  .option("noHistory", {
156
193
  alias: "nh",
157
194
  type: "boolean",
158
195
  default: false,
159
- description: "Don't load or save chat history",
160
- group: "Optional:"
196
+ description: "Don't load or save chat history"
161
197
  })
162
198
  .option("environmentFunctions", {
163
199
  alias: "ef",
164
200
  type: "boolean",
165
201
  default: false,
166
- description: "Provide access to environment functions like `getDate` and `getTime`",
167
- group: "Optional:"
202
+ description: "Provide access to environment functions like `getDate` and `getTime`"
203
+ })
204
+ .option("debug", {
205
+ alias: "d",
206
+ type: "boolean",
207
+ default: false,
208
+ description: "Print llama.cpp info and debug logs"
209
+ })
210
+ .option("meter", {
211
+ type: "boolean",
212
+ default: false,
213
+ description: "Print how many tokens were used as input and output for each response"
214
+ })
215
+ .option("printTimings", {
216
+ alias: "pt",
217
+ type: "boolean",
218
+ default: false,
219
+ description: "Print llama.cpp timings after each response"
168
220
  });
169
221
  },
170
- async handler({ model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions }) {
222
+ async handler({ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
171
223
  try {
172
224
  await RunChat({
173
- model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
174
- topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
175
- repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
225
+ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize,
226
+ batchSize, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers,
227
+ lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
228
+ noHistory, environmentFunctions, debug, meter, printTimings
176
229
  });
177
230
  }
178
231
  catch (err) {
232
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
179
233
  console.error(err);
180
234
  process.exit(1);
181
235
  }
182
236
  }
183
237
  };
184
- async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions }) {
185
- const { LlamaChatSession } = await import("../../llamaEvaluator/LlamaChatSession/LlamaChatSession.js");
186
- const { LlamaModel } = await import("../../llamaEvaluator/LlamaModel.js");
187
- const { LlamaContext } = await import("../../llamaEvaluator/LlamaContext/LlamaContext.js");
188
- const { LlamaGrammar } = await import("../../llamaEvaluator/LlamaGrammar.js");
189
- const { LlamaJsonSchemaGrammar } = await import("../../llamaEvaluator/LlamaJsonSchemaGrammar.js");
238
+ async function RunChat({ modelPath: modelArg, header: headerArg, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
239
+ if (contextSize === -1)
240
+ contextSize = undefined;
241
+ if (gpuLayers === -1)
242
+ gpuLayers = undefined;
243
+ const headers = resolveHeaderFlag(headerArg);
244
+ const trimWhitespace = !noTrimWhitespace;
245
+ if (debug)
246
+ console.info(`${chalk.yellow("Log level:")} debug`);
247
+ const llamaLogLevel = debug
248
+ ? LlamaLogLevel.debug
249
+ : LlamaLogLevel.warn;
250
+ const llama = gpu == null
251
+ ? await getLlama("lastBuild", {
252
+ logLevel: llamaLogLevel
253
+ })
254
+ : await getLlama({
255
+ gpu,
256
+ logLevel: llamaLogLevel
257
+ });
258
+ const logBatchSize = batchSize != null;
259
+ const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers);
190
260
  if (systemInfo)
191
- console.log(LlamaModel.systemInfo);
261
+ console.log(llama.systemInfo);
262
+ if (systemPromptFile != null && systemPromptFile !== "") {
263
+ if (systemPrompt != null && systemPrompt !== "" && systemPrompt !== defaultChatSystemPrompt)
264
+ console.warn(chalk.yellow("Both `systemPrompt` and `systemPromptFile` were specified. `systemPromptFile` will be used."));
265
+ systemPrompt = await fs.readFile(path.resolve(process.cwd(), systemPromptFile), "utf8");
266
+ }
267
+ if (promptFile != null && promptFile !== "") {
268
+ if (prompt != null && prompt !== "")
269
+ console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
270
+ prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
271
+ }
272
+ if (batchSize != null && contextSize != null && batchSize > contextSize) {
273
+ console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
274
+ batchSize = contextSize;
275
+ }
192
276
  let initialPrompt = prompt ?? null;
193
- const model = await withStatusLogs({
194
- loading: chalk.blue("Loading model"),
195
- success: chalk.blue("Model loaded"),
196
- fail: chalk.blue("Failed to load model")
197
- }, async () => new LlamaModel({
198
- modelPath: path.resolve(process.cwd(), modelArg),
199
- gpuLayers: gpuLayers != null ? gpuLayers : undefined
200
- }));
201
- const context = await withStatusLogs({
277
+ const model = await withProgressLog({
278
+ loadingText: chalk.blue.bold("Loading model"),
279
+ successText: chalk.blue("Model loaded"),
280
+ failText: chalk.blue("Failed to load model"),
281
+ liveUpdates: !debug,
282
+ noProgress: debug,
283
+ liveCtrlCSendsAbortSignal: true
284
+ }, async (progressUpdater) => {
285
+ try {
286
+ return await llama.loadModel({
287
+ modelPath: resolvedModelPath,
288
+ gpuLayers: gpuLayers != null
289
+ ? gpuLayers
290
+ : contextSize != null
291
+ ? { fitContext: { contextSize } }
292
+ : undefined,
293
+ ignoreMemorySafetyChecks: gpuLayers != null,
294
+ onLoadProgress(loadProgress) {
295
+ progressUpdater.setProgress(loadProgress);
296
+ },
297
+ loadSignal: progressUpdater.abortSignal
298
+ });
299
+ }
300
+ catch (err) {
301
+ if (err === progressUpdater.abortSignal?.reason)
302
+ process.exit(0);
303
+ throw err;
304
+ }
305
+ finally {
306
+ if (llama.logLevel === LlamaLogLevel.debug) {
307
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
308
+ console.info();
309
+ }
310
+ }
311
+ });
312
+ const context = await withOra({
202
313
  loading: chalk.blue("Creating context"),
203
314
  success: chalk.blue("Context created"),
204
- fail: chalk.blue("Failed to create context")
205
- }, async () => new LlamaContext({
206
- model,
207
- contextSize,
208
- threads
209
- }));
315
+ fail: chalk.blue("Failed to create context"),
316
+ useStatusLogs: debug
317
+ }, async () => {
318
+ try {
319
+ return await model.createContext({
320
+ contextSize: contextSize != null ? contextSize : undefined,
321
+ batchSize: batchSize != null ? batchSize : undefined,
322
+ threads,
323
+ ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null
324
+ });
325
+ }
326
+ finally {
327
+ if (llama.logLevel === LlamaLogLevel.debug) {
328
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
329
+ console.info();
330
+ }
331
+ }
332
+ });
210
333
  const grammar = jsonSchemaGrammarFilePath != null
211
- ? new LlamaJsonSchemaGrammar(await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
334
+ ? new LlamaJsonSchemaGrammar(llama, await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
212
335
  : grammarArg !== "text"
213
- ? await LlamaGrammar.getFor(grammarArg)
336
+ ? await LlamaGrammar.getFor(llama, grammarArg)
214
337
  : undefined;
215
- const bos = model.tokens.bosString; // bos = beginning of sequence
216
- const eos = model.tokens.bosString; // eos = end of sequence
217
- const chatWrapper = getChatWrapper(wrapper, {
218
- bosString: bos,
338
+ const chatWrapper = resolveChatWrapper({
339
+ type: wrapper,
340
+ bosString: model.tokens.bosString,
219
341
  filename: model.filename,
220
- typeDescription: model.typeDescription
221
- });
342
+ fileInfo: model.fileInfo,
343
+ tokenizer: model.tokenizer,
344
+ noJinja
345
+ }) ?? new GeneralChatWrapper();
346
+ const contextSequence = context.getSequence();
222
347
  const session = new LlamaChatSession({
223
- contextSequence: context.getSequence(),
348
+ contextSequence,
224
349
  systemPrompt,
225
350
  chatWrapper: chatWrapper
226
351
  });
352
+ let lastTokenMeterState = contextSequence.tokenMeter.getState();
353
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
227
354
  if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
228
355
  console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
229
- console.info(`${chalk.yellow("Context size:")} ${context.contextSize}`);
230
- console.info(`${chalk.yellow("Train context size:")} ${model.trainContextSize}`);
231
- console.info(`${chalk.yellow("Model type:")} ${model.typeDescription}`);
232
- console.info(`${chalk.yellow("BOS:")} ${bos}`);
233
- console.info(`${chalk.yellow("EOS:")} ${eos}`);
234
- console.info(`${chalk.yellow("Chat wrapper:")} ${chatWrapper.wrapperName}`);
235
- console.info(`${chalk.yellow("Repeat penalty:")} ${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`);
236
- if (repeatFrequencyPenalty != null)
237
- console.info(`${chalk.yellow("Repeat frequency penalty:")} ${repeatFrequencyPenalty}`);
238
- if (repeatPresencePenalty != null)
239
- console.info(`${chalk.yellow("Repeat presence penalty:")} ${repeatPresencePenalty}`);
240
- if (!penalizeRepeatingNewLine)
241
- console.info(`${chalk.yellow("Penalize repeating new line:")} disabled`);
242
- if (jsonSchemaGrammarFilePath != null)
243
- console.info(`${chalk.yellow("JSON schema grammar file:")} ${path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath))}`);
244
- else if (grammarArg !== "text")
245
- console.info(`${chalk.yellow("Grammar:")} ${grammarArg}`);
246
356
  if (environmentFunctions && grammar != null) {
247
357
  console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
248
358
  environmentFunctions = false;
249
359
  }
360
+ const padTitle = "Context".length + 1;
361
+ await printCommonInfoLines({
362
+ context,
363
+ minTitleLength: padTitle,
364
+ printBos: true,
365
+ printEos: true,
366
+ logBatchSize,
367
+ tokenMeterEnabled: meter
368
+ });
369
+ printInfoLine({
370
+ title: "Chat",
371
+ padTitle: padTitle,
372
+ info: [{
373
+ title: "Wrapper",
374
+ value: chatWrapper.wrapperName
375
+ }, {
376
+ title: "Repeat penalty",
377
+ value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
378
+ }, {
379
+ show: repeatFrequencyPenalty != null,
380
+ title: "Repeat frequency penalty",
381
+ value: String(repeatFrequencyPenalty)
382
+ }, {
383
+ show: repeatPresencePenalty != null,
384
+ title: "Repeat presence penalty",
385
+ value: String(repeatPresencePenalty)
386
+ }, {
387
+ show: !penalizeRepeatingNewLine,
388
+ title: "Penalize repeating new line",
389
+ value: "disabled"
390
+ }, {
391
+ show: jsonSchemaGrammarFilePath != null,
392
+ title: "JSON schema grammar file",
393
+ value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
394
+ }, {
395
+ show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
396
+ title: "Grammar",
397
+ value: grammarArg
398
+ }, {
399
+ show: environmentFunctions,
400
+ title: "Environment functions",
401
+ value: "enabled"
402
+ }]
403
+ });
250
404
  // this is for ora to not interfere with readline
251
405
  await new Promise(resolve => setTimeout(resolve, 1));
252
406
  const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
@@ -260,8 +414,11 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrap
260
414
  rl.close();
261
415
  return res;
262
416
  }
417
+ void session.preloadPrompt("");
263
418
  // eslint-disable-next-line no-constant-condition
264
419
  while (true) {
420
+ let hadNoWhitespaceTextInThisIteration = false;
421
+ let nextPrintLeftovers = "";
265
422
  const input = initialPrompt != null
266
423
  ? initialPrompt
267
424
  : await getPrompt();
@@ -277,8 +434,9 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrap
277
434
  const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
278
435
  process.stdout.write(startColor);
279
436
  await session.prompt(input, {
280
- grammar: grammar,
437
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
281
438
  temperature,
439
+ minP,
282
440
  topK,
283
441
  topP,
284
442
  repeatPenalty: {
@@ -294,14 +452,42 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrap
294
452
  ? undefined
295
453
  : maxTokens,
296
454
  onToken(chunk) {
297
- process.stdout.write(model.detokenize(chunk));
455
+ let text = nextPrintLeftovers + model.detokenize(chunk);
456
+ nextPrintLeftovers = "";
457
+ if (trimWhitespace) {
458
+ if (!hadNoWhitespaceTextInThisIteration) {
459
+ text = text.trimStart();
460
+ if (text.length > 0)
461
+ hadNoWhitespaceTextInThisIteration = true;
462
+ }
463
+ const textWithTrimmedEnd = text.trimEnd();
464
+ if (textWithTrimmedEnd.length < text.length) {
465
+ nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
466
+ text = textWithTrimmedEnd;
467
+ }
468
+ }
469
+ process.stdout.write(text);
298
470
  },
299
471
  functions: (grammar == null && environmentFunctions)
300
472
  ? defaultEnvironmentFunctions
301
- : undefined
473
+ : undefined,
474
+ trimWhitespaceSuffix: trimWhitespace
302
475
  });
303
476
  process.stdout.write(endColor);
304
477
  console.log();
478
+ if (printTimings) {
479
+ if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
480
+ llama.logLevel = LlamaLogLevel.info;
481
+ await context.printTimings();
482
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
483
+ llama.logLevel = llamaLogLevel;
484
+ }
485
+ if (meter) {
486
+ const newTokenMeterState = contextSequence.tokenMeter.getState();
487
+ const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
488
+ lastTokenMeterState = newTokenMeterState;
489
+ console.info(`${chalk.dim("Input tokens:")} ${String(tokenMeterDiff.usedInputTokens).padEnd(5, " ")} ${chalk.dim("Output tokens:")} ${tokenMeterDiff.usedOutputTokens}`);
490
+ }
305
491
  }
306
492
  }
307
493
  const defaultEnvironmentFunctions = {
@@ -318,33 +504,4 @@ const defaultEnvironmentFunctions = {
318
504
  }
319
505
  })
320
506
  };
321
- function getChatWrapper(wrapper, { bosString, filename, typeDescription }) {
322
- switch (wrapper) {
323
- case "general":
324
- return new GeneralChatWrapper();
325
- case "llamaChat":
326
- return new LlamaChatWrapper();
327
- case "alpacaChat":
328
- return new AlpacaChatWrapper();
329
- case "functionary":
330
- return new FunctionaryChatWrapper();
331
- case "chatML":
332
- return new ChatMLChatWrapper();
333
- case "falconChat":
334
- return new FalconChatWrapper();
335
- default:
336
- }
337
- if (wrapper === "auto") {
338
- const chatWrapper = resolveChatWrapperBasedOnModel({
339
- bosString,
340
- filename,
341
- typeDescription
342
- });
343
- if (chatWrapper != null)
344
- return new chatWrapper();
345
- return new GeneralChatWrapper();
346
- }
347
- void (wrapper);
348
- throw new Error("Unknown wrapper: " + wrapper);
349
- }
350
507
  //# sourceMappingURL=ChatCommand.js.map