node-llama-cpp 3.0.0-beta.9 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (684) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -3
  21. package/dist/bindings/Llama.js +193 -23
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +264 -75
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +321 -190
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +16 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/CMakeLists.txt +134 -5
  586. package/llama/addon/AddonContext.cpp +629 -0
  587. package/llama/addon/AddonContext.h +52 -0
  588. package/llama/addon/AddonGrammar.cpp +39 -0
  589. package/llama/addon/AddonGrammar.h +19 -0
  590. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  591. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  592. package/llama/addon/AddonModel.cpp +672 -0
  593. package/llama/addon/AddonModel.h +61 -0
  594. package/llama/addon/AddonModelData.cpp +25 -0
  595. package/llama/addon/AddonModelData.h +15 -0
  596. package/llama/addon/AddonModelLora.cpp +105 -0
  597. package/llama/addon/AddonModelLora.h +28 -0
  598. package/llama/addon/AddonSampler.cpp +513 -0
  599. package/llama/addon/AddonSampler.h +65 -0
  600. package/llama/addon/RingBuffer.h +109 -0
  601. package/llama/addon/addon.cpp +223 -0
  602. package/llama/addon/addonGlobals.cpp +22 -0
  603. package/llama/addon/addonGlobals.h +12 -0
  604. package/llama/addon/globals/addonLog.cpp +136 -0
  605. package/llama/addon/globals/addonLog.h +21 -0
  606. package/llama/addon/globals/addonProgress.cpp +15 -0
  607. package/llama/addon/globals/addonProgress.h +15 -0
  608. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  609. package/llama/addon/globals/getGpuInfo.h +6 -0
  610. package/llama/binariesGithubRelease.json +1 -1
  611. package/llama/gitRelease.bundle +0 -0
  612. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  613. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  614. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  615. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  616. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  618. package/llama/grammars/README.md +297 -6
  619. package/llama/grammars/json.gbnf +4 -4
  620. package/llama/grammars/json_arr.gbnf +4 -4
  621. package/llama/llama.cpp.info.json +1 -1
  622. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  623. package/package.json +109 -59
  624. package/templates/packed/electron-typescript-react.json +1 -0
  625. package/templates/packed/node-typescript.json +1 -0
  626. package/dist/AbortError.d.ts +0 -2
  627. package/dist/AbortError.js +0 -7
  628. package/dist/AbortError.js.map +0 -1
  629. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  630. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  631. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  634. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  635. package/dist/cli/commands/BuildCommand.js +0 -106
  636. package/dist/cli/commands/BuildCommand.js.map +0 -1
  637. package/dist/cli/commands/ClearCommand.js.map +0 -1
  638. package/dist/cli/commands/DownloadCommand.js +0 -169
  639. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  640. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  643. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  647. package/dist/evaluator/LlamaModel.d.ts +0 -120
  648. package/dist/evaluator/LlamaModel.js +0 -320
  649. package/dist/evaluator/LlamaModel.js.map +0 -1
  650. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  653. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  654. package/dist/utils/parseModelTypeDescription.js +0 -9
  655. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  656. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  657. package/dist/utils/resolveChatWrapper.js +0 -16
  658. package/dist/utils/resolveChatWrapper.js.map +0 -1
  659. package/llama/addon.cpp +0 -950
  660. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  661. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  662. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  663. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  664. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  665. package/llamaBins/linux-x64/llama-addon.node +0 -0
  666. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  667. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  668. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  669. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  670. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  671. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  672. package/llamaBins/mac-x64/llama-addon.node +0 -0
  673. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  674. package/llamaBins/win-x64/llama-addon.exp +0 -0
  675. package/llamaBins/win-x64/llama-addon.lib +0 -0
  676. package/llamaBins/win-x64/llama-addon.node +0 -0
  677. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  678. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  679. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  681. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -3,231 +3,272 @@ import process from "process";
3
3
  import path from "path";
4
4
  import chalk from "chalk";
5
5
  import fs from "fs-extra";
6
- import { chatCommandHistoryFilePath, defaultChatSystemPrompt } from "../../config.js";
7
- import { LlamaChatWrapper } from "../../chatWrappers/LlamaChatWrapper.js";
8
- import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
9
- import { ChatMLChatWrapper } from "../../chatWrappers/ChatMLChatWrapper.js";
10
- import { resolveChatWrapperBasedOnModel } from "../../chatWrappers/resolveChatWrapperBasedOnModel.js";
11
- import { FalconChatWrapper } from "../../chatWrappers/FalconChatWrapper.js";
6
+ import { chatCommandHistoryFilePath, defaultChatSystemPrompt, documentationPageUrls } from "../../config.js";
12
7
  import { getIsInDocumentationMode } from "../../state.js";
13
8
  import { ReplHistory } from "../../utils/ReplHistory.js";
14
- import withStatusLogs from "../../utils/withStatusLogs.js";
15
- import { AlpacaChatWrapper } from "../../chatWrappers/AlpacaChatWrapper.js";
16
- import { FunctionaryChatWrapper } from "../../chatWrappers/FunctionaryChatWrapper.js";
17
9
  import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
18
10
  import { getLlama } from "../../bindings/getLlama.js";
19
11
  import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
20
12
  import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
21
- import { LlamaModel } from "../../evaluator/LlamaModel.js";
22
- import { LlamaContext } from "../../evaluator/LlamaContext/LlamaContext.js";
23
13
  import { LlamaJsonSchemaGrammar } from "../../evaluator/LlamaJsonSchemaGrammar.js";
24
- import { LlamaLogLevel } from "../../bindings/types.js";
25
- const modelWrappers = ["auto", "general", "llamaChat", "alpacaChat", "functionary", "chatML", "falconChat"];
14
+ import { LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../bindings/types.js";
15
+ import withOra from "../../utils/withOra.js";
16
+ import { TokenMeter } from "../../evaluator/TokenMeter.js";
17
+ import { printInfoLine } from "../utils/printInfoLine.js";
18
+ import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
19
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
20
+ import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
21
+ import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
22
+ import { withProgressLog } from "../../utils/withProgressLog.js";
23
+ import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
24
+ import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
25
+ import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
26
26
  export const ChatCommand = {
27
- command: "chat",
28
- describe: "Chat with a Llama model",
27
+ command: "chat [modelPath]",
28
+ describe: withCliCommandDescriptionDocsUrl("Chat with a model", documentationPageUrls.CLI.Chat),
29
29
  builder(yargs) {
30
30
  const isInDocumentationMode = getIsInDocumentationMode();
31
31
  return yargs
32
- .option("model", {
33
- alias: "m",
32
+ .option("modelPath", {
33
+ alias: ["m", "model", "path", "url"],
34
+ type: "string",
35
+ description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models"
36
+ })
37
+ .option("header", {
38
+ alias: ["H"],
34
39
  type: "string",
35
- demandOption: true,
36
- description: "Llama model file to use for the chat",
37
- group: "Required:"
40
+ array: true,
41
+ description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
42
+ })
43
+ .option("gpu", {
44
+ type: "string",
45
+ // yargs types don't support passing `false` as a choice, although it is supported by yargs
46
+ choices: nodeLlamaCppGpuOptions,
47
+ coerce: (value) => {
48
+ if (value == null || value == "")
49
+ return undefined;
50
+ return parseNodeLlamaCppGpuOption(value);
51
+ },
52
+ defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
53
+ description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
38
54
  })
39
55
  .option("systemInfo", {
40
56
  alias: "i",
41
57
  type: "boolean",
42
58
  default: false,
43
- description: "Print llama.cpp system info",
44
- group: "Optional:"
59
+ description: "Print llama.cpp system info"
45
60
  })
46
61
  .option("systemPrompt", {
47
62
  alias: "s",
48
63
  type: "string",
49
- default: defaultChatSystemPrompt,
50
- defaultDescription: " ",
51
64
  description: "System prompt to use against the model" +
52
- (isInDocumentationMode ? "" : (". [default value: " + defaultChatSystemPrompt.split("\n").join(" ") + "]")),
53
- group: "Optional:"
65
+ (isInDocumentationMode ? "" : (". [the default value is determined by the chat wrapper, but is usually: " + defaultChatSystemPrompt.split("\n").join(" ") + "]"))
54
66
  })
55
67
  .option("systemPromptFile", {
56
68
  type: "string",
57
- description: "Path to a file to load text from and use as as the model system prompt",
58
- group: "Optional:"
69
+ description: "Path to a file to load text from and use as as the model system prompt"
59
70
  })
60
71
  .option("prompt", {
61
72
  type: "string",
62
- description: "First prompt to automatically send to the model when starting the chat",
63
- group: "Optional:"
73
+ description: "First prompt to automatically send to the model when starting the chat"
64
74
  })
65
75
  .option("promptFile", {
66
76
  type: "string",
67
- description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat",
68
- group: "Optional:"
77
+ description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat"
69
78
  })
70
79
  .option("wrapper", {
71
80
  alias: "w",
72
81
  type: "string",
73
82
  default: "auto",
74
- choices: modelWrappers,
75
- description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token",
76
- group: "Optional:"
83
+ choices: ["auto", ...specializedChatWrapperTypeNames],
84
+ description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token"
85
+ })
86
+ .option("noJinja", {
87
+ type: "boolean",
88
+ default: false,
89
+ description: "Don't use a Jinja wrapper, even if it's the best option for the model"
77
90
  })
78
91
  .option("contextSize", {
79
92
  alias: "c",
80
93
  type: "number",
81
- default: 1024 * 4,
82
94
  description: "Context size to use for the model context",
83
- group: "Optional:"
95
+ default: -1,
96
+ defaultDescription: "Automatically determined based on the available VRAM"
84
97
  })
85
98
  .option("batchSize", {
86
99
  alias: "b",
87
100
  type: "number",
88
- description: "Batch size to use for the model context. The default value is the context size",
89
- group: "Optional:"
101
+ description: "Batch size to use for the model context. The default value is the context size"
102
+ })
103
+ .option("flashAttention", {
104
+ alias: "fa",
105
+ type: "boolean",
106
+ default: false,
107
+ description: "Enable flash attention"
108
+ })
109
+ .option("noTrimWhitespace", {
110
+ type: "boolean",
111
+ alias: ["noTrim"],
112
+ default: false,
113
+ description: "Don't trim whitespaces from the model response"
90
114
  })
91
115
  .option("grammar", {
92
116
  alias: "g",
93
117
  type: "string",
94
118
  default: "text",
95
119
  choices: ["text", "json", "list", "arithmetic", "japanese", "chess"],
96
- description: "Restrict the model response to a specific grammar, like JSON for example",
97
- group: "Optional:"
120
+ description: "Restrict the model response to a specific grammar, like JSON for example"
98
121
  })
99
122
  .option("jsonSchemaGrammarFile", {
100
123
  alias: ["jsgf"],
101
124
  type: "string",
102
- description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema",
103
- group: "Optional:"
125
+ description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema"
104
126
  })
105
127
  .option("threads", {
106
128
  type: "number",
107
- default: 6,
108
- description: "Number of threads to use for the evaluation of tokens",
109
- group: "Optional:"
129
+ defaultDescription: "Number of cores that are useful for math on the current machine",
130
+ description: "Number of threads to use for the evaluation of tokens"
110
131
  })
111
132
  .option("temperature", {
112
133
  alias: "t",
113
134
  type: "number",
114
135
  default: 0,
115
- description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable.",
116
- group: "Optional:"
136
+ description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
137
+ })
138
+ .option("minP", {
139
+ alias: "mp",
140
+ type: "number",
141
+ default: 0,
142
+ description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
117
143
  })
118
144
  .option("topK", {
119
145
  alias: "k",
120
146
  type: "number",
121
147
  default: 40,
122
- description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0.",
123
- group: "Optional:"
148
+ description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
124
149
  })
125
150
  .option("topP", {
126
151
  alias: "p",
127
152
  type: "number",
128
153
  default: 0.95,
129
- description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`.",
130
- group: "Optional:"
154
+ description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
155
+ })
156
+ .option("seed", {
157
+ type: "number",
158
+ description: "Used to control the randomness of the generated text. Only relevant when using `temperature`.",
159
+ defaultDescription: "The current epoch time"
131
160
  })
132
161
  .option("gpuLayers", {
133
162
  alias: "gl",
134
163
  type: "number",
135
164
  description: "number of layers to store in VRAM",
136
- group: "Optional:"
165
+ default: -1,
166
+ defaultDescription: "Automatically determined based on the available VRAM"
137
167
  })
138
168
  .option("repeatPenalty", {
139
169
  alias: "rp",
140
170
  type: "number",
141
171
  default: 1.1,
142
- description: "Prevent the model from repeating the same token too much. Set to `1` to disable.",
143
- group: "Optional:"
172
+ description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
144
173
  })
145
174
  .option("lastTokensRepeatPenalty", {
146
175
  alias: "rpn",
147
176
  type: "number",
148
177
  default: 64,
149
- description: "Number of recent tokens generated by the model to apply penalties to repetition of",
150
- group: "Optional:"
178
+ description: "Number of recent tokens generated by the model to apply penalties to repetition of"
151
179
  })
152
180
  .option("penalizeRepeatingNewLine", {
153
181
  alias: "rpnl",
154
182
  type: "boolean",
155
183
  default: true,
156
- description: "Penalize new line tokens. set \"--no-penalizeRepeatingNewLine\" or \"--no-rpnl\" to disable",
157
- group: "Optional:"
184
+ description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
158
185
  })
159
186
  .option("repeatFrequencyPenalty", {
160
187
  alias: "rfp",
161
188
  type: "number",
162
- description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable.",
163
- group: "Optional:"
189
+ description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
164
190
  })
165
191
  .option("repeatPresencePenalty", {
166
192
  alias: "rpp",
167
193
  type: "number",
168
- description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable.",
169
- group: "Optional:"
194
+ description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
170
195
  })
171
196
  .option("maxTokens", {
172
197
  alias: "mt",
173
198
  type: "number",
174
199
  default: 0,
175
- description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size",
176
- group: "Optional:"
200
+ description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
177
201
  })
178
202
  .option("noHistory", {
179
203
  alias: "nh",
180
204
  type: "boolean",
181
205
  default: false,
182
- description: "Don't load or save chat history",
183
- group: "Optional:"
206
+ description: "Don't load or save chat history"
184
207
  })
185
208
  .option("environmentFunctions", {
186
209
  alias: "ef",
187
210
  type: "boolean",
188
211
  default: false,
189
- description: "Provide access to environment functions like `getDate` and `getTime`",
190
- group: "Optional:"
212
+ description: "Provide access to environment functions like `getDate` and `getTime`"
213
+ })
214
+ .option("debug", {
215
+ alias: "d",
216
+ type: "boolean",
217
+ default: false,
218
+ description: "Print llama.cpp info and debug logs"
191
219
  })
192
- .option("noInfoLog", {
193
- alias: "nl",
220
+ .option("meter", {
194
221
  type: "boolean",
195
222
  default: false,
196
- description: "Disable llama.cpp info logs",
197
- group: "Optional:"
223
+ description: "Print how many tokens were used as input and output for each response"
198
224
  })
199
225
  .option("printTimings", {
200
226
  alias: "pt",
201
227
  type: "boolean",
202
228
  default: false,
203
- description: "Print llama.cpp timings after each response",
204
- group: "Optional:"
229
+ description: "Print llama.cpp timings after each response"
205
230
  });
206
231
  },
207
- async handler({ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, noInfoLog, printTimings }) {
232
+ async handler({ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
208
233
  try {
209
234
  await RunChat({
210
- model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize,
211
- grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty,
212
- repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
213
- noHistory, environmentFunctions, noInfoLog, printTimings
235
+ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize,
236
+ batchSize, flashAttention, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed,
237
+ gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
238
+ maxTokens, noHistory, environmentFunctions, debug, meter, printTimings
214
239
  });
215
240
  }
216
241
  catch (err) {
242
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
217
243
  console.error(err);
218
244
  process.exit(1);
219
245
  }
220
246
  }
221
247
  };
222
- async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, noInfoLog, printTimings }) {
223
- if (noInfoLog)
224
- console.info(`${chalk.yellow("Log level:")} warn`);
225
- const llama = await getLlama("lastBuild", {
226
- logLevel: noInfoLog
227
- ? LlamaLogLevel.warn
228
- : LlamaLogLevel.debug
229
- });
248
+ async function RunChat({ modelPath: modelArg, header: headerArg, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
249
+ if (contextSize === -1)
250
+ contextSize = undefined;
251
+ if (gpuLayers === -1)
252
+ gpuLayers = undefined;
253
+ const headers = resolveHeaderFlag(headerArg);
254
+ const trimWhitespace = !noTrimWhitespace;
255
+ if (debug)
256
+ console.info(`${chalk.yellow("Log level:")} debug`);
257
+ const llamaLogLevel = debug
258
+ ? LlamaLogLevel.debug
259
+ : LlamaLogLevel.warn;
260
+ const llama = gpu == null
261
+ ? await getLlama("lastBuild", {
262
+ logLevel: llamaLogLevel
263
+ })
264
+ : await getLlama({
265
+ gpu,
266
+ logLevel: llamaLogLevel
267
+ });
230
268
  const logBatchSize = batchSize != null;
269
+ const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers, {
270
+ flashAttention
271
+ });
231
272
  if (systemInfo)
232
273
  console.log(llama.systemInfo);
233
274
  if (systemPromptFile != null && systemPromptFile !== "") {
@@ -240,75 +281,140 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
240
281
  console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
241
282
  prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
242
283
  }
243
- if (batchSize == null)
244
- batchSize = contextSize;
245
- else if (batchSize > contextSize) {
284
+ if (batchSize != null && contextSize != null && batchSize > contextSize) {
246
285
  console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
247
286
  batchSize = contextSize;
248
287
  }
249
288
  let initialPrompt = prompt ?? null;
250
- const model = await withStatusLogs({
251
- loading: chalk.blue("Loading model"),
252
- success: chalk.blue("Model loaded"),
253
- fail: chalk.blue("Failed to load model")
254
- }, async () => new LlamaModel({
255
- llama,
256
- modelPath: path.resolve(process.cwd(), modelArg),
257
- gpuLayers: gpuLayers != null ? gpuLayers : undefined
258
- }));
259
- const context = await withStatusLogs({
289
+ const model = await withProgressLog({
290
+ loadingText: chalk.blue.bold("Loading model"),
291
+ successText: chalk.blue("Model loaded"),
292
+ failText: chalk.blue("Failed to load model"),
293
+ liveUpdates: !debug,
294
+ noProgress: debug,
295
+ liveCtrlCSendsAbortSignal: true
296
+ }, async (progressUpdater) => {
297
+ try {
298
+ return await llama.loadModel({
299
+ modelPath: resolvedModelPath,
300
+ gpuLayers: gpuLayers != null
301
+ ? gpuLayers
302
+ : contextSize != null
303
+ ? { fitContext: { contextSize } }
304
+ : undefined,
305
+ defaultContextFlashAttention: flashAttention,
306
+ ignoreMemorySafetyChecks: gpuLayers != null,
307
+ onLoadProgress(loadProgress) {
308
+ progressUpdater.setProgress(loadProgress);
309
+ },
310
+ loadSignal: progressUpdater.abortSignal
311
+ });
312
+ }
313
+ catch (err) {
314
+ if (err === progressUpdater.abortSignal?.reason)
315
+ process.exit(0);
316
+ throw err;
317
+ }
318
+ finally {
319
+ if (llama.logLevel === LlamaLogLevel.debug) {
320
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
321
+ console.info();
322
+ }
323
+ }
324
+ });
325
+ const context = await withOra({
260
326
  loading: chalk.blue("Creating context"),
261
327
  success: chalk.blue("Context created"),
262
- fail: chalk.blue("Failed to create context")
263
- }, async () => new LlamaContext({
264
- model,
265
- contextSize,
266
- batchSize,
267
- threads
268
- }));
328
+ fail: chalk.blue("Failed to create context"),
329
+ useStatusLogs: debug
330
+ }, async () => {
331
+ try {
332
+ return await model.createContext({
333
+ contextSize: contextSize != null ? contextSize : undefined,
334
+ batchSize: batchSize != null ? batchSize : undefined,
335
+ threads: threads === null ? undefined : threads,
336
+ ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null,
337
+ performanceTracking: printTimings
338
+ });
339
+ }
340
+ finally {
341
+ if (llama.logLevel === LlamaLogLevel.debug) {
342
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
343
+ console.info();
344
+ }
345
+ }
346
+ });
269
347
  const grammar = jsonSchemaGrammarFilePath != null
270
348
  ? new LlamaJsonSchemaGrammar(llama, await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
271
349
  : grammarArg !== "text"
272
350
  ? await LlamaGrammar.getFor(llama, grammarArg)
273
351
  : undefined;
274
- const bos = model.tokens.bosString; // bos = beginning of sequence
275
- const eos = model.tokens.bosString; // eos = end of sequence
276
- const chatWrapper = getChatWrapper(wrapper, {
277
- bosString: bos,
352
+ const chatWrapper = resolveChatWrapper({
353
+ type: wrapper,
354
+ bosString: model.tokens.bosString,
278
355
  filename: model.filename,
279
- typeDescription: model.typeDescription
280
- });
356
+ fileInfo: model.fileInfo,
357
+ tokenizer: model.tokenizer,
358
+ noJinja
359
+ }) ?? new GeneralChatWrapper();
360
+ const contextSequence = context.getSequence();
281
361
  const session = new LlamaChatSession({
282
- contextSequence: context.getSequence(),
362
+ contextSequence,
283
363
  systemPrompt,
284
364
  chatWrapper: chatWrapper
285
365
  });
366
+ let lastTokenMeterState = contextSequence.tokenMeter.getState();
286
367
  await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
287
368
  if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
288
369
  console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
289
- console.info(`${chalk.yellow("Context size:")} ${context.contextSize}`);
290
- if (logBatchSize)
291
- console.info(`${chalk.yellow("Batch size:")} ${context.batchSize}`);
292
- console.info(`${chalk.yellow("Train context size:")} ${model.trainContextSize}`);
293
- console.info(`${chalk.yellow("Model type:")} ${model.typeDescription}`);
294
- console.info(`${chalk.yellow("BOS:")} ${bos}`);
295
- console.info(`${chalk.yellow("EOS:")} ${eos}`);
296
- console.info(`${chalk.yellow("Chat wrapper:")} ${chatWrapper.wrapperName}`);
297
- console.info(`${chalk.yellow("Repeat penalty:")} ${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`);
298
- if (repeatFrequencyPenalty != null)
299
- console.info(`${chalk.yellow("Repeat frequency penalty:")} ${repeatFrequencyPenalty}`);
300
- if (repeatPresencePenalty != null)
301
- console.info(`${chalk.yellow("Repeat presence penalty:")} ${repeatPresencePenalty}`);
302
- if (!penalizeRepeatingNewLine)
303
- console.info(`${chalk.yellow("Penalize repeating new line:")} disabled`);
304
- if (jsonSchemaGrammarFilePath != null)
305
- console.info(`${chalk.yellow("JSON schema grammar file:")} ${path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath))}`);
306
- else if (grammarArg !== "text")
307
- console.info(`${chalk.yellow("Grammar:")} ${grammarArg}`);
308
370
  if (environmentFunctions && grammar != null) {
309
371
  console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
310
372
  environmentFunctions = false;
311
373
  }
374
+ const padTitle = "Context".length + 1;
375
+ await printCommonInfoLines({
376
+ context,
377
+ minTitleLength: padTitle,
378
+ printBos: true,
379
+ printEos: true,
380
+ logBatchSize,
381
+ tokenMeterEnabled: meter
382
+ });
383
+ printInfoLine({
384
+ title: "Chat",
385
+ padTitle: padTitle,
386
+ info: [{
387
+ title: "Wrapper",
388
+ value: chatWrapper.wrapperName
389
+ }, {
390
+ title: "Repeat penalty",
391
+ value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
392
+ }, {
393
+ show: repeatFrequencyPenalty != null,
394
+ title: "Repeat frequency penalty",
395
+ value: String(repeatFrequencyPenalty)
396
+ }, {
397
+ show: repeatPresencePenalty != null,
398
+ title: "Repeat presence penalty",
399
+ value: String(repeatPresencePenalty)
400
+ }, {
401
+ show: !penalizeRepeatingNewLine,
402
+ title: "Penalize repeating new line",
403
+ value: "disabled"
404
+ }, {
405
+ show: jsonSchemaGrammarFilePath != null,
406
+ title: "JSON schema grammar file",
407
+ value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
408
+ }, {
409
+ show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
410
+ title: "Grammar",
411
+ value: grammarArg
412
+ }, {
413
+ show: environmentFunctions,
414
+ title: "Environment functions",
415
+ value: "enabled"
416
+ }]
417
+ });
312
418
  // this is for ora to not interfere with readline
313
419
  await new Promise(resolve => setTimeout(resolve, 1));
314
420
  const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
@@ -322,8 +428,13 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
322
428
  rl.close();
323
429
  return res;
324
430
  }
431
+ if (!printTimings && !meter)
432
+ void session.preloadPrompt("")
433
+ .catch(() => void 0); // don't throw an error if preloading fails because a real prompt is sent early
325
434
  // eslint-disable-next-line no-constant-condition
326
435
  while (true) {
436
+ let hadNoWhitespaceTextInThisIteration = false;
437
+ let nextPrintLeftovers = "";
327
438
  const input = initialPrompt != null
328
439
  ? initialPrompt
329
440
  : await getPrompt();
@@ -337,35 +448,84 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
337
448
  break;
338
449
  process.stdout.write(chalk.yellow("AI: "));
339
450
  const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
340
- process.stdout.write(startColor);
341
- await session.prompt(input, {
342
- grammar: grammar,
343
- temperature,
344
- topK,
345
- topP,
346
- repeatPenalty: {
347
- penalty: repeatPenalty,
348
- frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
349
- presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
350
- penalizeNewLine: penalizeRepeatingNewLine,
351
- lastTokens: lastTokensRepeatPenalty
352
- },
353
- maxTokens: maxTokens === -1
354
- ? context.contextSize
355
- : maxTokens <= 0
356
- ? undefined
357
- : maxTokens,
358
- onToken(chunk) {
359
- process.stdout.write(model.detokenize(chunk));
360
- },
361
- functions: (grammar == null && environmentFunctions)
362
- ? defaultEnvironmentFunctions
363
- : undefined
451
+ const abortController = new AbortController();
452
+ const consoleInteraction = new ConsoleInteraction();
453
+ consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
454
+ abortController.abort();
455
+ consoleInteraction.stop();
364
456
  });
365
- process.stdout.write(endColor);
366
- console.log();
367
- if (printTimings)
457
+ try {
458
+ process.stdout.write(startColor);
459
+ consoleInteraction.start();
460
+ await session.prompt(input, {
461
+ grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
462
+ temperature,
463
+ minP,
464
+ topK,
465
+ topP,
466
+ seed: seed ?? undefined,
467
+ signal: abortController.signal,
468
+ stopOnAbortSignal: true,
469
+ repeatPenalty: {
470
+ penalty: repeatPenalty,
471
+ frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
472
+ presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
473
+ penalizeNewLine: penalizeRepeatingNewLine,
474
+ lastTokens: lastTokensRepeatPenalty
475
+ },
476
+ maxTokens: maxTokens === -1
477
+ ? context.contextSize
478
+ : maxTokens <= 0
479
+ ? undefined
480
+ : maxTokens,
481
+ onTextChunk(chunk) {
482
+ let text = nextPrintLeftovers + chunk;
483
+ nextPrintLeftovers = "";
484
+ if (trimWhitespace) {
485
+ if (!hadNoWhitespaceTextInThisIteration) {
486
+ text = text.trimStart();
487
+ if (text.length > 0)
488
+ hadNoWhitespaceTextInThisIteration = true;
489
+ }
490
+ const textWithTrimmedEnd = text.trimEnd();
491
+ if (textWithTrimmedEnd.length < text.length) {
492
+ nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
493
+ text = textWithTrimmedEnd;
494
+ }
495
+ }
496
+ process.stdout.write(text);
497
+ },
498
+ functions: (grammar == null && environmentFunctions)
499
+ ? defaultEnvironmentFunctions
500
+ : undefined,
501
+ trimWhitespaceSuffix: trimWhitespace
502
+ });
503
+ }
504
+ catch (err) {
505
+ if (!(abortController.signal.aborted && err === abortController.signal.reason))
506
+ throw err;
507
+ }
508
+ finally {
509
+ consoleInteraction.stop();
510
+ if (abortController.signal.aborted)
511
+ process.stdout.write(endColor + chalk.yellow("[generation aborted by user]"));
512
+ else
513
+ process.stdout.write(endColor);
514
+ console.log();
515
+ }
516
+ if (printTimings) {
517
+ if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
518
+ llama.logLevel = LlamaLogLevel.info;
368
519
  await context.printTimings();
520
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
521
+ llama.logLevel = llamaLogLevel;
522
+ }
523
+ if (meter) {
524
+ const newTokenMeterState = contextSequence.tokenMeter.getState();
525
+ const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
526
+ lastTokenMeterState = newTokenMeterState;
527
+ console.info(`${chalk.dim("Input tokens:")} ${String(tokenMeterDiff.usedInputTokens).padEnd(5, " ")} ${chalk.dim("Output tokens:")} ${tokenMeterDiff.usedOutputTokens}`);
528
+ }
369
529
  }
370
530
  }
371
531
  const defaultEnvironmentFunctions = {
@@ -382,33 +542,4 @@ const defaultEnvironmentFunctions = {
382
542
  }
383
543
  })
384
544
  };
385
- function getChatWrapper(wrapper, { bosString, filename, typeDescription }) {
386
- switch (wrapper) {
387
- case "general":
388
- return new GeneralChatWrapper();
389
- case "llamaChat":
390
- return new LlamaChatWrapper();
391
- case "alpacaChat":
392
- return new AlpacaChatWrapper();
393
- case "functionary":
394
- return new FunctionaryChatWrapper();
395
- case "chatML":
396
- return new ChatMLChatWrapper();
397
- case "falconChat":
398
- return new FalconChatWrapper();
399
- default:
400
- }
401
- if (wrapper === "auto") {
402
- const chatWrapper = resolveChatWrapperBasedOnModel({
403
- bosString,
404
- filename,
405
- typeDescription
406
- });
407
- if (chatWrapper != null)
408
- return new chatWrapper();
409
- return new GeneralChatWrapper();
410
- }
411
- void (wrapper);
412
- throw new Error("Unknown wrapper: " + wrapper);
413
- }
414
545
  //# sourceMappingURL=ChatCommand.js.map