node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (351) hide show
  1. package/README.md +1 -1
  2. package/dist/ChatWrapper.js +4 -0
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/bindings/AddonTypes.d.ts +35 -6
  5. package/dist/bindings/Llama.d.ts +12 -0
  6. package/dist/bindings/Llama.js +100 -7
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/getLlama.d.ts +19 -1
  9. package/dist/bindings/getLlama.js +16 -6
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +18 -0
  12. package/dist/bindings/types.js +31 -2
  13. package/dist/bindings/types.js.map +1 -1
  14. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  15. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  16. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  17. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  18. package/dist/bindings/utils/cloneLlamaCppRepo.js +4 -3
  19. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  20. package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
  21. package/dist/bindings/utils/compileLLamaCpp.js +133 -97
  22. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  23. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
  24. package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
  25. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  26. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  27. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  28. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  29. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
  30. package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
  31. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
  32. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
  33. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  34. package/dist/bindings/utils/testBindingBinary.js +2 -2
  35. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  36. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  37. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  38. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  39. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  40. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  41. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
  43. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  44. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  45. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  46. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  47. package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
  48. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  49. package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
  50. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  51. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  52. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  53. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  54. package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
  55. package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
  56. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
  57. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  58. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  60. package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +16 -18
  61. package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
  62. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  63. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  64. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  65. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  66. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  67. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  68. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  69. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
  70. package/dist/chatWrappers/utils/resolveChatWrapper.js +206 -0
  71. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  72. package/dist/cli/cli.js +1 -1
  73. package/dist/cli/cli.js.map +1 -1
  74. package/dist/cli/commands/ChatCommand.d.ts +7 -4
  75. package/dist/cli/commands/ChatCommand.js +177 -70
  76. package/dist/cli/commands/ChatCommand.js.map +1 -1
  77. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  78. package/dist/cli/commands/ClearCommand.js +5 -5
  79. package/dist/cli/commands/ClearCommand.js.map +1 -1
  80. package/dist/cli/commands/CompleteCommand.d.ts +3 -2
  81. package/dist/cli/commands/CompleteCommand.js +115 -51
  82. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  83. package/dist/cli/commands/InfillCommand.d.ts +3 -2
  84. package/dist/cli/commands/InfillCommand.js +115 -51
  85. package/dist/cli/commands/InfillCommand.js.map +1 -1
  86. package/dist/cli/commands/OnPostInstallCommand.js +2 -0
  87. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  88. package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
  89. package/dist/cli/commands/inspect/InspectCommand.js +17 -0
  90. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  91. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +10 -0
  92. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +108 -0
  93. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  94. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  95. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +98 -0
  96. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  97. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +14 -0
  98. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +577 -0
  99. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  100. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  101. package/dist/cli/utils/ConsoleTable.js +86 -0
  102. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  103. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  104. package/dist/cli/utils/printCommonInfoLines.js +70 -0
  105. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  106. package/dist/cli/utils/printInfoLine.d.ts +10 -0
  107. package/dist/cli/utils/printInfoLine.js +45 -0
  108. package/dist/cli/utils/printInfoLine.js.map +1 -0
  109. package/dist/cli/utils/resolveCommandGgufPath.d.ts +1 -0
  110. package/dist/cli/utils/resolveCommandGgufPath.js +6 -0
  111. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  112. package/dist/config.d.ts +3 -1
  113. package/dist/config.js +7 -1
  114. package/dist/config.js.map +1 -1
  115. package/dist/evaluator/LlamaChat/LlamaChat.js +13 -5
  116. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  117. package/dist/evaluator/LlamaCompletion.js +5 -3
  118. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  119. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +43 -9
  120. package/dist/evaluator/LlamaContext/LlamaContext.js +251 -60
  121. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  122. package/dist/evaluator/LlamaContext/types.d.ts +68 -10
  123. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  124. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  125. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  126. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
  127. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  128. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -7
  129. package/dist/evaluator/LlamaEmbeddingContext.js +31 -22
  130. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  131. package/dist/evaluator/LlamaGrammar.js +1 -0
  132. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  133. package/dist/evaluator/LlamaModel.d.ts +78 -20
  134. package/dist/evaluator/LlamaModel.js +385 -21
  135. package/dist/evaluator/LlamaModel.js.map +1 -1
  136. package/dist/evaluator/TokenMeter.d.ts +54 -0
  137. package/dist/evaluator/TokenMeter.js +86 -0
  138. package/dist/evaluator/TokenMeter.js.map +1 -0
  139. package/dist/gguf/GgufInsights.d.ts +40 -0
  140. package/dist/gguf/GgufInsights.js +350 -0
  141. package/dist/gguf/GgufInsights.js.map +1 -0
  142. package/dist/gguf/consts.d.ts +3 -0
  143. package/dist/gguf/consts.js +8 -0
  144. package/dist/gguf/consts.js.map +1 -0
  145. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  146. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  147. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  148. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  149. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  150. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  151. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  152. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  153. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  154. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  155. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  156. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  157. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  158. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  159. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  160. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  161. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  162. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  163. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  164. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  165. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  166. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  167. package/dist/gguf/parser/parseGguf.js +58 -0
  168. package/dist/gguf/parser/parseGguf.js.map +1 -0
  169. package/dist/gguf/readGgufFileInfo.d.ts +30 -0
  170. package/dist/gguf/readGgufFileInfo.js +37 -0
  171. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  172. package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
  173. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  174. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  175. package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
  176. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  177. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  178. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  179. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  180. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  181. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  182. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  183. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  184. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  185. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  186. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  187. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  188. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  189. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  190. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  191. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  192. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  193. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  194. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  195. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  196. package/dist/index.d.ts +13 -7
  197. package/dist/index.js +11 -6
  198. package/dist/index.js.map +1 -1
  199. package/dist/state.d.ts +2 -0
  200. package/dist/state.js +7 -0
  201. package/dist/state.js.map +1 -1
  202. package/dist/types.d.ts +1 -1
  203. package/dist/utils/DisposeGuard.d.ts +13 -0
  204. package/dist/utils/DisposeGuard.js +120 -0
  205. package/dist/utils/DisposeGuard.js.map +1 -0
  206. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  207. package/dist/utils/InsufficientMemoryError.js +6 -0
  208. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  209. package/dist/utils/LlamaText.d.ts +25 -10
  210. package/dist/utils/LlamaText.js +205 -23
  211. package/dist/utils/LlamaText.js.map +1 -1
  212. package/dist/utils/StopGenerationDetector.js +3 -1
  213. package/dist/utils/StopGenerationDetector.js.map +1 -1
  214. package/dist/utils/cmake.js +1 -1
  215. package/dist/utils/cmake.js.map +1 -1
  216. package/dist/utils/findBestOption.d.ts +4 -0
  217. package/dist/utils/findBestOption.js +15 -0
  218. package/dist/utils/findBestOption.js.map +1 -0
  219. package/dist/utils/getConsoleLogPrefix.js +3 -2
  220. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  221. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
  222. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
  223. package/dist/utils/gitReleaseBundles.js +68 -1
  224. package/dist/utils/gitReleaseBundles.js.map +1 -1
  225. package/dist/utils/mergeUnionTypes.d.ts +4 -0
  226. package/dist/utils/parseModelFileName.d.ts +1 -0
  227. package/dist/utils/parseModelFileName.js +6 -1
  228. package/dist/utils/parseModelFileName.js.map +1 -1
  229. package/dist/utils/prettyPrintObject.d.ts +10 -1
  230. package/dist/utils/prettyPrintObject.js +57 -13
  231. package/dist/utils/prettyPrintObject.js.map +1 -1
  232. package/dist/utils/removeNullFields.d.ts +2 -2
  233. package/dist/utils/removeNullFields.js.map +1 -1
  234. package/dist/utils/spawnCommand.d.ts +11 -1
  235. package/dist/utils/spawnCommand.js +55 -7
  236. package/dist/utils/spawnCommand.js.map +1 -1
  237. package/dist/utils/tokenizeInput.d.ts +1 -1
  238. package/dist/utils/tokenizeInput.js +3 -3
  239. package/dist/utils/tokenizeInput.js.map +1 -1
  240. package/dist/utils/withOra.d.ts +1 -0
  241. package/dist/utils/withOra.js +2 -2
  242. package/dist/utils/withOra.js.map +1 -1
  243. package/llama/CMakeLists.txt +5 -5
  244. package/llama/addon.cpp +793 -88
  245. package/llama/binariesGithubRelease.json +1 -1
  246. package/llama/gitRelease.bundle +0 -0
  247. package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
  248. package/llama/gpuInfo/cuda-gpu-info.h +3 -0
  249. package/llama/gpuInfo/metal-gpu-info.h +4 -1
  250. package/llama/gpuInfo/metal-gpu-info.mm +14 -1
  251. package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
  252. package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
  253. package/llama/grammars/json.gbnf +1 -1
  254. package/llama/grammars/json_arr.gbnf +1 -1
  255. package/llama/llama.cpp.info.json +1 -1
  256. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  257. package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  258. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  259. package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  260. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  261. package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  262. package/llamaBins/linux-x64/llama-addon.node +0 -0
  263. package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  264. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  265. package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  266. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  267. package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  268. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  269. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  270. package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  271. package/llamaBins/mac-x64/llama-addon.node +0 -0
  272. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  273. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  274. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  275. package/llamaBins/win-arm64/llama-addon.node +0 -0
  276. package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  277. package/llamaBins/win-x64/llama-addon.exp +0 -0
  278. package/llamaBins/win-x64/llama-addon.lib +0 -0
  279. package/llamaBins/win-x64/llama-addon.node +0 -0
  280. package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  281. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  282. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  283. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  284. package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  285. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  286. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  287. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  288. package/package.json +16 -11
  289. package/dist/TemplateChatWrapper.js.map +0 -1
  290. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
  291. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
  292. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
  293. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  294. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
  295. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  296. package/dist/cli/commands/InspectCommand.js +0 -113
  297. package/dist/cli/commands/InspectCommand.js.map +0 -1
  298. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  299. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  301. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  302. package/dist/gguf/GGUFInsights.d.ts +0 -28
  303. package/dist/gguf/GGUFInsights.js +0 -58
  304. package/dist/gguf/GGUFInsights.js.map +0 -1
  305. package/dist/gguf/GGUFMetadata.d.ts +0 -19
  306. package/dist/gguf/GGUFMetadata.js +0 -38
  307. package/dist/gguf/GGUFMetadata.js.map +0 -1
  308. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
  309. package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
  310. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
  311. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
  312. package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
  313. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
  314. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
  315. package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
  316. package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
  317. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
  318. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -12
  319. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
  320. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
  321. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
  322. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
  323. package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
  324. package/dist/gguf/ggufParser/GGUFParser.js +0 -123
  325. package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
  326. package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
  327. package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
  328. package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
  329. package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
  330. package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
  331. package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
  332. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
  333. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
  334. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
  335. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
  336. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
  337. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
  338. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
  339. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
  340. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
  341. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  342. package/dist/utils/parseModelTypeDescription.js +0 -9
  343. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  344. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  345. package/dist/utils/resolveChatWrapper.js +0 -16
  346. package/dist/utils/resolveChatWrapper.js.map +0 -1
  347. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
  348. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  349. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  350. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  351. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -0,0 +1,206 @@
1
+ import { parseModelFileName } from "../../utils/parseModelFileName.js";
2
+ import { LlamaChatWrapper } from "../LlamaChatWrapper.js";
3
+ import { ChatMLChatWrapper } from "../ChatMLChatWrapper.js";
4
+ import { GeneralChatWrapper } from "../GeneralChatWrapper.js";
5
+ import { FalconChatWrapper } from "../FalconChatWrapper.js";
6
+ import { FunctionaryChatWrapper } from "../FunctionaryChatWrapper.js";
7
+ import { AlpacaChatWrapper } from "../AlpacaChatWrapper.js";
8
+ import { GemmaChatWrapper } from "../GemmaChatWrapper.js";
9
+ import { JinjaTemplateChatWrapper } from "../generic/JinjaTemplateChatWrapper.js";
10
+ import { TemplateChatWrapper } from "../generic/TemplateChatWrapper.js";
11
+ import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
12
+ import { isJinjaTemplateEquivalentToSpecializedChatWrapper } from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js";
13
+ export const specializedChatWrapperTypeNames = Object.freeze([
14
+ "general", "llamaChat", "alpacaChat", "functionary", "chatML", "falconChat", "gemma"
15
+ ]);
16
+ export const templateChatWrapperTypeNames = Object.freeze([
17
+ "template", "jinjaTemplate"
18
+ ]);
19
+ export const resolvableChatWrapperTypeNames = Object.freeze([
20
+ "auto",
21
+ ...specializedChatWrapperTypeNames,
22
+ ...templateChatWrapperTypeNames
23
+ ]);
24
+ const chatWrappers = {
25
+ "general": GeneralChatWrapper,
26
+ "llamaChat": LlamaChatWrapper,
27
+ "alpacaChat": AlpacaChatWrapper,
28
+ "functionary": FunctionaryChatWrapper,
29
+ "chatML": ChatMLChatWrapper,
30
+ "falconChat": FalconChatWrapper,
31
+ "gemma": GemmaChatWrapper,
32
+ "template": TemplateChatWrapper,
33
+ "jinjaTemplate": JinjaTemplateChatWrapper
34
+ };
35
+ const chatWrapperToConfigType = new Map(Object.entries(chatWrappers)
36
+ .map(([configType, Wrapper]) => ([Wrapper, configType])));
37
+ /**
38
+ * Resolve to a chat wrapper instance based on the provided information.
39
+ * The more information provided, the better the resolution will be (except for `type`).
40
+ *
41
+ * It's recommended to not set `type` to a specific chat wrapper in order for the resolution to be more flexible, but it is useful for when
42
+ * you need to provide the ability to force a specific chat wrapper type.
43
+ * Note that when setting `type` to a generic chat wrapper type (such as `"template"` or `"jinjaTemplate"`), the `customWrapperSettings`
44
+ * must contain the necessary settings for that chat wrapper to be created.
45
+ *
46
+ * When loading a Jinja chat template from either `fileInfo` or `customWrapperSettings.jinjaTemplate.template`,
47
+ * if the chat template format is invalid, it fallbacks to resolve other chat wrappers,
48
+ * unless `fallbackToOtherWrappersOnJinjaError` is set to `false` (in which case, it will throw an error).
49
+ */
50
+ export function resolveChatWrapper({ type = "auto", bosString, filename, fileInfo, tokenizer, customWrapperSettings, warningLogs = true, fallbackToOtherWrappersOnJinjaError = true, noJinja = false }) {
51
+ function createSpecializedChatWrapper(specializedChatWrapper, defaultSettings = {}) {
52
+ const chatWrapperConfigType = chatWrapperToConfigType.get(specializedChatWrapper);
53
+ const chatWrapperSettings = customWrapperSettings?.[chatWrapperConfigType];
54
+ return new specializedChatWrapper({
55
+ ...(defaultSettings ?? {}),
56
+ ...(chatWrapperSettings ?? {})
57
+ });
58
+ }
59
+ if (type !== "auto" && type != null) {
60
+ if (isTemplateChatWrapperType(type)) {
61
+ const Wrapper = chatWrappers[type];
62
+ if (isClassReference(Wrapper, TemplateChatWrapper)) {
63
+ const wrapperSettings = customWrapperSettings?.template;
64
+ if (wrapperSettings == null || wrapperSettings?.template == null || wrapperSettings?.historyTemplate == null ||
65
+ wrapperSettings?.modelRoleName == null || wrapperSettings?.userRoleName == null) {
66
+ if (warningLogs)
67
+ console.warn(getConsoleLogPrefix() + "Template chat wrapper settings must have a template, historyTemplate, modelRoleName, and userRoleName. Falling back to resolve other chat wrapper types.");
68
+ }
69
+ else
70
+ return new TemplateChatWrapper(wrapperSettings);
71
+ }
72
+ else if (isClassReference(Wrapper, JinjaTemplateChatWrapper)) {
73
+ const jinjaTemplate = customWrapperSettings?.jinjaTemplate?.template ?? fileInfo?.metadata?.tokenizer?.chat_template;
74
+ if (jinjaTemplate == null) {
75
+ if (warningLogs)
76
+ console.warn(getConsoleLogPrefix() + "Jinja template chat wrapper received no template. Falling back to resolve other chat wrapper types.");
77
+ }
78
+ else {
79
+ try {
80
+ return new JinjaTemplateChatWrapper({
81
+ ...(customWrapperSettings?.jinjaTemplate ?? {}),
82
+ template: jinjaTemplate
83
+ });
84
+ }
85
+ catch (err) {
86
+ if (!fallbackToOtherWrappersOnJinjaError)
87
+ throw err;
88
+ else if (warningLogs)
89
+ console.error(getConsoleLogPrefix() + "Error creating Jinja template chat wrapper. Falling back to resolve other chat wrappers. Error:", err);
90
+ }
91
+ }
92
+ }
93
+ else
94
+ void (Wrapper);
95
+ }
96
+ else if (Object.hasOwn(chatWrappers, type)) {
97
+ const Wrapper = chatWrappers[type];
98
+ const wrapperSettings = customWrapperSettings?.[type];
99
+ return new Wrapper(wrapperSettings);
100
+ }
101
+ }
102
+ const modelJinjaTemplate = customWrapperSettings?.jinjaTemplate?.template ?? fileInfo?.metadata?.tokenizer?.chat_template;
103
+ if (!noJinja && modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
104
+ const jinjaTemplateChatWrapperOptions = {
105
+ ...(customWrapperSettings?.jinjaTemplate ?? {}),
106
+ template: modelJinjaTemplate
107
+ };
108
+ for (const specializedChatWrapperTypeName of specializedChatWrapperTypeNames) {
109
+ const Wrapper = chatWrappers[specializedChatWrapperTypeName];
110
+ const wrapperSettings = customWrapperSettings?.[specializedChatWrapperTypeName];
111
+ const testOptionConfigurations = Wrapper._getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate?.() ?? [];
112
+ if (testOptionConfigurations.length === 0)
113
+ testOptionConfigurations.push({});
114
+ for (const testConfiguration of testOptionConfigurations) {
115
+ const testChatWrapperSettings = {
116
+ ...(wrapperSettings ?? {}),
117
+ ...(testConfiguration ?? {})
118
+ };
119
+ const chatWrapper = new Wrapper(testChatWrapperSettings);
120
+ if (isJinjaTemplateEquivalentToSpecializedChatWrapper(jinjaTemplateChatWrapperOptions, chatWrapper, tokenizer))
121
+ return new Wrapper(testChatWrapperSettings);
122
+ }
123
+ }
124
+ if (!fallbackToOtherWrappersOnJinjaError)
125
+ return new JinjaTemplateChatWrapper(jinjaTemplateChatWrapperOptions);
126
+ try {
127
+ return new JinjaTemplateChatWrapper(jinjaTemplateChatWrapperOptions);
128
+ }
129
+ catch (err) {
130
+ console.error(getConsoleLogPrefix() + "Error creating Jinja template chat wrapper. Falling back to resolve other chat wrappers. Error:", err);
131
+ }
132
+ }
133
+ // try to find a pattern in the Jinja template to resolve to a specialized chat wrapper,
134
+ // with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function
135
+ if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
136
+ if (modelJinjaTemplate.includes("<|im_start|>"))
137
+ return createSpecializedChatWrapper(ChatMLChatWrapper);
138
+ else if (modelJinjaTemplate.includes("[INST]"))
139
+ return createSpecializedChatWrapper(LlamaChatWrapper, {
140
+ addSpaceBeforeEos: modelJinjaTemplate.includes("' ' + eos_token")
141
+ });
142
+ else if (modelJinjaTemplate.includes("<start_of_turn>"))
143
+ return createSpecializedChatWrapper(GemmaChatWrapper);
144
+ }
145
+ if (filename != null) {
146
+ const { name, subType, fileType, otherInfo } = parseModelFileName(filename);
147
+ if (fileType?.toLowerCase() === "gguf") {
148
+ const lowercaseName = name?.toLowerCase();
149
+ const lowercaseSubType = subType?.toLowerCase();
150
+ const splitLowercaseSubType = (lowercaseSubType?.split("-") ?? []).concat(otherInfo.map(info => info.toLowerCase()));
151
+ const firstSplitLowercaseSubType = splitLowercaseSubType[0];
152
+ if (lowercaseName === "llama") {
153
+ if (splitLowercaseSubType.includes("chat"))
154
+ return createSpecializedChatWrapper(LlamaChatWrapper);
155
+ return createSpecializedChatWrapper(GeneralChatWrapper);
156
+ }
157
+ else if (lowercaseName === "yarn" && firstSplitLowercaseSubType === "llama")
158
+ return createSpecializedChatWrapper(LlamaChatWrapper);
159
+ else if (lowercaseName === "orca")
160
+ return createSpecializedChatWrapper(ChatMLChatWrapper);
161
+ else if (lowercaseName === "phind" && lowercaseSubType === "codellama")
162
+ return createSpecializedChatWrapper(LlamaChatWrapper);
163
+ else if (lowercaseName === "mistral")
164
+ return createSpecializedChatWrapper(GeneralChatWrapper);
165
+ else if (firstSplitLowercaseSubType === "llama")
166
+ return createSpecializedChatWrapper(LlamaChatWrapper);
167
+ else if (lowercaseSubType === "alpaca")
168
+ return createSpecializedChatWrapper(AlpacaChatWrapper);
169
+ else if (lowercaseName === "functionary")
170
+ return createSpecializedChatWrapper(FunctionaryChatWrapper);
171
+ else if (lowercaseName === "dolphin" && splitLowercaseSubType.includes("mistral"))
172
+ return createSpecializedChatWrapper(ChatMLChatWrapper);
173
+ else if (lowercaseName === "gemma")
174
+ return createSpecializedChatWrapper(GemmaChatWrapper);
175
+ else if (splitLowercaseSubType.includes("chatml"))
176
+ return createSpecializedChatWrapper(ChatMLChatWrapper);
177
+ }
178
+ }
179
+ if (fileInfo != null) {
180
+ const arch = fileInfo.metadata.general?.architecture;
181
+ if (arch === "llama")
182
+ return createSpecializedChatWrapper(LlamaChatWrapper);
183
+ else if (arch === "falcon")
184
+ return createSpecializedChatWrapper(FalconChatWrapper);
185
+ }
186
+ if (bosString === "" || bosString == null)
187
+ return null;
188
+ if ("<s>[INST] <<SYS>>\n".startsWith(bosString)) {
189
+ return createSpecializedChatWrapper(LlamaChatWrapper);
190
+ }
191
+ else if ("<|im_start|>system\n".startsWith(bosString)) {
192
+ return createSpecializedChatWrapper(ChatMLChatWrapper);
193
+ }
194
+ return null;
195
+ }
196
+ export function isSpecializedChatWrapperType(type) {
197
+ return specializedChatWrapperTypeNames.includes(type);
198
+ }
199
+ export function isTemplateChatWrapperType(type) {
200
+ return templateChatWrapperTypeNames.includes(type);
201
+ }
202
+ // this is needed because TypeScript guards don't work automatically with class references
203
+ function isClassReference(value, classReference) {
204
+ return value === classReference;
205
+ }
206
+ //# sourceMappingURL=resolveChatWrapper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"resolveChatWrapper.js","sourceRoot":"","sources":["../../../src/chatWrappers/utils/resolveChatWrapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,kBAAkB,EAAC,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,kBAAkB,EAAC,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,sBAAsB,EAAC,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAC,wBAAwB,EAAkC,MAAM,wCAAwC,CAAC;AACjH,OAAO,EAAC,mBAAmB,EAAC,MAAM,mCAAmC,CAAC;AACtE,OAAO,EAAC,mBAAmB,EAAC,MAAM,oCAAoC,CAAC;AAEvE,OAAO,EAAC,iDAAiD,EAAC,MAAM,wDAAwD,CAAC;AAIzH,MAAM,CAAC,MAAM,+BAA+B,GAAG,MAAM,CAAC,MAAM,CAAC;IACzD,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO;CAC9E,CAAC,CAAC;AAGZ,MAAM,CAAC,MAAM,4BAA4B,GAAG,MAAM,CAAC,MAAM,CAAC;IACtD,UAAU,EAAE,eAAe;CACrB,CAAC,CAAC;AAGZ,MAAM,CAAC,MAAM,8BAA8B,GAAG,MAAM,CAAC,MAAM,CAAC;IACxD,MAAM;IACN,GAAG,+BAA+B;IAClC,GAAG,4BAA4B;CACzB,CAAC,CAAC;AAGZ,MAAM,YAAY,GAAG;IACjB,SAAS,EAAE,kBAAkB;IAC7B,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,iBAAiB;IAC/B,aAAa,EAAE,sBAAsB;IACrC,QAAQ,EAAE,iBAAiB;IAC3B,YAAY,EAAE,iBAAiB;IAC/B,OAAO,EAAE,gBAAgB;IACzB,UAAU,EAAE,mBAAmB;IAC/B,eAAe,EAAE,wBAAwB;CACiD,CAAC;AAC/F,MAAM,uBAAuB,GAAG,IAAI,GAAG,CACnC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC;KACvB,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC5B,CAAC,OAAO,EAAE,UAAuC,CAAC,CACrD,CAAC,CACT,CAAC;AA2BF;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAC/B,IAAI,GAAG,MAAM,EACb,SAAS,EACT,QAAQ,EACR,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,WAAW,GAAG,IAAI,EAClB,mCAAmC,GAAG,IAAI,EAC1C,OAAO,GAAG,KAAK,EACS;IACxB,SAAS,4BAA4B,CACjC,sBAAyB,EACzB,kBAA+C,EAAE;QAEjD,MAAM,qBAAqB,GAAG,uBAAuB,CAAC,GAAG,CAAC,sBAAsB,CAAmC,CAAC;QACpH,MAAM,mBAAmB,GAAG,qBAAqB,EAAE,CAAC,qBAAqB,CAAC,CAAC;QAE3E,OAAO,IAAK,sBAA8B,CAAC;YACvC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;YAC1B,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC;SACjC,CAAC,CAAC;IACP,CAAC;IAED,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,IAAI,IAAI,EAAE;QACjC,IAAI,yBAAyB,CAAC,IAAI,CAAC,EAAE;YACjC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YAEnC,IAAI,gBAAgB,CAAC,OAAO,EAAE,mBAAmB,CAAC,EAAE;gBAChD,MAAM,eAAe,GAAG,qBAAqB,EAAE,QAAQ,CAAC;gBACxD,IAAI,eAAe,IAAI,IAAI,IAAI,eAAe,EAAE,QAAQ,IAAI,IAAI,IAAI,eAAe,EAAE,eAAe,IAAI,IAAI;oBACxG,eAAe,EAAE,aAAa,IAAI,IAAI,IAAI,eAAe,EAAE,YAAY,IAAI,IAAI,EACjF;oBACE,IAAI,WAAW;wBACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,0JAA0J,CAAC,CAAC;iBACxM;;oBACG,OAAO,IAAI,mBAAmB,CAAC,eAAe,CAAC,CAAC;aACvD;iBAAM,IAAI,gBAAgB,CAAC,OAAO,EAAE,wBAAwB,CAAC,EAAE;gBAC5D,MAAM,aAAa,GAAG,qBAAqB,EAAE,aAAa,EAAE,QAAQ,IAAI,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC;gBAErH,IAAI,aAAa,IAAI,IAAI,EAAE;oBACvB,IAAI,WAAW;wBACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,qGAAqG,CAAC,CAAC;iBACnJ;qBAAM;oBACH,IAAI;wBACA,OAAO,IAAI,wBAAwB,CAAC;4BAChC,GAAG,CAAC,qBAAqB,EAAE,aAAa,IAAI,EAAE,CAAC;4BAC/C,QAAQ,EAAE,aAAa;yBAC1B,CAAC,CAAC;qBACN;oBAAC,OAAO,GAAG,EAAE;wBACV,IAAI,CAAC,mCAAmC;4BACpC,MAAM,GAAG,CAAC;6BACT,IAAI,WAAW;4BAChB,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,iGAAiG,EAAE,GAAG,CAAC,CAAC;qBACrJ;iBACJ;aACJ;;gBACG,KAAK,CAAC,OAAuB,CAAC,CAAC;SACtC;aAAM,IAAI,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,EAAE;YAC1C,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,eAAe,GACjB,qBAAqB,EAAE,CAAC,IAAI,CAAC,CAAC;YAElC,OAAO,IAAK,OAAe,CAAC,eAAe,CAAC,CAAC;SAChD;KACJ;IAED,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,aAAa,EAAE,QAAQ,IAAI,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC;IAE1H,IAAI,CAAC,OAAO,IAAI,kBAAkB,IAAI,IAAI,IAAI,kBAAkB,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5E,MAAM,+BAA+B,GAAoC;YACrE,GAAG,CAAC,qBAAqB,EAAE,aAAa,IAAI,EAAE,CAAC;YAC/C,QAAQ,EAAE,kBAAkB;SAC/B,CAAC;QAEF,KAAK,MAAM,8BAA8B,IAAI,+BAA+B,EAAE;YAC1E,MAAM,OAAO,GAAG,YAAY,CAAC,8BAA8B,CAAC,CAAC;YAC7D,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAC,8BAA8B,CAAC,CAAC;YAEhF,MAAM,wBAAwB,GAAG,OAAO,CAAC,yDAAyD,EAAE,EAAE,IAAI,EAAE,CAAC;YAC7G,IAAI,wBAAwB,CAAC,MAAM,KAAK,CAAC;gBACrC,wBAAwB,CAAC,IAAI,CAAC,EAAS,CAAC,CAAC;YAE7C,KAAK,MAAM,iBAAiB,IAAI,wBAAwB,EAAE;gBACtD,MAAM,uBAAuB,GAAG;oBAC5B,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;oBAC1B,GAAG,CAAC,iBAAiB,IAAI,EAAE,CAAC;iBAC/B,CAAC;gBACF,MAAM,WAAW,GAAG,IAAK,OAAe,CAAC,uBAAuB,CAAC,CAAC;gBAElE,IAAI,iDAAiD,CAAC,+BAA+B,EAAE,WAAW,EAAE,SAAS,CAAC;oBAC1G,OAAO,IAAK,OAAe,CAAC,uBAAuB,CAAC,CAAC;aAC5D;SACJ;QAED,IAAI,CAAC,mCAAmC;YACpC,OAAO,IAAI,wBAAwB,CAAC,+BAA+B,CAAC,CAAC;QAEzE,IAAI;YACA,OAAO,IAAI,wBAAwB,CAAC,+BAA+B,CAAC,CAAC;SACxE;QAAC,OAAO,GAAG,EAAE;YACV,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,iGAAiG,EAAE,GAAG,CAAC,CAAC;SACjJ;KACJ;IAED,wFAAwF;IACxF,sFAAsF;IACtF,IAAI,kBAAkB,IAAI,IAAI,IAAI,kBAAkB,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAChE,IAAI,kBAAkB,CAAC,QAAQ,CAAC,cAAc,CAAC;YAC3C,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;aACtD,IAAI,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAC1C,OAAO,4BAA4B,CAAC,gBAAgB,EAAE;gBAClD,iBAAiB,EAAE,kBAAkB,CAAC,QAAQ,CAAC,iBAAiB,CAAC;aACpE,CAAC,CAAC;aACF,IAAI,kBAAkB,CAAC,QAAQ,CAAC,iBAAiB,CAAC;YACnD,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;KAC7D;IAED,IAAI,QAAQ,IAAI,IAAI,EAAE;QAClB,MAAM,EAAC,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAC,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAE1E,IAAI,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,EAAE;YACpC,MAAM,aAAa,GAAG,IAAI,EAAE,WAAW,EAAE,CAAC;YAC1C,MAAM,gBAAgB,GAAG,OAAO,EAAE,WAAW,EAAE,CAAC;YAChD,MAAM,qBAAqB,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CACrE,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAC5C,CAAC;YACF,MAAM,0BAA0B,GAAG,qBAAqB,CAAC,CAAC,CAAC,CAAC;YAE5D,IAAI,aAAa,KAAK,OAAO,EAAE;gBAC3B,IAAI,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC;oBACtC,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;gBAE1D,OAAO,4BAA4B,CAAC,kBAAkB,CAAC,CAAC;aAC3D;iBAAM,IAAI,aAAa,KAAK,MAAM,IAAI,0BAA0B,KAAK,OAAO;gBACzE,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,aAAa,KAAK,MAAM;gBAC7B,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,OAAO,IAAI,gBAAgB,KAAK,WAAW;gBAClE,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,aAAa,KAAK,SAAS;gBAChC,OAAO,4BAA4B,CAAC,kBAAkB,CAAC,CAAC;iBACvD,IAAI,0BAA0B,KAAK,OAAO;gBAC3C,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,gBAAgB,KAAK,QAAQ;gBAClC,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,aAAa;gBACpC,OAAO,4BAA4B,CAAC,sBAAsB,CAAC,CAAC;iBAC3D,IAAI,aAAa,KAAK,SAAS,IAAI,qBAAqB,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC7E,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,OAAO;gBAC9B,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,qBAAqB,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC7C,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;SAC9D;KACJ;IAED,IAAI,QAAQ,IAAI,IAAI,EAAE;QAClB,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;QAErD,IAAI,IAAI,KAAK,OAAO;YAChB,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;aACrD,IAAI,IAAI,KAAK,QAAQ;YACtB,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;KAC9D;IAED,IAAI,SAAS,KAAK,EAAE,IAAI,SAAS,IAAI,IAAI;QACrC,OAAO,IAAI,CAAC;IAEhB,IAAI,qBAAqB,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QAC7C,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;KACzD;SAAM,IAAI,sBAAsB,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QACrD,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;KAC1D;IAED,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,IAAY;IACrD,OAAO,+BAA+B,CAAC,QAAQ,CAAC,IAAW,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,IAAY;IAClD,OAAO,4BAA4B,CAAC,QAAQ,CAAC,IAAW,CAAC,CAAC;AAC9D,CAAC;AAED,0FAA0F;AAC1F,SAAS,gBAAgB,CAAI,KAAU,EAAE,cAAiB;IACtD,OAAO,KAAK,KAAK,cAAc,CAAC;AACpC,CAAC"}
package/dist/cli/cli.js CHANGED
@@ -14,7 +14,7 @@ import { ClearCommand } from "./commands/ClearCommand.js";
14
14
  import { ChatCommand } from "./commands/ChatCommand.js";
15
15
  import { CompleteCommand } from "./commands/CompleteCommand.js";
16
16
  import { InfillCommand } from "./commands/InfillCommand.js";
17
- import { InspectCommand } from "./commands/InspectCommand.js";
17
+ import { InspectCommand } from "./commands/inspect/InspectCommand.js";
18
18
  import { DebugCommand } from "./commands/DebugCommand.js";
19
19
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
20
20
  const packageJson = fs.readJSONSync(path.join(__dirname, "..", "..", "package.json"));
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,sDAAsD;AACtD,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,UAAU,EAAC,MAAM,cAAc,CAAC;AACxC,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,oBAAoB,EAAC,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAC,cAAc,EAAC,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AAExD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC;AAEtF,mBAAmB,CAAC,IAAI,CAAC,CAAC;AAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;AAE1C,IAAI;KACC,UAAU,CAAC,UAAU,CAAC;KACtB,KAAK,CAAC,+BAA+B,CAAC;KACtC,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,oBAAoB,CAAC;KAC7B,OAAO,CAAC,YAAY,CAAC;KACrB,iBAAiB,EAAE;KACnB,aAAa,CAAC,CAAC,CAAC;KAChB,MAAM,EAAE;KACR,cAAc,EAAE;KAChB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KACrB,IAAI,CAAC,GAAG,CAAC;KACT,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC;KAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;KACzC,KAAK,EAAE,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,sDAAsD;AACtD,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,UAAU,EAAC,MAAM,cAAc,CAAC;AACxC,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,oBAAoB,EAAC,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAC,cAAc,EAAC,MAAM,sCAAsC,CAAC;AACpE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AAExD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC;AAEtF,mBAAmB,CAAC,IAAI,CAAC,CAAC;AAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;AAE1C,IAAI;KACC,UAAU,CAAC,UAAU,CAAC;KACtB,KAAK,CAAC,+BAA+B,CAAC;KACtC,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,oBAAoB,CAAC;KAC7B,OAAO,CAAC,YAAY,CAAC;KACrB,iBAAiB,EAAE;KACnB,aAAa,CAAC,CAAC,CAAC;KAChB,MAAM,EAAE;KACR,cAAc,EAAE;KAChB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KACrB,IAAI,CAAC,GAAG,CAAC;KACT,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC;KAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;KACzC,KAAK,EAAE,CAAC"}
@@ -1,6 +1,6 @@
1
1
  import { CommandModule } from "yargs";
2
2
  import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
3
- import { ChatWrapperTypeName } from "../../bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js";
3
+ import { SpecializedChatWrapperTypeName } from "../../chatWrappers/utils/resolveChatWrapper.js";
4
4
  type ChatCommand = {
5
5
  model: string;
6
6
  systemInfo: boolean;
@@ -8,9 +8,11 @@ type ChatCommand = {
8
8
  systemPromptFile?: string;
9
9
  prompt?: string;
10
10
  promptFile?: string;
11
- wrapper: ChatWrapperTypeName;
12
- contextSize: number;
11
+ wrapper: SpecializedChatWrapperTypeName | "auto";
12
+ noJinja?: boolean;
13
+ contextSize?: number;
13
14
  batchSize?: number;
15
+ noTrimWhitespace: boolean;
14
16
  grammar: "text" | Parameters<typeof LlamaGrammar.getFor>[1];
15
17
  jsonSchemaGrammarFile?: string;
16
18
  threads: number;
@@ -27,7 +29,8 @@ type ChatCommand = {
27
29
  maxTokens: number;
28
30
  noHistory: boolean;
29
31
  environmentFunctions: boolean;
30
- noInfoLog: boolean;
32
+ debug: boolean;
33
+ meter: boolean;
31
34
  printTimings: boolean;
32
35
  };
33
36
  export declare const ChatCommand: CommandModule<object, ChatCommand>;
@@ -6,24 +6,27 @@ import fs from "fs-extra";
6
6
  import { chatCommandHistoryFilePath, defaultChatSystemPrompt } from "../../config.js";
7
7
  import { getIsInDocumentationMode } from "../../state.js";
8
8
  import { ReplHistory } from "../../utils/ReplHistory.js";
9
- import withStatusLogs from "../../utils/withStatusLogs.js";
10
9
  import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
11
10
  import { getLlama } from "../../bindings/getLlama.js";
12
11
  import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
13
12
  import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
14
- import { LlamaModel } from "../../evaluator/LlamaModel.js";
15
- import { LlamaContext } from "../../evaluator/LlamaContext/LlamaContext.js";
16
13
  import { LlamaJsonSchemaGrammar } from "../../evaluator/LlamaJsonSchemaGrammar.js";
17
- import { LlamaLogLevel } from "../../bindings/types.js";
18
- import { resolveChatWrapperBasedOnWrapperTypeName, chatWrapperTypeNames } from "../../bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js";
14
+ import { LlamaLogLevel, LlamaLogLevelGreaterThan } from "../../bindings/types.js";
15
+ import withOra from "../../utils/withOra.js";
16
+ import { TokenMeter } from "../../evaluator/TokenMeter.js";
17
+ import { printInfoLine } from "../utils/printInfoLine.js";
18
+ import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
19
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
20
+ import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
21
+ import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
19
22
  export const ChatCommand = {
20
- command: "chat",
23
+ command: "chat [modelPath]",
21
24
  describe: "Chat with a Llama model",
22
25
  builder(yargs) {
23
26
  const isInDocumentationMode = getIsInDocumentationMode();
24
27
  return yargs
25
28
  .option("model", {
26
- alias: "m",
29
+ alias: ["m", "modelPath"],
27
30
  type: "string",
28
31
  demandOption: true,
29
32
  description: "Llama model file to use for the chat",
@@ -64,15 +67,22 @@ export const ChatCommand = {
64
67
  alias: "w",
65
68
  type: "string",
66
69
  default: "auto",
67
- choices: chatWrapperTypeNames,
70
+ choices: ["auto", ...specializedChatWrapperTypeNames],
68
71
  description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token",
69
72
  group: "Optional:"
73
+ })
74
+ .option("noJinja", {
75
+ type: "boolean",
76
+ default: false,
77
+ description: "Don't use a Jinja wrapper, even if it's the best option for the model",
78
+ group: "Optional:"
70
79
  })
71
80
  .option("contextSize", {
72
81
  alias: "c",
73
82
  type: "number",
74
- default: 1024 * 4,
75
83
  description: "Context size to use for the model context",
84
+ default: -1,
85
+ defaultDescription: "Automatically determined based on the available VRAM",
76
86
  group: "Optional:"
77
87
  })
78
88
  .option("batchSize", {
@@ -80,6 +90,13 @@ export const ChatCommand = {
80
90
  type: "number",
81
91
  description: "Batch size to use for the model context. The default value is the context size",
82
92
  group: "Optional:"
93
+ })
94
+ .option("noTrimWhitespace", {
95
+ type: "boolean",
96
+ alias: ["noTrim"],
97
+ default: false,
98
+ description: "Don't trim whitespaces from the model response",
99
+ group: "Optional:"
83
100
  })
84
101
  .option("grammar", {
85
102
  alias: "g",
@@ -133,6 +150,8 @@ export const ChatCommand = {
133
150
  alias: "gl",
134
151
  type: "number",
135
152
  description: "number of layers to store in VRAM",
153
+ default: -1,
154
+ defaultDescription: "Automatically determined based on the available VRAM",
136
155
  group: "Optional:"
137
156
  })
138
157
  .option("repeatPenalty", {
@@ -189,11 +208,17 @@ export const ChatCommand = {
189
208
  description: "Provide access to environment functions like `getDate` and `getTime`",
190
209
  group: "Optional:"
191
210
  })
192
- .option("noInfoLog", {
193
- alias: "nl",
211
+ .option("debug", {
212
+ alias: "d",
194
213
  type: "boolean",
195
214
  default: false,
196
- description: "Disable llama.cpp info logs",
215
+ description: "Print llama.cpp info and debug logs",
216
+ group: "Optional:"
217
+ })
218
+ .option("meter", {
219
+ type: "boolean",
220
+ default: false,
221
+ description: "Log how many tokens were used as input and output for each response",
197
222
  group: "Optional:"
198
223
  })
199
224
  .option("printTimings", {
@@ -204,28 +229,36 @@ export const ChatCommand = {
204
229
  group: "Optional:"
205
230
  });
206
231
  },
207
- async handler({ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, noInfoLog, printTimings }) {
232
+ async handler({ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
208
233
  try {
209
234
  await RunChat({
210
- model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize,
211
- grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty,
212
- repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
213
- noHistory, environmentFunctions, noInfoLog, printTimings
235
+ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize,
236
+ noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers,
237
+ lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
238
+ noHistory, environmentFunctions, debug, meter, printTimings
214
239
  });
215
240
  }
216
241
  catch (err) {
242
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
217
243
  console.error(err);
218
244
  process.exit(1);
219
245
  }
220
246
  }
221
247
  };
222
- async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, noInfoLog, printTimings }) {
223
- if (noInfoLog)
224
- console.info(`${chalk.yellow("Log level:")} warn`);
248
+ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
249
+ if (contextSize === -1)
250
+ contextSize = undefined;
251
+ if (gpuLayers === -1)
252
+ gpuLayers = undefined;
253
+ const trimWhitespace = !noTrimWhitespace;
254
+ if (debug)
255
+ console.info(`${chalk.yellow("Log level:")} debug`);
256
+ const resolvedModelPath = await resolveCommandGgufPath(modelArg);
257
+ const llamaLogLevel = debug
258
+ ? LlamaLogLevel.debug
259
+ : LlamaLogLevel.warn;
225
260
  const llama = await getLlama("lastBuild", {
226
- logLevel: noInfoLog
227
- ? LlamaLogLevel.warn
228
- : LlamaLogLevel.debug
261
+ logLevel: llamaLogLevel
229
262
  });
230
263
  const logBatchSize = batchSize != null;
231
264
  if (systemInfo)
@@ -240,75 +273,121 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
240
273
  console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
241
274
  prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
242
275
  }
243
- if (batchSize == null)
244
- batchSize = contextSize;
245
- else if (batchSize > contextSize) {
276
+ if (batchSize != null && contextSize != null && batchSize > contextSize) {
246
277
  console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
247
278
  batchSize = contextSize;
248
279
  }
249
280
  let initialPrompt = prompt ?? null;
250
- const model = await withStatusLogs({
281
+ const model = await withOra({
251
282
  loading: chalk.blue("Loading model"),
252
283
  success: chalk.blue("Model loaded"),
253
- fail: chalk.blue("Failed to load model")
254
- }, async () => new LlamaModel({
255
- llama,
256
- modelPath: path.resolve(process.cwd(), modelArg),
257
- gpuLayers: gpuLayers != null ? gpuLayers : undefined
258
- }));
259
- const context = await withStatusLogs({
284
+ fail: chalk.blue("Failed to load model"),
285
+ useStatusLogs: debug
286
+ }, async () => {
287
+ try {
288
+ return await llama.loadModel({
289
+ modelPath: resolvedModelPath,
290
+ gpuLayers: gpuLayers != null ? gpuLayers : undefined
291
+ });
292
+ }
293
+ finally {
294
+ if (llama.logLevel === LlamaLogLevel.debug) {
295
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
296
+ console.info();
297
+ }
298
+ }
299
+ });
300
+ const context = await withOra({
260
301
  loading: chalk.blue("Creating context"),
261
302
  success: chalk.blue("Context created"),
262
- fail: chalk.blue("Failed to create context")
263
- }, async () => new LlamaContext({
264
- model,
265
- contextSize,
266
- batchSize,
267
- threads
268
- }));
303
+ fail: chalk.blue("Failed to create context"),
304
+ useStatusLogs: debug
305
+ }, async () => {
306
+ try {
307
+ return await model.createContext({
308
+ contextSize: contextSize != null ? contextSize : undefined,
309
+ batchSize: batchSize != null ? batchSize : undefined,
310
+ threads
311
+ });
312
+ }
313
+ finally {
314
+ if (llama.logLevel === LlamaLogLevel.debug) {
315
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
316
+ console.info();
317
+ }
318
+ }
319
+ });
269
320
  const grammar = jsonSchemaGrammarFilePath != null
270
321
  ? new LlamaJsonSchemaGrammar(llama, await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
271
322
  : grammarArg !== "text"
272
323
  ? await LlamaGrammar.getFor(llama, grammarArg)
273
324
  : undefined;
274
- const bos = model.tokens.bosString; // bos = beginning of sequence
275
- const eos = model.tokens.bosString; // eos = end of sequence
276
- const chatWrapper = resolveChatWrapperBasedOnWrapperTypeName(wrapper, {
277
- bosString: bos,
325
+ const chatWrapper = resolveChatWrapper({
326
+ type: wrapper,
327
+ bosString: model.tokens.bosString,
278
328
  filename: model.filename,
279
- typeDescription: model.typeDescription
280
- });
329
+ fileInfo: model.fileInfo,
330
+ tokenizer: model.tokenize,
331
+ noJinja
332
+ }) ?? new GeneralChatWrapper();
333
+ const contextSequence = context.getSequence();
281
334
  const session = new LlamaChatSession({
282
- contextSequence: context.getSequence(),
335
+ contextSequence,
283
336
  systemPrompt,
284
337
  chatWrapper: chatWrapper
285
338
  });
339
+ let lastTokenMeterState = contextSequence.tokenMeter.getState();
286
340
  await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
287
341
  if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
288
342
  console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
289
- console.info(`${chalk.yellow("Context size:")} ${context.contextSize}`);
290
- if (logBatchSize)
291
- console.info(`${chalk.yellow("Batch size:")} ${context.batchSize}`);
292
- console.info(`${chalk.yellow("Train context size:")} ${model.trainContextSize}`);
293
- console.info(`${chalk.yellow("Model type:")} ${model.typeDescription}`);
294
- console.info(`${chalk.yellow("BOS:")} ${bos}`);
295
- console.info(`${chalk.yellow("EOS:")} ${eos}`);
296
- console.info(`${chalk.yellow("Chat wrapper:")} ${chatWrapper.wrapperName}`);
297
- console.info(`${chalk.yellow("Repeat penalty:")} ${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`);
298
- if (repeatFrequencyPenalty != null)
299
- console.info(`${chalk.yellow("Repeat frequency penalty:")} ${repeatFrequencyPenalty}`);
300
- if (repeatPresencePenalty != null)
301
- console.info(`${chalk.yellow("Repeat presence penalty:")} ${repeatPresencePenalty}`);
302
- if (!penalizeRepeatingNewLine)
303
- console.info(`${chalk.yellow("Penalize repeating new line:")} disabled`);
304
- if (jsonSchemaGrammarFilePath != null)
305
- console.info(`${chalk.yellow("JSON schema grammar file:")} ${path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath))}`);
306
- else if (grammarArg !== "text")
307
- console.info(`${chalk.yellow("Grammar:")} ${grammarArg}`);
308
343
  if (environmentFunctions && grammar != null) {
309
344
  console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
310
345
  environmentFunctions = false;
311
346
  }
347
+ const padTitle = "Context".length + 1;
348
+ printCommonInfoLines({
349
+ context,
350
+ minTitleLength: padTitle,
351
+ printBos: true,
352
+ printEos: true,
353
+ logBatchSize,
354
+ tokenMeterEnabled: meter
355
+ });
356
+ printInfoLine({
357
+ title: "Chat",
358
+ padTitle: padTitle,
359
+ info: [{
360
+ title: "Wrapper",
361
+ value: chatWrapper.wrapperName
362
+ }, {
363
+ title: "Repeat penalty",
364
+ value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
365
+ }, {
366
+ show: repeatFrequencyPenalty != null,
367
+ title: "Repeat frequency penalty",
368
+ value: String(repeatFrequencyPenalty)
369
+ }, {
370
+ show: repeatPresencePenalty != null,
371
+ title: "Repeat presence penalty",
372
+ value: String(repeatPresencePenalty)
373
+ }, {
374
+ show: !penalizeRepeatingNewLine,
375
+ title: "Penalize repeating new line",
376
+ value: "disabled"
377
+ }, {
378
+ show: jsonSchemaGrammarFilePath != null,
379
+ title: "JSON schema grammar file",
380
+ value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
381
+ }, {
382
+ show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
383
+ title: "Grammar",
384
+ value: grammarArg
385
+ }, {
386
+ show: environmentFunctions,
387
+ title: "Environment functions",
388
+ value: "enabled"
389
+ }]
390
+ });
312
391
  // this is for ora to not interfere with readline
313
392
  await new Promise(resolve => setTimeout(resolve, 1));
314
393
  const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
@@ -324,6 +403,8 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
324
403
  }
325
404
  // eslint-disable-next-line no-constant-condition
326
405
  while (true) {
406
+ let hadNoWhitespaceTextInThisIteration = false;
407
+ let nextPrintLeftovers = "";
327
408
  const input = initialPrompt != null
328
409
  ? initialPrompt
329
410
  : await getPrompt();
@@ -357,16 +438,42 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
357
438
  ? undefined
358
439
  : maxTokens,
359
440
  onToken(chunk) {
360
- process.stdout.write(model.detokenize(chunk));
441
+ let text = nextPrintLeftovers + model.detokenize(chunk);
442
+ nextPrintLeftovers = "";
443
+ if (trimWhitespace) {
444
+ if (!hadNoWhitespaceTextInThisIteration) {
445
+ text = text.trimStart();
446
+ if (text.length > 0)
447
+ hadNoWhitespaceTextInThisIteration = true;
448
+ }
449
+ const textWithTrimmedEnd = text.trimEnd();
450
+ if (textWithTrimmedEnd.length < text.length) {
451
+ nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
452
+ text = textWithTrimmedEnd;
453
+ }
454
+ }
455
+ process.stdout.write(text);
361
456
  },
362
457
  functions: (grammar == null && environmentFunctions)
363
458
  ? defaultEnvironmentFunctions
364
- : undefined
459
+ : undefined,
460
+ trimWhitespaceSuffix: trimWhitespace
365
461
  });
366
462
  process.stdout.write(endColor);
367
463
  console.log();
368
- if (printTimings)
464
+ if (printTimings) {
465
+ if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
466
+ llama.logLevel = LlamaLogLevel.info;
369
467
  await context.printTimings();
468
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
469
+ llama.logLevel = llamaLogLevel;
470
+ }
471
+ if (meter) {
472
+ const newTokenMeterState = contextSequence.tokenMeter.getState();
473
+ const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
474
+ lastTokenMeterState = newTokenMeterState;
475
+ console.info(`${chalk.dim("Input tokens:")} ${String(tokenMeterDiff.usedInputTokens).padEnd(5, " ")} ${chalk.dim("Output tokens:")} ${tokenMeterDiff.usedOutputTokens}`);
476
+ }
370
477
  }
371
478
  }
372
479
  const defaultEnvironmentFunctions = {