node-llama-cpp 3.0.0-beta.14 → 3.0.0-beta.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. package/README.md +1 -1
  2. package/dist/ChatWrapper.js +4 -0
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/bindings/AddonTypes.d.ts +23 -0
  5. package/dist/bindings/Llama.d.ts +11 -0
  6. package/dist/bindings/Llama.js +56 -4
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/getLlama.d.ts +20 -2
  9. package/dist/bindings/getLlama.js +15 -5
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +15 -0
  12. package/dist/bindings/types.js +27 -2
  13. package/dist/bindings/types.js.map +1 -1
  14. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  15. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  16. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  17. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  18. package/dist/bindings/utils/cloneLlamaCppRepo.js +26 -25
  19. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  20. package/dist/bindings/utils/compileLLamaCpp.js +2 -2
  21. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  22. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  23. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  24. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  25. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
  26. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  27. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  28. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  29. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  30. package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
  31. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  32. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  33. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  34. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  35. package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
  36. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  37. package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
  38. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  39. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  40. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  41. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
  43. package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
  44. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
  45. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  46. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
  47. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  48. package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +6 -9
  49. package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
  50. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  51. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  52. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  53. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  54. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  55. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  56. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  57. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
  58. package/dist/chatWrappers/utils/resolveChatWrapper.js +208 -0
  59. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  60. package/dist/cli/cli.js +1 -1
  61. package/dist/cli/cli.js.map +1 -1
  62. package/dist/cli/commands/BuildCommand.js +1 -1
  63. package/dist/cli/commands/BuildCommand.js.map +1 -1
  64. package/dist/cli/commands/ChatCommand.d.ts +9 -5
  65. package/dist/cli/commands/ChatCommand.js +203 -118
  66. package/dist/cli/commands/ChatCommand.js.map +1 -1
  67. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  68. package/dist/cli/commands/ClearCommand.js +5 -5
  69. package/dist/cli/commands/ClearCommand.js.map +1 -1
  70. package/dist/cli/commands/CompleteCommand.d.ts +5 -3
  71. package/dist/cli/commands/CompleteCommand.js +136 -85
  72. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  73. package/dist/cli/commands/DebugCommand.js +4 -4
  74. package/dist/cli/commands/DownloadCommand.js +3 -4
  75. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  76. package/dist/cli/commands/InfillCommand.d.ts +5 -3
  77. package/dist/cli/commands/InfillCommand.js +138 -89
  78. package/dist/cli/commands/InfillCommand.js.map +1 -1
  79. package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
  80. package/dist/cli/commands/inspect/InspectCommand.js +17 -0
  81. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  82. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +11 -0
  83. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +121 -0
  84. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  85. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  86. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +136 -0
  87. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  88. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +15 -0
  89. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +579 -0
  90. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  91. package/dist/cli/recommendedModels.d.ts +2 -0
  92. package/dist/cli/recommendedModels.js +281 -0
  93. package/dist/cli/recommendedModels.js.map +1 -0
  94. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  95. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  96. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  97. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  98. package/dist/cli/utils/ConsoleTable.js +86 -0
  99. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  100. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  101. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  102. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  103. package/dist/cli/utils/consolePromptQuestion.d.ts +5 -0
  104. package/dist/cli/utils/consolePromptQuestion.js +80 -0
  105. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  106. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  107. package/dist/cli/utils/getReadablePath.js +14 -0
  108. package/dist/cli/utils/getReadablePath.js.map +1 -0
  109. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  110. package/dist/cli/utils/printCommonInfoLines.js +70 -0
  111. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  112. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  113. package/dist/cli/utils/printInfoLine.js +54 -0
  114. package/dist/cli/utils/printInfoLine.js.map +1 -0
  115. package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -0
  116. package/dist/cli/utils/resolveCommandGgufPath.js +494 -0
  117. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  118. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  119. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  120. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  121. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  122. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  123. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  124. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  125. package/dist/cli/utils/splitAnsiToLines.js +17 -0
  126. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  127. package/dist/config.d.ts +5 -0
  128. package/dist/config.js +11 -2
  129. package/dist/config.js.map +1 -1
  130. package/dist/consts.d.ts +2 -0
  131. package/dist/consts.js +8 -0
  132. package/dist/consts.js.map +1 -1
  133. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +8 -1
  134. package/dist/evaluator/LlamaChat/LlamaChat.js +15 -6
  135. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  136. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +9 -2
  137. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +5 -3
  138. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  139. package/dist/evaluator/LlamaCompletion.d.ts +9 -2
  140. package/dist/evaluator/LlamaCompletion.js +11 -6
  141. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  142. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +30 -3
  143. package/dist/evaluator/LlamaContext/LlamaContext.js +227 -102
  144. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  145. package/dist/evaluator/LlamaContext/types.d.ts +57 -6
  146. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  147. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  148. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  149. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
  150. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  151. package/dist/evaluator/LlamaEmbeddingContext.d.ts +23 -2
  152. package/dist/evaluator/LlamaEmbeddingContext.js +4 -5
  153. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  154. package/dist/evaluator/LlamaGrammar.d.ts +3 -2
  155. package/dist/evaluator/LlamaGrammar.js +3 -2
  156. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  157. package/dist/evaluator/LlamaModel.d.ts +56 -6
  158. package/dist/evaluator/LlamaModel.js +99 -7
  159. package/dist/evaluator/LlamaModel.js.map +1 -1
  160. package/dist/evaluator/TokenBias.d.ts +22 -0
  161. package/dist/evaluator/TokenBias.js +33 -0
  162. package/dist/evaluator/TokenBias.js.map +1 -0
  163. package/dist/evaluator/TokenMeter.d.ts +54 -0
  164. package/dist/evaluator/TokenMeter.js +86 -0
  165. package/dist/evaluator/TokenMeter.js.map +1 -0
  166. package/dist/gguf/consts.d.ts +3 -0
  167. package/dist/gguf/consts.js +8 -0
  168. package/dist/gguf/consts.js.map +1 -0
  169. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  170. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  171. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  172. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  173. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  174. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  175. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  176. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  177. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  178. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  179. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  180. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  181. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  182. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  183. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  184. package/dist/gguf/insights/GgufInsights.d.ts +42 -0
  185. package/dist/gguf/insights/GgufInsights.js +361 -0
  186. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  187. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  188. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
  189. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  190. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  191. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +52 -0
  192. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  193. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  194. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  195. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  196. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  197. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  198. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  199. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  200. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  201. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  202. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  203. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  204. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  205. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  206. package/dist/gguf/parser/parseGguf.js +58 -0
  207. package/dist/gguf/parser/parseGguf.js.map +1 -0
  208. package/dist/gguf/readGgufFileInfo.d.ts +30 -0
  209. package/dist/gguf/readGgufFileInfo.js +38 -0
  210. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  211. package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
  212. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  213. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  214. package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
  215. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  216. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  217. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  218. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  219. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  220. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  221. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  222. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  223. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  224. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  225. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  226. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  227. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  228. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  229. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  230. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  231. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  232. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  233. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  234. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  235. package/dist/index.d.ts +14 -7
  236. package/dist/index.js +12 -6
  237. package/dist/index.js.map +1 -1
  238. package/dist/types.d.ts +1 -1
  239. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  240. package/dist/utils/InsufficientMemoryError.js +6 -0
  241. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  242. package/dist/utils/LlamaText.d.ts +25 -10
  243. package/dist/utils/LlamaText.js +205 -23
  244. package/dist/utils/LlamaText.js.map +1 -1
  245. package/dist/utils/StopGenerationDetector.js +3 -1
  246. package/dist/utils/StopGenerationDetector.js.map +1 -1
  247. package/dist/utils/findBestOption.d.ts +4 -0
  248. package/dist/utils/findBestOption.js +15 -0
  249. package/dist/utils/findBestOption.js.map +1 -0
  250. package/dist/utils/getConsoleLogPrefix.js +1 -1
  251. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
  252. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
  253. package/dist/utils/getReadableContextSize.d.ts +1 -0
  254. package/dist/utils/getReadableContextSize.js +7 -0
  255. package/dist/utils/getReadableContextSize.js.map +1 -0
  256. package/dist/utils/gitReleaseBundles.js +68 -1
  257. package/dist/utils/gitReleaseBundles.js.map +1 -1
  258. package/dist/utils/isToken.d.ts +2 -0
  259. package/dist/utils/isToken.js +4 -0
  260. package/dist/utils/isToken.js.map +1 -0
  261. package/dist/utils/isUrl.d.ts +1 -0
  262. package/dist/utils/isUrl.js +15 -0
  263. package/dist/utils/isUrl.js.map +1 -0
  264. package/dist/utils/mergeUnionTypes.d.ts +4 -0
  265. package/dist/utils/parseModelFileName.d.ts +1 -0
  266. package/dist/utils/parseModelFileName.js +6 -1
  267. package/dist/utils/parseModelFileName.js.map +1 -1
  268. package/dist/utils/prettyPrintObject.d.ts +10 -1
  269. package/dist/utils/prettyPrintObject.js +57 -13
  270. package/dist/utils/prettyPrintObject.js.map +1 -1
  271. package/dist/utils/spawnCommand.js.map +1 -1
  272. package/dist/utils/tokenizeInput.d.ts +1 -1
  273. package/dist/utils/tokenizeInput.js +6 -3
  274. package/dist/utils/tokenizeInput.js.map +1 -1
  275. package/dist/utils/withOra.d.ts +2 -0
  276. package/dist/utils/withOra.js +14 -8
  277. package/dist/utils/withOra.js.map +1 -1
  278. package/dist/utils/withProgressLog.d.ts +23 -0
  279. package/dist/utils/withProgressLog.js +211 -0
  280. package/dist/utils/withProgressLog.js.map +1 -0
  281. package/dist/utils/withStatusLogs.js +1 -1
  282. package/dist/utils/withStatusLogs.js.map +1 -1
  283. package/llama/CMakeLists.txt +5 -5
  284. package/llama/addon.cpp +159 -9
  285. package/llama/binariesGithubRelease.json +1 -1
  286. package/llama/gitRelease.bundle +0 -0
  287. package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
  288. package/llama/gpuInfo/cuda-gpu-info.h +3 -0
  289. package/llama/gpuInfo/metal-gpu-info.h +4 -1
  290. package/llama/gpuInfo/metal-gpu-info.mm +14 -1
  291. package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
  292. package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
  293. package/llama/grammars/README.md +10 -0
  294. package/llama/llama.cpp.info.json +1 -1
  295. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  296. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
  297. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  298. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  299. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  300. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
  301. package/llamaBins/linux-x64/llama-addon.node +0 -0
  302. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
  303. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  304. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  305. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  306. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  307. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  308. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  309. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
  310. package/llamaBins/mac-x64/llama-addon.node +0 -0
  311. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  312. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  313. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  314. package/llamaBins/win-arm64/llama-addon.node +0 -0
  315. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
  316. package/llamaBins/win-x64/llama-addon.node +0 -0
  317. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
  318. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  319. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  320. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  321. package/package.json +15 -12
  322. package/dist/TemplateChatWrapper.js.map +0 -1
  323. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
  324. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
  325. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
  326. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  327. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
  328. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  329. package/dist/cli/commands/InspectCommand.js +0 -113
  330. package/dist/cli/commands/InspectCommand.js.map +0 -1
  331. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  332. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  333. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  334. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  335. package/dist/gguf/GGUFInsights.d.ts +0 -28
  336. package/dist/gguf/GGUFInsights.js +0 -58
  337. package/dist/gguf/GGUFInsights.js.map +0 -1
  338. package/dist/gguf/GGUFMetadata.d.ts +0 -19
  339. package/dist/gguf/GGUFMetadata.js +0 -38
  340. package/dist/gguf/GGUFMetadata.js.map +0 -1
  341. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
  342. package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
  343. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
  344. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
  345. package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
  346. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
  347. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
  348. package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
  349. package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
  350. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
  351. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -11
  352. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
  353. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
  354. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
  355. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
  356. package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
  357. package/dist/gguf/ggufParser/GGUFParser.js +0 -123
  358. package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
  359. package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
  360. package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
  361. package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
  362. package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
  363. package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
  364. package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
  365. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
  366. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
  367. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
  368. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
  369. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
  370. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
  371. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
  372. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
  373. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
  374. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  375. package/dist/utils/parseModelTypeDescription.js +0 -9
  376. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  377. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  378. package/dist/utils/resolveChatWrapper.js +0 -16
  379. package/dist/utils/resolveChatWrapper.js.map +0 -1
  380. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  381. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  382. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  383. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
package/llama/addon.cpp CHANGED
@@ -3,13 +3,14 @@
3
3
  #include <algorithm>
4
4
  #include <sstream>
5
5
  #include <vector>
6
+ #include <unordered_map>
6
7
 
7
8
  #include "common.h"
8
9
  #include "common/grammar-parser.h"
9
10
  #include "llama.h"
10
11
  #include "napi.h"
11
12
 
12
- #ifdef GPU_INFO_USE_CUBLAS
13
+ #ifdef GPU_INFO_USE_CUDA
13
14
  # include "gpuInfo/cuda-gpu-info.h"
14
15
  #endif
15
16
  #ifdef GPU_INFO_USE_VULKAN
@@ -121,7 +122,7 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
121
122
  return std::string(result.data(), result.size());
122
123
  }
123
124
 
124
- #ifdef GPU_INFO_USE_CUBLAS
125
+ #ifdef GPU_INFO_USE_CUDA
125
126
  void logCudaError(const char* message) {
126
127
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
127
128
  }
@@ -136,7 +137,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
136
137
  uint64_t total = 0;
137
138
  uint64_t used = 0;
138
139
 
139
- #ifdef GPU_INFO_USE_CUBLAS
140
+ #ifdef GPU_INFO_USE_CUDA
140
141
  size_t cudaDeviceTotal = 0;
141
142
  size_t cudaDeviceUsed = 0;
142
143
  bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
@@ -161,7 +162,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
161
162
  #ifdef GPU_INFO_USE_METAL
162
163
  uint64_t metalDeviceTotal = 0;
163
164
  uint64_t metalDeviceUsed = 0;
164
- get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
165
+ getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
165
166
 
166
167
  total += metalDeviceTotal;
167
168
  used += metalDeviceUsed;
@@ -174,8 +175,34 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
174
175
  return result;
175
176
  }
176
177
 
178
+ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
179
+ std::vector<std::string> deviceNames;
180
+
181
+ #ifdef GPU_INFO_USE_CUDA
182
+ gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
183
+ #endif
184
+
185
+ #ifdef GPU_INFO_USE_VULKAN
186
+ gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
187
+ #endif
188
+
189
+ #ifdef GPU_INFO_USE_METAL
190
+ getMetalGpuDeviceNames(&deviceNames);
191
+ #endif
192
+
193
+ Napi::Object result = Napi::Object::New(info.Env());
194
+
195
+ Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
196
+ for (size_t i = 0; i < deviceNames.size(); ++i) {
197
+ deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
198
+ }
199
+ result.Set("deviceNames", deviceNamesNapiArray);
200
+
201
+ return result;
202
+ }
203
+
177
204
  Napi::Value getGpuType(const Napi::CallbackInfo& info) {
178
- #ifdef GPU_INFO_USE_CUBLAS
205
+ #ifdef GPU_INFO_USE_CUDA
179
206
  return Napi::String::New(info.Env(), "cuda");
180
207
  #endif
181
208
 
@@ -507,6 +534,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
507
534
 
508
535
  return Napi::Number::From(info.Env(), int32_t(tokenType));
509
536
  }
537
+ Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
538
+ if (disposed) {
539
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
540
+ return info.Env().Undefined();
541
+ }
542
+
543
+ auto vocabularyType = llama_vocab_type(model);
544
+
545
+ return Napi::Number::From(info.Env(), int32_t(vocabularyType));
546
+ }
510
547
  Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
511
548
  const int addBos = llama_add_bos_token(model);
512
549
 
@@ -515,6 +552,10 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
515
552
  return Napi::Boolean::New(info.Env(), shouldPrependBos);
516
553
  }
517
554
 
555
+ Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
556
+ return Napi::Number::From(info.Env(), llama_model_size(model));
557
+ }
558
+
518
559
  static void init(Napi::Object exports) {
519
560
  exports.Set(
520
561
  "AddonModel",
@@ -540,7 +581,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
540
581
  InstanceMethod("eotToken", &AddonModel::EotToken),
541
582
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
542
583
  InstanceMethod("getTokenType", &AddonModel::GetTokenType),
584
+ InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
543
585
  InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
586
+ InstanceMethod("getModelSize", &AddonModel::GetModelSize),
544
587
  InstanceMethod("dispose", &AddonModel::Dispose),
545
588
  }
546
589
  )
@@ -822,6 +865,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
822
865
  context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
823
866
  }
824
867
 
868
+ if (options.Has("sequences")) {
869
+ context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
870
+ }
871
+
825
872
  if (options.Has("embeddings")) {
826
873
  context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
827
874
  }
@@ -1039,6 +1086,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
1039
1086
  return result;
1040
1087
  }
1041
1088
 
1089
+ Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
1090
+ if (disposed) {
1091
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1092
+ return info.Env().Undefined();
1093
+ }
1094
+
1095
+ return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
1096
+ }
1097
+
1042
1098
  Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
1043
1099
  llama_print_timings(ctx);
1044
1100
  llama_reset_timings(ctx);
@@ -1063,6 +1119,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
1063
1119
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
1064
1120
  InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
1065
1121
  InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
1122
+ InstanceMethod("getStateSize", &AddonContext::GetStateSize),
1066
1123
  InstanceMethod("printTimings", &AddonContext::PrintTimings),
1067
1124
  InstanceMethod("dispose", &AddonContext::Dispose),
1068
1125
  }
@@ -1163,7 +1220,7 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1163
1220
  }
1164
1221
  void OnOK() {
1165
1222
  if (context->contextLoaded) {
1166
- uint64_t contextMemorySize = llama_get_state_size(context->ctx);
1223
+ uint64_t contextMemorySize = llama_state_get_size(context->ctx);
1167
1224
  adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1168
1225
  context->loadedContextMemorySize = contextMemorySize;
1169
1226
  }
@@ -1278,6 +1335,8 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1278
1335
  float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
1279
1336
  float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
1280
1337
  std::vector<llama_token> repeat_penalty_tokens;
1338
+ std::unordered_map<llama_token, float> tokenBiases;
1339
+ bool useTokenBiases = false;
1281
1340
  bool use_repeat_penalty = false;
1282
1341
 
1283
1342
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
@@ -1322,6 +1381,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1322
1381
  use_repeat_penalty = true;
1323
1382
  }
1324
1383
 
1384
+ if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
1385
+ Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
1386
+ Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
1387
+
1388
+ if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
1389
+ for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
1390
+ tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
1391
+ }
1392
+
1393
+ useTokenBiases = true;
1394
+ }
1395
+ }
1396
+
1325
1397
  if (options.Has("repeatPenaltyPresencePenalty")) {
1326
1398
  repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
1327
1399
  }
@@ -1370,18 +1442,33 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1370
1442
  // Select the best prediction.
1371
1443
  auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
1372
1444
  auto n_vocab = llama_n_vocab(ctx->model->model);
1445
+ auto eos_token = llama_token_eos(ctx->model->model);
1373
1446
 
1374
1447
  std::vector<llama_token_data> candidates;
1375
1448
  candidates.reserve(n_vocab);
1376
1449
 
1377
1450
  for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
1378
- candidates.emplace_back(llama_token_data { token_id, logits[token_id], 0.0f });
1451
+ auto logit = logits[token_id];
1452
+
1453
+ if (useTokenBiases) {
1454
+ bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
1455
+ if (hasTokenBias) {
1456
+ auto logitBias = tokenBiases.at(token_id);
1457
+ if (logitBias == -INFINITY || logitBias < -INFINITY) {
1458
+ if (token_id != eos_token) {
1459
+ logit = -INFINITY;
1460
+ }
1461
+ } else {
1462
+ logit += logitBias;
1463
+ }
1464
+ }
1465
+ }
1466
+
1467
+ candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
1379
1468
  }
1380
1469
 
1381
1470
  llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
1382
1471
 
1383
- auto eos_token = llama_token_eos(ctx->model->model);
1384
-
1385
1472
  if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
1386
1473
  llama_sample_repetition_penalties(
1387
1474
  ctx->ctx,
@@ -1396,6 +1483,13 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1396
1483
 
1397
1484
  if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
1398
1485
  llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1486
+
1487
+ if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
1488
+ // logit biases caused grammar sampling to fail, so sampling again without logit biases
1489
+ useTokenBiases = false;
1490
+ SampleToken();
1491
+ return;
1492
+ }
1399
1493
  }
1400
1494
 
1401
1495
  if (temperature <= 0) {
@@ -1444,6 +1538,55 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
1444
1538
  return Napi::String::From(info.Env(), llama_print_system_info());
1445
1539
  }
1446
1540
 
1541
+ Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
1542
+ return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
1543
+ }
1544
+
1545
+ Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
1546
+ return Napi::Boolean::New(info.Env(), llama_supports_mmap());
1547
+ }
1548
+
1549
+ Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
1550
+ return Napi::Boolean::New(info.Env(), llama_supports_mlock());
1551
+ }
1552
+
1553
+ Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
1554
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1555
+
1556
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1557
+ return info.Env().Undefined();
1558
+ }
1559
+
1560
+ const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
1561
+
1562
+ return Napi::Number::New(info.Env(), blockSize);
1563
+ }
1564
+
1565
+ Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
1566
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1567
+
1568
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1569
+ return info.Env().Undefined();
1570
+ }
1571
+
1572
+ const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
1573
+
1574
+ return Napi::Number::New(info.Env(), typeSize);
1575
+ }
1576
+
1577
+ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
1578
+ Napi::Object consts = Napi::Object::New(info.Env());
1579
+ consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
1580
+ consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
1581
+ consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
1582
+ consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
1583
+ consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
1584
+ consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
1585
+ consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
1586
+
1587
+ return consts;
1588
+ }
1589
+
1447
1590
  int addonGetGgmlLogLevelNumber(ggml_log_level level) {
1448
1591
  switch (level) {
1449
1592
  case GGML_LOG_LEVEL_ERROR: return 2;
@@ -1693,9 +1836,16 @@ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1693
1836
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1694
1837
  exports.DefineProperties({
1695
1838
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1839
+ Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
1840
+ Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
1841
+ Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
1842
+ Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
1843
+ Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
1844
+ Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
1696
1845
  Napi::PropertyDescriptor::Function("setLogger", setLogger),
1697
1846
  Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1698
1847
  Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1848
+ Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
1699
1849
  Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1700
1850
  Napi::PropertyDescriptor::Function("init", addonInit),
1701
1851
  Napi::PropertyDescriptor::Function("dispose", addonDispose),
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b2440"
2
+ "release": "b2665"
3
3
  }
Binary file
@@ -1,4 +1,6 @@
1
1
  #include <stddef.h>
2
+ #include <vector>
3
+ #include <string>
2
4
 
3
5
  #if defined(GPU_INFO_USE_HIPBLAS)
4
6
  #include <hip/hip_runtime.h>
@@ -97,3 +99,22 @@ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaEr
97
99
 
98
100
  return true;
99
101
  }
102
+
103
+ void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
104
+ int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
105
+
106
+ if (deviceCount < 0) {
107
+ return;
108
+ }
109
+
110
+ for (int i = 0; i < deviceCount; i++) {
111
+ cudaDeviceProp prop;
112
+ auto getDevicePropertiesResult = cudaGetDeviceProperties(&prop, i);
113
+
114
+ if (getDevicePropertiesResult != cudaSuccess) {
115
+ errorLogCallback(cudaGetErrorString(getDevicePropertiesResult));
116
+ } else {
117
+ (*deviceNames).push_back(std::string(prop.name));
118
+ }
119
+ }
120
+ }
@@ -1,7 +1,10 @@
1
1
  #pragma once
2
2
 
3
3
  #include <stddef.h>
4
+ #include <vector>
5
+ #include <string>
4
6
 
5
7
  typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
6
8
 
7
9
  bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
10
+ void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback);
@@ -1,5 +1,8 @@
1
1
  #pragma once
2
2
 
3
3
  #include <stdint.h>
4
+ #include <string>
5
+ #include <vector>
4
6
 
5
- void get_metal_gpu_info(uint64_t * total, uint64_t * used);
7
+ void getMetalGpuInfo(uint64_t * total, uint64_t * used);
8
+ void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames);
@@ -1,7 +1,9 @@
1
1
  #include <stdint.h>
2
+ #include <vector>
3
+ #include <string>
2
4
  #import <Metal/Metal.h>
3
5
 
4
- void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
6
+ void getMetalGpuInfo(uint64_t * total, uint64_t * used) {
5
7
  id<MTLDevice> device = MTLCreateSystemDefaultDevice();
6
8
 
7
9
  if (device) {
@@ -15,3 +17,14 @@ void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
15
17
  [device release];
16
18
  device = nil;
17
19
  }
20
+
21
+ void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames) {
22
+ NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
23
+
24
+ for (id<MTLDevice> device in devices) {
25
+ (*deviceNames).push_back(std::string(([NSString stringWithUTF8String:device.name.UTF8String]).UTF8String));
26
+ }
27
+
28
+ [devices release];
29
+ devices = nil;
30
+ }
@@ -1,10 +1,11 @@
1
1
  #include <stddef.h>
2
+ #include <vector>
2
3
 
3
4
  #include <vulkan/vulkan.hpp>
4
5
 
5
6
  typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
7
 
7
- bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
8
+ static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
8
9
  vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
9
10
  vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
10
11
  vk::Instance instance = vk::createInstance(createInfo);
@@ -41,8 +42,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
41
42
 
42
43
  for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
43
44
  if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
44
- totalMem += memProps.memoryHeaps[i].size;
45
+ const auto size = memProps.memoryHeaps[i].size;
46
+ totalMem += size;
45
47
  usedMem += memoryBudgetProperties.heapUsage[i];
48
+
49
+ if (size > 0 && addDeviceNames) {
50
+ (*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
51
+ }
52
+
46
53
  break;
47
54
  }
48
55
  }
@@ -63,3 +70,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
63
70
  *used = usedMem;
64
71
  return true;
65
72
  }
73
+
74
+ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
75
+ return enumerateVulkanDevices(total, used, false, nullptr, warningLogCallback);
76
+ }
77
+
78
+ bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
79
+ size_t vulkanDeviceTotal = 0;
80
+ size_t vulkanDeviceUsed = 0;
81
+
82
+ return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, true, deviceNames, warningLogCallback);
83
+ }
@@ -1,7 +1,9 @@
1
1
  #pragma once
2
2
 
3
3
  #include <stddef.h>
4
+ #include <vector>
4
5
 
5
6
  typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
7
 
7
8
  bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
9
+ bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
89
89
  ```
90
90
  ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
91
91
  ```
92
+
93
+ ## Troubleshooting
94
+
95
+ Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
96
+
97
+ ### Efficient optional repetitions
98
+
99
+ A common pattern is to allow repetitions of a pattern `x` up to N times.
100
+
101
+ While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b2440",
2
+ "tag": "b2665",
3
3
  "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
4
  }
@@ -0,0 +1,41 @@
1
+ set(CMAKE_SYSTEM_NAME Windows)
2
+ set(CMAKE_SYSTEM_PROCESSOR ARM64)
3
+
4
+ # Look for cl.exe in the Visual Studio installation directories
5
+ set(PROGRAMFILES "$ENV{ProgramFiles}")
6
+ set(PROGRAMFILES_X86 "$ENV{ProgramFiles\(x86\)}")
7
+
8
+ set(VS_INSTALL_PATHS
9
+ "${PROGRAMFILES_X86}/Microsoft Visual Studio"
10
+ "${PROGRAMFILES}/Microsoft Visual Studio"
11
+ "C:/Program Files (x86)/Microsoft Visual Studio"
12
+ "C:/Program Files/Microsoft Visual Studio"
13
+ )
14
+ foreach(PATH IN LISTS VS_INSTALL_PATHS)
15
+ if(CL_EXE_PATH)
16
+ break()
17
+ endif()
18
+
19
+ file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
20
+ if(FOUND_CL_EXE)
21
+ list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
22
+ break()
23
+ endif()
24
+
25
+ if(CL_EXE_PATH)
26
+ break()
27
+ endif()
28
+
29
+ file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/**/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
30
+ if(FOUND_CL_EXE)
31
+ list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
32
+ break()
33
+ endif()
34
+ endforeach()
35
+
36
+ if(NOT CL_EXE_PATH)
37
+ message(FATAL_ERROR "cl.exe not found for ARM architecture.")
38
+ else()
39
+ set(CMAKE_C_COMPILER "${CL_EXE_PATH}")
40
+ set(CMAKE_CXX_COMPILER "${CL_EXE_PATH}")
41
+ endif()
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
Binary file
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
Binary file
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2440"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}