node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (330) hide show
  1. package/README.md +2 -0
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +11 -4
  6. package/dist/bindings/AddonTypes.js +2 -0
  7. package/dist/bindings/AddonTypes.js.map +1 -0
  8. package/dist/bindings/Llama.d.ts +23 -0
  9. package/dist/bindings/Llama.js +225 -0
  10. package/dist/bindings/Llama.js.map +1 -0
  11. package/dist/bindings/getLlama.d.ts +86 -0
  12. package/dist/bindings/getLlama.js +225 -0
  13. package/dist/bindings/getLlama.js.map +1 -0
  14. package/dist/bindings/types.d.ts +33 -0
  15. package/dist/bindings/types.js +30 -0
  16. package/dist/bindings/types.js.map +1 -0
  17. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  18. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  19. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  20. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  21. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  22. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  23. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  24. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  25. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
  29. package/dist/bindings/utils/compileLLamaCpp.js +157 -0
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  31. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  32. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
  33. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  34. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  35. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  36. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  37. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  38. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  39. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  40. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  41. package/dist/bindings/utils/getPlatform.js +15 -0
  42. package/dist/bindings/utils/getPlatform.js.map +1 -0
  43. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  44. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  45. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  46. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  47. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
  48. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  49. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  50. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  51. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  52. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  53. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  54. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  55. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
  56. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  57. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  58. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  59. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  60. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  61. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  62. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  63. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  64. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  65. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  66. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  67. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  68. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  69. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  70. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  71. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  72. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  73. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
  74. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
  75. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  76. package/dist/cli/cli.js +4 -0
  77. package/dist/cli/cli.js.map +1 -1
  78. package/dist/cli/commands/BuildCommand.d.ts +2 -1
  79. package/dist/cli/commands/BuildCommand.js +50 -10
  80. package/dist/cli/commands/BuildCommand.js.map +1 -1
  81. package/dist/cli/commands/ChatCommand.d.ts +10 -3
  82. package/dist/cli/commands/ChatCommand.js +152 -42
  83. package/dist/cli/commands/ChatCommand.js.map +1 -1
  84. package/dist/cli/commands/ClearCommand.js +4 -6
  85. package/dist/cli/commands/ClearCommand.js.map +1 -1
  86. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  87. package/dist/cli/commands/DebugCommand.js +59 -0
  88. package/dist/cli/commands/DebugCommand.js.map +1 -0
  89. package/dist/cli/commands/DownloadCommand.d.ts +2 -1
  90. package/dist/cli/commands/DownloadCommand.js +47 -40
  91. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  92. package/dist/cli/commands/OnPostInstallCommand.js +7 -10
  93. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  94. package/dist/config.d.ts +10 -3
  95. package/dist/config.js +18 -7
  96. package/dist/config.js.map +1 -1
  97. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +185 -0
  98. package/dist/evaluator/LlamaChat/LlamaChat.js +705 -0
  99. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  100. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
  101. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
  102. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  103. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  104. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
  105. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  106. package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
  107. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
  108. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  109. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  110. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  111. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  112. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
  113. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +71 -105
  114. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  115. package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
  116. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  117. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  118. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  119. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  120. package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
  121. package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
  122. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  123. package/dist/evaluator/LlamaGrammar.d.ts +30 -0
  124. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
  125. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  126. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  127. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  128. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  129. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
  130. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  131. package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +14 -5
  132. package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +18 -9
  133. package/dist/evaluator/LlamaModel.js.map +1 -0
  134. package/dist/index.d.ts +27 -16
  135. package/dist/index.js +26 -14
  136. package/dist/index.js.map +1 -1
  137. package/dist/state.d.ts +2 -0
  138. package/dist/state.js +7 -0
  139. package/dist/state.js.map +1 -1
  140. package/dist/types.d.ts +41 -3
  141. package/dist/types.js +5 -1
  142. package/dist/types.js.map +1 -1
  143. package/dist/utils/LlamaText.d.ts +42 -0
  144. package/dist/utils/LlamaText.js +207 -0
  145. package/dist/utils/LlamaText.js.map +1 -0
  146. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  147. package/dist/utils/StopGenerationDetector.js +205 -0
  148. package/dist/utils/StopGenerationDetector.js.map +1 -0
  149. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  150. package/dist/utils/TokenStreamRegulator.js +96 -0
  151. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  152. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  153. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  154. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  155. package/dist/utils/cmake.js +16 -11
  156. package/dist/utils/cmake.js.map +1 -1
  157. package/dist/utils/compareTokens.d.ts +2 -0
  158. package/dist/utils/compareTokens.js +4 -0
  159. package/dist/utils/compareTokens.js.map +1 -0
  160. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  161. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  162. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  163. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  164. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  165. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  166. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  167. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  168. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  169. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  170. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  171. package/dist/utils/gbnfJson/types.d.ts +1 -1
  172. package/dist/utils/gbnfJson/types.js.map +1 -1
  173. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  174. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  175. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  176. package/dist/utils/getConsoleLogPrefix.js +9 -0
  177. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  178. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  179. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  180. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  181. package/dist/utils/getGrammarsFolder.js +8 -7
  182. package/dist/utils/getGrammarsFolder.js.map +1 -1
  183. package/dist/utils/getModuleVersion.d.ts +1 -0
  184. package/dist/utils/getModuleVersion.js +13 -0
  185. package/dist/utils/getModuleVersion.js.map +1 -0
  186. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  187. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  188. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  189. package/dist/utils/gitReleaseBundles.js +6 -5
  190. package/dist/utils/gitReleaseBundles.js.map +1 -1
  191. package/dist/utils/hashString.d.ts +1 -0
  192. package/dist/utils/hashString.js +8 -0
  193. package/dist/utils/hashString.js.map +1 -0
  194. package/dist/utils/isLockfileActive.d.ts +4 -0
  195. package/dist/utils/isLockfileActive.js +12 -0
  196. package/dist/utils/isLockfileActive.js.map +1 -0
  197. package/dist/utils/parseModelTypeDescription.d.ts +1 -1
  198. package/dist/utils/prettyPrintObject.d.ts +1 -0
  199. package/dist/utils/prettyPrintObject.js +40 -0
  200. package/dist/utils/prettyPrintObject.js.map +1 -0
  201. package/dist/utils/removeNullFields.d.ts +1 -0
  202. package/dist/utils/removeNullFields.js +8 -0
  203. package/dist/utils/removeNullFields.js.map +1 -1
  204. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  205. package/dist/utils/resolveChatWrapper.js +16 -0
  206. package/dist/utils/resolveChatWrapper.js.map +1 -0
  207. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  208. package/dist/utils/resolveGithubRelease.js +36 -0
  209. package/dist/utils/resolveGithubRelease.js.map +1 -0
  210. package/dist/utils/spawnCommand.d.ts +1 -1
  211. package/dist/utils/spawnCommand.js +4 -2
  212. package/dist/utils/spawnCommand.js.map +1 -1
  213. package/dist/utils/tokenizeInput.d.ts +3 -0
  214. package/dist/utils/tokenizeInput.js +9 -0
  215. package/dist/utils/tokenizeInput.js.map +1 -0
  216. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  217. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  218. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  219. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  220. package/dist/utils/waitForLockfileRelease.js +20 -0
  221. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  222. package/dist/utils/withLockfile.d.ts +7 -0
  223. package/dist/utils/withLockfile.js +44 -0
  224. package/dist/utils/withLockfile.js.map +1 -0
  225. package/dist/utils/withOra.js +11 -1
  226. package/dist/utils/withOra.js.map +1 -1
  227. package/dist/utils/withStatusLogs.d.ts +2 -1
  228. package/dist/utils/withStatusLogs.js +11 -8
  229. package/dist/utils/withStatusLogs.js.map +1 -1
  230. package/llama/.clang-format +1 -2
  231. package/llama/CMakeLists.txt +87 -2
  232. package/llama/addon.cpp +256 -22
  233. package/llama/binariesGithubRelease.json +1 -1
  234. package/llama/gitRelease.bundle +0 -0
  235. package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
  236. package/llama/gpuInfo/cuda-gpu-info.h +7 -0
  237. package/llama/gpuInfo/metal-gpu-info.h +5 -0
  238. package/llama/gpuInfo/metal-gpu-info.mm +17 -0
  239. package/llama/llama.cpp.info.json +4 -0
  240. package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
  241. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  242. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
  243. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  244. package/llamaBins/linux-x64/.buildMetadata.json +1 -0
  245. package/llamaBins/linux-x64/llama-addon.node +0 -0
  246. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
  247. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  248. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
  249. package/llamaBins/mac-arm64-metal/ggml-metal.metal +6119 -0
  250. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  251. package/llamaBins/mac-x64/.buildMetadata.json +1 -0
  252. package/llamaBins/mac-x64/llama-addon.node +0 -0
  253. package/llamaBins/win-x64/.buildMetadata.json +1 -0
  254. package/llamaBins/win-x64/llama-addon.exp +0 -0
  255. package/llamaBins/win-x64/llama-addon.node +0 -0
  256. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
  257. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  258. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  259. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  260. package/package.json +37 -15
  261. package/dist/ChatPromptWrapper.d.ts +0 -11
  262. package/dist/ChatPromptWrapper.js +0 -20
  263. package/dist/ChatPromptWrapper.js.map +0 -1
  264. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  265. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  266. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  267. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  268. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  269. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  270. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  271. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  272. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  273. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  274. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  275. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  276. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  277. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  278. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  279. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
  280. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
  281. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  282. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
  283. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  284. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  285. package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
  286. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  287. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  288. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  289. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  290. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  291. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  292. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
  293. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  294. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  295. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  296. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  297. package/dist/utils/binariesGithubRelease.js.map +0 -1
  298. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  299. package/dist/utils/clearLlamaBuild.js +0 -12
  300. package/dist/utils/clearLlamaBuild.js.map +0 -1
  301. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  302. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  303. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  304. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  305. package/dist/utils/compileLLamaCpp.js +0 -127
  306. package/dist/utils/compileLLamaCpp.js.map +0 -1
  307. package/dist/utils/getBin.js +0 -78
  308. package/dist/utils/getBin.js.map +0 -1
  309. package/dist/utils/getReleaseInfo.d.ts +0 -7
  310. package/dist/utils/getReleaseInfo.js +0 -30
  311. package/dist/utils/getReleaseInfo.js.map +0 -1
  312. package/dist/utils/getTextCompletion.d.ts +0 -3
  313. package/dist/utils/getTextCompletion.js +0 -12
  314. package/dist/utils/getTextCompletion.js.map +0 -1
  315. package/dist/utils/usedBinFlag.d.ts +0 -6
  316. package/dist/utils/usedBinFlag.js +0 -15
  317. package/dist/utils/usedBinFlag.js.map +0 -1
  318. package/llama/usedBin.json +0 -3
  319. package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
  320. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  321. package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
  322. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  323. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  324. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
  325. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
  326. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
  327. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
  328. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
  329. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
  330. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp CHANGED
@@ -9,21 +9,84 @@
9
9
  #include "llama.h"
10
10
  #include "napi.h"
11
11
 
12
- std::string addon_model_token_to_piece(const struct llama_model * model, llama_token token) {
12
+ #ifdef GPU_INFO_USE_CUBLAS
13
+ # include "gpuInfo/cuda-gpu-info.h"
14
+ #endif
15
+ #ifdef GPU_INFO_USE_METAL
16
+ # include "gpuInfo/metal-gpu-info.h"
17
+ #endif
18
+
19
+
20
+ struct addon_logger_log {
21
+ public:
22
+ const int logLevelNumber;
23
+ const std::stringstream* stringStream;
24
+ };
25
+
26
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
27
+
28
+ using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
29
+ void addonCallJsLogCallback(
30
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
31
+ );
32
+ using AddonThreadSafeLogCallbackFunction =
33
+ Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
34
+
35
+ AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
36
+ bool addonJsLoggerCallbackSet = false;
37
+ int addonLoggerLogLevel = 5;
38
+
39
+ std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
13
40
  std::vector<char> result(8, 0);
14
41
  const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
15
42
  if (n_tokens < 0) {
16
43
  result.resize(-n_tokens);
17
44
  int check = llama_token_to_piece(model, token, result.data(), result.size());
18
45
  GGML_ASSERT(check == -n_tokens);
19
- }
20
- else {
46
+ } else {
21
47
  result.resize(n_tokens);
22
48
  }
23
49
 
24
50
  return std::string(result.data(), result.size());
25
51
  }
26
52
 
53
+ #ifdef GPU_INFO_USE_CUBLAS
54
+ void lodCudaError(const char* message) {
55
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56
+ }
57
+ #endif
58
+
59
+ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60
+ uint64_t total = 0;
61
+ uint64_t used = 0;
62
+
63
+ #ifdef GPU_INFO_USE_CUBLAS
64
+ size_t cudaDeviceTotal = 0;
65
+ size_t cudaDeviceUsed = 0;
66
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
67
+
68
+ if (cudeGetInfoSuccess) {
69
+ total += cudaDeviceTotal;
70
+ used += cudaDeviceUsed;
71
+ }
72
+ #endif
73
+
74
+ #ifdef GPU_INFO_USE_METAL
75
+ uint64_t metalDeviceTotal = 0;
76
+ uint64_t metalDeviceUsed = 0;
77
+ get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
78
+
79
+ total += metalDeviceTotal;
80
+ used += metalDeviceUsed;
81
+ #endif
82
+
83
+ Napi::Object result = Napi::Object::New(info.Env());
84
+ result.Set("total", Napi::Number::From(info.Env(), total));
85
+ result.Set("used", Napi::Number::From(info.Env(), used));
86
+
87
+ return result;
88
+ }
89
+
27
90
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
28
91
  public:
29
92
  llama_model_params model_params;
@@ -95,8 +158,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
95
158
  }
96
159
 
97
160
  std::string text = info[0].As<Napi::String>().Utf8Value();
161
+ bool specialTokens = info[1].As<Napi::Boolean>().Value();
98
162
 
99
- std::vector<llama_token> tokens = llama_tokenize(model, text, true, true);
163
+ std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
100
164
 
101
165
  Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
102
166
  for (size_t i = 0; i < tokens.size(); ++i) {
@@ -162,7 +226,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
162
226
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
163
227
  return info.Env().Undefined();
164
228
  }
165
-
229
+
166
230
  char model_desc[128];
167
231
  int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
168
232
 
@@ -265,7 +329,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
265
329
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
266
330
  InstanceMethod("eotToken", &AddonModel::EotToken),
267
331
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
268
- InstanceMethod("dispose", &AddonModel::Dispose)
332
+ InstanceMethod("dispose", &AddonModel::Dispose),
269
333
  }
270
334
  )
271
335
  );
@@ -352,29 +416,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
352
416
  context_params.seed = -1;
353
417
  context_params.n_ctx = 4096;
354
418
  context_params.n_threads = 6;
355
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
419
+ context_params.n_threads_batch = context_params.n_threads;
356
420
 
357
421
  if (info.Length() > 1 && info[1].IsObject()) {
358
422
  Napi::Object options = info[1].As<Napi::Object>();
359
423
 
360
- if (options.Has("seed")) {
361
- context_params.seed = options.Get("seed").As<Napi::Number>().Int32Value();
424
+ if (options.Has("noSeed")) {
425
+ context_params.seed = time(NULL);
426
+ } else if (options.Has("seed")) {
427
+ context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
362
428
  }
363
429
 
364
430
  if (options.Has("contextSize")) {
365
- context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Int32Value();
431
+ context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
366
432
  }
367
433
 
368
434
  if (options.Has("batchSize")) {
369
- context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
370
- }
371
-
372
- if (options.Has("f16Kv")) {
373
- context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
374
- }
375
-
376
- if (options.Has("logitsAll")) {
377
- context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
435
+ context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
378
436
  }
379
437
 
380
438
  if (options.Has("embedding")) {
@@ -382,8 +440,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
382
440
  }
383
441
 
384
442
  if (options.Has("threads")) {
385
- context_params.n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
386
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
443
+ const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
444
+ const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
445
+
446
+ context_params.n_threads = resolved_n_threads;
447
+ context_params.n_threads_batch = resolved_n_threads;
387
448
  }
388
449
  }
389
450
 
@@ -533,6 +594,41 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
533
594
  Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
534
595
  Napi::Value SampleToken(const Napi::CallbackInfo& info);
535
596
 
597
+ Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
598
+ AddonGrammarEvaluationState* grammar_evaluation_state =
599
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
600
+ llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
601
+
602
+ if ((grammar_evaluation_state)->grammar != nullptr) {
603
+ llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
604
+ }
605
+
606
+ return info.Env().Undefined();
607
+ }
608
+
609
+ Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
610
+ if (disposed) {
611
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
612
+ return info.Env().Undefined();
613
+ }
614
+
615
+ const int n_embd = llama_n_embd(model->model);
616
+ const auto* embeddings = llama_get_embeddings(ctx);
617
+
618
+ Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
619
+ for (size_t i = 0; i < n_embd; ++i) {
620
+ result[i] = embeddings[i];
621
+ }
622
+
623
+ return result;
624
+ }
625
+
626
+ Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
627
+ llama_print_timings(ctx);
628
+ llama_reset_timings(ctx);
629
+ return info.Env().Undefined();
630
+ }
631
+
536
632
  static void init(Napi::Object exports) {
537
633
  exports.Set(
538
634
  "AddonContext",
@@ -548,7 +644,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
548
644
  InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
549
645
  InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
550
646
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
551
- InstanceMethod("dispose", &AddonContext::Dispose)
647
+ InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
648
+ InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
649
+ InstanceMethod("printTimings", &AddonContext::PrintTimings),
650
+ InstanceMethod("dispose", &AddonContext::Dispose),
552
651
  }
553
652
  )
554
653
  );
@@ -610,6 +709,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
610
709
  bool use_grammar = false;
611
710
  llama_token result;
612
711
  float temperature = 0.0f;
712
+ float min_p = 0;
613
713
  int32_t top_k = 40;
614
714
  float top_p = 0.95f;
615
715
  float repeat_penalty = 1.10f; // 1.0 = disabled
@@ -633,6 +733,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
633
733
  temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
634
734
  }
635
735
 
736
+ if (options.Has("minP")) {
737
+ min_p = options.Get("minP").As<Napi::Number>().FloatValue();
738
+ }
739
+
636
740
  if (options.Has("topK")) {
637
741
  top_k = options.Get("topK").As<Napi::Number>().Int32Value();
638
742
  }
@@ -734,6 +838,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
734
838
  llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
735
839
  llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
736
840
  llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
841
+ llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
737
842
  llama_sample_temp(ctx->ctx, &candidates_p, temperature);
738
843
  new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
739
844
  }
@@ -764,15 +869,144 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
764
869
  return Napi::String::From(info.Env(), llama_print_system_info());
765
870
  }
766
871
 
872
+ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
873
+ switch (level) {
874
+ case GGML_LOG_LEVEL_ERROR: return 2;
875
+ case GGML_LOG_LEVEL_WARN: return 3;
876
+ case GGML_LOG_LEVEL_INFO: return 4;
877
+ case GGML_LOG_LEVEL_DEBUG: return 5;
878
+ }
879
+
880
+ return 1;
881
+ }
882
+
883
+ void addonCallJsLogCallback(
884
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
885
+ ) {
886
+ bool called = false;
887
+
888
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
889
+ try {
890
+ callback.Call({
891
+ Napi::Number::New(env, data->logLevelNumber),
892
+ Napi::String::New(env, data->stringStream->str()),
893
+ });
894
+ called = true;
895
+ } catch (const Napi::Error& e) {
896
+ called = false;
897
+ }
898
+ }
899
+
900
+ if (!called && data != nullptr) {
901
+ if (data->logLevelNumber == 2) {
902
+ fputs(data->stringStream->str().c_str(), stderr);
903
+ fflush(stderr);
904
+ } else {
905
+ fputs(data->stringStream->str().c_str(), stdout);
906
+ fflush(stdout);
907
+ }
908
+ }
909
+
910
+ if (data != nullptr) {
911
+ delete data->stringStream;
912
+ delete data;
913
+ }
914
+ }
915
+
916
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
917
+ int logLevelNumber = addonGetGgmlLogLevelNumber(level);
918
+
919
+ if (logLevelNumber > addonLoggerLogLevel) {
920
+ return;
921
+ }
922
+
923
+ if (addonJsLoggerCallbackSet) {
924
+ std::stringstream* stringStream = new std::stringstream();
925
+ if (text != nullptr) {
926
+ *stringStream << text;
927
+ }
928
+
929
+ addon_logger_log* data = new addon_logger_log {
930
+ logLevelNumber,
931
+ stringStream,
932
+ };
933
+
934
+ auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
935
+
936
+ if (status == napi_ok) {
937
+ return;
938
+ }
939
+ }
940
+
941
+ if (level == 2) {
942
+ fputs(text, stderr);
943
+ fflush(stderr);
944
+ } else {
945
+ fputs(text, stdout);
946
+ fflush(stdout);
947
+ }
948
+ }
949
+
950
+ Napi::Value setLogger(const Napi::CallbackInfo& info) {
951
+ if (info.Length() < 1 || !info[0].IsFunction()) {
952
+ if (addonJsLoggerCallbackSet) {
953
+ addonJsLoggerCallbackSet = false;
954
+ addonThreadSafeLoggerCallback.Release();
955
+ }
956
+
957
+ return info.Env().Undefined();
958
+ }
959
+
960
+ auto addonLoggerJSCallback = info[0].As<Napi::Function>();
961
+ AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
962
+ addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
963
+ info.Env(),
964
+ addonLoggerJSCallback,
965
+ "loggerCallback",
966
+ 0,
967
+ 1,
968
+ context,
969
+ [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
970
+ addonJsLoggerCallbackSet = false;
971
+
972
+ delete ctx;
973
+ }
974
+ );
975
+ addonJsLoggerCallbackSet = true;
976
+
977
+ // prevent blocking the main node process from exiting due to active resources
978
+ addonThreadSafeLoggerCallback.Unref(info.Env());
979
+
980
+ return info.Env().Undefined();
981
+ }
982
+
983
+ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
984
+ if (info.Length() < 1 || !info[0].IsNumber()) {
985
+ addonLoggerLogLevel = 5;
986
+
987
+ return info.Env().Undefined();
988
+ }
989
+
990
+ addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
991
+
992
+ return info.Env().Undefined();
993
+ }
994
+
767
995
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
768
996
  llama_backend_init(false);
769
997
  exports.DefineProperties({
770
998
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
999
+ Napi::PropertyDescriptor::Function("setLogger", setLogger),
1000
+ Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1001
+ Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
771
1002
  });
772
1003
  AddonModel::init(exports);
773
1004
  AddonGrammar::init(exports);
774
1005
  AddonGrammarEvaluationState::init(exports);
775
1006
  AddonContext::init(exports);
1007
+
1008
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1009
+
776
1010
  return exports;
777
1011
  }
778
1012
 
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b1567"
2
+ "release": "b2127"
3
3
  }
Binary file
@@ -0,0 +1,99 @@
1
+ #include <stddef.h>
2
+
3
+ #if defined(GPU_INFO_USE_HIPBLAS)
4
+ #include <hip/hip_runtime.h>
5
+ #include <hipblas/hipblas.h>
6
+ #define cudaGetDevice hipGetDevice
7
+ #define cudaGetDeviceCount hipGetDeviceCount
8
+ #define cudaGetErrorString hipGetErrorString
9
+ #define cudaMemGetInfo hipMemGetInfo
10
+ #define cudaSetDevice hipSetDevice
11
+ #define cudaSuccess hipSuccess
12
+ #else
13
+ #include <cuda_runtime.h>
14
+ #include <cuda.h>
15
+ #endif
16
+
17
+
18
+ typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
19
+
20
+ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
21
+ int current_device;
22
+ auto getDeviceResult = cudaGetDevice(&current_device);
23
+
24
+ if (getDeviceResult != cudaSuccess) {
25
+ errorLogCallback(cudaGetErrorString(getDeviceResult));
26
+ return false;
27
+ }
28
+
29
+ if (device == current_device) {
30
+ return true;
31
+ }
32
+
33
+ const auto setDeviceResult = cudaSetDevice(device);
34
+
35
+ if (setDeviceResult != cudaSuccess) {
36
+ errorLogCallback(cudaGetErrorString(setDeviceResult));
37
+ return false;
38
+ }
39
+
40
+ return true;
41
+ }
42
+
43
+ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
44
+ gpuInfoSetCudaDevice(device, errorLogCallback);
45
+
46
+ size_t freeMem;
47
+ size_t totalMem;
48
+ auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
49
+
50
+ if (getMemInfoResult != cudaSuccess) {
51
+ errorLogCallback(cudaGetErrorString(getMemInfoResult));
52
+ return false;
53
+ }
54
+
55
+ *total = totalMem;
56
+ *used = totalMem - freeMem;
57
+
58
+ return true;
59
+ }
60
+
61
+ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
62
+ int deviceCount;
63
+ auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
64
+
65
+ if (getDeviceCountResult != cudaSuccess) {
66
+ errorLogCallback(cudaGetErrorString(getDeviceCountResult));
67
+ return -1;
68
+ }
69
+
70
+ return deviceCount;
71
+ }
72
+
73
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
74
+ int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
75
+
76
+ if (deviceCount < 0) {
77
+ return false;
78
+ }
79
+
80
+ size_t usedMem = 0;
81
+ size_t totalMem = 0;
82
+
83
+ for (int i = 0; i < deviceCount; i++) {
84
+ size_t deviceUsedMem;
85
+ size_t deviceTotalMem;
86
+
87
+ if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
88
+ return false;
89
+ }
90
+
91
+ usedMem += deviceUsedMem;
92
+ totalMem += deviceTotalMem;
93
+ }
94
+
95
+ *total = totalMem;
96
+ *used = usedMem;
97
+
98
+ return true;
99
+ }
@@ -0,0 +1,7 @@
1
+ #pragma once
2
+
3
+ #include <stddef.h>
4
+
5
+ typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
@@ -0,0 +1,5 @@
1
+ #pragma once
2
+
3
+ #include <stdint.h>
4
+
5
+ void get_metal_gpu_info(uint64_t * total, uint64_t * used);
@@ -0,0 +1,17 @@
1
+ #include <stdint.h>
2
+ #import <Metal/Metal.h>
3
+
4
+ void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
5
+ id<MTLDevice> device = MTLCreateSystemDefaultDevice();
6
+
7
+ if (device) {
8
+ *total = device.recommendedMaxWorkingSetSize;
9
+ *used = device.currentAllocatedSize;
10
+ } else {
11
+ *total = 0;
12
+ *used = 0;
13
+ }
14
+
15
+ [device release];
16
+ device = nil;
17
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "tag": "b2127",
3
+ "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
+ }
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}