node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/README.md +2 -0
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +14 -4
  6. package/dist/bindings/AddonTypes.js +2 -0
  7. package/dist/bindings/AddonTypes.js.map +1 -0
  8. package/dist/bindings/Llama.d.ts +23 -0
  9. package/dist/bindings/Llama.js +225 -0
  10. package/dist/bindings/Llama.js.map +1 -0
  11. package/dist/bindings/getLlama.d.ts +103 -0
  12. package/dist/bindings/getLlama.js +228 -0
  13. package/dist/bindings/getLlama.js.map +1 -0
  14. package/dist/bindings/types.d.ts +33 -0
  15. package/dist/bindings/types.js +30 -0
  16. package/dist/bindings/types.js.map +1 -0
  17. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  18. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  19. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  20. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  21. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  22. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  23. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  24. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  25. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
  29. package/dist/bindings/utils/compileLLamaCpp.js +157 -0
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  31. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  32. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
  33. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  34. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  35. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  36. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  37. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  38. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  39. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  40. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  41. package/dist/bindings/utils/getPlatform.js +15 -0
  42. package/dist/bindings/utils/getPlatform.js.map +1 -0
  43. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  44. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  45. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  46. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  47. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
  48. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  49. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
  50. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
  51. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
  52. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  53. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  54. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  55. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  56. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  57. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  58. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
  59. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  60. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  61. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  62. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  63. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  64. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  65. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  66. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  67. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  68. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  69. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  70. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  71. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  72. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  73. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  74. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  75. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  76. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
  77. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
  78. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  79. package/dist/cli/cli.js +8 -0
  80. package/dist/cli/cli.js.map +1 -1
  81. package/dist/cli/commands/BuildCommand.d.ts +2 -1
  82. package/dist/cli/commands/BuildCommand.js +50 -10
  83. package/dist/cli/commands/BuildCommand.js.map +1 -1
  84. package/dist/cli/commands/ChatCommand.d.ts +11 -4
  85. package/dist/cli/commands/ChatCommand.js +138 -64
  86. package/dist/cli/commands/ChatCommand.js.map +1 -1
  87. package/dist/cli/commands/ClearCommand.js +4 -6
  88. package/dist/cli/commands/ClearCommand.js.map +1 -1
  89. package/dist/cli/commands/CompleteCommand.d.ts +25 -0
  90. package/dist/cli/commands/CompleteCommand.js +278 -0
  91. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  92. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  93. package/dist/cli/commands/DebugCommand.js +59 -0
  94. package/dist/cli/commands/DebugCommand.js.map +1 -0
  95. package/dist/cli/commands/DownloadCommand.d.ts +2 -1
  96. package/dist/cli/commands/DownloadCommand.js +47 -40
  97. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  98. package/dist/cli/commands/InfillCommand.d.ts +27 -0
  99. package/dist/cli/commands/InfillCommand.js +316 -0
  100. package/dist/cli/commands/InfillCommand.js.map +1 -0
  101. package/dist/cli/commands/OnPostInstallCommand.js +7 -10
  102. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  103. package/dist/config.d.ts +10 -3
  104. package/dist/config.js +18 -7
  105. package/dist/config.js.map +1 -1
  106. package/dist/consts.d.ts +1 -0
  107. package/dist/consts.js +2 -0
  108. package/dist/consts.js.map +1 -0
  109. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +154 -0
  110. package/dist/evaluator/LlamaChat/LlamaChat.js +684 -0
  111. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  112. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
  113. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
  114. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  115. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  116. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
  117. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  118. package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
  119. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
  120. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  121. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  122. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  123. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  124. package/dist/evaluator/LlamaCompletion.d.ts +148 -0
  125. package/dist/evaluator/LlamaCompletion.js +402 -0
  126. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  127. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
  128. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +77 -107
  129. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  130. package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
  131. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  132. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  133. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  134. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  135. package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
  136. package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
  137. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  138. package/dist/evaluator/LlamaGrammar.d.ts +30 -0
  139. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
  140. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  141. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  142. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  143. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  144. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
  145. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  146. package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +24 -6
  147. package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +51 -12
  148. package/dist/evaluator/LlamaModel.js.map +1 -0
  149. package/dist/index.d.ts +30 -17
  150. package/dist/index.js +29 -15
  151. package/dist/index.js.map +1 -1
  152. package/dist/state.d.ts +2 -0
  153. package/dist/state.js +7 -0
  154. package/dist/state.js.map +1 -1
  155. package/dist/types.d.ts +72 -3
  156. package/dist/types.js +5 -1
  157. package/dist/types.js.map +1 -1
  158. package/dist/utils/LlamaText.d.ts +42 -0
  159. package/dist/utils/LlamaText.js +207 -0
  160. package/dist/utils/LlamaText.js.map +1 -0
  161. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  162. package/dist/utils/StopGenerationDetector.js +205 -0
  163. package/dist/utils/StopGenerationDetector.js.map +1 -0
  164. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  165. package/dist/utils/TokenStreamRegulator.js +96 -0
  166. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  167. package/dist/utils/UnsupportedError.d.ts +2 -0
  168. package/dist/utils/UnsupportedError.js +7 -0
  169. package/dist/utils/UnsupportedError.js.map +1 -0
  170. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  171. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  172. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  173. package/dist/utils/cmake.js +16 -11
  174. package/dist/utils/cmake.js.map +1 -1
  175. package/dist/utils/compareTokens.d.ts +2 -0
  176. package/dist/utils/compareTokens.js +4 -0
  177. package/dist/utils/compareTokens.js.map +1 -0
  178. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  179. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  180. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  181. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  182. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  183. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  184. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  185. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  186. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  187. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  188. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  189. package/dist/utils/gbnfJson/types.d.ts +1 -1
  190. package/dist/utils/gbnfJson/types.js.map +1 -1
  191. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  192. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  193. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  194. package/dist/utils/getConsoleLogPrefix.js +9 -0
  195. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  196. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  197. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  198. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  199. package/dist/utils/getGrammarsFolder.js +8 -7
  200. package/dist/utils/getGrammarsFolder.js.map +1 -1
  201. package/dist/utils/getModuleVersion.d.ts +1 -0
  202. package/dist/utils/getModuleVersion.js +13 -0
  203. package/dist/utils/getModuleVersion.js.map +1 -0
  204. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  205. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  206. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  207. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  208. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  209. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  210. package/dist/utils/gitReleaseBundles.js +6 -5
  211. package/dist/utils/gitReleaseBundles.js.map +1 -1
  212. package/dist/utils/hashString.d.ts +1 -0
  213. package/dist/utils/hashString.js +8 -0
  214. package/dist/utils/hashString.js.map +1 -0
  215. package/dist/utils/isLockfileActive.d.ts +4 -0
  216. package/dist/utils/isLockfileActive.js +12 -0
  217. package/dist/utils/isLockfileActive.js.map +1 -0
  218. package/dist/utils/parseModelTypeDescription.d.ts +1 -1
  219. package/dist/utils/prettyPrintObject.d.ts +1 -0
  220. package/dist/utils/prettyPrintObject.js +40 -0
  221. package/dist/utils/prettyPrintObject.js.map +1 -0
  222. package/dist/utils/removeNullFields.d.ts +1 -0
  223. package/dist/utils/removeNullFields.js +8 -0
  224. package/dist/utils/removeNullFields.js.map +1 -1
  225. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  226. package/dist/utils/resolveChatWrapper.js +16 -0
  227. package/dist/utils/resolveChatWrapper.js.map +1 -0
  228. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  229. package/dist/utils/resolveGithubRelease.js +36 -0
  230. package/dist/utils/resolveGithubRelease.js.map +1 -0
  231. package/dist/utils/spawnCommand.d.ts +1 -1
  232. package/dist/utils/spawnCommand.js +4 -2
  233. package/dist/utils/spawnCommand.js.map +1 -1
  234. package/dist/utils/tokenizeInput.d.ts +3 -0
  235. package/dist/utils/tokenizeInput.js +9 -0
  236. package/dist/utils/tokenizeInput.js.map +1 -0
  237. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  238. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  239. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  240. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  241. package/dist/utils/waitForLockfileRelease.js +20 -0
  242. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  243. package/dist/utils/withLockfile.d.ts +7 -0
  244. package/dist/utils/withLockfile.js +44 -0
  245. package/dist/utils/withLockfile.js.map +1 -0
  246. package/dist/utils/withOra.js +11 -1
  247. package/dist/utils/withOra.js.map +1 -1
  248. package/dist/utils/withStatusLogs.d.ts +2 -1
  249. package/dist/utils/withStatusLogs.js +11 -8
  250. package/dist/utils/withStatusLogs.js.map +1 -1
  251. package/llama/.clang-format +1 -2
  252. package/llama/CMakeLists.txt +87 -2
  253. package/llama/addon.cpp +319 -31
  254. package/llama/binariesGithubRelease.json +1 -1
  255. package/llama/gitRelease.bundle +0 -0
  256. package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
  257. package/llama/gpuInfo/cuda-gpu-info.h +7 -0
  258. package/llama/gpuInfo/metal-gpu-info.h +5 -0
  259. package/llama/gpuInfo/metal-gpu-info.mm +17 -0
  260. package/llama/llama.cpp.info.json +4 -0
  261. package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
  262. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  263. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
  264. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  265. package/llamaBins/linux-x64/.buildMetadata.json +1 -0
  266. package/llamaBins/linux-x64/llama-addon.node +0 -0
  267. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
  268. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  269. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
  270. package/llamaBins/mac-arm64-metal/ggml-metal.metal +6491 -0
  271. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  272. package/llamaBins/mac-x64/.buildMetadata.json +1 -0
  273. package/llamaBins/mac-x64/llama-addon.node +0 -0
  274. package/llamaBins/win-x64/.buildMetadata.json +1 -0
  275. package/llamaBins/win-x64/llama-addon.exp +0 -0
  276. package/llamaBins/win-x64/llama-addon.node +0 -0
  277. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
  278. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  279. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  280. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  281. package/package.json +37 -15
  282. package/dist/AbortError.d.ts +0 -2
  283. package/dist/AbortError.js +0 -7
  284. package/dist/AbortError.js.map +0 -1
  285. package/dist/ChatPromptWrapper.d.ts +0 -11
  286. package/dist/ChatPromptWrapper.js +0 -20
  287. package/dist/ChatPromptWrapper.js.map +0 -1
  288. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  289. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  290. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  291. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  292. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  293. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  294. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  295. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  296. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  297. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  298. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  299. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  300. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  301. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  302. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  303. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
  304. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
  305. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  306. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
  307. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  308. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  309. package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
  310. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  311. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  312. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  313. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  314. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  315. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  316. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
  317. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  318. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  319. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  320. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  321. package/dist/utils/binariesGithubRelease.js.map +0 -1
  322. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  323. package/dist/utils/clearLlamaBuild.js +0 -12
  324. package/dist/utils/clearLlamaBuild.js.map +0 -1
  325. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  326. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  327. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  328. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  329. package/dist/utils/compileLLamaCpp.js +0 -127
  330. package/dist/utils/compileLLamaCpp.js.map +0 -1
  331. package/dist/utils/getBin.js +0 -78
  332. package/dist/utils/getBin.js.map +0 -1
  333. package/dist/utils/getReleaseInfo.d.ts +0 -7
  334. package/dist/utils/getReleaseInfo.js +0 -30
  335. package/dist/utils/getReleaseInfo.js.map +0 -1
  336. package/dist/utils/getTextCompletion.d.ts +0 -3
  337. package/dist/utils/getTextCompletion.js +0 -12
  338. package/dist/utils/getTextCompletion.js.map +0 -1
  339. package/dist/utils/usedBinFlag.d.ts +0 -6
  340. package/dist/utils/usedBinFlag.js +0 -15
  341. package/dist/utils/usedBinFlag.js.map +0 -1
  342. package/llama/usedBin.json +0 -3
  343. package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
  344. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  345. package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
  346. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  347. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  348. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
  349. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
  350. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
  351. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
  352. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
  353. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
  354. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp CHANGED
@@ -9,21 +9,104 @@
9
9
  #include "llama.h"
10
10
  #include "napi.h"
11
11
 
12
- std::string addon_model_token_to_piece(const struct llama_model * model, llama_token token) {
12
+ #ifdef GPU_INFO_USE_CUBLAS
13
+ # include "gpuInfo/cuda-gpu-info.h"
14
+ #endif
15
+ #ifdef GPU_INFO_USE_METAL
16
+ # include "gpuInfo/metal-gpu-info.h"
17
+ #endif
18
+
19
+
20
+ struct addon_logger_log {
21
+ public:
22
+ const int logLevelNumber;
23
+ const std::stringstream* stringStream;
24
+ };
25
+
26
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
27
+
28
+ using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
29
+ void addonCallJsLogCallback(
30
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
31
+ );
32
+ using AddonThreadSafeLogCallbackFunction =
33
+ Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
34
+
35
+ AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
36
+ bool addonJsLoggerCallbackSet = false;
37
+ int addonLoggerLogLevel = 5;
38
+
39
+ std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
13
40
  std::vector<char> result(8, 0);
14
41
  const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
15
42
  if (n_tokens < 0) {
16
43
  result.resize(-n_tokens);
17
44
  int check = llama_token_to_piece(model, token, result.data(), result.size());
18
45
  GGML_ASSERT(check == -n_tokens);
19
- }
20
- else {
46
+ } else {
21
47
  result.resize(n_tokens);
22
48
  }
23
49
 
24
50
  return std::string(result.data(), result.size());
25
51
  }
26
52
 
53
+ #ifdef GPU_INFO_USE_CUBLAS
54
+ void lodCudaError(const char* message) {
55
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56
+ }
57
+ #endif
58
+
59
+ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60
+ uint64_t total = 0;
61
+ uint64_t used = 0;
62
+
63
+ #ifdef GPU_INFO_USE_CUBLAS
64
+ size_t cudaDeviceTotal = 0;
65
+ size_t cudaDeviceUsed = 0;
66
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
67
+
68
+ if (cudeGetInfoSuccess) {
69
+ total += cudaDeviceTotal;
70
+ used += cudaDeviceUsed;
71
+ }
72
+ #endif
73
+
74
+ #ifdef GPU_INFO_USE_METAL
75
+ uint64_t metalDeviceTotal = 0;
76
+ uint64_t metalDeviceUsed = 0;
77
+ get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
78
+
79
+ total += metalDeviceTotal;
80
+ used += metalDeviceUsed;
81
+ #endif
82
+
83
+ Napi::Object result = Napi::Object::New(info.Env());
84
+ result.Set("total", Napi::Number::From(info.Env(), total));
85
+ result.Set("used", Napi::Number::From(info.Env(), used));
86
+
87
+ return result;
88
+ }
89
+
90
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
91
+ auto tokenType = llama_token_get_type(model, token);
92
+
93
+ if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
94
+ return Napi::Number::From(info.Env(), -1);
95
+ }
96
+
97
+ return Napi::Number::From(info.Env(), token);
98
+ }
99
+
100
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
101
+ auto tokenType = llama_token_get_type(model, token);
102
+
103
+ if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
104
+ return Napi::Number::From(info.Env(), -1);
105
+ }
106
+
107
+ return Napi::Number::From(info.Env(), token);
108
+ }
109
+
27
110
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
28
111
  public:
29
112
  llama_model_params model_params;
@@ -56,7 +139,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
56
139
  }
57
140
  }
58
141
 
59
- llama_backend_init(false);
60
142
  model = llama_load_model_from_file(modelPath.c_str(), model_params);
61
143
 
62
144
  if (model == NULL) {
@@ -95,8 +177,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
95
177
  }
96
178
 
97
179
  std::string text = info[0].As<Napi::String>().Utf8Value();
180
+ bool specialTokens = info[1].As<Napi::Boolean>().Value();
98
181
 
99
- std::vector<llama_token> tokens = llama_tokenize(model, text, true, true);
182
+ std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
100
183
 
101
184
  Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
102
185
  for (size_t i = 0; i < tokens.size(); ++i) {
@@ -139,6 +222,15 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
139
222
  return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
140
223
  }
141
224
 
225
+ Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
226
+ if (disposed) {
227
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
228
+ return info.Env().Undefined();
229
+ }
230
+
231
+ return Napi::Number::From(info.Env(), llama_n_embd(model));
232
+ }
233
+
142
234
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
143
235
  if (disposed) {
144
236
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -162,7 +254,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
162
254
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
163
255
  return info.Env().Undefined();
164
256
  }
165
-
257
+
166
258
  char model_desc[128];
167
259
  int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
168
260
 
@@ -175,7 +267,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
175
267
  return info.Env().Undefined();
176
268
  }
177
269
 
178
- return Napi::Number::From(info.Env(), llama_token_bos(model));
270
+ return getNapiControlToken(info, model, llama_token_bos(model));
179
271
  }
180
272
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
181
273
  if (disposed) {
@@ -183,7 +275,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
183
275
  return info.Env().Undefined();
184
276
  }
185
277
 
186
- return Napi::Number::From(info.Env(), llama_token_eos(model));
278
+ return getNapiControlToken(info, model, llama_token_eos(model));
187
279
  }
188
280
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
189
281
  if (disposed) {
@@ -191,7 +283,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
191
283
  return info.Env().Undefined();
192
284
  }
193
285
 
194
- return Napi::Number::From(info.Env(), llama_token_nl(model));
286
+ return getNapiToken(info, model, llama_token_nl(model));
195
287
  }
196
288
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
197
289
  if (disposed) {
@@ -199,7 +291,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
199
291
  return info.Env().Undefined();
200
292
  }
201
293
 
202
- return Napi::Number::From(info.Env(), llama_token_prefix(model));
294
+ return getNapiControlToken(info, model, llama_token_prefix(model));
203
295
  }
204
296
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
205
297
  if (disposed) {
@@ -207,7 +299,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
207
299
  return info.Env().Undefined();
208
300
  }
209
301
 
210
- return Napi::Number::From(info.Env(), llama_token_middle(model));
302
+ return getNapiControlToken(info, model, llama_token_middle(model));
211
303
  }
212
304
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
213
305
  if (disposed) {
@@ -215,7 +307,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
215
307
  return info.Env().Undefined();
216
308
  }
217
309
 
218
- return Napi::Number::From(info.Env(), llama_token_suffix(model));
310
+ return getNapiControlToken(info, model, llama_token_suffix(model));
219
311
  }
220
312
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
221
313
  if (disposed) {
@@ -223,7 +315,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
223
315
  return info.Env().Undefined();
224
316
  }
225
317
 
226
- return Napi::Number::From(info.Env(), llama_token_eot(model));
318
+ return getNapiControlToken(info, model, llama_token_eot(model));
227
319
  }
228
320
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
229
321
  if (disposed) {
@@ -244,6 +336,29 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
244
336
  return Napi::String::New(info.Env(), ss.str());
245
337
  }
246
338
 
339
+ Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
340
+ if (disposed) {
341
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
342
+ return info.Env().Undefined();
343
+ }
344
+
345
+ if (info[0].IsNumber() == false) {
346
+ return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
347
+ }
348
+
349
+ int token = info[0].As<Napi::Number>().Int32Value();
350
+ auto tokenType = llama_token_get_type(model, token);
351
+
352
+ return Napi::Number::From(info.Env(), int32_t(tokenType));
353
+ }
354
+ Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
355
+ const int addBos = llama_add_bos_token(model);
356
+
357
+ bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
358
+
359
+ return Napi::Boolean::New(info.Env(), shouldPrependBos);
360
+ }
361
+
247
362
  static void init(Napi::Object exports) {
248
363
  exports.Set(
249
364
  "AddonModel",
@@ -254,6 +369,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
254
369
  InstanceMethod("tokenize", &AddonModel::Tokenize),
255
370
  InstanceMethod("detokenize", &AddonModel::Detokenize),
256
371
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
372
+ InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
257
373
  InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
258
374
  InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
259
375
  InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
@@ -265,7 +381,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
265
381
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
266
382
  InstanceMethod("eotToken", &AddonModel::EotToken),
267
383
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
268
- InstanceMethod("dispose", &AddonModel::Dispose)
384
+ InstanceMethod("getTokenType", &AddonModel::GetTokenType),
385
+ InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
386
+ InstanceMethod("dispose", &AddonModel::Dispose),
269
387
  }
270
388
  )
271
389
  );
@@ -352,29 +470,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
352
470
  context_params.seed = -1;
353
471
  context_params.n_ctx = 4096;
354
472
  context_params.n_threads = 6;
355
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
473
+ context_params.n_threads_batch = context_params.n_threads;
356
474
 
357
475
  if (info.Length() > 1 && info[1].IsObject()) {
358
476
  Napi::Object options = info[1].As<Napi::Object>();
359
477
 
360
- if (options.Has("seed")) {
361
- context_params.seed = options.Get("seed").As<Napi::Number>().Int32Value();
478
+ if (options.Has("noSeed")) {
479
+ context_params.seed = time(NULL);
480
+ } else if (options.Has("seed")) {
481
+ context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
362
482
  }
363
483
 
364
484
  if (options.Has("contextSize")) {
365
- context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Int32Value();
485
+ context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
366
486
  }
367
487
 
368
488
  if (options.Has("batchSize")) {
369
- context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
370
- }
371
-
372
- if (options.Has("f16Kv")) {
373
- context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
374
- }
375
-
376
- if (options.Has("logitsAll")) {
377
- context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
489
+ context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
378
490
  }
379
491
 
380
492
  if (options.Has("embedding")) {
@@ -382,8 +494,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
382
494
  }
383
495
 
384
496
  if (options.Has("threads")) {
385
- context_params.n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
386
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
497
+ const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
498
+ const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
499
+
500
+ context_params.n_threads = resolved_n_threads;
501
+ context_params.n_threads_batch = resolved_n_threads;
387
502
  }
388
503
  }
389
504
 
@@ -533,6 +648,41 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
533
648
  Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
534
649
  Napi::Value SampleToken(const Napi::CallbackInfo& info);
535
650
 
651
+ Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
652
+ AddonGrammarEvaluationState* grammar_evaluation_state =
653
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
654
+ llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
655
+
656
+ if ((grammar_evaluation_state)->grammar != nullptr) {
657
+ llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
658
+ }
659
+
660
+ return info.Env().Undefined();
661
+ }
662
+
663
+ Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
664
+ if (disposed) {
665
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
666
+ return info.Env().Undefined();
667
+ }
668
+
669
+ const int n_embd = llama_n_embd(model->model);
670
+ const auto* embeddings = llama_get_embeddings(ctx);
671
+
672
+ Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
673
+ for (size_t i = 0; i < n_embd; ++i) {
674
+ result[i] = embeddings[i];
675
+ }
676
+
677
+ return result;
678
+ }
679
+
680
+ Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
681
+ llama_print_timings(ctx);
682
+ llama_reset_timings(ctx);
683
+ return info.Env().Undefined();
684
+ }
685
+
536
686
  static void init(Napi::Object exports) {
537
687
  exports.Set(
538
688
  "AddonContext",
@@ -548,7 +698,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
548
698
  InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
549
699
  InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
550
700
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
551
- InstanceMethod("dispose", &AddonContext::Dispose)
701
+ InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
702
+ InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
703
+ InstanceMethod("printTimings", &AddonContext::PrintTimings),
704
+ InstanceMethod("dispose", &AddonContext::Dispose),
552
705
  }
553
706
  )
554
707
  );
@@ -610,6 +763,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
610
763
  bool use_grammar = false;
611
764
  llama_token result;
612
765
  float temperature = 0.0f;
766
+ float min_p = 0;
613
767
  int32_t top_k = 40;
614
768
  float top_p = 0.95f;
615
769
  float repeat_penalty = 1.10f; // 1.0 = disabled
@@ -633,6 +787,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
633
787
  temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
634
788
  }
635
789
 
790
+ if (options.Has("minP")) {
791
+ min_p = options.Get("minP").As<Napi::Number>().FloatValue();
792
+ }
793
+
636
794
  if (options.Has("topK")) {
637
795
  top_k = options.Get("topK").As<Napi::Number>().Int32Value();
638
796
  }
@@ -734,6 +892,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
734
892
  llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
735
893
  llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
736
894
  llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
895
+ llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
737
896
  llama_sample_temp(ctx->ctx, &candidates_p, temperature);
738
897
  new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
739
898
  }
@@ -764,15 +923,144 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
764
923
  return Napi::String::From(info.Env(), llama_print_system_info());
765
924
  }
766
925
 
926
+ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
927
+ switch (level) {
928
+ case GGML_LOG_LEVEL_ERROR: return 2;
929
+ case GGML_LOG_LEVEL_WARN: return 3;
930
+ case GGML_LOG_LEVEL_INFO: return 4;
931
+ case GGML_LOG_LEVEL_DEBUG: return 5;
932
+ }
933
+
934
+ return 1;
935
+ }
936
+
937
+ void addonCallJsLogCallback(
938
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
939
+ ) {
940
+ bool called = false;
941
+
942
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
943
+ try {
944
+ callback.Call({
945
+ Napi::Number::New(env, data->logLevelNumber),
946
+ Napi::String::New(env, data->stringStream->str()),
947
+ });
948
+ called = true;
949
+ } catch (const Napi::Error& e) {
950
+ called = false;
951
+ }
952
+ }
953
+
954
+ if (!called && data != nullptr) {
955
+ if (data->logLevelNumber == 2) {
956
+ fputs(data->stringStream->str().c_str(), stderr);
957
+ fflush(stderr);
958
+ } else {
959
+ fputs(data->stringStream->str().c_str(), stdout);
960
+ fflush(stdout);
961
+ }
962
+ }
963
+
964
+ if (data != nullptr) {
965
+ delete data->stringStream;
966
+ delete data;
967
+ }
968
+ }
969
+
970
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
971
+ int logLevelNumber = addonGetGgmlLogLevelNumber(level);
972
+
973
+ if (logLevelNumber > addonLoggerLogLevel) {
974
+ return;
975
+ }
976
+
977
+ if (addonJsLoggerCallbackSet) {
978
+ std::stringstream* stringStream = new std::stringstream();
979
+ if (text != nullptr) {
980
+ *stringStream << text;
981
+ }
982
+
983
+ addon_logger_log* data = new addon_logger_log {
984
+ logLevelNumber,
985
+ stringStream,
986
+ };
987
+
988
+ auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
989
+
990
+ if (status == napi_ok) {
991
+ return;
992
+ }
993
+ }
994
+
995
+ if (level == 2) {
996
+ fputs(text, stderr);
997
+ fflush(stderr);
998
+ } else {
999
+ fputs(text, stdout);
1000
+ fflush(stdout);
1001
+ }
1002
+ }
1003
+
1004
+ Napi::Value setLogger(const Napi::CallbackInfo& info) {
1005
+ if (info.Length() < 1 || !info[0].IsFunction()) {
1006
+ if (addonJsLoggerCallbackSet) {
1007
+ addonJsLoggerCallbackSet = false;
1008
+ addonThreadSafeLoggerCallback.Release();
1009
+ }
1010
+
1011
+ return info.Env().Undefined();
1012
+ }
1013
+
1014
+ auto addonLoggerJSCallback = info[0].As<Napi::Function>();
1015
+ AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
1016
+ addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
1017
+ info.Env(),
1018
+ addonLoggerJSCallback,
1019
+ "loggerCallback",
1020
+ 0,
1021
+ 1,
1022
+ context,
1023
+ [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
1024
+ addonJsLoggerCallbackSet = false;
1025
+
1026
+ delete ctx;
1027
+ }
1028
+ );
1029
+ addonJsLoggerCallbackSet = true;
1030
+
1031
+ // prevent blocking the main node process from exiting due to active resources
1032
+ addonThreadSafeLoggerCallback.Unref(info.Env());
1033
+
1034
+ return info.Env().Undefined();
1035
+ }
1036
+
1037
+ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1038
+ if (info.Length() < 1 || !info[0].IsNumber()) {
1039
+ addonLoggerLogLevel = 5;
1040
+
1041
+ return info.Env().Undefined();
1042
+ }
1043
+
1044
+ addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
1045
+
1046
+ return info.Env().Undefined();
1047
+ }
1048
+
767
1049
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
768
- llama_backend_init(false);
1050
+ llama_backend_init();
769
1051
  exports.DefineProperties({
770
1052
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1053
+ Napi::PropertyDescriptor::Function("setLogger", setLogger),
1054
+ Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1055
+ Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
771
1056
  });
772
1057
  AddonModel::init(exports);
773
1058
  AddonGrammar::init(exports);
774
1059
  AddonGrammarEvaluationState::init(exports);
775
1060
  AddonContext::init(exports);
1061
+
1062
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1063
+
776
1064
  return exports;
777
1065
  }
778
1066
 
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b1567"
2
+ "release": "b2174"
3
3
  }
Binary file
@@ -0,0 +1,99 @@
1
+ #include <stddef.h>
2
+
3
+ #if defined(GPU_INFO_USE_HIPBLAS)
4
+ #include <hip/hip_runtime.h>
5
+ #include <hipblas/hipblas.h>
6
+ #define cudaGetDevice hipGetDevice
7
+ #define cudaGetDeviceCount hipGetDeviceCount
8
+ #define cudaGetErrorString hipGetErrorString
9
+ #define cudaMemGetInfo hipMemGetInfo
10
+ #define cudaSetDevice hipSetDevice
11
+ #define cudaSuccess hipSuccess
12
+ #else
13
+ #include <cuda_runtime.h>
14
+ #include <cuda.h>
15
+ #endif
16
+
17
+
18
+ typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
19
+
20
+ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
21
+ int current_device;
22
+ auto getDeviceResult = cudaGetDevice(&current_device);
23
+
24
+ if (getDeviceResult != cudaSuccess) {
25
+ errorLogCallback(cudaGetErrorString(getDeviceResult));
26
+ return false;
27
+ }
28
+
29
+ if (device == current_device) {
30
+ return true;
31
+ }
32
+
33
+ const auto setDeviceResult = cudaSetDevice(device);
34
+
35
+ if (setDeviceResult != cudaSuccess) {
36
+ errorLogCallback(cudaGetErrorString(setDeviceResult));
37
+ return false;
38
+ }
39
+
40
+ return true;
41
+ }
42
+
43
+ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
44
+ gpuInfoSetCudaDevice(device, errorLogCallback);
45
+
46
+ size_t freeMem;
47
+ size_t totalMem;
48
+ auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
49
+
50
+ if (getMemInfoResult != cudaSuccess) {
51
+ errorLogCallback(cudaGetErrorString(getMemInfoResult));
52
+ return false;
53
+ }
54
+
55
+ *total = totalMem;
56
+ *used = totalMem - freeMem;
57
+
58
+ return true;
59
+ }
60
+
61
+ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
62
+ int deviceCount;
63
+ auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
64
+
65
+ if (getDeviceCountResult != cudaSuccess) {
66
+ errorLogCallback(cudaGetErrorString(getDeviceCountResult));
67
+ return -1;
68
+ }
69
+
70
+ return deviceCount;
71
+ }
72
+
73
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
74
+ int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
75
+
76
+ if (deviceCount < 0) {
77
+ return false;
78
+ }
79
+
80
+ size_t usedMem = 0;
81
+ size_t totalMem = 0;
82
+
83
+ for (int i = 0; i < deviceCount; i++) {
84
+ size_t deviceUsedMem;
85
+ size_t deviceTotalMem;
86
+
87
+ if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
88
+ return false;
89
+ }
90
+
91
+ usedMem += deviceUsedMem;
92
+ totalMem += deviceTotalMem;
93
+ }
94
+
95
+ *total = totalMem;
96
+ *used = usedMem;
97
+
98
+ return true;
99
+ }
@@ -0,0 +1,7 @@
1
+ #pragma once
2
+
3
+ #include <stddef.h>
4
+
5
+ typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
@@ -0,0 +1,5 @@
1
+ #pragma once
2
+
3
+ #include <stdint.h>
4
+
5
+ void get_metal_gpu_info(uint64_t * total, uint64_t * used);
@@ -0,0 +1,17 @@
1
+ #include <stdint.h>
2
+ #import <Metal/Metal.h>
3
+
4
+ void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
5
+ id<MTLDevice> device = MTLCreateSystemDefaultDevice();
6
+
7
+ if (device) {
8
+ *total = device.recommendedMaxWorkingSetSize;
9
+ *used = device.currentAllocatedSize;
10
+ } else {
11
+ *total = 0;
12
+ *used = 0;
13
+ }
14
+
15
+ [device release];
16
+ device = nil;
17
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "tag": "b2174",
3
+ "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
+ }
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}