node-llama-cpp 2.8.6 → 3.0.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. package/README.md +2 -2
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/bindings/AddonTypes.d.ts +92 -0
  6. package/dist/bindings/AddonTypes.js +2 -0
  7. package/dist/bindings/AddonTypes.js.map +1 -0
  8. package/dist/bindings/Llama.d.ts +23 -0
  9. package/dist/bindings/Llama.js +225 -0
  10. package/dist/bindings/Llama.js.map +1 -0
  11. package/dist/bindings/getLlama.d.ts +86 -0
  12. package/dist/bindings/getLlama.js +225 -0
  13. package/dist/bindings/getLlama.js.map +1 -0
  14. package/dist/bindings/types.d.ts +33 -0
  15. package/dist/bindings/types.js +30 -0
  16. package/dist/bindings/types.js.map +1 -0
  17. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  18. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  19. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  20. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  21. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  22. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  23. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  24. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  25. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
  29. package/dist/bindings/utils/compileLLamaCpp.js +157 -0
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  31. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  32. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
  33. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  34. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  35. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  36. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  37. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  38. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  39. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  40. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  41. package/dist/bindings/utils/getPlatform.js +15 -0
  42. package/dist/bindings/utils/getPlatform.js.map +1 -0
  43. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  44. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  45. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  46. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  47. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
  48. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  49. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  50. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  51. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  52. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  53. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  54. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  55. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
  56. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  57. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  58. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  59. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  60. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  61. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  62. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  63. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  64. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  65. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  66. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  67. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  68. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  69. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  70. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  71. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  72. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  73. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +13 -0
  74. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +60 -0
  75. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -0
  76. package/dist/cli/cli.js +5 -1
  77. package/dist/cli/cli.js.map +1 -1
  78. package/dist/cli/commands/BuildCommand.d.ts +2 -1
  79. package/dist/cli/commands/BuildCommand.js +50 -10
  80. package/dist/cli/commands/BuildCommand.js.map +1 -1
  81. package/dist/cli/commands/ChatCommand.d.ts +10 -4
  82. package/dist/cli/commands/ChatCommand.js +169 -57
  83. package/dist/cli/commands/ChatCommand.js.map +1 -1
  84. package/dist/cli/commands/ClearCommand.js +4 -6
  85. package/dist/cli/commands/ClearCommand.js.map +1 -1
  86. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  87. package/dist/cli/commands/DebugCommand.js +59 -0
  88. package/dist/cli/commands/DebugCommand.js.map +1 -0
  89. package/dist/cli/commands/DownloadCommand.d.ts +2 -1
  90. package/dist/cli/commands/DownloadCommand.js +47 -40
  91. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  92. package/dist/cli/commands/OnPostInstallCommand.js +7 -10
  93. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  94. package/dist/config.d.ts +10 -3
  95. package/dist/config.js +18 -7
  96. package/dist/config.js.map +1 -1
  97. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +185 -0
  98. package/dist/evaluator/LlamaChat/LlamaChat.js +705 -0
  99. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  100. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
  101. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
  102. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  103. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  104. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
  105. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  106. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +157 -0
  107. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
  108. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  109. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  110. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  111. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  112. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +109 -0
  113. package/dist/evaluator/LlamaContext/LlamaContext.js +606 -0
  114. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  115. package/dist/evaluator/LlamaContext/types.d.ts +82 -0
  116. package/dist/evaluator/LlamaContext/types.js +2 -0
  117. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  118. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +5 -0
  119. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +16 -0
  120. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  121. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +5 -0
  122. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +42 -0
  123. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  124. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +2 -0
  125. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +13 -0
  126. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  127. package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
  128. package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
  129. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  130. package/dist/evaluator/LlamaGrammar.d.ts +30 -0
  131. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +15 -19
  132. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  133. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +15 -0
  134. package/dist/evaluator/LlamaGrammarEvaluationState.js +17 -0
  135. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  136. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  137. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
  138. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  139. package/dist/evaluator/LlamaModel.d.ts +120 -0
  140. package/dist/evaluator/LlamaModel.js +320 -0
  141. package/dist/evaluator/LlamaModel.js.map +1 -0
  142. package/dist/index.d.ts +29 -16
  143. package/dist/index.js +28 -15
  144. package/dist/index.js.map +1 -1
  145. package/dist/state.d.ts +2 -0
  146. package/dist/state.js +7 -0
  147. package/dist/state.js.map +1 -1
  148. package/dist/types.d.ts +44 -4
  149. package/dist/types.js +5 -1
  150. package/dist/types.js.map +1 -1
  151. package/dist/utils/LlamaText.d.ts +42 -0
  152. package/dist/utils/LlamaText.js +207 -0
  153. package/dist/utils/LlamaText.js.map +1 -0
  154. package/dist/utils/ReplHistory.js +1 -1
  155. package/dist/utils/ReplHistory.js.map +1 -1
  156. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  157. package/dist/utils/StopGenerationDetector.js +205 -0
  158. package/dist/utils/StopGenerationDetector.js.map +1 -0
  159. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  160. package/dist/utils/TokenStreamRegulator.js +96 -0
  161. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  162. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  163. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  164. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  165. package/dist/utils/cmake.js +16 -11
  166. package/dist/utils/cmake.js.map +1 -1
  167. package/dist/utils/compareTokens.d.ts +2 -0
  168. package/dist/utils/compareTokens.js +4 -0
  169. package/dist/utils/compareTokens.js.map +1 -0
  170. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  171. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  172. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  173. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  174. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  175. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  176. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  177. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  178. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  179. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  180. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  181. package/dist/utils/gbnfJson/types.d.ts +1 -1
  182. package/dist/utils/gbnfJson/types.js.map +1 -1
  183. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  184. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  185. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  186. package/dist/utils/getConsoleLogPrefix.js +9 -0
  187. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  188. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  189. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  190. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  191. package/dist/utils/getGrammarsFolder.js +8 -7
  192. package/dist/utils/getGrammarsFolder.js.map +1 -1
  193. package/dist/utils/getModuleVersion.d.ts +1 -0
  194. package/dist/utils/getModuleVersion.js +13 -0
  195. package/dist/utils/getModuleVersion.js.map +1 -0
  196. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  197. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  198. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  199. package/dist/utils/gitReleaseBundles.js +6 -5
  200. package/dist/utils/gitReleaseBundles.js.map +1 -1
  201. package/dist/utils/hashString.d.ts +1 -0
  202. package/dist/utils/hashString.js +8 -0
  203. package/dist/utils/hashString.js.map +1 -0
  204. package/dist/utils/isLockfileActive.d.ts +4 -0
  205. package/dist/utils/isLockfileActive.js +12 -0
  206. package/dist/utils/isLockfileActive.js.map +1 -0
  207. package/dist/utils/parseModelFileName.d.ts +9 -0
  208. package/dist/utils/parseModelFileName.js +68 -0
  209. package/dist/utils/parseModelFileName.js.map +1 -0
  210. package/dist/utils/parseModelTypeDescription.d.ts +6 -0
  211. package/dist/utils/parseModelTypeDescription.js +9 -0
  212. package/dist/utils/parseModelTypeDescription.js.map +1 -0
  213. package/dist/utils/prettyPrintObject.d.ts +1 -0
  214. package/dist/utils/prettyPrintObject.js +40 -0
  215. package/dist/utils/prettyPrintObject.js.map +1 -0
  216. package/dist/utils/removeNullFields.d.ts +1 -0
  217. package/dist/utils/removeNullFields.js +8 -0
  218. package/dist/utils/removeNullFields.js.map +1 -1
  219. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  220. package/dist/utils/resolveChatWrapper.js +16 -0
  221. package/dist/utils/resolveChatWrapper.js.map +1 -0
  222. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  223. package/dist/utils/resolveGithubRelease.js +36 -0
  224. package/dist/utils/resolveGithubRelease.js.map +1 -0
  225. package/dist/utils/spawnCommand.d.ts +1 -1
  226. package/dist/utils/spawnCommand.js +4 -2
  227. package/dist/utils/spawnCommand.js.map +1 -1
  228. package/dist/utils/tokenizeInput.d.ts +3 -0
  229. package/dist/utils/tokenizeInput.js +9 -0
  230. package/dist/utils/tokenizeInput.js.map +1 -0
  231. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  232. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  233. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  234. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  235. package/dist/utils/waitForLockfileRelease.js +20 -0
  236. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  237. package/dist/utils/withLockfile.d.ts +7 -0
  238. package/dist/utils/withLockfile.js +44 -0
  239. package/dist/utils/withLockfile.js.map +1 -0
  240. package/dist/utils/withOra.js +11 -1
  241. package/dist/utils/withOra.js.map +1 -1
  242. package/dist/utils/withStatusLogs.d.ts +2 -1
  243. package/dist/utils/withStatusLogs.js +11 -8
  244. package/dist/utils/withStatusLogs.js.map +1 -1
  245. package/llama/.clang-format +11 -11
  246. package/llama/CMakeLists.txt +87 -2
  247. package/llama/addon.cpp +915 -352
  248. package/llama/binariesGithubRelease.json +1 -1
  249. package/llama/gitRelease.bundle +0 -0
  250. package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
  251. package/llama/gpuInfo/cuda-gpu-info.h +7 -0
  252. package/llama/gpuInfo/metal-gpu-info.h +5 -0
  253. package/llama/gpuInfo/metal-gpu-info.mm +17 -0
  254. package/llama/llama.cpp.info.json +4 -0
  255. package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
  256. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  257. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
  258. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  259. package/llamaBins/linux-x64/.buildMetadata.json +1 -0
  260. package/llamaBins/linux-x64/llama-addon.node +0 -0
  261. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
  262. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  263. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
  264. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  265. package/llamaBins/mac-x64/.buildMetadata.json +1 -0
  266. package/llamaBins/mac-x64/llama-addon.node +0 -0
  267. package/llamaBins/win-x64/.buildMetadata.json +1 -0
  268. package/llamaBins/win-x64/llama-addon.exp +0 -0
  269. package/llamaBins/win-x64/llama-addon.node +0 -0
  270. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
  271. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  272. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  273. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  274. package/package.json +39 -19
  275. package/dist/ChatPromptWrapper.d.ts +0 -11
  276. package/dist/ChatPromptWrapper.js +0 -20
  277. package/dist/ChatPromptWrapper.js.map +0 -1
  278. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  279. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  280. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  281. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  282. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  283. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  284. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  285. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  286. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  287. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  288. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  289. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  290. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  291. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  292. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  293. package/dist/chatWrappers/createChatWrapperByBos.d.ts +0 -2
  294. package/dist/chatWrappers/createChatWrapperByBos.js +0 -14
  295. package/dist/chatWrappers/createChatWrapperByBos.js.map +0 -1
  296. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -23
  297. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -47
  298. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  299. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -4
  300. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  301. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  302. package/dist/llamaEvaluator/LlamaChatSession.d.ts +0 -122
  303. package/dist/llamaEvaluator/LlamaChatSession.js +0 -236
  304. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  305. package/dist/llamaEvaluator/LlamaContext.d.ts +0 -99
  306. package/dist/llamaEvaluator/LlamaContext.js +0 -143
  307. package/dist/llamaEvaluator/LlamaContext.js.map +0 -1
  308. package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
  309. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  310. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +0 -14
  311. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +0 -16
  312. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  313. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  314. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -123
  315. package/dist/llamaEvaluator/LlamaModel.js +0 -74
  316. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  317. package/dist/utils/binariesGithubRelease.js.map +0 -1
  318. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  319. package/dist/utils/clearLlamaBuild.js +0 -12
  320. package/dist/utils/clearLlamaBuild.js.map +0 -1
  321. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  322. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  323. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  324. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  325. package/dist/utils/compileLLamaCpp.js +0 -132
  326. package/dist/utils/compileLLamaCpp.js.map +0 -1
  327. package/dist/utils/getBin.d.ts +0 -53
  328. package/dist/utils/getBin.js +0 -78
  329. package/dist/utils/getBin.js.map +0 -1
  330. package/dist/utils/getReleaseInfo.d.ts +0 -7
  331. package/dist/utils/getReleaseInfo.js +0 -30
  332. package/dist/utils/getReleaseInfo.js.map +0 -1
  333. package/dist/utils/getTextCompletion.d.ts +0 -3
  334. package/dist/utils/getTextCompletion.js +0 -12
  335. package/dist/utils/getTextCompletion.js.map +0 -1
  336. package/dist/utils/usedBinFlag.d.ts +0 -6
  337. package/dist/utils/usedBinFlag.js +0 -15
  338. package/dist/utils/usedBinFlag.js.map +0 -1
  339. package/dist/utils/withLock.d.ts +0 -1
  340. package/dist/utils/withLock.js +0 -19
  341. package/dist/utils/withLock.js.map +0 -1
  342. package/llama/usedBin.json +0 -3
  343. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  344. package/llamaBins/mac-x64/ggml-metal.metal +0 -6119
  345. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  346. /package/llamaBins/{mac-arm64 → mac-arm64-metal}/ggml-metal.metal +0 -0
package/llama/addon.cpp CHANGED
@@ -1,450 +1,1013 @@
1
1
  #include <stddef.h>
2
+
2
3
  #include <algorithm>
3
4
  #include <sstream>
4
5
  #include <vector>
5
6
 
6
7
  #include "common.h"
7
- #include "llama.h"
8
8
  #include "common/grammar-parser.h"
9
+ #include "llama.h"
9
10
  #include "napi.h"
10
11
 
11
- class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
12
- public:
13
- llama_model_params model_params;
14
- llama_model* model;
12
+ #ifdef GPU_INFO_USE_CUBLAS
13
+ # include "gpuInfo/cuda-gpu-info.h"
14
+ #endif
15
+ #ifdef GPU_INFO_USE_METAL
16
+ # include "gpuInfo/metal-gpu-info.h"
17
+ #endif
18
+
19
+
20
+ struct addon_logger_log {
21
+ public:
22
+ const int logLevelNumber;
23
+ const std::stringstream* stringStream;
24
+ };
25
+
26
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
27
+
28
+ using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
29
+ void addonCallJsLogCallback(
30
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
31
+ );
32
+ using AddonThreadSafeLogCallbackFunction =
33
+ Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
34
+
35
+ AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
36
+ bool addonJsLoggerCallbackSet = false;
37
+ int addonLoggerLogLevel = 5;
38
+
39
+ std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
40
+ std::vector<char> result(8, 0);
41
+ const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
42
+ if (n_tokens < 0) {
43
+ result.resize(-n_tokens);
44
+ int check = llama_token_to_piece(model, token, result.data(), result.size());
45
+ GGML_ASSERT(check == -n_tokens);
46
+ } else {
47
+ result.resize(n_tokens);
48
+ }
49
+
50
+ return std::string(result.data(), result.size());
51
+ }
52
+
53
+ #ifdef GPU_INFO_USE_CUBLAS
54
+ void lodCudaError(const char* message) {
55
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56
+ }
57
+ #endif
58
+
59
+ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60
+ uint64_t total = 0;
61
+ uint64_t used = 0;
62
+
63
+ #ifdef GPU_INFO_USE_CUBLAS
64
+ size_t cudaDeviceTotal = 0;
65
+ size_t cudaDeviceUsed = 0;
66
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
15
67
 
16
- LLAMAModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAModel>(info) {
17
- model_params = llama_model_default_params();
68
+ if (cudeGetInfoSuccess) {
69
+ total += cudaDeviceTotal;
70
+ used += cudaDeviceUsed;
71
+ }
72
+ #endif
73
+
74
+ #ifdef GPU_INFO_USE_METAL
75
+ uint64_t metalDeviceTotal = 0;
76
+ uint64_t metalDeviceUsed = 0;
77
+ get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
78
+
79
+ total += metalDeviceTotal;
80
+ used += metalDeviceUsed;
81
+ #endif
82
+
83
+ Napi::Object result = Napi::Object::New(info.Env());
84
+ result.Set("total", Napi::Number::From(info.Env(), total));
85
+ result.Set("used", Napi::Number::From(info.Env(), used));
86
+
87
+ return result;
88
+ }
89
+
90
+ class AddonModel : public Napi::ObjectWrap<AddonModel> {
91
+ public:
92
+ llama_model_params model_params;
93
+ llama_model* model;
94
+ bool disposed = false;
18
95
 
19
- // Get the model path
20
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
96
+ AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
97
+ model_params = llama_model_default_params();
21
98
 
22
- if (info.Length() > 1 && info[1].IsObject()) {
23
- Napi::Object options = info[1].As<Napi::Object>();
99
+ // Get the model path
100
+ std::string modelPath = info[0].As<Napi::String>().Utf8Value();
24
101
 
25
- if (options.Has("gpuLayers")) {
26
- model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
102
+ if (info.Length() > 1 && info[1].IsObject()) {
103
+ Napi::Object options = info[1].As<Napi::Object>();
104
+
105
+ if (options.Has("gpuLayers")) {
106
+ model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
107
+ }
108
+
109
+ if (options.Has("vocabOnly")) {
110
+ model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
111
+ }
112
+
113
+ if (options.Has("useMmap")) {
114
+ model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
115
+ }
116
+
117
+ if (options.Has("useMlock")) {
118
+ model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
119
+ }
27
120
  }
28
121
 
29
- if (options.Has("vocabOnly")) {
30
- model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
122
+ llama_backend_init(false);
123
+ model = llama_load_model_from_file(modelPath.c_str(), model_params);
124
+
125
+ if (model == NULL) {
126
+ Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
127
+ return;
31
128
  }
129
+ }
32
130
 
33
- if (options.Has("useMmap")) {
34
- model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
131
+ ~AddonModel() {
132
+ dispose();
133
+ }
134
+
135
+ void dispose() {
136
+ if (disposed) {
137
+ return;
35
138
  }
36
139
 
37
- if (options.Has("useMlock")) {
38
- model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
140
+ llama_free_model(model);
141
+ disposed = true;
142
+ }
143
+
144
+ Napi::Value Dispose(const Napi::CallbackInfo& info) {
145
+ if (disposed) {
146
+ return info.Env().Undefined();
39
147
  }
148
+
149
+ dispose();
150
+
151
+ return info.Env().Undefined();
40
152
  }
41
153
 
42
- llama_backend_init(false);
43
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
154
+ Napi::Value Tokenize(const Napi::CallbackInfo& info) {
155
+ if (disposed) {
156
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
157
+ return info.Env().Undefined();
158
+ }
159
+
160
+ std::string text = info[0].As<Napi::String>().Utf8Value();
161
+ bool specialTokens = info[1].As<Napi::Boolean>().Value();
44
162
 
45
- if (model == NULL) {
46
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
47
- return;
163
+ std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
164
+
165
+ Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
166
+ for (size_t i = 0; i < tokens.size(); ++i) {
167
+ result[i] = static_cast<uint32_t>(tokens[i]);
168
+ }
169
+
170
+ return result;
48
171
  }
49
- }
172
+ Napi::Value Detokenize(const Napi::CallbackInfo& info) {
173
+ if (disposed) {
174
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
175
+ return info.Env().Undefined();
176
+ }
50
177
 
51
- ~LLAMAModel() {
52
- llama_free_model(model);
53
- }
178
+ Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
54
179
 
55
- static void init(Napi::Object exports) {
56
- exports.Set("LLAMAModel", DefineClass(exports.Env(), "LLAMAModel", {}));
57
- }
58
- };
180
+ // Create a stringstream for accumulating the decoded string.
181
+ std::stringstream ss;
182
+
183
+ // Decode each token and accumulate the result.
184
+ for (size_t i = 0; i < tokens.ElementLength(); i++) {
185
+ const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i]);
59
186
 
60
- class LLAMAGrammar : public Napi::ObjectWrap<LLAMAGrammar> {
61
- public:
62
- grammar_parser::parse_state parsed_grammar;
187
+ if (piece.empty()) {
188
+ continue;
189
+ }
63
190
 
64
- LLAMAGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAGrammar>(info) {
65
- // Get the model path
66
- std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
67
- bool should_print_grammar = false;
191
+ ss << piece;
192
+ }
68
193
 
69
- if (info.Length() > 1 && info[1].IsObject()) {
70
- Napi::Object options = info[1].As<Napi::Object>();
194
+ return Napi::String::New(info.Env(), ss.str());
195
+ }
71
196
 
72
- if (options.Has("printGrammar")) {
73
- should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
197
+ Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
198
+ if (disposed) {
199
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
200
+ return info.Env().Undefined();
74
201
  }
202
+
203
+ return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
75
204
  }
76
205
 
77
- parsed_grammar = grammar_parser::parse(grammarCode.c_str());
78
- // will be empty (default) if there are parse errors
79
- if (parsed_grammar.rules.empty()) {
80
- Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
81
- return;
206
+ Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
207
+ if (disposed) {
208
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
209
+ return info.Env().Undefined();
210
+ }
211
+
212
+ return Napi::Number::From(info.Env(), llama_model_size(model));
82
213
  }
83
214
 
84
- if (should_print_grammar) {
85
- grammar_parser::print_grammar(stderr, parsed_grammar);
215
+ Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
216
+ if (disposed) {
217
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
218
+ return info.Env().Undefined();
219
+ }
220
+
221
+ return Napi::Number::From(info.Env(), llama_model_n_params(model));
86
222
  }
87
- }
88
223
 
89
- static void init(Napi::Object exports) {
90
- exports.Set("LLAMAGrammar", DefineClass(exports.Env(), "LLAMAGrammar", {}));
91
- }
224
+ Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
225
+ if (disposed) {
226
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
227
+ return info.Env().Undefined();
228
+ }
229
+
230
+ char model_desc[128];
231
+ int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
232
+
233
+ return Napi::String::New(info.Env(), model_desc, actual_length);
234
+ }
235
+
236
+ Napi::Value TokenBos(const Napi::CallbackInfo& info) {
237
+ if (disposed) {
238
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
239
+ return info.Env().Undefined();
240
+ }
241
+
242
+ return Napi::Number::From(info.Env(), llama_token_bos(model));
243
+ }
244
+ Napi::Value TokenEos(const Napi::CallbackInfo& info) {
245
+ if (disposed) {
246
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
247
+ return info.Env().Undefined();
248
+ }
249
+
250
+ return Napi::Number::From(info.Env(), llama_token_eos(model));
251
+ }
252
+ Napi::Value TokenNl(const Napi::CallbackInfo& info) {
253
+ if (disposed) {
254
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
255
+ return info.Env().Undefined();
256
+ }
257
+
258
+ return Napi::Number::From(info.Env(), llama_token_nl(model));
259
+ }
260
+ Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
261
+ if (disposed) {
262
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
263
+ return info.Env().Undefined();
264
+ }
265
+
266
+ return Napi::Number::From(info.Env(), llama_token_prefix(model));
267
+ }
268
+ Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
269
+ if (disposed) {
270
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
271
+ return info.Env().Undefined();
272
+ }
273
+
274
+ return Napi::Number::From(info.Env(), llama_token_middle(model));
275
+ }
276
+ Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
277
+ if (disposed) {
278
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
279
+ return info.Env().Undefined();
280
+ }
281
+
282
+ return Napi::Number::From(info.Env(), llama_token_suffix(model));
283
+ }
284
+ Napi::Value EotToken(const Napi::CallbackInfo& info) {
285
+ if (disposed) {
286
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
287
+ return info.Env().Undefined();
288
+ }
289
+
290
+ return Napi::Number::From(info.Env(), llama_token_eot(model));
291
+ }
292
+ Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
293
+ if (disposed) {
294
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
295
+ return info.Env().Undefined();
296
+ }
297
+
298
+ int token = info[0].As<Napi::Number>().Int32Value();
299
+ std::stringstream ss;
300
+
301
+ const char* str = llama_token_get_text(model, token);
302
+ if (str == nullptr) {
303
+ return info.Env().Undefined();
304
+ }
305
+
306
+ ss << str;
307
+
308
+ return Napi::String::New(info.Env(), ss.str());
309
+ }
310
+
311
+ static void init(Napi::Object exports) {
312
+ exports.Set(
313
+ "AddonModel",
314
+ DefineClass(
315
+ exports.Env(),
316
+ "AddonModel",
317
+ {
318
+ InstanceMethod("tokenize", &AddonModel::Tokenize),
319
+ InstanceMethod("detokenize", &AddonModel::Detokenize),
320
+ InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
321
+ InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
322
+ InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
323
+ InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
324
+ InstanceMethod("tokenBos", &AddonModel::TokenBos),
325
+ InstanceMethod("tokenEos", &AddonModel::TokenEos),
326
+ InstanceMethod("tokenNl", &AddonModel::TokenNl),
327
+ InstanceMethod("prefixToken", &AddonModel::PrefixToken),
328
+ InstanceMethod("middleToken", &AddonModel::MiddleToken),
329
+ InstanceMethod("suffixToken", &AddonModel::SuffixToken),
330
+ InstanceMethod("eotToken", &AddonModel::EotToken),
331
+ InstanceMethod("getTokenString", &AddonModel::GetTokenString),
332
+ InstanceMethod("dispose", &AddonModel::Dispose),
333
+ }
334
+ )
335
+ );
336
+ }
92
337
  };
93
338
 
94
- class LLAMAGrammarEvaluationState : public Napi::ObjectWrap<LLAMAGrammarEvaluationState> {
95
- public:
96
- LLAMAGrammar* grammarDef;
97
- llama_grammar *grammar = nullptr;
339
+ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
340
+ public:
341
+ grammar_parser::parse_state parsed_grammar;
98
342
 
99
- LLAMAGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAGrammarEvaluationState>(info) {
100
- grammarDef = Napi::ObjectWrap<LLAMAGrammar>::Unwrap(info[0].As<Napi::Object>());
101
- grammarDef->Ref();
343
+ AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
344
+ // Get the model path
345
+ std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
346
+ bool should_print_grammar = false;
102
347
 
103
- std::vector<const llama_grammar_element *> grammar_rules(grammarDef->parsed_grammar.c_rules());
104
- grammar = llama_grammar_init(
105
- grammar_rules.data(), grammar_rules.size(), grammarDef->parsed_grammar.symbol_ids.at("root")
106
- );
107
- }
348
+ if (info.Length() > 1 && info[1].IsObject()) {
349
+ Napi::Object options = info[1].As<Napi::Object>();
108
350
 
109
- ~LLAMAGrammarEvaluationState() {
110
- grammarDef->Unref();
351
+ if (options.Has("printGrammar")) {
352
+ should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
353
+ }
354
+ }
355
+
356
+ parsed_grammar = grammar_parser::parse(grammarCode.c_str());
357
+ // will be empty (default) if there are parse errors
358
+ if (parsed_grammar.rules.empty()) {
359
+ Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
360
+ return;
361
+ }
111
362
 
112
- if (grammar != nullptr) {
113
- llama_grammar_free(grammar);
114
- grammar = nullptr;
363
+ if (should_print_grammar) {
364
+ grammar_parser::print_grammar(stderr, parsed_grammar);
365
+ }
115
366
  }
116
- }
117
367
 
118
- static void init(Napi::Object exports) {
119
- exports.Set("LLAMAGrammarEvaluationState", DefineClass(exports.Env(), "LLAMAGrammarEvaluationState", {}));
120
- }
368
+ static void init(Napi::Object exports) {
369
+ exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
370
+ }
121
371
  };
122
372
 
123
- class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
124
- public:
125
- LLAMAModel* model;
126
- llama_context_params context_params;
127
- llama_context* ctx;
128
- int n_cur = 0;
129
-
130
- LLAMAContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAContext>(info) {
131
- model = Napi::ObjectWrap<LLAMAModel>::Unwrap(info[0].As<Napi::Object>());
132
- model->Ref();
133
-
134
- context_params = llama_context_default_params();
135
- context_params.seed = -1;
136
- context_params.n_ctx = 4096;
137
- context_params.n_threads = 6;
138
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
139
-
140
- if (info.Length() > 1 && info[1].IsObject()) {
141
- Napi::Object options = info[1].As<Napi::Object>();
142
-
143
- if (options.Has("seed")) {
144
- context_params.seed = options.Get("seed").As<Napi::Number>().Int32Value();
145
- }
146
-
147
- if (options.Has("contextSize")) {
148
- context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Int32Value();
149
- }
150
-
151
- if (options.Has("batchSize")) {
152
- context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
153
- }
154
-
155
- if (options.Has("logitsAll")) {
156
- context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
157
- }
158
-
159
- if (options.Has("embedding")) {
160
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
161
- }
162
-
163
- if (options.Has("threads")) {
164
- context_params.n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
165
- context_params.n_threads_batch == -1 ? context_params.n_threads : context_params.n_threads_batch;
166
- }
167
- }
373
+ class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
374
+ public:
375
+ AddonGrammar* grammarDef;
376
+ llama_grammar* grammar = nullptr;
168
377
 
169
- ctx = llama_new_context_with_model(model->model, context_params);
170
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
171
- }
172
- ~LLAMAContext() {
173
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
174
- llama_free(ctx);
175
- model->Unref();
176
- }
177
- Napi::Value Encode(const Napi::CallbackInfo& info) {
178
- std::string text = info[0].As<Napi::String>().Utf8Value();
378
+ AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
379
+ grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[0].As<Napi::Object>());
380
+ grammarDef->Ref();
179
381
 
180
- std::vector<llama_token> tokens = llama_tokenize(ctx, text, false);
382
+ std::vector<const llama_grammar_element*> grammar_rules(grammarDef->parsed_grammar.c_rules());
383
+ grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), grammarDef->parsed_grammar.symbol_ids.at("root"));
384
+ }
181
385
 
182
- Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
183
- for (size_t i = 0; i < tokens.size(); ++i) { result[i] = static_cast<uint32_t>(tokens[i]); }
386
+ ~AddonGrammarEvaluationState() {
387
+ grammarDef->Unref();
184
388
 
185
- return result;
186
- }
187
- Napi::Value Decode(const Napi::CallbackInfo& info) {
188
- Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
389
+ if (grammar != nullptr) {
390
+ llama_grammar_free(grammar);
391
+ grammar = nullptr;
392
+ }
393
+ }
189
394
 
190
- // Create a stringstream for accumulating the decoded string.
191
- std::stringstream ss;
395
+ static void init(Napi::Object exports) {
396
+ exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
397
+ }
398
+ };
192
399
 
193
- // Decode each token and accumulate the result.
194
- for (size_t i = 0; i < tokens.ElementLength(); i++) {
195
- const std::string piece = llama_token_to_piece(ctx, (llama_token)tokens[i]);
400
+ class AddonContext : public Napi::ObjectWrap<AddonContext> {
401
+ public:
402
+ AddonModel* model;
403
+ llama_context_params context_params;
404
+ llama_context* ctx;
405
+ llama_batch batch;
406
+ bool has_batch = false;
407
+ int32_t batch_n_tokens = 0;
408
+ int n_cur = 0;
409
+ bool disposed = false;
410
+
411
+ AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
412
+ model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
413
+ model->Ref();
414
+
415
+ context_params = llama_context_default_params();
416
+ context_params.seed = -1;
417
+ context_params.n_ctx = 4096;
418
+ context_params.n_threads = 6;
419
+ context_params.n_threads_batch = context_params.n_threads;
420
+
421
+ if (info.Length() > 1 && info[1].IsObject()) {
422
+ Napi::Object options = info[1].As<Napi::Object>();
423
+
424
+ if (options.Has("noSeed")) {
425
+ context_params.seed = time(NULL);
426
+ } else if (options.Has("seed")) {
427
+ context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
428
+ }
429
+
430
+ if (options.Has("contextSize")) {
431
+ context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
432
+ }
433
+
434
+ if (options.Has("batchSize")) {
435
+ context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
436
+ }
437
+
438
+ if (options.Has("embedding")) {
439
+ context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
440
+ }
441
+
442
+ if (options.Has("threads")) {
443
+ const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
444
+ const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
445
+
446
+ context_params.n_threads = resolved_n_threads;
447
+ context_params.n_threads_batch = resolved_n_threads;
448
+ }
449
+ }
196
450
 
197
- if (piece.empty()) {
198
- continue;
451
+ ctx = llama_new_context_with_model(model->model, context_params);
452
+ Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
453
+ }
454
+ ~AddonContext() {
455
+ dispose();
199
456
  }
200
457
 
201
- ss << piece;
202
- }
458
+ void dispose() {
459
+ if (disposed) {
460
+ return;
461
+ }
203
462
 
204
- return Napi::String::New(info.Env(), ss.str());
205
- }
206
- Napi::Value TokenBos(const Napi::CallbackInfo& info) {
207
- return Napi::Number::From(info.Env(), llama_token_bos(model->model)); // TODO: move this to the model
208
- }
209
- Napi::Value TokenEos(const Napi::CallbackInfo& info) {
210
- return Napi::Number::From(info.Env(), llama_token_eos(model->model)); // TODO: move this to the model
211
- }
212
- Napi::Value TokenNl(const Napi::CallbackInfo& info) {
213
- return Napi::Number::From(info.Env(), llama_token_nl(model->model)); // TODO: move this to the model
214
- }
215
- Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
216
- return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
217
- }
218
-
219
- Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
220
- llama_print_timings(ctx);
221
- llama_reset_timings(ctx);
222
- return info.Env().Undefined();
223
- }
463
+ Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
464
+ llama_free(ctx);
465
+ model->Unref();
224
466
 
225
- Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
226
- int token = info[0].As<Napi::Number>().Int32Value();
227
- std::stringstream ss;
467
+ disposeBatch();
228
468
 
229
- const char* str = llama_token_get_text(model->model, token); // TODO: move this to the model
230
- if (str == nullptr) {
231
- return info.Env().Undefined();
232
- }
469
+ disposed = true;
470
+ }
471
+ void disposeBatch() {
472
+ if (!has_batch) {
473
+ return;
474
+ }
475
+
476
+ llama_batch_free(batch);
477
+ has_batch = false;
478
+ batch_n_tokens = 0;
479
+ }
480
+ Napi::Value Dispose(const Napi::CallbackInfo& info) {
481
+ if (disposed) {
482
+ return info.Env().Undefined();
483
+ }
233
484
 
234
- ss << str;
235
-
236
- return Napi::String::New(info.Env(), ss.str());
237
- }
238
- Napi::Value Eval(const Napi::CallbackInfo& info);
239
- static void init(Napi::Object exports) {
240
- exports.Set("LLAMAContext",
241
- DefineClass(exports.Env(),
242
- "LLAMAContext",
243
- {
244
- InstanceMethod("encode", &LLAMAContext::Encode),
245
- InstanceMethod("decode", &LLAMAContext::Decode),
246
- InstanceMethod("tokenBos", &LLAMAContext::TokenBos),
247
- InstanceMethod("tokenEos", &LLAMAContext::TokenEos),
248
- InstanceMethod("tokenNl", &LLAMAContext::TokenNl),
249
- InstanceMethod("getContextSize", &LLAMAContext::GetContextSize),
250
- InstanceMethod("getTokenString", &LLAMAContext::GetTokenString),
251
- InstanceMethod("eval", &LLAMAContext::Eval),
252
- InstanceMethod("printTimings", &LLAMAContext::PrintTimings),
253
- }));
254
- }
485
+ dispose();
486
+
487
+ return info.Env().Undefined();
488
+ }
489
+ Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
490
+ if (disposed) {
491
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
492
+ return info.Env().Undefined();
493
+ }
494
+
495
+ return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
496
+ }
497
+ Napi::Value InitBatch(const Napi::CallbackInfo& info) {
498
+ if (disposed) {
499
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
500
+ return info.Env().Undefined();
501
+ }
502
+
503
+ if (has_batch) {
504
+ llama_batch_free(batch);
505
+ }
506
+
507
+ int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
508
+
509
+ batch = llama_batch_init(n_tokens, 0, 1);
510
+ has_batch = true;
511
+ batch_n_tokens = n_tokens;
512
+
513
+ return info.Env().Undefined();
514
+ }
515
+ Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
516
+ if (disposed) {
517
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
518
+ return info.Env().Undefined();
519
+ }
520
+
521
+ disposeBatch();
522
+
523
+ return info.Env().Undefined();
524
+ }
525
+ Napi::Value AddToBatch(const Napi::CallbackInfo& info) {
526
+ if (!has_batch) {
527
+ Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
528
+ return info.Env().Undefined();
529
+ }
530
+
531
+ int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
532
+ int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
533
+ Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
534
+ bool generateLogitAtTheEnd = info[3].As<Napi::Boolean>().Value();
535
+
536
+ auto tokensLength = tokens.ElementLength();
537
+ GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
538
+
539
+ for (size_t i = 0; i < tokensLength; i++) {
540
+ llama_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
541
+ }
542
+
543
+ if (generateLogitAtTheEnd) {
544
+ batch.logits[batch.n_tokens - 1] = true;
545
+
546
+ auto logit_index = batch.n_tokens - 1;
547
+
548
+ return Napi::Number::From(info.Env(), logit_index);
549
+ }
550
+
551
+ return info.Env().Undefined();
552
+ }
553
+ Napi::Value DisposeSequence(const Napi::CallbackInfo& info) {
554
+ if (disposed) {
555
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
556
+ return info.Env().Undefined();
557
+ }
558
+
559
+ int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
560
+
561
+ llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
562
+
563
+ return info.Env().Undefined();
564
+ }
565
+ Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
566
+ if (disposed) {
567
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
568
+ return info.Env().Undefined();
569
+ }
570
+
571
+ int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
572
+ int32_t startPos = info[1].As<Napi::Number>().Int32Value();
573
+ int32_t endPos = info[2].As<Napi::Number>().Int32Value();
574
+
575
+ llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
576
+
577
+ return info.Env().Undefined();
578
+ }
579
+ Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
580
+ if (disposed) {
581
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
582
+ return info.Env().Undefined();
583
+ }
584
+
585
+ int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
586
+ int32_t startPos = info[1].As<Napi::Number>().Int32Value();
587
+ int32_t endPos = info[2].As<Napi::Number>().Int32Value();
588
+ int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
589
+
590
+ llama_kv_cache_seq_shift(ctx, sequenceId, startPos, endPos, shiftDelta);
591
+
592
+ return info.Env().Undefined();
593
+ }
594
+ Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
595
+ Napi::Value SampleToken(const Napi::CallbackInfo& info);
596
+
597
+ Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
598
+ AddonGrammarEvaluationState* grammar_evaluation_state =
599
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
600
+ llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
601
+
602
+ if ((grammar_evaluation_state)->grammar != nullptr) {
603
+ llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
604
+ }
605
+
606
+ return info.Env().Undefined();
607
+ }
608
+
609
+ Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
610
+ if (disposed) {
611
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
612
+ return info.Env().Undefined();
613
+ }
614
+
615
+ const int n_embd = llama_n_embd(model->model);
616
+ const auto* embeddings = llama_get_embeddings(ctx);
617
+
618
+ Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
619
+ for (size_t i = 0; i < n_embd; ++i) {
620
+ result[i] = embeddings[i];
621
+ }
622
+
623
+ return result;
624
+ }
625
+
626
+ Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
627
+ llama_print_timings(ctx);
628
+ llama_reset_timings(ctx);
629
+ return info.Env().Undefined();
630
+ }
631
+
632
+ static void init(Napi::Object exports) {
633
+ exports.Set(
634
+ "AddonContext",
635
+ DefineClass(
636
+ exports.Env(),
637
+ "AddonContext",
638
+ {
639
+ InstanceMethod("getContextSize", &AddonContext::GetContextSize),
640
+ InstanceMethod("initBatch", &AddonContext::InitBatch),
641
+ InstanceMethod("addToBatch", &AddonContext::AddToBatch),
642
+ InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
643
+ InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
644
+ InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
645
+ InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
646
+ InstanceMethod("sampleToken", &AddonContext::SampleToken),
647
+ InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
648
+ InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
649
+ InstanceMethod("printTimings", &AddonContext::PrintTimings),
650
+ InstanceMethod("dispose", &AddonContext::Dispose),
651
+ }
652
+ )
653
+ );
654
+ }
255
655
  };
256
656
 
257
657
 
258
- class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
259
- LLAMAContext* ctx;
260
- LLAMAGrammarEvaluationState* grammar_evaluation_state;
261
- bool use_grammar = false;
262
- std::vector<llama_token> tokens;
263
- llama_token result;
264
- float temperature;
265
- int32_t top_k;
266
- float top_p;
267
- float repeat_penalty = 1.10f; // 1.0 = disabled
268
- float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
269
- float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
270
- std::vector<llama_token> repeat_penalty_tokens;
271
- bool use_repeat_penalty = false;
272
-
273
- public:
274
- LLAMAContextEvalWorker(const Napi::CallbackInfo& info, LLAMAContext* ctx) : Napi::AsyncWorker(info.Env(), "LLAMAContextEvalWorker"), ctx(ctx), Napi::Promise::Deferred(info.Env()) {
275
- ctx->Ref();
276
- Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
277
-
278
- temperature = 0.0f;
279
- top_k = 40;
280
- top_p = 0.95f;
281
-
282
- if (info.Length() > 1 && info[1].IsObject()) {
283
- Napi::Object options = info[1].As<Napi::Object>();
284
-
285
- if (options.Has("temperature")) {
286
- temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
287
- }
288
-
289
- if (options.Has("topK")) {
290
- top_k = options.Get("topK").As<Napi::Number>().Int32Value();
291
- }
292
-
293
- if (options.Has("topP")) {
294
- top_p = options.Get("topP").As<Napi::Number>().FloatValue();
295
- }
296
-
297
- if (options.Has("repeatPenalty")) {
298
- repeat_penalty = options.Get("repeatPenalty").As<Napi::Number>().FloatValue();
299
- }
300
-
301
- if (options.Has("repeatPenaltyTokens")) {
302
- Napi::Uint32Array repeat_penalty_tokens_uint32_array = options.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
303
-
304
- repeat_penalty_tokens.reserve(repeat_penalty_tokens_uint32_array.ElementLength());
305
- for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
306
- repeat_penalty_tokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
307
- }
308
-
309
- use_repeat_penalty = true;
310
- }
311
-
312
- if (options.Has("repeatPenaltyPresencePenalty")) {
313
- repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
314
- }
315
-
316
- if (options.Has("repeatPenaltyFrequencyPenalty")) {
317
- repeat_penalty_frequency_penalty = options.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue();
318
- }
319
-
320
- if (options.Has("grammarEvaluationState")) {
321
- grammar_evaluation_state = Napi::ObjectWrap<LLAMAGrammarEvaluationState>::Unwrap(options.Get("grammarEvaluationState").As<Napi::Object>());
322
- grammar_evaluation_state->Ref();
323
- use_grammar = true;
324
- }
325
- }
658
+ class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
659
+ public:
660
+ AddonContext* ctx;
326
661
 
327
- this->tokens.reserve(tokens.ElementLength());
328
- for (size_t i = 0; i < tokens.ElementLength(); i++) { this->tokens.push_back(static_cast<llama_token>(tokens[i])); }
329
- }
330
- ~LLAMAContextEvalWorker() {
331
- ctx->Unref();
662
+ AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
663
+ : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
664
+ ctx(ctx),
665
+ Napi::Promise::Deferred(info.Env()) {
666
+ ctx->Ref();
667
+ }
668
+ ~AddonContextDecodeBatchWorker() {
669
+ ctx->Unref();
670
+ }
671
+ using Napi::AsyncWorker::Queue;
672
+ using Napi::Promise::Deferred::Promise;
673
+
674
+ protected:
675
+ void Execute() {
676
+ // Perform the evaluation using llama_decode.
677
+ int r = llama_decode(ctx->ctx, ctx->batch);
678
+
679
+ if (r != 0) {
680
+ if (r == 1) {
681
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
682
+ } else {
683
+ SetError("Eval has failed");
684
+ }
685
+
686
+ return;
687
+ }
688
+ }
689
+ void OnOK() {
690
+ Napi::Env env = Napi::AsyncWorker::Env();
691
+ Napi::Promise::Deferred::Resolve(env.Undefined());
692
+ }
693
+ void OnError(const Napi::Error& err) {
694
+ Napi::Promise::Deferred::Reject(err.Value());
695
+ }
696
+ };
332
697
 
333
- if (use_grammar) {
334
- grammar_evaluation_state->Unref();
335
- use_grammar = false;
336
- }
337
- }
338
- using Napi::AsyncWorker::Queue;
339
- using Napi::Promise::Deferred::Promise;
698
+ Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
699
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
700
+ worker->Queue();
701
+ return worker->Promise();
702
+ }
340
703
 
341
- protected:
342
- void Execute() {
343
- llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
704
+ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
705
+ public:
706
+ AddonContext* ctx;
707
+ AddonGrammarEvaluationState* grammar_evaluation_state;
708
+ int32_t batchLogitIndex;
709
+ bool use_grammar = false;
710
+ llama_token result;
711
+ float temperature = 0.0f;
712
+ float min_p = 0;
713
+ int32_t top_k = 40;
714
+ float top_p = 0.95f;
715
+ float repeat_penalty = 1.10f; // 1.0 = disabled
716
+ float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
717
+ float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
718
+ std::vector<llama_token> repeat_penalty_tokens;
719
+ bool use_repeat_penalty = false;
720
+
721
+ AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
722
+ : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
723
+ ctx(ctx),
724
+ Napi::Promise::Deferred(info.Env()) {
725
+ ctx->Ref();
726
+
727
+ batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
728
+
729
+ if (info.Length() > 1 && info[1].IsObject()) {
730
+ Napi::Object options = info[1].As<Napi::Object>();
731
+
732
+ if (options.Has("temperature")) {
733
+ temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
734
+ }
735
+
736
+ if (options.Has("minP")) {
737
+ min_p = options.Get("minP").As<Napi::Number>().FloatValue();
738
+ }
739
+
740
+ if (options.Has("topK")) {
741
+ top_k = options.Get("topK").As<Napi::Number>().Int32Value();
742
+ }
743
+
744
+ if (options.Has("topP")) {
745
+ top_p = options.Get("topP").As<Napi::Number>().FloatValue();
746
+ }
747
+
748
+ if (options.Has("repeatPenalty")) {
749
+ repeat_penalty = options.Get("repeatPenalty").As<Napi::Number>().FloatValue();
750
+ }
751
+
752
+ if (options.Has("repeatPenaltyTokens")) {
753
+ Napi::Uint32Array repeat_penalty_tokens_uint32_array = options.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
754
+
755
+ repeat_penalty_tokens.reserve(repeat_penalty_tokens_uint32_array.ElementLength());
756
+ for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
757
+ repeat_penalty_tokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
758
+ }
759
+
760
+ use_repeat_penalty = true;
761
+ }
762
+
763
+ if (options.Has("repeatPenaltyPresencePenalty")) {
764
+ repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
765
+ }
766
+
767
+ if (options.Has("repeatPenaltyFrequencyPenalty")) {
768
+ repeat_penalty_frequency_penalty = options.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue();
769
+ }
770
+
771
+ if (options.Has("grammarEvaluationState")) {
772
+ grammar_evaluation_state =
773
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(options.Get("grammarEvaluationState").As<Napi::Object>());
774
+ grammar_evaluation_state->Ref();
775
+ use_grammar = true;
776
+ }
777
+ }
778
+ }
779
+ ~AddonContextSampleTokenWorker() {
780
+ ctx->Unref();
344
781
 
345
- for (size_t i = 0; i < tokens.size(); i++) {
346
- llama_batch_add(batch, tokens[i], ctx->n_cur, { 0 }, false);
782
+ if (use_grammar) {
783
+ grammar_evaluation_state->Unref();
784
+ use_grammar = false;
785
+ }
786
+ }
787
+ using Napi::AsyncWorker::Queue;
788
+ using Napi::Promise::Deferred::Promise;
347
789
 
348
- ctx->n_cur++;
349
- }
350
- GGML_ASSERT(batch.n_tokens == (int) tokens.size());
790
+ protected:
791
+ void Execute() {
792
+ llama_token new_token_id = 0;
351
793
 
352
- batch.logits[batch.n_tokens - 1] = true;
794
+ // Select the best prediction.
795
+ auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
796
+ auto n_vocab = llama_n_vocab(ctx->model->model);
353
797
 
354
- // Perform the evaluation using llama_decode.
355
- int r = llama_decode(ctx->ctx, batch);
798
+ std::vector<llama_token_data> candidates;
799
+ candidates.reserve(n_vocab);
356
800
 
357
- llama_batch_free(batch);
801
+ for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
802
+ candidates.emplace_back(llama_token_data { token_id, logits[token_id], 0.0f });
803
+ }
358
804
 
359
- if (r != 0) {
360
- if (r == 1) {
361
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
362
- } else {
363
- SetError("Eval has failed");
364
- }
805
+ llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
365
806
 
366
- return;
367
- }
807
+ auto eos_token = llama_token_eos(ctx->model->model);
808
+
809
+ if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
810
+ llama_sample_repetition_penalties(
811
+ ctx->ctx,
812
+ &candidates_p,
813
+ repeat_penalty_tokens.data(),
814
+ repeat_penalty_tokens.size(),
815
+ repeat_penalty,
816
+ repeat_penalty_frequency_penalty,
817
+ repeat_penalty_presence_penalty
818
+ );
819
+ }
820
+
821
+ if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
822
+ llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
823
+ }
824
+
825
+ if (temperature <= 0) {
826
+ new_token_id = llama_sample_token_greedy(ctx->ctx, &candidates_p);
827
+ } else {
828
+ const int32_t resolved_top_k =
829
+ top_k <= 0 ? llama_n_vocab(ctx->model->model) : std::min(top_k, llama_n_vocab(ctx->model->model));
830
+ const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
831
+ const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
832
+ const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
833
+ const float resolved_top_p = top_p; // Top p sampling - 1.0 = disabled
834
+
835
+ // Temperature sampling
836
+ size_t min_keep = std::max(1, n_probs);
837
+ llama_sample_top_k(ctx->ctx, &candidates_p, resolved_top_k, min_keep);
838
+ llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
839
+ llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
840
+ llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
841
+ llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
842
+ llama_sample_temp(ctx->ctx, &candidates_p, temperature);
843
+ new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
844
+ }
845
+
846
+ if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
847
+ llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
848
+ }
849
+
850
+ result = new_token_id;
851
+ }
852
+ void OnOK() {
853
+ Napi::Env env = Napi::AsyncWorker::Env();
854
+ Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
855
+ Napi::Promise::Deferred::Resolve(resultValue);
856
+ }
857
+ void OnError(const Napi::Error& err) {
858
+ Napi::Promise::Deferred::Reject(err.Value());
859
+ }
860
+ };
368
861
 
369
- llama_token new_token_id = 0;
862
+ Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
863
+ AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
864
+ worker->Queue();
865
+ return worker->Promise();
866
+ }
867
+
868
+ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
869
+ return Napi::String::From(info.Env(), llama_print_system_info());
870
+ }
370
871
 
371
- // Select the best prediction.
372
- auto logits = llama_get_logits_ith(ctx->ctx, batch.n_tokens - 1);
373
- auto n_vocab = llama_n_vocab(ctx->model->model);
872
+ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
873
+ switch (level) {
874
+ case GGML_LOG_LEVEL_ERROR: return 2;
875
+ case GGML_LOG_LEVEL_WARN: return 3;
876
+ case GGML_LOG_LEVEL_INFO: return 4;
877
+ case GGML_LOG_LEVEL_DEBUG: return 5;
878
+ }
374
879
 
375
- std::vector<llama_token_data> candidates;
376
- candidates.reserve(n_vocab);
880
+ return 1;
881
+ }
377
882
 
378
- for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
379
- candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
883
+ void addonCallJsLogCallback(
884
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
885
+ ) {
886
+ bool called = false;
887
+
888
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
889
+ try {
890
+ callback.Call({
891
+ Napi::Number::New(env, data->logLevelNumber),
892
+ Napi::String::New(env, data->stringStream->str()),
893
+ });
894
+ called = true;
895
+ } catch (const Napi::Error& e) {
896
+ called = false;
897
+ }
898
+ }
899
+
900
+ if (!called && data != nullptr) {
901
+ if (data->logLevelNumber == 2) {
902
+ fputs(data->stringStream->str().c_str(), stderr);
903
+ fflush(stderr);
904
+ } else {
905
+ fputs(data->stringStream->str().c_str(), stdout);
906
+ fflush(stdout);
907
+ }
380
908
  }
381
909
 
382
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
910
+ if (data != nullptr) {
911
+ delete data->stringStream;
912
+ delete data;
913
+ }
914
+ }
383
915
 
384
- auto eos_token = llama_token_eos(ctx->model->model);
916
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
917
+ int logLevelNumber = addonGetGgmlLogLevelNumber(level);
385
918
 
386
- if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
387
- llama_sample_repetition_penalties(
388
- ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty,
389
- repeat_penalty_frequency_penalty, repeat_penalty_presence_penalty
390
- );
919
+ if (logLevelNumber > addonLoggerLogLevel) {
920
+ return;
391
921
  }
392
922
 
393
- if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
394
- llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
923
+ if (addonJsLoggerCallbackSet) {
924
+ std::stringstream* stringStream = new std::stringstream();
925
+ if (text != nullptr) {
926
+ *stringStream << text;
927
+ }
928
+
929
+ addon_logger_log* data = new addon_logger_log {
930
+ logLevelNumber,
931
+ stringStream,
932
+ };
933
+
934
+ auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
935
+
936
+ if (status == napi_ok) {
937
+ return;
938
+ }
395
939
  }
396
940
 
397
- if (temperature <= 0) {
398
- new_token_id = llama_sample_token_greedy(ctx->ctx , &candidates_p);
941
+ if (level == 2) {
942
+ fputs(text, stderr);
943
+ fflush(stderr);
399
944
  } else {
400
- const int32_t resolved_top_k = top_k <= 0 ? llama_n_vocab(ctx->model->model) : std::min(top_k, llama_n_vocab(ctx->model->model));
401
- const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
402
- const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
403
- const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
404
- const float resolved_top_p = top_p; // Top p sampling - 1.0 = disabled
405
-
406
- // Temperature sampling
407
- size_t min_keep = std::max(1, n_probs);
408
- llama_sample_top_k(ctx->ctx, &candidates_p, resolved_top_k, min_keep);
409
- llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
410
- llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
411
- llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
412
- llama_sample_temperature(ctx->ctx, &candidates_p, temperature);
413
- new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
945
+ fputs(text, stdout);
946
+ fflush(stdout);
414
947
  }
948
+ }
949
+
950
+ Napi::Value setLogger(const Napi::CallbackInfo& info) {
951
+ if (info.Length() < 1 || !info[0].IsFunction()) {
952
+ if (addonJsLoggerCallbackSet) {
953
+ addonJsLoggerCallbackSet = false;
954
+ addonThreadSafeLoggerCallback.Release();
955
+ }
415
956
 
416
- if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
417
- llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
957
+ return info.Env().Undefined();
418
958
  }
419
959
 
420
- result = new_token_id;
421
- }
422
- void OnOK() {
423
- Napi::Env env = Napi::AsyncWorker::Env();
424
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
425
- Napi::Promise::Deferred::Resolve(resultValue);
426
- }
427
- void OnError(const Napi::Error& err) { Napi::Promise::Deferred::Reject(err.Value()); }
428
- };
960
+ auto addonLoggerJSCallback = info[0].As<Napi::Function>();
961
+ AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
962
+ addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
963
+ info.Env(),
964
+ addonLoggerJSCallback,
965
+ "loggerCallback",
966
+ 0,
967
+ 1,
968
+ context,
969
+ [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
970
+ addonJsLoggerCallbackSet = false;
971
+
972
+ delete ctx;
973
+ }
974
+ );
975
+ addonJsLoggerCallbackSet = true;
976
+
977
+ // prevent blocking the main node process from exiting due to active resources
978
+ addonThreadSafeLoggerCallback.Unref(info.Env());
429
979
 
430
- Napi::Value LLAMAContext::Eval(const Napi::CallbackInfo& info) {
431
- LLAMAContextEvalWorker* worker = new LLAMAContextEvalWorker(info, this);
432
- worker->Queue();
433
- return worker->Promise();
980
+ return info.Env().Undefined();
434
981
  }
435
982
 
436
- Napi::Value systemInfo(const Napi::CallbackInfo& info) { return Napi::String::From(info.Env(), llama_print_system_info()); }
983
+ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
984
+ if (info.Length() < 1 || !info[0].IsNumber()) {
985
+ addonLoggerLogLevel = 5;
986
+
987
+ return info.Env().Undefined();
988
+ }
989
+
990
+ addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
991
+
992
+ return info.Env().Undefined();
993
+ }
437
994
 
438
995
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
439
- llama_backend_init(false);
440
- exports.DefineProperties({
441
- Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
442
- });
443
- LLAMAModel::init(exports);
444
- LLAMAGrammar::init(exports);
445
- LLAMAGrammarEvaluationState::init(exports);
446
- LLAMAContext::init(exports);
447
- return exports;
996
+ llama_backend_init(false);
997
+ exports.DefineProperties({
998
+ Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
999
+ Napi::PropertyDescriptor::Function("setLogger", setLogger),
1000
+ Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1001
+ Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1002
+ });
1003
+ AddonModel::init(exports);
1004
+ AddonGrammar::init(exports);
1005
+ AddonGrammarEvaluationState::init(exports);
1006
+ AddonContext::init(exports);
1007
+
1008
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1009
+
1010
+ return exports;
448
1011
  }
449
1012
 
450
1013
  NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)