node-llama-cpp 3.0.0-beta.3 → 3.0.0-beta.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (690) hide show
  1. package/README.md +14 -11
  2. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  3. package/bins/linux-arm64/llama-addon.node +0 -0
  4. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-armv7l/llama-addon.node +0 -0
  6. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  7. package/bins/linux-x64/llama-addon.node +0 -0
  8. package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-x64-cuda/llama-addon.node +0 -0
  10. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  11. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  12. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  13. package/bins/mac-arm64-metal/default.metallib +0 -0
  14. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  15. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  16. package/bins/mac-x64/llama-addon.node +0 -0
  17. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  18. package/bins/win-arm64/llama-addon.exp +0 -0
  19. package/bins/win-arm64/llama-addon.lib +0 -0
  20. package/bins/win-arm64/llama-addon.node +0 -0
  21. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  22. package/bins/win-x64/llama-addon.exp +0 -0
  23. package/bins/win-x64/llama-addon.lib +0 -0
  24. package/bins/win-x64/llama-addon.node +0 -0
  25. package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  26. package/bins/win-x64-cuda/llama-addon.exp +0 -0
  27. package/bins/win-x64-cuda/llama-addon.lib +0 -0
  28. package/bins/win-x64-cuda/llama-addon.node +0 -0
  29. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  30. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  31. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  32. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  33. package/dist/ChatWrapper.d.ts +8 -39
  34. package/dist/ChatWrapper.js +115 -72
  35. package/dist/ChatWrapper.js.map +1 -1
  36. package/dist/apiDocsIndex.d.ts +1 -0
  37. package/dist/apiDocsIndex.js +7 -0
  38. package/dist/apiDocsIndex.js.map +1 -0
  39. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -8
  40. package/dist/bindings/AddonTypes.js +2 -0
  41. package/dist/bindings/AddonTypes.js.map +1 -0
  42. package/dist/bindings/Llama.d.ts +47 -0
  43. package/dist/bindings/Llama.js +353 -0
  44. package/dist/bindings/Llama.js.map +1 -0
  45. package/dist/bindings/consts.d.ts +2 -0
  46. package/dist/bindings/consts.js +11 -0
  47. package/dist/bindings/consts.js.map +1 -0
  48. package/dist/bindings/getLlama.d.ts +148 -0
  49. package/dist/bindings/getLlama.js +401 -0
  50. package/dist/bindings/getLlama.js.map +1 -0
  51. package/dist/bindings/types.d.ts +56 -0
  52. package/dist/bindings/types.js +77 -0
  53. package/dist/bindings/types.js.map +1 -0
  54. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  55. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  56. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  57. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  58. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  59. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  60. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  61. package/dist/bindings/utils/asyncEvery.js +15 -0
  62. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  63. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  64. package/dist/bindings/utils/asyncSome.js +27 -0
  65. package/dist/bindings/utils/asyncSome.js.map +1 -0
  66. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  67. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  68. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  69. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  70. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  71. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  72. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  73. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  74. package/dist/bindings/utils/compileLLamaCpp.d.ts +17 -0
  75. package/dist/bindings/utils/compileLLamaCpp.js +226 -0
  76. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  77. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  78. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  79. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  80. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  81. package/dist/bindings/utils/detectGlibc.js +46 -0
  82. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  83. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  84. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  85. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  86. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  87. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  88. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  89. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  90. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  91. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  92. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  93. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  94. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  95. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  96. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  97. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  98. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  99. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  100. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  101. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  102. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  103. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  104. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  105. package/dist/bindings/utils/getPlatform.js +15 -0
  106. package/dist/bindings/utils/getPlatform.js.map +1 -0
  107. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  108. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  109. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  110. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  111. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  112. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  113. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  114. package/dist/bindings/utils/hasFileInPath.js +34 -0
  115. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  116. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  117. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  118. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  119. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  120. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  121. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  122. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  123. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  124. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  125. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  126. package/dist/bindings/utils/resolveCustomCmakeOptions.js +47 -0
  127. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  128. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  129. package/dist/bindings/utils/testBindingBinary.js +100 -0
  130. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  131. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  132. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  133. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  134. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  135. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  136. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  137. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  138. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  139. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  140. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  141. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  142. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  143. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  144. package/dist/chatWrappers/FunctionaryChatWrapper.js +323 -118
  145. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  146. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  147. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  148. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  149. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  150. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  151. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  152. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  153. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  154. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  155. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  156. package/dist/chatWrappers/Llama3ChatWrapper.js +174 -0
  157. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  158. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +67 -0
  159. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  160. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  161. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  162. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  163. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  164. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  165. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  166. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  167. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  168. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  169. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  170. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  171. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  172. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  173. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  174. package/dist/chatWrappers/utils/resolveChatWrapper.js +243 -0
  175. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  176. package/dist/cli/cli.js +21 -7
  177. package/dist/cli/cli.js.map +1 -1
  178. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  179. package/dist/cli/commands/BuildCommand.js +114 -41
  180. package/dist/cli/commands/BuildCommand.js.map +1 -1
  181. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  182. package/dist/cli/commands/ChatCommand.js +299 -143
  183. package/dist/cli/commands/ChatCommand.js.map +1 -1
  184. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  185. package/dist/cli/commands/ClearCommand.js +11 -12
  186. package/dist/cli/commands/ClearCommand.js.map +1 -1
  187. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  188. package/dist/cli/commands/CompleteCommand.js +365 -0
  189. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  190. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  191. package/dist/cli/commands/DebugCommand.js +54 -0
  192. package/dist/cli/commands/DebugCommand.js.map +1 -0
  193. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  194. package/dist/cli/commands/DownloadCommand.js +121 -70
  195. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  196. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  197. package/dist/cli/commands/InfillCommand.js +401 -0
  198. package/dist/cli/commands/InfillCommand.js.map +1 -0
  199. package/dist/cli/commands/InitCommand.d.ts +11 -0
  200. package/dist/cli/commands/InitCommand.js +195 -0
  201. package/dist/cli/commands/InitCommand.js.map +1 -0
  202. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  203. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  204. package/dist/cli/commands/PullCommand.d.ts +12 -0
  205. package/dist/cli/commands/PullCommand.js +117 -0
  206. package/dist/cli/commands/PullCommand.js.map +1 -0
  207. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  208. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  209. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  210. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  211. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  212. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  213. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  214. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +164 -0
  215. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  216. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  217. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  218. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  219. package/dist/cli/projectTemplates.d.ts +7 -0
  220. package/dist/cli/projectTemplates.js +10 -0
  221. package/dist/cli/projectTemplates.js.map +1 -0
  222. package/dist/cli/recommendedModels.d.ts +2 -0
  223. package/dist/cli/recommendedModels.js +363 -0
  224. package/dist/cli/recommendedModels.js.map +1 -0
  225. package/dist/cli/startCreateCli.d.ts +2 -0
  226. package/dist/cli/startCreateCli.js +26 -0
  227. package/dist/cli/startCreateCli.js.map +1 -0
  228. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  229. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  230. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  231. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  232. package/dist/cli/utils/ConsoleTable.js +86 -0
  233. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  234. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  235. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  236. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  237. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  238. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  239. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  240. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  241. package/dist/cli/utils/getReadablePath.js +14 -0
  242. package/dist/cli/utils/getReadablePath.js.map +1 -0
  243. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  244. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  245. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  246. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  247. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  248. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  249. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  250. package/dist/cli/utils/printCommonInfoLines.js +75 -0
  251. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  252. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  253. package/dist/cli/utils/printInfoLine.js +54 -0
  254. package/dist/cli/utils/printInfoLine.js.map +1 -0
  255. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  256. package/dist/cli/utils/projectTemplates.js +47 -0
  257. package/dist/cli/utils/projectTemplates.js.map +1 -0
  258. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  259. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  260. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  261. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  262. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  263. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  264. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  265. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  266. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  267. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  268. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  269. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  270. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  271. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  272. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  273. package/dist/commands.d.ts +1 -0
  274. package/dist/commands.js +3 -0
  275. package/dist/commands.js.map +1 -1
  276. package/dist/config.d.ts +38 -5
  277. package/dist/config.js +61 -16
  278. package/dist/config.js.map +1 -1
  279. package/dist/consts.d.ts +3 -0
  280. package/dist/consts.js +10 -0
  281. package/dist/consts.js.map +1 -0
  282. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +112 -39
  283. package/dist/evaluator/LlamaChat/LlamaChat.js +1512 -0
  284. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  285. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  286. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  287. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  289. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  290. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  291. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  292. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  293. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  294. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  295. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  296. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +288 -0
  297. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +419 -0
  298. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  299. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  300. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  301. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  302. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  303. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  304. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  305. package/dist/evaluator/LlamaCompletion.d.ts +143 -0
  306. package/dist/evaluator/LlamaCompletion.js +418 -0
  307. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  308. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -21
  309. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +270 -81
  310. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  311. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  312. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  313. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  314. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  315. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  316. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  317. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  318. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  319. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  320. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  321. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +10 -7
  322. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -11
  323. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  324. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  325. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  327. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  328. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  329. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +236 -0
  330. package/dist/evaluator/LlamaModel/LlamaModel.js +679 -0
  331. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  332. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  333. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  334. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  335. package/dist/evaluator/TokenBias.d.ts +22 -0
  336. package/dist/evaluator/TokenBias.js +33 -0
  337. package/dist/evaluator/TokenBias.js.map +1 -0
  338. package/dist/evaluator/TokenMeter.d.ts +54 -0
  339. package/dist/evaluator/TokenMeter.js +86 -0
  340. package/dist/evaluator/TokenMeter.js.map +1 -0
  341. package/dist/gguf/consts.d.ts +3 -0
  342. package/dist/gguf/consts.js +8 -0
  343. package/dist/gguf/consts.js.map +1 -0
  344. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  345. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  346. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  347. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  348. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  349. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  350. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  351. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  352. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  353. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  354. package/dist/gguf/fileReaders/GgufFsFileReader.js +46 -0
  355. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  356. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  357. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  358. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  359. package/dist/gguf/insights/GgufInsights.d.ts +48 -0
  360. package/dist/gguf/insights/GgufInsights.js +381 -0
  361. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  362. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  363. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +141 -0
  364. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  365. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  366. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  367. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  368. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  369. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  370. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  371. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  372. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  373. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  374. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  375. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  376. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  377. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  378. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  379. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  380. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  381. package/dist/gguf/parser/parseGguf.js +63 -0
  382. package/dist/gguf/parser/parseGguf.js.map +1 -0
  383. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  384. package/dist/gguf/readGgufFileInfo.js +66 -0
  385. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  386. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  387. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  388. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  389. package/dist/gguf/types/GgufMetadataTypes.d.ts +335 -0
  390. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  391. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  392. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  393. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  394. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  395. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  396. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  397. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  398. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  399. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  400. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  401. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  402. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  403. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  404. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  405. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  406. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  407. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  408. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  409. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  410. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  411. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  412. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  413. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  414. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  415. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  416. package/dist/index.d.ts +41 -18
  417. package/dist/index.js +36 -15
  418. package/dist/index.js.map +1 -1
  419. package/dist/state.d.ts +4 -0
  420. package/dist/state.js +14 -0
  421. package/dist/state.js.map +1 -1
  422. package/dist/types.d.ts +116 -5
  423. package/dist/types.js.map +1 -1
  424. package/dist/utils/DisposeGuard.d.ts +13 -0
  425. package/dist/utils/DisposeGuard.js +120 -0
  426. package/dist/utils/DisposeGuard.js.map +1 -0
  427. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  428. package/dist/utils/InsufficientMemoryError.js +6 -0
  429. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  430. package/dist/utils/LlamaText.d.ts +70 -26
  431. package/dist/utils/LlamaText.js +469 -157
  432. package/dist/utils/LlamaText.js.map +1 -1
  433. package/dist/utils/LruCache.d.ts +12 -0
  434. package/dist/utils/LruCache.js +44 -0
  435. package/dist/utils/LruCache.js.map +1 -0
  436. package/dist/utils/ReplHistory.js.map +1 -1
  437. package/dist/utils/StopGenerationDetector.d.ts +25 -9
  438. package/dist/utils/StopGenerationDetector.js +93 -22
  439. package/dist/utils/StopGenerationDetector.js.map +1 -1
  440. package/dist/utils/TokenStreamRegulator.d.ts +9 -4
  441. package/dist/utils/TokenStreamRegulator.js +81 -8
  442. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  443. package/dist/utils/UnsupportedError.d.ts +2 -0
  444. package/dist/utils/UnsupportedError.js +7 -0
  445. package/dist/utils/UnsupportedError.js.map +1 -0
  446. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  447. package/dist/utils/clearTempFolder.js.map +1 -1
  448. package/dist/utils/cmake.js +38 -20
  449. package/dist/utils/cmake.js.map +1 -1
  450. package/dist/utils/createModelDownloader.d.ts +108 -0
  451. package/dist/utils/createModelDownloader.js +231 -0
  452. package/dist/utils/createModelDownloader.js.map +1 -0
  453. package/dist/utils/findBestOption.d.ts +4 -0
  454. package/dist/utils/findBestOption.js +15 -0
  455. package/dist/utils/findBestOption.js.map +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  458. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  459. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  462. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  465. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  477. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  482. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  484. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  489. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  492. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  493. package/dist/utils/gbnfJson/types.d.ts +3 -0
  494. package/dist/utils/gbnfJson/types.js.map +1 -1
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  497. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  500. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  502. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  503. package/dist/utils/getBuildDefaults.d.ts +1 -2
  504. package/dist/utils/getBuildDefaults.js +2 -3
  505. package/dist/utils/getBuildDefaults.js.map +1 -1
  506. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  507. package/dist/utils/getConsoleLogPrefix.js +10 -0
  508. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  509. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  510. package/dist/utils/getGrammarsFolder.js +8 -7
  511. package/dist/utils/getGrammarsFolder.js.map +1 -1
  512. package/dist/utils/getModuleVersion.d.ts +1 -0
  513. package/dist/utils/getModuleVersion.js +13 -0
  514. package/dist/utils/getModuleVersion.js.map +1 -0
  515. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  516. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  517. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  518. package/dist/utils/getReadableContextSize.d.ts +1 -0
  519. package/dist/utils/getReadableContextSize.js +7 -0
  520. package/dist/utils/getReadableContextSize.js.map +1 -0
  521. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  522. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  523. package/dist/utils/gitReleaseBundles.js +73 -5
  524. package/dist/utils/gitReleaseBundles.js.map +1 -1
  525. package/dist/utils/hashString.d.ts +1 -0
  526. package/dist/utils/hashString.js +8 -0
  527. package/dist/utils/hashString.js.map +1 -0
  528. package/dist/utils/isLockfileActive.d.ts +4 -0
  529. package/dist/utils/isLockfileActive.js +12 -0
  530. package/dist/utils/isLockfileActive.js.map +1 -0
  531. package/dist/utils/isToken.d.ts +2 -0
  532. package/dist/utils/isToken.js +4 -0
  533. package/dist/utils/isToken.js.map +1 -0
  534. package/dist/utils/isUrl.d.ts +1 -0
  535. package/dist/utils/isUrl.js +15 -0
  536. package/dist/utils/isUrl.js.map +1 -0
  537. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  538. package/dist/utils/mergeUnionTypes.js +2 -0
  539. package/dist/utils/mergeUnionTypes.js.map +1 -0
  540. package/dist/utils/parseModelFileName.d.ts +1 -0
  541. package/dist/utils/parseModelFileName.js +6 -1
  542. package/dist/utils/parseModelFileName.js.map +1 -1
  543. package/dist/utils/parseTextTemplate.d.ts +66 -0
  544. package/dist/utils/parseTextTemplate.js +116 -0
  545. package/dist/utils/parseTextTemplate.js.map +1 -0
  546. package/dist/utils/prettyPrintObject.d.ts +10 -0
  547. package/dist/utils/prettyPrintObject.js +84 -0
  548. package/dist/utils/prettyPrintObject.js.map +1 -0
  549. package/dist/utils/removeNullFields.d.ts +2 -1
  550. package/dist/utils/removeNullFields.js +8 -0
  551. package/dist/utils/removeNullFields.js.map +1 -1
  552. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  553. package/dist/utils/resolveGithubRelease.js +36 -0
  554. package/dist/utils/resolveGithubRelease.js.map +1 -0
  555. package/dist/utils/runtime.d.ts +4 -0
  556. package/dist/utils/runtime.js +8 -0
  557. package/dist/utils/runtime.js.map +1 -0
  558. package/dist/utils/safeEventCallback.d.ts +6 -0
  559. package/dist/utils/safeEventCallback.js +29 -0
  560. package/dist/utils/safeEventCallback.js.map +1 -0
  561. package/dist/utils/spawnCommand.d.ts +11 -1
  562. package/dist/utils/spawnCommand.js +56 -6
  563. package/dist/utils/spawnCommand.js.map +1 -1
  564. package/dist/utils/tokenizeInput.d.ts +3 -0
  565. package/dist/utils/tokenizeInput.js +12 -0
  566. package/dist/utils/tokenizeInput.js.map +1 -0
  567. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  569. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  570. package/dist/utils/utilTypes.d.ts +3 -0
  571. package/dist/utils/utilTypes.js +2 -0
  572. package/dist/utils/utilTypes.js.map +1 -0
  573. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  574. package/dist/utils/waitForLockfileRelease.js +20 -0
  575. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  576. package/dist/utils/withLockfile.d.ts +7 -0
  577. package/dist/utils/withLockfile.js +44 -0
  578. package/dist/utils/withLockfile.js.map +1 -0
  579. package/dist/utils/withOra.d.ts +2 -0
  580. package/dist/utils/withOra.js +22 -6
  581. package/dist/utils/withOra.js.map +1 -1
  582. package/dist/utils/withProgressLog.d.ts +23 -0
  583. package/dist/utils/withProgressLog.js +211 -0
  584. package/dist/utils/withProgressLog.js.map +1 -0
  585. package/dist/utils/withStatusLogs.d.ts +2 -1
  586. package/dist/utils/withStatusLogs.js +12 -9
  587. package/dist/utils/withStatusLogs.js.map +1 -1
  588. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  589. package/dist/utils/wrapAbortSignal.js +9 -0
  590. package/dist/utils/wrapAbortSignal.js.map +1 -0
  591. package/llama/.clang-format +1 -2
  592. package/llama/CMakeLists.txt +115 -4
  593. package/llama/addon.cpp +1300 -97
  594. package/llama/binariesGithubRelease.json +1 -1
  595. package/llama/gitRelease.bundle +0 -0
  596. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  597. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  598. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  599. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  600. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  601. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  602. package/llama/grammars/README.md +58 -5
  603. package/llama/grammars/json.gbnf +4 -4
  604. package/llama/grammars/json_arr.gbnf +4 -4
  605. package/llama/llama.cpp.info.json +4 -0
  606. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  607. package/package.json +78 -53
  608. package/templates/packed/electron-typescript-react.json +1 -0
  609. package/templates/packed/node-typescript.json +1 -0
  610. package/dist/AbortError.d.ts +0 -2
  611. package/dist/AbortError.js +0 -7
  612. package/dist/AbortError.js.map +0 -1
  613. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  614. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  615. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  616. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  617. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  618. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  619. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  620. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  621. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  622. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  623. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  624. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  625. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  626. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  627. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  628. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  629. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  630. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  631. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  632. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  633. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  634. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  635. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  636. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  637. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  638. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  639. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  640. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  641. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  642. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  643. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  644. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  645. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  646. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  647. package/dist/utils/binariesGithubRelease.js.map +0 -1
  648. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  649. package/dist/utils/clearLlamaBuild.js +0 -12
  650. package/dist/utils/clearLlamaBuild.js.map +0 -1
  651. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  652. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  653. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  654. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  655. package/dist/utils/compileLLamaCpp.js +0 -132
  656. package/dist/utils/compileLLamaCpp.js.map +0 -1
  657. package/dist/utils/getBin.js +0 -78
  658. package/dist/utils/getBin.js.map +0 -1
  659. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  660. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  661. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  662. package/dist/utils/getReleaseInfo.d.ts +0 -7
  663. package/dist/utils/getReleaseInfo.js +0 -30
  664. package/dist/utils/getReleaseInfo.js.map +0 -1
  665. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  666. package/dist/utils/parseModelTypeDescription.js +0 -9
  667. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  668. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  669. package/dist/utils/resolveChatWrapper.js +0 -16
  670. package/dist/utils/resolveChatWrapper.js.map +0 -1
  671. package/dist/utils/usedBinFlag.d.ts +0 -6
  672. package/dist/utils/usedBinFlag.js +0 -15
  673. package/dist/utils/usedBinFlag.js.map +0 -1
  674. package/llama/usedBin.json +0 -3
  675. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  676. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  677. package/llamaBins/linux-x64/llama-addon.node +0 -0
  678. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  679. package/llamaBins/mac-x64/llama-addon.node +0 -0
  680. package/llamaBins/win-x64/llama-addon.exp +0 -0
  681. package/llamaBins/win-x64/llama-addon.lib +0 -0
  682. package/llamaBins/win-x64/llama-addon.node +0 -0
  683. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  684. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  685. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  686. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  687. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  688. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  689. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  690. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp CHANGED
@@ -3,42 +3,284 @@
3
3
  #include <algorithm>
4
4
  #include <sstream>
5
5
  #include <vector>
6
+ #include <unordered_map>
6
7
 
7
8
  #include "common.h"
8
9
  #include "common/grammar-parser.h"
9
10
  #include "llama.h"
10
11
  #include "napi.h"
11
12
 
12
- std::string addon_model_token_to_piece(const struct llama_model * model, llama_token token) {
13
+ #ifdef GPU_INFO_USE_CUDA
14
+ # include "gpuInfo/cuda-gpu-info.h"
15
+ #endif
16
+ #ifdef GPU_INFO_USE_VULKAN
17
+ # include "gpuInfo/vulkan-gpu-info.h"
18
+ #endif
19
+ #ifdef GPU_INFO_USE_METAL
20
+ # include "gpuInfo/metal-gpu-info.h"
21
+ #endif
22
+
23
+
24
+ struct addon_logger_log {
25
+ public:
26
+ const int logLevelNumber;
27
+ const std::stringstream* stringStream;
28
+ };
29
+
30
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
31
+
32
+ using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
33
+ void addonCallJsLogCallback(
34
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
35
+ );
36
+ using AddonThreadSafeLogCallbackFunction =
37
+ Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
38
+
39
+
40
+ struct addon_progress_event {
41
+ public:
42
+ const float progress;
43
+ };
44
+
45
+ using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
46
+ void addonCallJsProgressCallback(
47
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
48
+ );
49
+ using AddonThreadSafeProgressEventCallbackFunction =
50
+ Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
51
+
52
+
53
+ AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
54
+ bool addonJsLoggerCallbackSet = false;
55
+ int addonLoggerLogLevel = 5;
56
+ bool backendInitialized = false;
57
+ bool backendDisposed = false;
58
+
59
+ void addonCallJsProgressCallback(
60
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
61
+ ) {
62
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
63
+ try {
64
+ callback.Call({Napi::Number::New(env, data->progress)});
65
+ } catch (const Napi::Error& e) {}
66
+ }
67
+
68
+ if (data != nullptr) {
69
+ delete data;
70
+ }
71
+ }
72
+
73
+ static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
74
+ uint64_t totalSize = 0;
75
+
76
+ if (embd) {
77
+ totalSize += sizeof(float) * n_tokens_alloc * embd;
78
+ } else {
79
+ totalSize += sizeof(llama_token) * n_tokens_alloc;
80
+ }
81
+
82
+ totalSize += sizeof(llama_pos) * n_tokens_alloc;
83
+ totalSize += sizeof(int32_t) * n_tokens_alloc;
84
+ totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
85
+
86
+ totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
87
+
88
+ totalSize += sizeof(int8_t) * n_tokens_alloc;
89
+
90
+ return totalSize;
91
+ }
92
+
93
+ static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
94
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
95
+ while (size > 0) {
96
+ int64_t adjustSize = std::min(size, chunkSize);
97
+ Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
98
+ size -= adjustSize;
99
+ }
100
+ }
101
+
102
+ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
103
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
104
+ while (size > 0) {
105
+ int64_t adjustSize = std::min(size, chunkSize);
106
+ Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
107
+ size -= adjustSize;
108
+ }
109
+ }
110
+
111
+ std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
13
112
  std::vector<char> result(8, 0);
14
- const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
113
+ const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
15
114
  if (n_tokens < 0) {
16
115
  result.resize(-n_tokens);
17
- int check = llama_token_to_piece(model, token, result.data(), result.size());
116
+ int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
18
117
  GGML_ASSERT(check == -n_tokens);
19
- }
20
- else {
118
+ } else {
21
119
  result.resize(n_tokens);
22
120
  }
23
121
 
24
122
  return std::string(result.data(), result.size());
25
123
  }
26
124
 
125
+ #ifdef GPU_INFO_USE_CUDA
126
+ void logCudaError(const char* message) {
127
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
128
+ }
129
+ #endif
130
+ #ifdef GPU_INFO_USE_VULKAN
131
+ void logVulkanWarning(const char* message) {
132
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
133
+ }
134
+ #endif
135
+
136
+ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
137
+ uint64_t total = 0;
138
+ uint64_t used = 0;
139
+
140
+ #ifdef GPU_INFO_USE_CUDA
141
+ size_t cudaDeviceTotal = 0;
142
+ size_t cudaDeviceUsed = 0;
143
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
144
+
145
+ if (cudeGetInfoSuccess) {
146
+ total += cudaDeviceTotal;
147
+ used += cudaDeviceUsed;
148
+ }
149
+ #endif
150
+
151
+ #ifdef GPU_INFO_USE_VULKAN
152
+ uint64_t vulkanDeviceTotal = 0;
153
+ uint64_t vulkanDeviceUsed = 0;
154
+ const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
155
+
156
+ if (vulkanDeviceSupportsMemoryBudgetExtension) {
157
+ total += vulkanDeviceTotal;
158
+ used += vulkanDeviceUsed;
159
+ }
160
+ #endif
161
+
162
+ #ifdef GPU_INFO_USE_METAL
163
+ uint64_t metalDeviceTotal = 0;
164
+ uint64_t metalDeviceUsed = 0;
165
+ getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
166
+
167
+ total += metalDeviceTotal;
168
+ used += metalDeviceUsed;
169
+ #endif
170
+
171
+ Napi::Object result = Napi::Object::New(info.Env());
172
+ result.Set("total", Napi::Number::From(info.Env(), total));
173
+ result.Set("used", Napi::Number::From(info.Env(), used));
174
+
175
+ return result;
176
+ }
177
+
178
+ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
179
+ std::vector<std::string> deviceNames;
180
+
181
+ #ifdef GPU_INFO_USE_CUDA
182
+ gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
183
+ #endif
184
+
185
+ #ifdef GPU_INFO_USE_VULKAN
186
+ gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
187
+ #endif
188
+
189
+ #ifdef GPU_INFO_USE_METAL
190
+ getMetalGpuDeviceNames(&deviceNames);
191
+ #endif
192
+
193
+ Napi::Object result = Napi::Object::New(info.Env());
194
+
195
+ Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
196
+ for (size_t i = 0; i < deviceNames.size(); ++i) {
197
+ deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
198
+ }
199
+ result.Set("deviceNames", deviceNamesNapiArray);
200
+
201
+ return result;
202
+ }
203
+
204
+ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
205
+ #ifdef GPU_INFO_USE_CUDA
206
+ return Napi::String::New(info.Env(), "cuda");
207
+ #endif
208
+
209
+ #ifdef GPU_INFO_USE_VULKAN
210
+ return Napi::String::New(info.Env(), "vulkan");
211
+ #endif
212
+
213
+ #ifdef GPU_INFO_USE_METAL
214
+ return Napi::String::New(info.Env(), "metal");
215
+ #endif
216
+
217
+ return info.Env().Undefined();
218
+ }
219
+
220
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
221
+ if (token < 0) {
222
+ return Napi::Number::From(info.Env(), -1);
223
+ }
224
+
225
+ auto tokenAttributes = llama_token_get_attr(model, token);
226
+
227
+ if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
228
+ return Napi::Number::From(info.Env(), -1);
229
+ }
230
+
231
+ return Napi::Number::From(info.Env(), token);
232
+ }
233
+
234
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
235
+ if (token < 0) {
236
+ return Napi::Number::From(info.Env(), -1);
237
+ }
238
+
239
+ auto tokenAttributes = llama_token_get_attr(model, token);
240
+
241
+ if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
242
+ return Napi::Number::From(info.Env(), -1);
243
+ }
244
+
245
+ return Napi::Number::From(info.Env(), token);
246
+ }
247
+
248
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data);
249
+
27
250
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
28
251
  public:
29
252
  llama_model_params model_params;
30
253
  llama_model* model;
254
+ uint64_t loadedModelSize = 0;
255
+ Napi::Reference<Napi::Object> addonExportsRef;
256
+ bool hasAddonExportsRef = false;
257
+
258
+ std::string modelPath;
259
+ bool modelLoaded = false;
260
+ bool abortModelLoad = false;
261
+ bool model_load_stopped = false;
262
+ float rawModelLoadPercentage = 0;
263
+ unsigned modelLoadPercentage = 0;
264
+ AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
265
+ bool onLoadProgressEventCallbackSet = false;
266
+ bool hasLoadAbortSignal = false;
267
+
31
268
  bool disposed = false;
32
269
 
33
270
  AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
34
271
  model_params = llama_model_default_params();
35
272
 
36
273
  // Get the model path
37
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
274
+ modelPath = info[0].As<Napi::String>().Utf8Value();
38
275
 
39
276
  if (info.Length() > 1 && info[1].IsObject()) {
40
277
  Napi::Object options = info[1].As<Napi::Object>();
41
278
 
279
+ if (options.Has("addonExports")) {
280
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
281
+ hasAddonExportsRef = true;
282
+ }
283
+
42
284
  if (options.Has("gpuLayers")) {
43
285
  model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
44
286
  }
@@ -54,14 +296,41 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
54
296
  if (options.Has("useMlock")) {
55
297
  model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
56
298
  }
57
- }
58
299
 
59
- llama_backend_init(false);
60
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
300
+ if (options.Has("checkTensors")) {
301
+ model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
302
+ }
61
303
 
62
- if (model == NULL) {
63
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
64
- return;
304
+ if (options.Has("onLoadProgress")) {
305
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
306
+ if (onLoadProgressJSCallback.IsFunction()) {
307
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
308
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
309
+ info.Env(),
310
+ onLoadProgressJSCallback,
311
+ "onLoadProgressCallback",
312
+ 0,
313
+ 1,
314
+ context,
315
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
316
+ addonModel->onLoadProgressEventCallbackSet = false;
317
+
318
+ delete ctx;
319
+ },
320
+ this
321
+ );
322
+ onLoadProgressEventCallbackSet = true;
323
+ }
324
+ }
325
+
326
+ if (options.Has("hasLoadAbortSignal")) {
327
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
328
+ }
329
+
330
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
331
+ model_params.progress_callback_user_data = &(*this);
332
+ model_params.progress_callback = llamaModelParamsProgressCallback;
333
+ }
65
334
  }
66
335
  }
67
336
 
@@ -74,23 +343,32 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
74
343
  return;
75
344
  }
76
345
 
77
- llama_free_model(model);
78
346
  disposed = true;
79
- }
347
+ if (modelLoaded) {
348
+ modelLoaded = false;
349
+ llama_free_model(model);
80
350
 
81
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
82
- if (disposed) {
83
- return info.Env().Undefined();
351
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
352
+ loadedModelSize = 0;
84
353
  }
85
354
 
86
- dispose();
355
+ if (hasAddonExportsRef) {
356
+ addonExportsRef.Unref();
357
+ hasAddonExportsRef = false;
358
+ }
359
+ }
87
360
 
361
+ Napi::Value Init(const Napi::CallbackInfo& info);
362
+ Napi::Value LoadLora(const Napi::CallbackInfo& info);
363
+ Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
364
+ abortModelLoad = true;
88
365
  return info.Env().Undefined();
89
366
  }
367
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
90
368
 
91
369
  Napi::Value Tokenize(const Napi::CallbackInfo& info) {
92
370
  if (disposed) {
93
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
371
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
94
372
  return info.Env().Undefined();
95
373
  }
96
374
 
@@ -108,18 +386,21 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
108
386
  }
109
387
  Napi::Value Detokenize(const Napi::CallbackInfo& info) {
110
388
  if (disposed) {
111
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
389
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
112
390
  return info.Env().Undefined();
113
391
  }
114
392
 
115
393
  Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
394
+ bool decodeSpecialTokens = info.Length() > 0
395
+ ? info[1].As<Napi::Boolean>().Value()
396
+ : false;
116
397
 
117
398
  // Create a stringstream for accumulating the decoded string.
118
399
  std::stringstream ss;
119
400
 
120
401
  // Decode each token and accumulate the result.
121
402
  for (size_t i = 0; i < tokens.ElementLength(); i++) {
122
- const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i]);
403
+ const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
123
404
 
124
405
  if (piece.empty()) {
125
406
  continue;
@@ -133,16 +414,25 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
133
414
 
134
415
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
135
416
  if (disposed) {
136
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
417
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
137
418
  return info.Env().Undefined();
138
419
  }
139
420
 
140
421
  return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
141
422
  }
142
423
 
424
+ Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
425
+ if (disposed) {
426
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
427
+ return info.Env().Undefined();
428
+ }
429
+
430
+ return Napi::Number::From(info.Env(), llama_n_embd(model));
431
+ }
432
+
143
433
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
144
434
  if (disposed) {
145
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
435
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
146
436
  return info.Env().Undefined();
147
437
  }
148
438
 
@@ -151,7 +441,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
151
441
 
152
442
  Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
153
443
  if (disposed) {
154
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
444
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
155
445
  return info.Env().Undefined();
156
446
  }
157
447
 
@@ -160,7 +450,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
160
450
 
161
451
  Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
162
452
  if (disposed) {
163
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
453
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
164
454
  return info.Env().Undefined();
165
455
  }
166
456
 
@@ -172,63 +462,63 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
172
462
 
173
463
  Napi::Value TokenBos(const Napi::CallbackInfo& info) {
174
464
  if (disposed) {
175
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
465
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
176
466
  return info.Env().Undefined();
177
467
  }
178
468
 
179
- return Napi::Number::From(info.Env(), llama_token_bos(model));
469
+ return getNapiControlToken(info, model, llama_token_bos(model));
180
470
  }
181
471
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
182
472
  if (disposed) {
183
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
473
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
184
474
  return info.Env().Undefined();
185
475
  }
186
476
 
187
- return Napi::Number::From(info.Env(), llama_token_eos(model));
477
+ return getNapiControlToken(info, model, llama_token_eos(model));
188
478
  }
189
479
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
190
480
  if (disposed) {
191
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
481
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
192
482
  return info.Env().Undefined();
193
483
  }
194
484
 
195
- return Napi::Number::From(info.Env(), llama_token_nl(model));
485
+ return getNapiToken(info, model, llama_token_nl(model));
196
486
  }
197
487
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
198
488
  if (disposed) {
199
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
489
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
200
490
  return info.Env().Undefined();
201
491
  }
202
492
 
203
- return Napi::Number::From(info.Env(), llama_token_prefix(model));
493
+ return getNapiControlToken(info, model, llama_token_prefix(model));
204
494
  }
205
495
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
206
496
  if (disposed) {
207
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
497
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
208
498
  return info.Env().Undefined();
209
499
  }
210
500
 
211
- return Napi::Number::From(info.Env(), llama_token_middle(model));
501
+ return getNapiControlToken(info, model, llama_token_middle(model));
212
502
  }
213
503
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
214
504
  if (disposed) {
215
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
505
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
216
506
  return info.Env().Undefined();
217
507
  }
218
508
 
219
- return Napi::Number::From(info.Env(), llama_token_suffix(model));
509
+ return getNapiControlToken(info, model, llama_token_suffix(model));
220
510
  }
221
511
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
222
512
  if (disposed) {
223
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
513
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
224
514
  return info.Env().Undefined();
225
515
  }
226
516
 
227
- return Napi::Number::From(info.Env(), llama_token_eot(model));
517
+ return getNapiControlToken(info, model, llama_token_eot(model));
228
518
  }
229
519
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
230
520
  if (disposed) {
231
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
521
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
232
522
  return info.Env().Undefined();
233
523
  }
234
524
 
@@ -245,6 +535,57 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
245
535
  return Napi::String::New(info.Env(), ss.str());
246
536
  }
247
537
 
538
+ Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info) {
539
+ if (disposed) {
540
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
541
+ return info.Env().Undefined();
542
+ }
543
+
544
+ if (info[0].IsNumber() == false) {
545
+ return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
546
+ }
547
+
548
+ int token = info[0].As<Napi::Number>().Int32Value();
549
+ auto tokenAttributes = llama_token_get_attr(model, token);
550
+
551
+ return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
552
+ }
553
+ Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
554
+ if (disposed) {
555
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
556
+ return info.Env().Undefined();
557
+ }
558
+
559
+ if (info[0].IsNumber() == false) {
560
+ return Napi::Boolean::New(info.Env(), false);
561
+ }
562
+
563
+ int token = info[0].As<Napi::Number>().Int32Value();
564
+
565
+ return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
566
+ }
567
+ Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
568
+ if (disposed) {
569
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
570
+ return info.Env().Undefined();
571
+ }
572
+
573
+ auto vocabularyType = llama_vocab_type(model);
574
+
575
+ return Napi::Number::From(info.Env(), int32_t(vocabularyType));
576
+ }
577
+ Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
578
+ const int addBos = llama_add_bos_token(model);
579
+
580
+ bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
581
+
582
+ return Napi::Boolean::New(info.Env(), shouldPrependBos);
583
+ }
584
+
585
+ Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
586
+ return Napi::Number::From(info.Env(), llama_model_size(model));
587
+ }
588
+
248
589
  static void init(Napi::Object exports) {
249
590
  exports.Set(
250
591
  "AddonModel",
@@ -252,9 +593,13 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
252
593
  exports.Env(),
253
594
  "AddonModel",
254
595
  {
596
+ InstanceMethod("init", &AddonModel::Init),
597
+ InstanceMethod("loadLora", &AddonModel::LoadLora),
598
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
255
599
  InstanceMethod("tokenize", &AddonModel::Tokenize),
256
600
  InstanceMethod("detokenize", &AddonModel::Detokenize),
257
601
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
602
+ InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
258
603
  InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
259
604
  InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
260
605
  InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
@@ -266,16 +611,260 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
266
611
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
267
612
  InstanceMethod("eotToken", &AddonModel::EotToken),
268
613
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
269
- InstanceMethod("dispose", &AddonModel::Dispose)
614
+ InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
615
+ InstanceMethod("isEogToken", &AddonModel::IsEogToken),
616
+ InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
617
+ InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
618
+ InstanceMethod("getModelSize", &AddonModel::GetModelSize),
619
+ InstanceMethod("dispose", &AddonModel::Dispose),
270
620
  }
271
621
  )
272
622
  );
273
623
  }
274
624
  };
275
625
 
626
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
627
+ AddonModel* addonModel = (AddonModel *) user_data;
628
+ unsigned percentage = (unsigned) (100 * progress);
629
+
630
+ if (percentage > addonModel->modelLoadPercentage) {
631
+ addonModel->modelLoadPercentage = percentage;
632
+
633
+ // original llama.cpp logs
634
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
635
+ if (percentage >= 100) {
636
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
637
+ }
638
+ }
639
+
640
+ if (progress > addonModel->rawModelLoadPercentage) {
641
+ addonModel->rawModelLoadPercentage = progress;
642
+
643
+ if (addonModel->onLoadProgressEventCallbackSet) {
644
+ addon_progress_event* data = new addon_progress_event {
645
+ progress
646
+ };
647
+
648
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
649
+
650
+ if (status != napi_ok) {
651
+ delete data;
652
+ }
653
+ }
654
+ }
655
+
656
+ return !(addonModel->abortModelLoad);
657
+ }
658
+
659
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
660
+ public:
661
+ AddonModel* model;
662
+
663
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
664
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
665
+ model(model),
666
+ deferred(Napi::Promise::Deferred::New(env)) {
667
+ model->Ref();
668
+ }
669
+ ~AddonModelLoadModelWorker() {
670
+ model->Unref();
671
+ }
672
+
673
+ Napi::Promise GetPromise() {
674
+ return deferred.Promise();
675
+ }
676
+
677
+ protected:
678
+ Napi::Promise::Deferred deferred;
679
+
680
+ void Execute() {
681
+ try {
682
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
683
+
684
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
685
+ } catch (const std::exception& e) {
686
+ SetError(e.what());
687
+ } catch(...) {
688
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
689
+ }
690
+ }
691
+ void OnOK() {
692
+ if (model->modelLoaded) {
693
+ uint64_t modelSize = llama_model_size(model->model);
694
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
695
+ model->loadedModelSize = modelSize;
696
+ }
697
+
698
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
699
+ if (model->onLoadProgressEventCallbackSet) {
700
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
701
+ }
702
+ }
703
+ void OnError(const Napi::Error& err) {
704
+ deferred.Reject(err.Value());
705
+ }
706
+ };
707
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
708
+ public:
709
+ AddonModel* model;
710
+
711
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
712
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
713
+ model(model),
714
+ deferred(Napi::Promise::Deferred::New(env)) {
715
+ model->Ref();
716
+ }
717
+ ~AddonModelUnloadModelWorker() {
718
+ model->Unref();
719
+ }
720
+
721
+ Napi::Promise GetPromise() {
722
+ return deferred.Promise();
723
+ }
724
+
725
+ protected:
726
+ Napi::Promise::Deferred deferred;
727
+
728
+ void Execute() {
729
+ try {
730
+ llama_free_model(model->model);
731
+ model->modelLoaded = false;
732
+
733
+ model->dispose();
734
+ } catch (const std::exception& e) {
735
+ SetError(e.what());
736
+ } catch(...) {
737
+ SetError("Unknown error when calling \"llama_free_model\"");
738
+ }
739
+ }
740
+ void OnOK() {
741
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
742
+ model->loadedModelSize = 0;
743
+
744
+ deferred.Resolve(Env().Undefined());
745
+ }
746
+ void OnError(const Napi::Error& err) {
747
+ deferred.Reject(err.Value());
748
+ }
749
+ };
750
+ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
751
+ public:
752
+ AddonModel* model;
753
+ std::string loraFilePath;
754
+ float loraScale;
755
+ int32_t loraThreads;
756
+ std::string baseModelPath;
757
+
758
+ AddonModelLoadLoraWorker(
759
+ const Napi::Env& env,
760
+ AddonModel* model,
761
+ std::string loraFilePath,
762
+ float loraScale,
763
+ int32_t loraThreads,
764
+ std::string baseModelPath
765
+ )
766
+ : Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
767
+ model(model),
768
+ loraFilePath(loraFilePath),
769
+ loraScale(loraScale),
770
+ loraThreads(loraThreads),
771
+ baseModelPath(baseModelPath),
772
+ deferred(Napi::Promise::Deferred::New(env)) {
773
+ model->Ref();
774
+ }
775
+ ~AddonModelLoadLoraWorker() {
776
+ model->Unref();
777
+ }
778
+
779
+ Napi::Promise GetPromise() {
780
+ return deferred.Promise();
781
+ }
782
+
783
+ protected:
784
+ Napi::Promise::Deferred deferred;
785
+
786
+ void Execute() {
787
+ try {
788
+ const auto res = llama_model_apply_lora_from_file(
789
+ model->model,
790
+ loraFilePath.c_str(),
791
+ loraScale,
792
+ baseModelPath.empty() ? NULL : baseModelPath.c_str(),
793
+ loraThreads
794
+ );
795
+
796
+ if (res != 0) {
797
+ SetError(
798
+ std::string(
799
+ std::string("Failed to apply LoRA \"") + loraFilePath + std::string("\"") + (
800
+ baseModelPath.empty()
801
+ ? std::string("")
802
+ : (std::string(" with base model \"") + baseModelPath + std::string("\""))
803
+ )
804
+ )
805
+ );
806
+ }
807
+ } catch (const std::exception& e) {
808
+ SetError(e.what());
809
+ } catch(...) {
810
+ SetError("Unknown error when calling \"llama_model_apply_lora_from_file\"");
811
+ }
812
+ }
813
+ void OnOK() {
814
+ deferred.Resolve(Env().Undefined());
815
+ }
816
+ void OnError(const Napi::Error& err) {
817
+ deferred.Reject(err.Value());
818
+ }
819
+ };
820
+
821
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
822
+ if (disposed) {
823
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
824
+ return info.Env().Undefined();
825
+ }
826
+
827
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
828
+ worker->Queue();
829
+ return worker->GetPromise();
830
+ }
831
+ Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
832
+ std::string loraFilePath = info[0].As<Napi::String>().Utf8Value();
833
+ float scale = info[1].As<Napi::Number>().FloatValue();
834
+ int32_t threads = info[2].As<Napi::Number>().Int32Value();
835
+ std::string baseModelPath = (info.Length() > 3 && info[3].IsString()) ? info[3].As<Napi::String>().Utf8Value() : std::string("");
836
+
837
+ int32_t resolvedThreads = threads == 0 ? std::thread::hardware_concurrency() : threads;
838
+
839
+ AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), this, loraFilePath, scale, threads, baseModelPath);
840
+ worker->Queue();
841
+ return worker->GetPromise();
842
+ }
843
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
844
+ if (disposed) {
845
+ return info.Env().Undefined();
846
+ }
847
+
848
+ if (modelLoaded) {
849
+ modelLoaded = false;
850
+
851
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
852
+ worker->Queue();
853
+ return worker->GetPromise();
854
+ } else {
855
+ dispose();
856
+
857
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
858
+ deferred.Resolve(info.Env().Undefined());
859
+ return deferred.Promise();
860
+ }
861
+ }
862
+
276
863
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
277
864
  public:
278
865
  grammar_parser::parse_state parsed_grammar;
866
+ Napi::Reference<Napi::Object> addonExportsRef;
867
+ bool hasAddonExportsRef = false;
279
868
 
280
869
  AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
281
870
  // Get the model path
@@ -285,6 +874,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
285
874
  if (info.Length() > 1 && info[1].IsObject()) {
286
875
  Napi::Object options = info[1].As<Napi::Object>();
287
876
 
877
+ if (options.Has("addonExports")) {
878
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
879
+ hasAddonExportsRef = true;
880
+ }
881
+
288
882
  if (options.Has("printGrammar")) {
289
883
  should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
290
884
  }
@@ -302,6 +896,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
302
896
  }
303
897
  }
304
898
 
899
+ ~AddonGrammar() {
900
+ if (hasAddonExportsRef) {
901
+ addonExportsRef.Unref();
902
+ hasAddonExportsRef = false;
903
+ }
904
+ }
905
+
305
906
  static void init(Napi::Object exports) {
306
907
  exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
307
908
  }
@@ -340,9 +941,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
340
941
  llama_context_params context_params;
341
942
  llama_context* ctx;
342
943
  llama_batch batch;
944
+ uint64_t batchMemorySize = 0;
343
945
  bool has_batch = false;
344
946
  int32_t batch_n_tokens = 0;
345
947
  int n_cur = 0;
948
+
949
+ uint64_t loadedContextMemorySize = 0;
950
+ bool contextLoaded = false;
951
+
346
952
  bool disposed = false;
347
953
 
348
954
  AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
@@ -370,10 +976,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
370
976
 
371
977
  if (options.Has("batchSize")) {
372
978
  context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
979
+ context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
980
+ }
981
+
982
+ if (options.Has("sequences")) {
983
+ context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
373
984
  }
374
985
 
375
- if (options.Has("embedding")) {
376
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
986
+ if (options.Has("embeddings")) {
987
+ context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
377
988
  }
378
989
 
379
990
  if (options.Has("threads")) {
@@ -384,9 +995,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
384
995
  context_params.n_threads_batch = resolved_n_threads;
385
996
  }
386
997
  }
387
-
388
- ctx = llama_new_context_with_model(model->model, context_params);
389
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
390
998
  }
391
999
  ~AddonContext() {
392
1000
  dispose();
@@ -397,13 +1005,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
397
1005
  return;
398
1006
  }
399
1007
 
400
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
401
- llama_free(ctx);
1008
+ disposed = true;
1009
+ if (contextLoaded) {
1010
+ contextLoaded = false;
1011
+ llama_free(ctx);
1012
+
1013
+ adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
1014
+ loadedContextMemorySize = 0;
1015
+ }
1016
+
402
1017
  model->Unref();
403
1018
 
404
1019
  disposeBatch();
405
-
406
- disposed = true;
407
1020
  }
408
1021
  void disposeBatch() {
409
1022
  if (!has_batch) {
@@ -413,16 +1026,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
413
1026
  llama_batch_free(batch);
414
1027
  has_batch = false;
415
1028
  batch_n_tokens = 0;
1029
+
1030
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
1031
+ batchMemorySize = 0;
416
1032
  }
417
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
418
- if (disposed) {
419
- return info.Env().Undefined();
420
- }
421
1033
 
422
- dispose();
1034
+ Napi::Value Init(const Napi::CallbackInfo& info);
1035
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
423
1036
 
424
- return info.Env().Undefined();
425
- }
426
1037
  Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
427
1038
  if (disposed) {
428
1039
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -447,6 +1058,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
447
1058
  has_batch = true;
448
1059
  batch_n_tokens = n_tokens;
449
1060
 
1061
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
1062
+ if (newBatchMemorySize > batchMemorySize) {
1063
+ adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
1064
+ batchMemorySize = newBatchMemorySize;
1065
+ } else if (newBatchMemorySize < batchMemorySize) {
1066
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
1067
+ batchMemorySize = newBatchMemorySize;
1068
+ }
1069
+
450
1070
  return info.Env().Undefined();
451
1071
  }
452
1072
  Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
@@ -495,7 +1115,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
495
1115
 
496
1116
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
497
1117
 
498
- llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1118
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1119
+
1120
+ if (!result) {
1121
+ Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
1122
+ return info.Env().Undefined();
1123
+ }
499
1124
 
500
1125
  return info.Env().Undefined();
501
1126
  }
@@ -509,9 +1134,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
509
1134
  int32_t startPos = info[1].As<Napi::Number>().Int32Value();
510
1135
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
511
1136
 
512
- llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
1137
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
513
1138
 
514
- return info.Env().Undefined();
1139
+ return Napi::Boolean::New(info.Env(), result);
515
1140
  }
516
1141
  Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
517
1142
  if (disposed) {
@@ -524,7 +1149,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
524
1149
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
525
1150
  int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
526
1151
 
527
- llama_kv_cache_seq_shift(ctx, sequenceId, startPos, endPos, shiftDelta);
1152
+ llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
528
1153
 
529
1154
  return info.Env().Undefined();
530
1155
  }
@@ -532,7 +1157,8 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
532
1157
  Napi::Value SampleToken(const Napi::CallbackInfo& info);
533
1158
 
534
1159
  Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
535
- AddonGrammarEvaluationState* grammar_evaluation_state = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1160
+ AddonGrammarEvaluationState* grammar_evaluation_state =
1161
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
536
1162
  llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
537
1163
 
538
1164
  if ((grammar_evaluation_state)->grammar != nullptr) {
@@ -542,14 +1168,53 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
542
1168
  return info.Env().Undefined();
543
1169
  }
544
1170
 
1171
+ Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
1172
+ AddonGrammarEvaluationState* grammar_evaluation_state =
1173
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1174
+ llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
1175
+
1176
+ if ((grammar_evaluation_state)->grammar != nullptr) {
1177
+ std::vector<llama_token_data> candidates;
1178
+ candidates.reserve(1);
1179
+ candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
1180
+
1181
+ llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
1182
+
1183
+ llama_sample_grammar(ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1184
+
1185
+ if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
1186
+ return Napi::Boolean::New(info.Env(), false);
1187
+ }
1188
+
1189
+ return Napi::Boolean::New(info.Env(), true);
1190
+ }
1191
+
1192
+ return Napi::Boolean::New(info.Env(), false);
1193
+ }
1194
+
545
1195
  Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
546
1196
  if (disposed) {
547
1197
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
548
1198
  return info.Env().Undefined();
549
1199
  }
550
1200
 
1201
+ int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1202
+
1203
+ if (inputTokensLength <= 0) {
1204
+ Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1205
+ return info.Env().Undefined();
1206
+ }
1207
+
551
1208
  const int n_embd = llama_n_embd(model->model);
552
- const auto * embeddings = llama_get_embeddings(ctx);
1209
+ const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1210
+ if (embeddings == NULL) {
1211
+ embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1212
+
1213
+ if (embeddings == NULL) {
1214
+ Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1215
+ return info.Env().Undefined();
1216
+ }
1217
+ }
553
1218
 
554
1219
  Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
555
1220
  for (size_t i = 0; i < n_embd; ++i) {
@@ -559,6 +1224,21 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
559
1224
  return result;
560
1225
  }
561
1226
 
1227
+ Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
1228
+ if (disposed) {
1229
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1230
+ return info.Env().Undefined();
1231
+ }
1232
+
1233
+ return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
1234
+ }
1235
+
1236
+ Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
1237
+ llama_print_timings(ctx);
1238
+ llama_reset_timings(ctx);
1239
+ return info.Env().Undefined();
1240
+ }
1241
+
562
1242
  static void init(Napi::Object exports) {
563
1243
  exports.Set(
564
1244
  "AddonContext",
@@ -566,6 +1246,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
566
1246
  exports.Env(),
567
1247
  "AddonContext",
568
1248
  {
1249
+ InstanceMethod("init", &AddonContext::Init),
569
1250
  InstanceMethod("getContextSize", &AddonContext::GetContextSize),
570
1251
  InstanceMethod("initBatch", &AddonContext::InitBatch),
571
1252
  InstanceMethod("addToBatch", &AddonContext::AddToBatch),
@@ -575,8 +1256,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
575
1256
  InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
576
1257
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
577
1258
  InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
1259
+ InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
578
1260
  InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
579
- InstanceMethod("dispose", &AddonContext::Dispose)
1261
+ InstanceMethod("getStateSize", &AddonContext::GetStateSize),
1262
+ InstanceMethod("printTimings", &AddonContext::PrintTimings),
1263
+ InstanceMethod("dispose", &AddonContext::Dispose),
580
1264
  }
581
1265
  )
582
1266
  );
@@ -584,53 +1268,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
584
1268
  };
585
1269
 
586
1270
 
587
- class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1271
+ class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
588
1272
  public:
589
1273
  AddonContext* ctx;
590
1274
 
591
- AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
592
- : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
1275
+ AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1276
+ : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
593
1277
  ctx(ctx),
594
- Napi::Promise::Deferred(info.Env()) {
1278
+ deferred(Napi::Promise::Deferred::New(env)) {
595
1279
  ctx->Ref();
596
1280
  }
597
1281
  ~AddonContextDecodeBatchWorker() {
598
1282
  ctx->Unref();
599
1283
  }
600
- using Napi::AsyncWorker::Queue;
601
- using Napi::Promise::Deferred::Promise;
1284
+
1285
+ Napi::Promise GetPromise() {
1286
+ return deferred.Promise();
1287
+ }
602
1288
 
603
1289
  protected:
1290
+ Napi::Promise::Deferred deferred;
1291
+
604
1292
  void Execute() {
605
- // Perform the evaluation using llama_decode.
606
- int r = llama_decode(ctx->ctx, ctx->batch);
1293
+ try {
1294
+ // Perform the evaluation using llama_decode.
1295
+ int r = llama_decode(ctx->ctx, ctx->batch);
1296
+
1297
+ if (r != 0) {
1298
+ if (r == 1) {
1299
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1300
+ } else {
1301
+ SetError("Eval has failed");
1302
+ }
607
1303
 
608
- if (r != 0) {
609
- if (r == 1) {
610
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
611
- } else {
612
- SetError("Eval has failed");
1304
+ return;
613
1305
  }
614
1306
 
615
- return;
1307
+ llama_synchronize(ctx->ctx);
1308
+ } catch (const std::exception& e) {
1309
+ SetError(e.what());
1310
+ } catch(...) {
1311
+ SetError("Unknown error when calling \"llama_decode\"");
616
1312
  }
617
1313
  }
618
1314
  void OnOK() {
619
- Napi::Env env = Napi::AsyncWorker::Env();
620
- Napi::Promise::Deferred::Resolve(env.Undefined());
1315
+ deferred.Resolve(Env().Undefined());
621
1316
  }
622
1317
  void OnError(const Napi::Error& err) {
623
- Napi::Promise::Deferred::Reject(err.Value());
1318
+ deferred.Reject(err.Value());
624
1319
  }
625
1320
  };
626
1321
 
627
1322
  Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
628
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
1323
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
1324
+ worker->Queue();
1325
+ return worker->GetPromise();
1326
+ }
1327
+
1328
+ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1329
+ public:
1330
+ AddonContext* context;
1331
+
1332
+ AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1333
+ : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1334
+ context(context),
1335
+ deferred(Napi::Promise::Deferred::New(env)) {
1336
+ context->Ref();
1337
+ }
1338
+ ~AddonContextLoadContextWorker() {
1339
+ context->Unref();
1340
+ }
1341
+
1342
+ Napi::Promise GetPromise() {
1343
+ return deferred.Promise();
1344
+ }
1345
+
1346
+ protected:
1347
+ Napi::Promise::Deferred deferred;
1348
+
1349
+ void Execute() {
1350
+ try {
1351
+ context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1352
+
1353
+ context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1354
+ } catch (const std::exception& e) {
1355
+ SetError(e.what());
1356
+ } catch(...) {
1357
+ SetError("Unknown error when calling \"llama_new_context_with_model\"");
1358
+ }
1359
+ }
1360
+ void OnOK() {
1361
+ if (context->contextLoaded) {
1362
+ uint64_t contextMemorySize = llama_state_get_size(context->ctx);
1363
+ adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1364
+ context->loadedContextMemorySize = contextMemorySize;
1365
+ }
1366
+
1367
+ deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1368
+ }
1369
+ void OnError(const Napi::Error& err) {
1370
+ deferred.Reject(err.Value());
1371
+ }
1372
+ };
1373
+ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1374
+ public:
1375
+ AddonContext* context;
1376
+
1377
+ AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1378
+ : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1379
+ context(context),
1380
+ deferred(Napi::Promise::Deferred::New(env)) {
1381
+ context->Ref();
1382
+ }
1383
+ ~AddonContextUnloadContextWorker() {
1384
+ context->Unref();
1385
+ }
1386
+
1387
+ Napi::Promise GetPromise() {
1388
+ return deferred.Promise();
1389
+ }
1390
+
1391
+ protected:
1392
+ Napi::Promise::Deferred deferred;
1393
+
1394
+ void Execute() {
1395
+ try {
1396
+ llama_free(context->ctx);
1397
+ context->contextLoaded = false;
1398
+
1399
+ try {
1400
+ if (context->has_batch) {
1401
+ llama_batch_free(context->batch);
1402
+ context->has_batch = false;
1403
+ context->batch_n_tokens = 0;
1404
+ }
1405
+
1406
+ context->dispose();
1407
+ } catch (const std::exception& e) {
1408
+ SetError(e.what());
1409
+ } catch(...) {
1410
+ SetError("Unknown error when calling \"llama_batch_free\"");
1411
+ }
1412
+ } catch (const std::exception& e) {
1413
+ SetError(e.what());
1414
+ } catch(...) {
1415
+ SetError("Unknown error when calling \"llama_free\"");
1416
+ }
1417
+ }
1418
+ void OnOK() {
1419
+ adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1420
+ context->loadedContextMemorySize = 0;
1421
+
1422
+ adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1423
+ context->batchMemorySize = 0;
1424
+
1425
+ deferred.Resolve(Env().Undefined());
1426
+ }
1427
+ void OnError(const Napi::Error& err) {
1428
+ deferred.Reject(err.Value());
1429
+ }
1430
+ };
1431
+
1432
+ Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1433
+ if (disposed) {
1434
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1435
+ return info.Env().Undefined();
1436
+ }
1437
+
1438
+ AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
629
1439
  worker->Queue();
630
- return worker->Promise();
1440
+ return worker->GetPromise();
1441
+ }
1442
+ Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1443
+ if (disposed) {
1444
+ return info.Env().Undefined();
1445
+ }
1446
+
1447
+ if (contextLoaded) {
1448
+ contextLoaded = false;
1449
+
1450
+ AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1451
+ worker->Queue();
1452
+ return worker->GetPromise();
1453
+ } else {
1454
+ dispose();
1455
+
1456
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1457
+ deferred.Resolve(info.Env().Undefined());
1458
+ return deferred.Promise();
1459
+ }
631
1460
  }
632
1461
 
633
- class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1462
+ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
634
1463
  public:
635
1464
  AddonContext* ctx;
636
1465
  AddonGrammarEvaluationState* grammar_evaluation_state;
@@ -638,18 +1467,21 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
638
1467
  bool use_grammar = false;
639
1468
  llama_token result;
640
1469
  float temperature = 0.0f;
1470
+ float min_p = 0;
641
1471
  int32_t top_k = 40;
642
1472
  float top_p = 0.95f;
643
1473
  float repeat_penalty = 1.10f; // 1.0 = disabled
644
1474
  float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
645
1475
  float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
646
1476
  std::vector<llama_token> repeat_penalty_tokens;
1477
+ std::unordered_map<llama_token, float> tokenBiases;
1478
+ bool useTokenBiases = false;
647
1479
  bool use_repeat_penalty = false;
648
1480
 
649
1481
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
650
1482
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
651
1483
  ctx(ctx),
652
- Napi::Promise::Deferred(info.Env()) {
1484
+ deferred(Napi::Promise::Deferred::New(info.Env())) {
653
1485
  ctx->Ref();
654
1486
 
655
1487
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
@@ -661,6 +1493,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
661
1493
  temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
662
1494
  }
663
1495
 
1496
+ if (options.Has("minP")) {
1497
+ min_p = options.Get("minP").As<Napi::Number>().FloatValue();
1498
+ }
1499
+
664
1500
  if (options.Has("topK")) {
665
1501
  top_k = options.Get("topK").As<Napi::Number>().Int32Value();
666
1502
  }
@@ -684,6 +1520,19 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
684
1520
  use_repeat_penalty = true;
685
1521
  }
686
1522
 
1523
+ if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
1524
+ Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
1525
+ Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
1526
+
1527
+ if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
1528
+ for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
1529
+ tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
1530
+ }
1531
+
1532
+ useTokenBiases = true;
1533
+ }
1534
+ }
1535
+
687
1536
  if (options.Has("repeatPenaltyPresencePenalty")) {
688
1537
  repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
689
1538
  }
@@ -708,14 +1557,33 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
708
1557
  use_grammar = false;
709
1558
  }
710
1559
  }
711
- using Napi::AsyncWorker::Queue;
712
- using Napi::Promise::Deferred::Promise;
1560
+
1561
+ Napi::Promise GetPromise() {
1562
+ return deferred.Promise();
1563
+ }
713
1564
 
714
1565
  protected:
1566
+ Napi::Promise::Deferred deferred;
1567
+
715
1568
  void Execute() {
1569
+ try {
1570
+ SampleToken();
1571
+ } catch (const std::exception& e) {
1572
+ SetError(e.what());
1573
+ } catch(...) {
1574
+ SetError("Unknown error when calling \"SampleToken\"");
1575
+ }
1576
+ }
1577
+
1578
+ void SampleToken() {
716
1579
  llama_token new_token_id = 0;
717
1580
 
718
1581
  // Select the best prediction.
1582
+ if (llama_get_logits(ctx->ctx) == nullptr) {
1583
+ SetError("This model does not support token generation");
1584
+ return;
1585
+ }
1586
+
719
1587
  auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
720
1588
  auto n_vocab = llama_n_vocab(ctx->model->model);
721
1589
 
@@ -723,13 +1591,27 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
723
1591
  candidates.reserve(n_vocab);
724
1592
 
725
1593
  for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
726
- candidates.emplace_back(llama_token_data { token_id, logits[token_id], 0.0f });
1594
+ auto logit = logits[token_id];
1595
+
1596
+ if (useTokenBiases) {
1597
+ bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
1598
+ if (hasTokenBias) {
1599
+ auto logitBias = tokenBiases.at(token_id);
1600
+ if (logitBias == -INFINITY || logitBias < -INFINITY) {
1601
+ if (!llama_token_is_eog(ctx->model->model, token_id)) {
1602
+ logit = -INFINITY;
1603
+ }
1604
+ } else {
1605
+ logit += logitBias;
1606
+ }
1607
+ }
1608
+ }
1609
+
1610
+ candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
727
1611
  }
728
1612
 
729
1613
  llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
730
1614
 
731
- auto eos_token = llama_token_eos(ctx->model->model);
732
-
733
1615
  if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
734
1616
  llama_sample_repetition_penalties(
735
1617
  ctx->ctx,
@@ -744,6 +1626,13 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
744
1626
 
745
1627
  if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
746
1628
  llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1629
+
1630
+ if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
1631
+ // logit biases caused grammar sampling to fail, so sampling again without logit biases
1632
+ useTokenBiases = false;
1633
+ SampleToken();
1634
+ return;
1635
+ }
747
1636
  }
748
1637
 
749
1638
  if (temperature <= 0) {
@@ -762,45 +1651,359 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
762
1651
  llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
763
1652
  llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
764
1653
  llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
1654
+ llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
765
1655
  llama_sample_temp(ctx->ctx, &candidates_p, temperature);
766
1656
  new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
767
1657
  }
768
1658
 
769
- if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
1659
+ if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
770
1660
  llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
771
1661
  }
772
1662
 
773
1663
  result = new_token_id;
774
1664
  }
775
1665
  void OnOK() {
776
- Napi::Env env = Napi::AsyncWorker::Env();
777
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
778
- Napi::Promise::Deferred::Resolve(resultValue);
1666
+ Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1667
+ deferred.Resolve(resultValue);
779
1668
  }
780
1669
  void OnError(const Napi::Error& err) {
781
- Napi::Promise::Deferred::Reject(err.Value());
1670
+ deferred.Reject(err.Value());
782
1671
  }
783
1672
  };
784
1673
 
785
1674
  Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
786
1675
  AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
787
1676
  worker->Queue();
788
- return worker->Promise();
1677
+ return worker->GetPromise();
789
1678
  }
790
1679
 
791
1680
  Napi::Value systemInfo(const Napi::CallbackInfo& info) {
792
1681
  return Napi::String::From(info.Env(), llama_print_system_info());
793
1682
  }
794
1683
 
1684
+ Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
1685
+ return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
1686
+ }
1687
+
1688
+ Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
1689
+ return Napi::Boolean::New(info.Env(), llama_supports_mmap());
1690
+ }
1691
+
1692
+ Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
1693
+ return Napi::Boolean::New(info.Env(), llama_supports_mlock());
1694
+ }
1695
+
1696
+ Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
1697
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1698
+
1699
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1700
+ return info.Env().Undefined();
1701
+ }
1702
+
1703
+ const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
1704
+
1705
+ return Napi::Number::New(info.Env(), blockSize);
1706
+ }
1707
+
1708
+ Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
1709
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1710
+
1711
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1712
+ return info.Env().Undefined();
1713
+ }
1714
+
1715
+ const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
1716
+
1717
+ return Napi::Number::New(info.Env(), typeSize);
1718
+ }
1719
+
1720
+ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
1721
+ Napi::Object consts = Napi::Object::New(info.Env());
1722
+ consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
1723
+ consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
1724
+ consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
1725
+ consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
1726
+ consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
1727
+ consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
1728
+ consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
1729
+
1730
+ return consts;
1731
+ }
1732
+
1733
+ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
1734
+ switch (level) {
1735
+ case GGML_LOG_LEVEL_ERROR: return 2;
1736
+ case GGML_LOG_LEVEL_WARN: return 3;
1737
+ case GGML_LOG_LEVEL_INFO: return 4;
1738
+ case GGML_LOG_LEVEL_DEBUG: return 5;
1739
+ }
1740
+
1741
+ return 1;
1742
+ }
1743
+
1744
+ void addonCallJsLogCallback(
1745
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
1746
+ ) {
1747
+ bool called = false;
1748
+
1749
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
1750
+ try {
1751
+ callback.Call({
1752
+ Napi::Number::New(env, data->logLevelNumber),
1753
+ Napi::String::New(env, data->stringStream->str()),
1754
+ });
1755
+ called = true;
1756
+ } catch (const Napi::Error& e) {
1757
+ called = false;
1758
+ }
1759
+ }
1760
+
1761
+ if (!called && data != nullptr) {
1762
+ if (data->logLevelNumber == 2) {
1763
+ fputs(data->stringStream->str().c_str(), stderr);
1764
+ fflush(stderr);
1765
+ } else {
1766
+ fputs(data->stringStream->str().c_str(), stdout);
1767
+ fflush(stdout);
1768
+ }
1769
+ }
1770
+
1771
+ if (data != nullptr) {
1772
+ delete data->stringStream;
1773
+ delete data;
1774
+ }
1775
+ }
1776
+
1777
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
1778
+ int logLevelNumber = addonGetGgmlLogLevelNumber(level);
1779
+
1780
+ if (logLevelNumber > addonLoggerLogLevel) {
1781
+ return;
1782
+ }
1783
+
1784
+ if (addonJsLoggerCallbackSet) {
1785
+ std::stringstream* stringStream = new std::stringstream();
1786
+ if (text != nullptr) {
1787
+ *stringStream << text;
1788
+ }
1789
+
1790
+ addon_logger_log* data = new addon_logger_log {
1791
+ logLevelNumber,
1792
+ stringStream,
1793
+ };
1794
+
1795
+ auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
1796
+
1797
+ if (status == napi_ok) {
1798
+ return;
1799
+ } else {
1800
+ delete stringStream;
1801
+ delete data;
1802
+ }
1803
+ }
1804
+
1805
+ if (text != nullptr) {
1806
+ if (level == 2) {
1807
+ fputs(text, stderr);
1808
+ fflush(stderr);
1809
+ } else {
1810
+ fputs(text, stdout);
1811
+ fflush(stdout);
1812
+ }
1813
+ }
1814
+ }
1815
+
1816
+ Napi::Value setLogger(const Napi::CallbackInfo& info) {
1817
+ if (info.Length() < 1 || !info[0].IsFunction()) {
1818
+ if (addonJsLoggerCallbackSet) {
1819
+ addonJsLoggerCallbackSet = false;
1820
+ addonThreadSafeLoggerCallback.Release();
1821
+ }
1822
+
1823
+ return info.Env().Undefined();
1824
+ }
1825
+
1826
+ auto addonLoggerJSCallback = info[0].As<Napi::Function>();
1827
+ AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
1828
+ addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
1829
+ info.Env(),
1830
+ addonLoggerJSCallback,
1831
+ "loggerCallback",
1832
+ 0,
1833
+ 1,
1834
+ context,
1835
+ [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
1836
+ addonJsLoggerCallbackSet = false;
1837
+
1838
+ delete ctx;
1839
+ }
1840
+ );
1841
+ addonJsLoggerCallbackSet = true;
1842
+
1843
+ // prevent blocking the main node process from exiting due to active resources
1844
+ addonThreadSafeLoggerCallback.Unref(info.Env());
1845
+
1846
+ return info.Env().Undefined();
1847
+ }
1848
+
1849
+ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1850
+ if (info.Length() < 1 || !info[0].IsNumber()) {
1851
+ addonLoggerLogLevel = 5;
1852
+
1853
+ return info.Env().Undefined();
1854
+ }
1855
+
1856
+ addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
1857
+
1858
+ return info.Env().Undefined();
1859
+ }
1860
+
1861
+ class AddonBackendLoadWorker : public Napi::AsyncWorker {
1862
+ public:
1863
+ AddonBackendLoadWorker(const Napi::Env& env)
1864
+ : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1865
+ deferred(Napi::Promise::Deferred::New(env)) {
1866
+ }
1867
+ ~AddonBackendLoadWorker() {
1868
+ }
1869
+
1870
+ Napi::Promise GetPromise() {
1871
+ return deferred.Promise();
1872
+ }
1873
+
1874
+ protected:
1875
+ Napi::Promise::Deferred deferred;
1876
+
1877
+ void Execute() {
1878
+ try {
1879
+ llama_backend_init();
1880
+
1881
+ try {
1882
+ if (backendDisposed) {
1883
+ llama_backend_free();
1884
+ } else {
1885
+ backendInitialized = true;
1886
+ }
1887
+ } catch (const std::exception& e) {
1888
+ SetError(e.what());
1889
+ } catch(...) {
1890
+ SetError("Unknown error when calling \"llama_backend_free\"");
1891
+ }
1892
+ } catch (const std::exception& e) {
1893
+ SetError(e.what());
1894
+ } catch(...) {
1895
+ SetError("Unknown error when calling \"llama_backend_init\"");
1896
+ }
1897
+ }
1898
+ void OnOK() {
1899
+ deferred.Resolve(Env().Undefined());
1900
+ }
1901
+ void OnError(const Napi::Error& err) {
1902
+ deferred.Reject(err.Value());
1903
+ }
1904
+ };
1905
+
1906
+
1907
+ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1908
+ public:
1909
+ AddonBackendUnloadWorker(const Napi::Env& env)
1910
+ : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1911
+ deferred(Napi::Promise::Deferred::New(env)) {
1912
+ }
1913
+ ~AddonBackendUnloadWorker() {
1914
+ }
1915
+
1916
+ Napi::Promise GetPromise() {
1917
+ return deferred.Promise();
1918
+ }
1919
+
1920
+ protected:
1921
+ Napi::Promise::Deferred deferred;
1922
+
1923
+ void Execute() {
1924
+ try {
1925
+ if (backendInitialized) {
1926
+ backendInitialized = false;
1927
+ llama_backend_free();
1928
+ }
1929
+ } catch (const std::exception& e) {
1930
+ SetError(e.what());
1931
+ } catch(...) {
1932
+ SetError("Unknown error when calling \"llama_backend_free\"");
1933
+ }
1934
+ }
1935
+ void OnOK() {
1936
+ deferred.Resolve(Env().Undefined());
1937
+ }
1938
+ void OnError(const Napi::Error& err) {
1939
+ deferred.Reject(err.Value());
1940
+ }
1941
+ };
1942
+
1943
+ Napi::Value addonInit(const Napi::CallbackInfo& info) {
1944
+ if (backendInitialized) {
1945
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1946
+ deferred.Resolve(info.Env().Undefined());
1947
+ return deferred.Promise();
1948
+ }
1949
+
1950
+ AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1951
+ worker->Queue();
1952
+ return worker->GetPromise();
1953
+ }
1954
+
1955
+ Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1956
+ if (backendDisposed) {
1957
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1958
+ deferred.Resolve(info.Env().Undefined());
1959
+ return deferred.Promise();
1960
+ }
1961
+
1962
+ backendDisposed = true;
1963
+
1964
+ AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1965
+ worker->Queue();
1966
+ return worker->GetPromise();
1967
+ }
1968
+
1969
+ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1970
+ if (backendDisposed) {
1971
+ return;
1972
+ }
1973
+
1974
+ backendDisposed = true;
1975
+ if (backendInitialized) {
1976
+ backendInitialized = false;
1977
+ llama_backend_free();
1978
+ }
1979
+ }
1980
+
795
1981
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
796
- llama_backend_init(false);
797
1982
  exports.DefineProperties({
798
1983
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1984
+ Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
1985
+ Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
1986
+ Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
1987
+ Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
1988
+ Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
1989
+ Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
1990
+ Napi::PropertyDescriptor::Function("setLogger", setLogger),
1991
+ Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1992
+ Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1993
+ Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
1994
+ Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1995
+ Napi::PropertyDescriptor::Function("init", addonInit),
1996
+ Napi::PropertyDescriptor::Function("dispose", addonDispose),
799
1997
  });
800
1998
  AddonModel::init(exports);
801
1999
  AddonGrammar::init(exports);
802
2000
  AddonGrammarEvaluationState::init(exports);
803
2001
  AddonContext::init(exports);
2002
+
2003
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
2004
+
2005
+ exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
2006
+
804
2007
  return exports;
805
2008
  }
806
2009