node-llama-cpp 3.0.0-beta.2 → 3.0.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (641) hide show
  1. package/README.md +14 -11
  2. package/dist/ChatWrapper.d.ts +2 -15
  3. package/dist/ChatWrapper.js +28 -33
  4. package/dist/ChatWrapper.js.map +1 -1
  5. package/dist/apiDocsOverrides.d.ts +1 -0
  6. package/dist/apiDocsOverrides.js +5 -0
  7. package/dist/apiDocsOverrides.js.map +1 -0
  8. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -7
  9. package/dist/bindings/AddonTypes.js +2 -0
  10. package/dist/bindings/AddonTypes.js.map +1 -0
  11. package/dist/bindings/Llama.d.ts +47 -0
  12. package/dist/bindings/Llama.js +343 -0
  13. package/dist/bindings/Llama.js.map +1 -0
  14. package/dist/bindings/consts.d.ts +2 -0
  15. package/dist/bindings/consts.js +11 -0
  16. package/dist/bindings/consts.js.map +1 -0
  17. package/dist/bindings/getLlama.d.ts +145 -0
  18. package/dist/bindings/getLlama.js +389 -0
  19. package/dist/bindings/getLlama.js.map +1 -0
  20. package/dist/bindings/types.d.ts +55 -0
  21. package/dist/bindings/types.js +77 -0
  22. package/dist/bindings/types.js.map +1 -0
  23. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  24. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  25. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  26. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  27. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  28. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  29. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  30. package/dist/bindings/utils/asyncEvery.js +15 -0
  31. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  32. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  33. package/dist/bindings/utils/asyncSome.js +27 -0
  34. package/dist/bindings/utils/asyncSome.js.map +1 -0
  35. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  36. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  37. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  38. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  39. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  40. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  41. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  42. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  43. package/dist/bindings/utils/compileLLamaCpp.d.ts +15 -0
  44. package/dist/bindings/utils/compileLLamaCpp.js +221 -0
  45. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  46. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  47. package/dist/bindings/utils/detectAvailableComputeLayers.js +304 -0
  48. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  49. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  50. package/dist/bindings/utils/detectGlibc.js +46 -0
  51. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  52. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  53. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  55. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  56. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  58. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  59. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  60. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  61. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  62. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  63. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  64. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
  65. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  66. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  67. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  68. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  69. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  70. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  71. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  72. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  73. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  74. package/dist/bindings/utils/getPlatform.js +15 -0
  75. package/dist/bindings/utils/getPlatform.js.map +1 -0
  76. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  77. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  78. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  79. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  80. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  81. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  82. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  83. package/dist/bindings/utils/hasFileInPath.js +34 -0
  84. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  85. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  86. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  87. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  88. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  89. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  90. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  91. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  92. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  93. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  94. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  95. package/dist/bindings/utils/resolveCustomCmakeOptions.js +45 -0
  96. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  97. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  98. package/dist/bindings/utils/testBindingBinary.js +98 -0
  99. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  100. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  101. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  102. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  103. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  104. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  105. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +5 -0
  107. package/dist/chatWrappers/ChatMLChatWrapper.js +13 -11
  108. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  109. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  110. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  111. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/FunctionaryChatWrapper.js +86 -73
  113. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  114. package/dist/chatWrappers/{LlamaChatWrapper.d.ts → GemmaChatWrapper.d.ts} +6 -1
  115. package/dist/chatWrappers/GemmaChatWrapper.js +88 -0
  116. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  117. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  118. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  119. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  120. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +20 -0
  121. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +29 -11
  122. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  123. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
  124. package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
  125. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  127. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +359 -0
  128. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +64 -0
  130. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  131. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  133. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  134. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  136. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  137. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  138. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  141. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  142. package/dist/chatWrappers/utils/resolveChatWrapper.js +214 -0
  143. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  144. package/dist/cli/cli.js +21 -7
  145. package/dist/cli/cli.js.map +1 -1
  146. package/dist/cli/commands/BuildCommand.d.ts +6 -4
  147. package/dist/cli/commands/BuildCommand.js +103 -41
  148. package/dist/cli/commands/BuildCommand.js.map +1 -1
  149. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  150. package/dist/cli/commands/ChatCommand.js +298 -142
  151. package/dist/cli/commands/ChatCommand.js.map +1 -1
  152. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  153. package/dist/cli/commands/ClearCommand.js +11 -12
  154. package/dist/cli/commands/ClearCommand.js.map +1 -1
  155. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  156. package/dist/cli/commands/CompleteCommand.js +365 -0
  157. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  158. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  159. package/dist/cli/commands/DebugCommand.js +54 -0
  160. package/dist/cli/commands/DebugCommand.js.map +1 -0
  161. package/dist/cli/commands/DownloadCommand.d.ts +6 -4
  162. package/dist/cli/commands/DownloadCommand.js +120 -69
  163. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  164. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  165. package/dist/cli/commands/InfillCommand.js +401 -0
  166. package/dist/cli/commands/InfillCommand.js.map +1 -0
  167. package/dist/cli/commands/InitCommand.d.ts +11 -0
  168. package/dist/cli/commands/InitCommand.js +195 -0
  169. package/dist/cli/commands/InitCommand.js.map +1 -0
  170. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  171. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  172. package/dist/cli/commands/PullCommand.d.ts +12 -0
  173. package/dist/cli/commands/PullCommand.js +117 -0
  174. package/dist/cli/commands/PullCommand.js.map +1 -0
  175. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  176. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  177. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  179. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  180. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  182. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +138 -0
  183. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  185. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  186. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  187. package/dist/cli/projectTemplates.d.ts +7 -0
  188. package/dist/cli/projectTemplates.js +10 -0
  189. package/dist/cli/projectTemplates.js.map +1 -0
  190. package/dist/cli/recommendedModels.d.ts +2 -0
  191. package/dist/cli/recommendedModels.js +342 -0
  192. package/dist/cli/recommendedModels.js.map +1 -0
  193. package/dist/cli/startCreateCli.d.ts +2 -0
  194. package/dist/cli/startCreateCli.js +26 -0
  195. package/dist/cli/startCreateCli.js.map +1 -0
  196. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  197. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  198. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  199. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  200. package/dist/cli/utils/ConsoleTable.js +86 -0
  201. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  202. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  203. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  204. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  205. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  206. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  207. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  208. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  209. package/dist/cli/utils/getReadablePath.js +14 -0
  210. package/dist/cli/utils/getReadablePath.js.map +1 -0
  211. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  212. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  213. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  214. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  215. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  216. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  217. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  218. package/dist/cli/utils/printCommonInfoLines.js +71 -0
  219. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  220. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  221. package/dist/cli/utils/printInfoLine.js +54 -0
  222. package/dist/cli/utils/printInfoLine.js.map +1 -0
  223. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  224. package/dist/cli/utils/projectTemplates.js +47 -0
  225. package/dist/cli/utils/projectTemplates.js.map +1 -0
  226. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  227. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  228. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  229. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  230. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  231. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  232. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  233. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  234. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  235. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  236. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  237. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  238. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  239. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  240. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  241. package/dist/commands.d.ts +1 -0
  242. package/dist/commands.js +3 -0
  243. package/dist/commands.js.map +1 -1
  244. package/dist/config.d.ts +38 -5
  245. package/dist/config.js +61 -16
  246. package/dist/config.js.map +1 -1
  247. package/dist/consts.d.ts +3 -0
  248. package/dist/consts.js +10 -0
  249. package/dist/consts.js.map +1 -0
  250. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +37 -35
  251. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.js +298 -221
  252. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  253. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.d.ts +2 -1
  254. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.js +5 -3
  255. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  256. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +18 -0
  257. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  258. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.d.ts +40 -3
  259. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.js +28 -7
  260. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  261. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  262. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  263. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  264. package/dist/evaluator/LlamaCompletion.d.ts +155 -0
  265. package/dist/evaluator/LlamaCompletion.js +405 -0
  266. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  267. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -20
  268. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +271 -81
  269. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  270. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  271. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  272. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  273. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  274. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  275. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  276. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  277. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  278. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  279. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  280. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +8 -5
  281. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +13 -10
  282. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  283. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  284. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  285. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  286. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  287. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaModel.d.ts +230 -0
  289. package/dist/evaluator/LlamaModel.js +597 -0
  290. package/dist/evaluator/LlamaModel.js.map +1 -0
  291. package/dist/evaluator/TokenBias.d.ts +22 -0
  292. package/dist/evaluator/TokenBias.js +33 -0
  293. package/dist/evaluator/TokenBias.js.map +1 -0
  294. package/dist/evaluator/TokenMeter.d.ts +54 -0
  295. package/dist/evaluator/TokenMeter.js +86 -0
  296. package/dist/evaluator/TokenMeter.js.map +1 -0
  297. package/dist/gguf/consts.d.ts +3 -0
  298. package/dist/gguf/consts.js +8 -0
  299. package/dist/gguf/consts.js.map +1 -0
  300. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  301. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  302. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  303. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  304. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  305. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  306. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  307. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  308. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  309. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  310. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  311. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  312. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  313. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  314. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  315. package/dist/gguf/insights/GgufInsights.d.ts +42 -0
  316. package/dist/gguf/insights/GgufInsights.js +361 -0
  317. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  318. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  319. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
  320. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  321. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  322. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  323. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  324. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  325. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  326. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  327. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  328. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  329. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  330. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  331. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  332. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  333. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  334. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  335. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  336. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  337. package/dist/gguf/parser/parseGguf.js +63 -0
  338. package/dist/gguf/parser/parseGguf.js.map +1 -0
  339. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  340. package/dist/gguf/readGgufFileInfo.js +66 -0
  341. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  342. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  343. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  344. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  345. package/dist/gguf/types/GgufMetadataTypes.d.ts +334 -0
  346. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  347. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  348. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  349. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  350. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  351. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  352. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  353. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  354. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  355. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  356. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  357. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  358. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  359. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  360. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  361. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  362. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  363. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  364. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  365. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  366. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  367. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  368. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  369. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  370. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  371. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  372. package/dist/index.d.ts +37 -17
  373. package/dist/index.js +33 -14
  374. package/dist/index.js.map +1 -1
  375. package/dist/state.d.ts +4 -0
  376. package/dist/state.js +14 -0
  377. package/dist/state.js.map +1 -1
  378. package/dist/types.d.ts +53 -2
  379. package/dist/types.js.map +1 -1
  380. package/dist/utils/DisposeGuard.d.ts +13 -0
  381. package/dist/utils/DisposeGuard.js +120 -0
  382. package/dist/utils/DisposeGuard.js.map +1 -0
  383. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  384. package/dist/utils/InsufficientMemoryError.js +6 -0
  385. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  386. package/dist/utils/LlamaText.d.ts +50 -25
  387. package/dist/utils/LlamaText.js +367 -155
  388. package/dist/utils/LlamaText.js.map +1 -1
  389. package/dist/utils/StopGenerationDetector.d.ts +1 -1
  390. package/dist/utils/StopGenerationDetector.js +23 -18
  391. package/dist/utils/StopGenerationDetector.js.map +1 -1
  392. package/dist/utils/TokenStreamRegulator.d.ts +8 -4
  393. package/dist/utils/TokenStreamRegulator.js +78 -8
  394. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  395. package/dist/utils/UnsupportedError.d.ts +2 -0
  396. package/dist/utils/UnsupportedError.js +7 -0
  397. package/dist/utils/UnsupportedError.js.map +1 -0
  398. package/dist/utils/cmake.js +38 -20
  399. package/dist/utils/cmake.js.map +1 -1
  400. package/dist/utils/createModelDownloader.d.ts +102 -0
  401. package/dist/utils/createModelDownloader.js +226 -0
  402. package/dist/utils/createModelDownloader.js.map +1 -0
  403. package/dist/utils/findBestOption.d.ts +4 -0
  404. package/dist/utils/findBestOption.js +15 -0
  405. package/dist/utils/findBestOption.js.map +1 -0
  406. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
  407. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  408. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  409. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  410. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  411. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  412. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  413. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  414. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  415. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  416. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  417. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  418. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  419. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  420. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  421. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  422. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  423. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  424. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  425. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  426. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  427. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  428. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  429. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  430. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  431. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  432. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  433. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  434. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  435. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  436. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  437. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  438. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  439. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  440. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  441. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  442. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  443. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  444. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  445. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  446. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  447. package/dist/utils/getBuildDefaults.d.ts +1 -2
  448. package/dist/utils/getBuildDefaults.js +2 -3
  449. package/dist/utils/getBuildDefaults.js.map +1 -1
  450. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  451. package/dist/utils/getConsoleLogPrefix.js +10 -0
  452. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  453. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  454. package/dist/utils/getGrammarsFolder.js +8 -7
  455. package/dist/utils/getGrammarsFolder.js.map +1 -1
  456. package/dist/utils/getModuleVersion.d.ts +1 -0
  457. package/dist/utils/getModuleVersion.js +13 -0
  458. package/dist/utils/getModuleVersion.js.map +1 -0
  459. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  460. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  461. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  462. package/dist/utils/getReadableContextSize.d.ts +1 -0
  463. package/dist/utils/getReadableContextSize.js +7 -0
  464. package/dist/utils/getReadableContextSize.js.map +1 -0
  465. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  466. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  467. package/dist/utils/gitReleaseBundles.js +73 -5
  468. package/dist/utils/gitReleaseBundles.js.map +1 -1
  469. package/dist/utils/hashString.d.ts +1 -0
  470. package/dist/utils/hashString.js +8 -0
  471. package/dist/utils/hashString.js.map +1 -0
  472. package/dist/utils/isLockfileActive.d.ts +4 -0
  473. package/dist/utils/isLockfileActive.js +12 -0
  474. package/dist/utils/isLockfileActive.js.map +1 -0
  475. package/dist/utils/isToken.d.ts +2 -0
  476. package/dist/utils/isToken.js +4 -0
  477. package/dist/utils/isToken.js.map +1 -0
  478. package/dist/utils/isUrl.d.ts +1 -0
  479. package/dist/utils/isUrl.js +15 -0
  480. package/dist/utils/isUrl.js.map +1 -0
  481. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  482. package/dist/utils/mergeUnionTypes.js +2 -0
  483. package/dist/utils/mergeUnionTypes.js.map +1 -0
  484. package/dist/utils/parseModelFileName.d.ts +1 -0
  485. package/dist/utils/parseModelFileName.js +6 -1
  486. package/dist/utils/parseModelFileName.js.map +1 -1
  487. package/dist/utils/parseTextTemplate.d.ts +66 -0
  488. package/dist/utils/parseTextTemplate.js +116 -0
  489. package/dist/utils/parseTextTemplate.js.map +1 -0
  490. package/dist/utils/prettyPrintObject.d.ts +10 -0
  491. package/dist/utils/prettyPrintObject.js +84 -0
  492. package/dist/utils/prettyPrintObject.js.map +1 -0
  493. package/dist/utils/removeNullFields.d.ts +2 -1
  494. package/dist/utils/removeNullFields.js +8 -0
  495. package/dist/utils/removeNullFields.js.map +1 -1
  496. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  497. package/dist/utils/resolveGithubRelease.js +36 -0
  498. package/dist/utils/resolveGithubRelease.js.map +1 -0
  499. package/dist/utils/runtime.d.ts +4 -0
  500. package/dist/utils/runtime.js +8 -0
  501. package/dist/utils/runtime.js.map +1 -0
  502. package/dist/utils/spawnCommand.d.ts +11 -1
  503. package/dist/utils/spawnCommand.js +56 -6
  504. package/dist/utils/spawnCommand.js.map +1 -1
  505. package/dist/utils/tokenizeInput.d.ts +3 -0
  506. package/dist/utils/tokenizeInput.js +12 -0
  507. package/dist/utils/tokenizeInput.js.map +1 -0
  508. package/dist/utils/utilTypes.d.ts +3 -0
  509. package/dist/utils/utilTypes.js +2 -0
  510. package/dist/utils/utilTypes.js.map +1 -0
  511. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  512. package/dist/utils/waitForLockfileRelease.js +20 -0
  513. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  514. package/dist/utils/withLockfile.d.ts +7 -0
  515. package/dist/utils/withLockfile.js +44 -0
  516. package/dist/utils/withLockfile.js.map +1 -0
  517. package/dist/utils/withOra.d.ts +2 -0
  518. package/dist/utils/withOra.js +22 -6
  519. package/dist/utils/withOra.js.map +1 -1
  520. package/dist/utils/withProgressLog.d.ts +23 -0
  521. package/dist/utils/withProgressLog.js +211 -0
  522. package/dist/utils/withProgressLog.js.map +1 -0
  523. package/dist/utils/withStatusLogs.d.ts +2 -1
  524. package/dist/utils/withStatusLogs.js +12 -9
  525. package/dist/utils/withStatusLogs.js.map +1 -1
  526. package/llama/.clang-format +1 -2
  527. package/llama/CMakeLists.txt +115 -4
  528. package/llama/addon.cpp +1318 -99
  529. package/llama/binariesGithubRelease.json +1 -1
  530. package/llama/gitRelease.bundle +0 -0
  531. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  532. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  533. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  534. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  535. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  536. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  537. package/llama/grammars/README.md +11 -1
  538. package/llama/grammars/json.gbnf +1 -1
  539. package/llama/grammars/json_arr.gbnf +1 -1
  540. package/llama/llama.cpp.info.json +4 -0
  541. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  542. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
  543. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  544. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  545. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  546. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
  547. package/llamaBins/linux-x64/llama-addon.node +0 -0
  548. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  549. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  550. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  551. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  552. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  553. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  554. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  555. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
  556. package/llamaBins/mac-x64/llama-addon.node +0 -0
  557. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  558. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  559. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  560. package/llamaBins/win-arm64/llama-addon.node +0 -0
  561. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
  562. package/llamaBins/win-x64/llama-addon.exp +0 -0
  563. package/llamaBins/win-x64/llama-addon.lib +0 -0
  564. package/llamaBins/win-x64/llama-addon.node +0 -0
  565. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  566. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  567. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  568. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  569. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  570. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  571. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  572. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  573. package/package.json +61 -34
  574. package/templates/packed/electron-typescript-react.json +1 -0
  575. package/templates/packed/node-typescript.json +1 -0
  576. package/templates/packed/node_modules.json +1 -0
  577. package/dist/AbortError.d.ts +0 -2
  578. package/dist/AbortError.js +0 -7
  579. package/dist/AbortError.js.map +0 -1
  580. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  581. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  582. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -55
  583. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  584. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  585. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  586. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  587. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  588. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  589. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  590. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  591. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  592. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  593. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -86
  594. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  595. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  596. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  597. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  598. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  599. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  600. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  601. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  602. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  603. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  604. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  605. package/dist/utils/binariesGithubRelease.js.map +0 -1
  606. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  607. package/dist/utils/clearLlamaBuild.js +0 -12
  608. package/dist/utils/clearLlamaBuild.js.map +0 -1
  609. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  610. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  611. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  612. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  613. package/dist/utils/compileLLamaCpp.js +0 -132
  614. package/dist/utils/compileLLamaCpp.js.map +0 -1
  615. package/dist/utils/getBin.js +0 -78
  616. package/dist/utils/getBin.js.map +0 -1
  617. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  618. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  619. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  620. package/dist/utils/getReleaseInfo.d.ts +0 -7
  621. package/dist/utils/getReleaseInfo.js +0 -30
  622. package/dist/utils/getReleaseInfo.js.map +0 -1
  623. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  624. package/dist/utils/parseModelTypeDescription.js +0 -9
  625. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  626. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  627. package/dist/utils/resolveChatWrapper.js +0 -16
  628. package/dist/utils/resolveChatWrapper.js.map +0 -1
  629. package/dist/utils/usedBinFlag.d.ts +0 -6
  630. package/dist/utils/usedBinFlag.js +0 -15
  631. package/dist/utils/usedBinFlag.js.map +0 -1
  632. package/llama/usedBin.json +0 -3
  633. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  634. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  635. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  636. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  637. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  638. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  639. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  640. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  641. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp CHANGED
@@ -3,42 +3,284 @@
3
3
  #include <algorithm>
4
4
  #include <sstream>
5
5
  #include <vector>
6
+ #include <unordered_map>
6
7
 
7
8
  #include "common.h"
8
9
  #include "common/grammar-parser.h"
9
10
  #include "llama.h"
10
11
  #include "napi.h"
11
12
 
12
- std::string addon_model_token_to_piece(const struct llama_model * model, llama_token token) {
13
+ #ifdef GPU_INFO_USE_CUDA
14
+ # include "gpuInfo/cuda-gpu-info.h"
15
+ #endif
16
+ #ifdef GPU_INFO_USE_VULKAN
17
+ # include "gpuInfo/vulkan-gpu-info.h"
18
+ #endif
19
+ #ifdef GPU_INFO_USE_METAL
20
+ # include "gpuInfo/metal-gpu-info.h"
21
+ #endif
22
+
23
+
24
+ struct addon_logger_log {
25
+ public:
26
+ const int logLevelNumber;
27
+ const std::stringstream* stringStream;
28
+ };
29
+
30
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
31
+
32
+ using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
33
+ void addonCallJsLogCallback(
34
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
35
+ );
36
+ using AddonThreadSafeLogCallbackFunction =
37
+ Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
38
+
39
+
40
+ struct addon_progress_event {
41
+ public:
42
+ const float progress;
43
+ };
44
+
45
+ using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
46
+ void addonCallJsProgressCallback(
47
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
48
+ );
49
+ using AddonThreadSafeProgressEventCallbackFunction =
50
+ Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
51
+
52
+
53
+ AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
54
+ bool addonJsLoggerCallbackSet = false;
55
+ int addonLoggerLogLevel = 5;
56
+ bool backendInitialized = false;
57
+ bool backendDisposed = false;
58
+
59
+ void addonCallJsProgressCallback(
60
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
61
+ ) {
62
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
63
+ try {
64
+ callback.Call({Napi::Number::New(env, data->progress)});
65
+ } catch (const Napi::Error& e) {}
66
+ }
67
+
68
+ if (data != nullptr) {
69
+ delete data;
70
+ }
71
+ }
72
+
73
+ static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
74
+ uint64_t totalSize = 0;
75
+
76
+ if (embd) {
77
+ totalSize += sizeof(float) * n_tokens_alloc * embd;
78
+ } else {
79
+ totalSize += sizeof(llama_token) * n_tokens_alloc;
80
+ }
81
+
82
+ totalSize += sizeof(llama_pos) * n_tokens_alloc;
83
+ totalSize += sizeof(int32_t) * n_tokens_alloc;
84
+ totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
85
+
86
+ totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
87
+
88
+ totalSize += sizeof(int8_t) * n_tokens_alloc;
89
+
90
+ return totalSize;
91
+ }
92
+
93
+ static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
94
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
95
+ while (size > 0) {
96
+ int64_t adjustSize = std::min(size, chunkSize);
97
+ Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
98
+ size -= adjustSize;
99
+ }
100
+ }
101
+
102
+ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
103
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
104
+ while (size > 0) {
105
+ int64_t adjustSize = std::min(size, chunkSize);
106
+ Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
107
+ size -= adjustSize;
108
+ }
109
+ }
110
+
111
+ std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
13
112
  std::vector<char> result(8, 0);
14
- const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
113
+ const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
15
114
  if (n_tokens < 0) {
16
115
  result.resize(-n_tokens);
17
- int check = llama_token_to_piece(model, token, result.data(), result.size());
116
+ int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
18
117
  GGML_ASSERT(check == -n_tokens);
19
- }
20
- else {
118
+ } else {
21
119
  result.resize(n_tokens);
22
120
  }
23
121
 
24
122
  return std::string(result.data(), result.size());
25
123
  }
26
124
 
125
+ #ifdef GPU_INFO_USE_CUDA
126
+ void logCudaError(const char* message) {
127
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
128
+ }
129
+ #endif
130
+ #ifdef GPU_INFO_USE_VULKAN
131
+ void logVulkanWarning(const char* message) {
132
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
133
+ }
134
+ #endif
135
+
136
+ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
137
+ uint64_t total = 0;
138
+ uint64_t used = 0;
139
+
140
+ #ifdef GPU_INFO_USE_CUDA
141
+ size_t cudaDeviceTotal = 0;
142
+ size_t cudaDeviceUsed = 0;
143
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
144
+
145
+ if (cudeGetInfoSuccess) {
146
+ total += cudaDeviceTotal;
147
+ used += cudaDeviceUsed;
148
+ }
149
+ #endif
150
+
151
+ #ifdef GPU_INFO_USE_VULKAN
152
+ uint64_t vulkanDeviceTotal = 0;
153
+ uint64_t vulkanDeviceUsed = 0;
154
+ const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
155
+
156
+ if (vulkanDeviceSupportsMemoryBudgetExtension) {
157
+ total += vulkanDeviceTotal;
158
+ used += vulkanDeviceUsed;
159
+ }
160
+ #endif
161
+
162
+ #ifdef GPU_INFO_USE_METAL
163
+ uint64_t metalDeviceTotal = 0;
164
+ uint64_t metalDeviceUsed = 0;
165
+ getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
166
+
167
+ total += metalDeviceTotal;
168
+ used += metalDeviceUsed;
169
+ #endif
170
+
171
+ Napi::Object result = Napi::Object::New(info.Env());
172
+ result.Set("total", Napi::Number::From(info.Env(), total));
173
+ result.Set("used", Napi::Number::From(info.Env(), used));
174
+
175
+ return result;
176
+ }
177
+
178
+ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
179
+ std::vector<std::string> deviceNames;
180
+
181
+ #ifdef GPU_INFO_USE_CUDA
182
+ gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
183
+ #endif
184
+
185
+ #ifdef GPU_INFO_USE_VULKAN
186
+ gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
187
+ #endif
188
+
189
+ #ifdef GPU_INFO_USE_METAL
190
+ getMetalGpuDeviceNames(&deviceNames);
191
+ #endif
192
+
193
+ Napi::Object result = Napi::Object::New(info.Env());
194
+
195
+ Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
196
+ for (size_t i = 0; i < deviceNames.size(); ++i) {
197
+ deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
198
+ }
199
+ result.Set("deviceNames", deviceNamesNapiArray);
200
+
201
+ return result;
202
+ }
203
+
204
+ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
205
+ #ifdef GPU_INFO_USE_CUDA
206
+ return Napi::String::New(info.Env(), "cuda");
207
+ #endif
208
+
209
+ #ifdef GPU_INFO_USE_VULKAN
210
+ return Napi::String::New(info.Env(), "vulkan");
211
+ #endif
212
+
213
+ #ifdef GPU_INFO_USE_METAL
214
+ return Napi::String::New(info.Env(), "metal");
215
+ #endif
216
+
217
+ return info.Env().Undefined();
218
+ }
219
+
220
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
221
+ if (token < 0) {
222
+ return Napi::Number::From(info.Env(), -1);
223
+ }
224
+
225
+ auto tokenType = llama_token_get_type(model, token);
226
+
227
+ if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
228
+ return Napi::Number::From(info.Env(), -1);
229
+ }
230
+
231
+ return Napi::Number::From(info.Env(), token);
232
+ }
233
+
234
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
235
+ if (token < 0) {
236
+ return Napi::Number::From(info.Env(), -1);
237
+ }
238
+
239
+ auto tokenType = llama_token_get_type(model, token);
240
+
241
+ if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
242
+ return Napi::Number::From(info.Env(), -1);
243
+ }
244
+
245
+ return Napi::Number::From(info.Env(), token);
246
+ }
247
+
248
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data);
249
+
27
250
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
28
251
  public:
29
252
  llama_model_params model_params;
30
253
  llama_model* model;
254
+ uint64_t loadedModelSize = 0;
255
+ Napi::Reference<Napi::Object> addonExportsRef;
256
+ bool hasAddonExportsRef = false;
257
+
258
+ std::string modelPath;
259
+ bool modelLoaded = false;
260
+ bool abortModelLoad = false;
261
+ bool model_load_stopped = false;
262
+ float rawModelLoadPercentage = 0;
263
+ unsigned modelLoadPercentage = 0;
264
+ AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
265
+ bool onLoadProgressEventCallbackSet = false;
266
+ bool hasLoadAbortSignal = false;
267
+
31
268
  bool disposed = false;
32
269
 
33
270
  AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
34
271
  model_params = llama_model_default_params();
35
272
 
36
273
  // Get the model path
37
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
274
+ modelPath = info[0].As<Napi::String>().Utf8Value();
38
275
 
39
276
  if (info.Length() > 1 && info[1].IsObject()) {
40
277
  Napi::Object options = info[1].As<Napi::Object>();
41
278
 
279
+ if (options.Has("addonExports")) {
280
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
281
+ hasAddonExportsRef = true;
282
+ }
283
+
42
284
  if (options.Has("gpuLayers")) {
43
285
  model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
44
286
  }
@@ -54,14 +296,41 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
54
296
  if (options.Has("useMlock")) {
55
297
  model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
56
298
  }
57
- }
58
299
 
59
- llama_backend_init(false);
60
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
300
+ if (options.Has("checkTensors")) {
301
+ model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
302
+ }
303
+
304
+ if (options.Has("onLoadProgress")) {
305
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
306
+ if (onLoadProgressJSCallback.IsFunction()) {
307
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
308
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
309
+ info.Env(),
310
+ onLoadProgressJSCallback,
311
+ "onLoadProgressCallback",
312
+ 0,
313
+ 1,
314
+ context,
315
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
316
+ addonModel->onLoadProgressEventCallbackSet = false;
317
+
318
+ delete ctx;
319
+ },
320
+ this
321
+ );
322
+ onLoadProgressEventCallbackSet = true;
323
+ }
324
+ }
61
325
 
62
- if (model == NULL) {
63
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
64
- return;
326
+ if (options.Has("hasLoadAbortSignal")) {
327
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
328
+ }
329
+
330
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
331
+ model_params.progress_callback_user_data = &(*this);
332
+ model_params.progress_callback = llamaModelParamsProgressCallback;
333
+ }
65
334
  }
66
335
  }
67
336
 
@@ -74,23 +343,32 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
74
343
  return;
75
344
  }
76
345
 
77
- llama_free_model(model);
78
346
  disposed = true;
79
- }
347
+ if (modelLoaded) {
348
+ modelLoaded = false;
349
+ llama_free_model(model);
80
350
 
81
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
82
- if (disposed) {
83
- return info.Env().Undefined();
351
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
352
+ loadedModelSize = 0;
84
353
  }
85
354
 
86
- dispose();
355
+ if (hasAddonExportsRef) {
356
+ addonExportsRef.Unref();
357
+ hasAddonExportsRef = false;
358
+ }
359
+ }
87
360
 
361
+ Napi::Value Init(const Napi::CallbackInfo& info);
362
+ Napi::Value LoadLora(const Napi::CallbackInfo& info);
363
+ Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
364
+ abortModelLoad = true;
88
365
  return info.Env().Undefined();
89
366
  }
367
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
90
368
 
91
369
  Napi::Value Tokenize(const Napi::CallbackInfo& info) {
92
370
  if (disposed) {
93
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
371
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
94
372
  return info.Env().Undefined();
95
373
  }
96
374
 
@@ -108,18 +386,21 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
108
386
  }
109
387
  Napi::Value Detokenize(const Napi::CallbackInfo& info) {
110
388
  if (disposed) {
111
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
389
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
112
390
  return info.Env().Undefined();
113
391
  }
114
392
 
115
393
  Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
394
+ bool decodeSpecialTokens = info.Length() > 0
395
+ ? info[1].As<Napi::Boolean>().Value()
396
+ : false;
116
397
 
117
398
  // Create a stringstream for accumulating the decoded string.
118
399
  std::stringstream ss;
119
400
 
120
401
  // Decode each token and accumulate the result.
121
402
  for (size_t i = 0; i < tokens.ElementLength(); i++) {
122
- const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i]);
403
+ const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
123
404
 
124
405
  if (piece.empty()) {
125
406
  continue;
@@ -133,16 +414,25 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
133
414
 
134
415
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
135
416
  if (disposed) {
136
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
417
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
137
418
  return info.Env().Undefined();
138
419
  }
139
420
 
140
421
  return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
141
422
  }
142
423
 
424
+ Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
425
+ if (disposed) {
426
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
427
+ return info.Env().Undefined();
428
+ }
429
+
430
+ return Napi::Number::From(info.Env(), llama_n_embd(model));
431
+ }
432
+
143
433
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
144
434
  if (disposed) {
145
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
435
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
146
436
  return info.Env().Undefined();
147
437
  }
148
438
 
@@ -151,7 +441,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
151
441
 
152
442
  Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
153
443
  if (disposed) {
154
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
444
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
155
445
  return info.Env().Undefined();
156
446
  }
157
447
 
@@ -160,7 +450,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
160
450
 
161
451
  Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
162
452
  if (disposed) {
163
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
453
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
164
454
  return info.Env().Undefined();
165
455
  }
166
456
 
@@ -172,63 +462,63 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
172
462
 
173
463
  Napi::Value TokenBos(const Napi::CallbackInfo& info) {
174
464
  if (disposed) {
175
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
465
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
176
466
  return info.Env().Undefined();
177
467
  }
178
468
 
179
- return Napi::Number::From(info.Env(), llama_token_bos(model));
469
+ return getNapiControlToken(info, model, llama_token_bos(model));
180
470
  }
181
471
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
182
472
  if (disposed) {
183
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
473
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
184
474
  return info.Env().Undefined();
185
475
  }
186
476
 
187
- return Napi::Number::From(info.Env(), llama_token_eos(model));
477
+ return getNapiControlToken(info, model, llama_token_eos(model));
188
478
  }
189
479
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
190
480
  if (disposed) {
191
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
481
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
192
482
  return info.Env().Undefined();
193
483
  }
194
484
 
195
- return Napi::Number::From(info.Env(), llama_token_nl(model));
485
+ return getNapiToken(info, model, llama_token_nl(model));
196
486
  }
197
487
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
198
488
  if (disposed) {
199
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
489
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
200
490
  return info.Env().Undefined();
201
491
  }
202
492
 
203
- return Napi::Number::From(info.Env(), llama_token_prefix(model));
493
+ return getNapiControlToken(info, model, llama_token_prefix(model));
204
494
  }
205
495
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
206
496
  if (disposed) {
207
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
497
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
208
498
  return info.Env().Undefined();
209
499
  }
210
500
 
211
- return Napi::Number::From(info.Env(), llama_token_middle(model));
501
+ return getNapiControlToken(info, model, llama_token_middle(model));
212
502
  }
213
503
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
214
504
  if (disposed) {
215
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
505
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
216
506
  return info.Env().Undefined();
217
507
  }
218
508
 
219
- return Napi::Number::From(info.Env(), llama_token_suffix(model));
509
+ return getNapiControlToken(info, model, llama_token_suffix(model));
220
510
  }
221
511
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
222
512
  if (disposed) {
223
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
513
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
224
514
  return info.Env().Undefined();
225
515
  }
226
516
 
227
- return Napi::Number::From(info.Env(), llama_token_eot(model));
517
+ return getNapiControlToken(info, model, llama_token_eot(model));
228
518
  }
229
519
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
230
520
  if (disposed) {
231
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
521
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
232
522
  return info.Env().Undefined();
233
523
  }
234
524
 
@@ -245,6 +535,57 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
245
535
  return Napi::String::New(info.Env(), ss.str());
246
536
  }
247
537
 
538
+ Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
539
+ if (disposed) {
540
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
541
+ return info.Env().Undefined();
542
+ }
543
+
544
+ if (info[0].IsNumber() == false) {
545
+ return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
546
+ }
547
+
548
+ int token = info[0].As<Napi::Number>().Int32Value();
549
+ auto tokenType = llama_token_get_type(model, token);
550
+
551
+ return Napi::Number::From(info.Env(), int32_t(tokenType));
552
+ }
553
+ Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
554
+ if (disposed) {
555
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
556
+ return info.Env().Undefined();
557
+ }
558
+
559
+ if (info[0].IsNumber() == false) {
560
+ return Napi::Boolean::New(info.Env(), false);
561
+ }
562
+
563
+ int token = info[0].As<Napi::Number>().Int32Value();
564
+
565
+ return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
566
+ }
567
+ Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
568
+ if (disposed) {
569
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
570
+ return info.Env().Undefined();
571
+ }
572
+
573
+ auto vocabularyType = llama_vocab_type(model);
574
+
575
+ return Napi::Number::From(info.Env(), int32_t(vocabularyType));
576
+ }
577
+ Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
578
+ const int addBos = llama_add_bos_token(model);
579
+
580
+ bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
581
+
582
+ return Napi::Boolean::New(info.Env(), shouldPrependBos);
583
+ }
584
+
585
+ Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
586
+ return Napi::Number::From(info.Env(), llama_model_size(model));
587
+ }
588
+
248
589
  static void init(Napi::Object exports) {
249
590
  exports.Set(
250
591
  "AddonModel",
@@ -252,9 +593,13 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
252
593
  exports.Env(),
253
594
  "AddonModel",
254
595
  {
596
+ InstanceMethod("init", &AddonModel::Init),
597
+ InstanceMethod("loadLora", &AddonModel::LoadLora),
598
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
255
599
  InstanceMethod("tokenize", &AddonModel::Tokenize),
256
600
  InstanceMethod("detokenize", &AddonModel::Detokenize),
257
601
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
602
+ InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
258
603
  InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
259
604
  InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
260
605
  InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
@@ -266,16 +611,260 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
266
611
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
267
612
  InstanceMethod("eotToken", &AddonModel::EotToken),
268
613
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
269
- InstanceMethod("dispose", &AddonModel::Dispose)
614
+ InstanceMethod("getTokenType", &AddonModel::GetTokenType),
615
+ InstanceMethod("isEogToken", &AddonModel::IsEogToken),
616
+ InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
617
+ InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
618
+ InstanceMethod("getModelSize", &AddonModel::GetModelSize),
619
+ InstanceMethod("dispose", &AddonModel::Dispose),
270
620
  }
271
621
  )
272
622
  );
273
623
  }
274
624
  };
275
625
 
626
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
627
+ AddonModel* addonModel = (AddonModel *) user_data;
628
+ unsigned percentage = (unsigned) (100 * progress);
629
+
630
+ if (percentage > addonModel->modelLoadPercentage) {
631
+ addonModel->modelLoadPercentage = percentage;
632
+
633
+ // original llama.cpp logs
634
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
635
+ if (percentage >= 100) {
636
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
637
+ }
638
+ }
639
+
640
+ if (progress > addonModel->rawModelLoadPercentage) {
641
+ addonModel->rawModelLoadPercentage = progress;
642
+
643
+ if (addonModel->onLoadProgressEventCallbackSet) {
644
+ addon_progress_event* data = new addon_progress_event {
645
+ progress
646
+ };
647
+
648
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
649
+
650
+ if (status != napi_ok) {
651
+ delete data;
652
+ }
653
+ }
654
+ }
655
+
656
+ return !(addonModel->abortModelLoad);
657
+ }
658
+
659
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
660
+ public:
661
+ AddonModel* model;
662
+
663
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
664
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
665
+ model(model),
666
+ deferred(Napi::Promise::Deferred::New(env)) {
667
+ model->Ref();
668
+ }
669
+ ~AddonModelLoadModelWorker() {
670
+ model->Unref();
671
+ }
672
+
673
+ Napi::Promise GetPromise() {
674
+ return deferred.Promise();
675
+ }
676
+
677
+ protected:
678
+ Napi::Promise::Deferred deferred;
679
+
680
+ void Execute() {
681
+ try {
682
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
683
+
684
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
685
+ } catch (const std::exception& e) {
686
+ SetError(e.what());
687
+ } catch(...) {
688
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
689
+ }
690
+ }
691
+ void OnOK() {
692
+ if (model->modelLoaded) {
693
+ uint64_t modelSize = llama_model_size(model->model);
694
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
695
+ model->loadedModelSize = modelSize;
696
+ }
697
+
698
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
699
+ if (model->onLoadProgressEventCallbackSet) {
700
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
701
+ }
702
+ }
703
+ void OnError(const Napi::Error& err) {
704
+ deferred.Reject(err.Value());
705
+ }
706
+ };
707
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
708
+ public:
709
+ AddonModel* model;
710
+
711
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
712
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
713
+ model(model),
714
+ deferred(Napi::Promise::Deferred::New(env)) {
715
+ model->Ref();
716
+ }
717
+ ~AddonModelUnloadModelWorker() {
718
+ model->Unref();
719
+ }
720
+
721
+ Napi::Promise GetPromise() {
722
+ return deferred.Promise();
723
+ }
724
+
725
+ protected:
726
+ Napi::Promise::Deferred deferred;
727
+
728
+ void Execute() {
729
+ try {
730
+ llama_free_model(model->model);
731
+ model->modelLoaded = false;
732
+
733
+ model->dispose();
734
+ } catch (const std::exception& e) {
735
+ SetError(e.what());
736
+ } catch(...) {
737
+ SetError("Unknown error when calling \"llama_free_model\"");
738
+ }
739
+ }
740
+ void OnOK() {
741
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
742
+ model->loadedModelSize = 0;
743
+
744
+ deferred.Resolve(Env().Undefined());
745
+ }
746
+ void OnError(const Napi::Error& err) {
747
+ deferred.Reject(err.Value());
748
+ }
749
+ };
750
+ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
751
+ public:
752
+ AddonModel* model;
753
+ std::string loraFilePath;
754
+ float loraScale;
755
+ int32_t loraThreads;
756
+ std::string baseModelPath;
757
+
758
+ AddonModelLoadLoraWorker(
759
+ const Napi::Env& env,
760
+ AddonModel* model,
761
+ std::string loraFilePath,
762
+ float loraScale,
763
+ int32_t loraThreads,
764
+ std::string baseModelPath
765
+ )
766
+ : Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
767
+ model(model),
768
+ loraFilePath(loraFilePath),
769
+ loraScale(loraScale),
770
+ loraThreads(loraThreads),
771
+ baseModelPath(baseModelPath),
772
+ deferred(Napi::Promise::Deferred::New(env)) {
773
+ model->Ref();
774
+ }
775
+ ~AddonModelLoadLoraWorker() {
776
+ model->Unref();
777
+ }
778
+
779
+ Napi::Promise GetPromise() {
780
+ return deferred.Promise();
781
+ }
782
+
783
+ protected:
784
+ Napi::Promise::Deferred deferred;
785
+
786
+ void Execute() {
787
+ try {
788
+ const auto res = llama_model_apply_lora_from_file(
789
+ model->model,
790
+ loraFilePath.c_str(),
791
+ loraScale,
792
+ baseModelPath.empty() ? NULL : baseModelPath.c_str(),
793
+ loraThreads
794
+ );
795
+
796
+ if (res != 0) {
797
+ SetError(
798
+ std::string(
799
+ std::string("Failed to apply LoRA \"") + loraFilePath + std::string("\"") + (
800
+ baseModelPath.empty()
801
+ ? std::string("")
802
+ : (std::string(" with base model \"") + baseModelPath + std::string("\""))
803
+ )
804
+ )
805
+ );
806
+ }
807
+ } catch (const std::exception& e) {
808
+ SetError(e.what());
809
+ } catch(...) {
810
+ SetError("Unknown error when calling \"llama_model_apply_lora_from_file\"");
811
+ }
812
+ }
813
+ void OnOK() {
814
+ deferred.Resolve(Env().Undefined());
815
+ }
816
+ void OnError(const Napi::Error& err) {
817
+ deferred.Reject(err.Value());
818
+ }
819
+ };
820
+
821
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
822
+ if (disposed) {
823
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
824
+ return info.Env().Undefined();
825
+ }
826
+
827
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
828
+ worker->Queue();
829
+ return worker->GetPromise();
830
+ }
831
+ Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
832
+ std::string loraFilePath = info[0].As<Napi::String>().Utf8Value();
833
+ float scale = info[1].As<Napi::Number>().FloatValue();
834
+ int32_t threads = info[2].As<Napi::Number>().Int32Value();
835
+ std::string baseModelPath = (info.Length() > 3 && info[3].IsString()) ? info[3].As<Napi::String>().Utf8Value() : std::string("");
836
+
837
+ int32_t resolvedThreads = threads == 0 ? std::thread::hardware_concurrency() : threads;
838
+
839
+ AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), this, loraFilePath, scale, threads, baseModelPath);
840
+ worker->Queue();
841
+ return worker->GetPromise();
842
+ }
843
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
844
+ if (disposed) {
845
+ return info.Env().Undefined();
846
+ }
847
+
848
+ if (modelLoaded) {
849
+ modelLoaded = false;
850
+
851
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
852
+ worker->Queue();
853
+ return worker->GetPromise();
854
+ } else {
855
+ dispose();
856
+
857
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
858
+ deferred.Resolve(info.Env().Undefined());
859
+ return deferred.Promise();
860
+ }
861
+ }
862
+
276
863
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
277
864
  public:
278
865
  grammar_parser::parse_state parsed_grammar;
866
+ Napi::Reference<Napi::Object> addonExportsRef;
867
+ bool hasAddonExportsRef = false;
279
868
 
280
869
  AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
281
870
  // Get the model path
@@ -285,6 +874,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
285
874
  if (info.Length() > 1 && info[1].IsObject()) {
286
875
  Napi::Object options = info[1].As<Napi::Object>();
287
876
 
877
+ if (options.Has("addonExports")) {
878
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
879
+ hasAddonExportsRef = true;
880
+ }
881
+
288
882
  if (options.Has("printGrammar")) {
289
883
  should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
290
884
  }
@@ -302,6 +896,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
302
896
  }
303
897
  }
304
898
 
899
+ ~AddonGrammar() {
900
+ if (hasAddonExportsRef) {
901
+ addonExportsRef.Unref();
902
+ hasAddonExportsRef = false;
903
+ }
904
+ }
905
+
305
906
  static void init(Napi::Object exports) {
306
907
  exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
307
908
  }
@@ -340,9 +941,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
340
941
  llama_context_params context_params;
341
942
  llama_context* ctx;
342
943
  llama_batch batch;
944
+ uint64_t batchMemorySize = 0;
343
945
  bool has_batch = false;
344
946
  int32_t batch_n_tokens = 0;
345
947
  int n_cur = 0;
948
+
949
+ uint64_t loadedContextMemorySize = 0;
950
+ bool contextLoaded = false;
951
+
346
952
  bool disposed = false;
347
953
 
348
954
  AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
@@ -358,7 +964,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
358
964
  if (info.Length() > 1 && info[1].IsObject()) {
359
965
  Napi::Object options = info[1].As<Napi::Object>();
360
966
 
361
- if (options.Has("seed")) {
967
+ if (options.Has("noSeed")) {
968
+ context_params.seed = time(NULL);
969
+ } else if (options.Has("seed")) {
362
970
  context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
363
971
  }
364
972
 
@@ -368,14 +976,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
368
976
 
369
977
  if (options.Has("batchSize")) {
370
978
  context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
979
+ context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
371
980
  }
372
981
 
373
- if (options.Has("logitsAll")) {
374
- context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
982
+ if (options.Has("sequences")) {
983
+ context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
375
984
  }
376
985
 
377
- if (options.Has("embedding")) {
378
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
986
+ if (options.Has("embeddings")) {
987
+ context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
379
988
  }
380
989
 
381
990
  if (options.Has("threads")) {
@@ -386,9 +995,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
386
995
  context_params.n_threads_batch = resolved_n_threads;
387
996
  }
388
997
  }
389
-
390
- ctx = llama_new_context_with_model(model->model, context_params);
391
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
392
998
  }
393
999
  ~AddonContext() {
394
1000
  dispose();
@@ -399,13 +1005,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
399
1005
  return;
400
1006
  }
401
1007
 
402
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
403
- llama_free(ctx);
1008
+ disposed = true;
1009
+ if (contextLoaded) {
1010
+ contextLoaded = false;
1011
+ llama_free(ctx);
1012
+
1013
+ adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
1014
+ loadedContextMemorySize = 0;
1015
+ }
1016
+
404
1017
  model->Unref();
405
1018
 
406
1019
  disposeBatch();
407
-
408
- disposed = true;
409
1020
  }
410
1021
  void disposeBatch() {
411
1022
  if (!has_batch) {
@@ -415,16 +1026,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
415
1026
  llama_batch_free(batch);
416
1027
  has_batch = false;
417
1028
  batch_n_tokens = 0;
1029
+
1030
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
1031
+ batchMemorySize = 0;
418
1032
  }
419
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
420
- if (disposed) {
421
- return info.Env().Undefined();
422
- }
423
1033
 
424
- dispose();
1034
+ Napi::Value Init(const Napi::CallbackInfo& info);
1035
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
425
1036
 
426
- return info.Env().Undefined();
427
- }
428
1037
  Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
429
1038
  if (disposed) {
430
1039
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -449,6 +1058,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
449
1058
  has_batch = true;
450
1059
  batch_n_tokens = n_tokens;
451
1060
 
1061
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
1062
+ if (newBatchMemorySize > batchMemorySize) {
1063
+ adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
1064
+ batchMemorySize = newBatchMemorySize;
1065
+ } else if (newBatchMemorySize < batchMemorySize) {
1066
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
1067
+ batchMemorySize = newBatchMemorySize;
1068
+ }
1069
+
452
1070
  return info.Env().Undefined();
453
1071
  }
454
1072
  Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
@@ -497,7 +1115,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
497
1115
 
498
1116
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
499
1117
 
500
- llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1118
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1119
+
1120
+ if (!result) {
1121
+ Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
1122
+ return info.Env().Undefined();
1123
+ }
501
1124
 
502
1125
  return info.Env().Undefined();
503
1126
  }
@@ -511,9 +1134,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
511
1134
  int32_t startPos = info[1].As<Napi::Number>().Int32Value();
512
1135
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
513
1136
 
514
- llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
1137
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
515
1138
 
516
- return info.Env().Undefined();
1139
+ return Napi::Boolean::New(info.Env(), result);
517
1140
  }
518
1141
  Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
519
1142
  if (disposed) {
@@ -526,7 +1149,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
526
1149
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
527
1150
  int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
528
1151
 
529
- llama_kv_cache_seq_shift(ctx, sequenceId, startPos, endPos, shiftDelta);
1152
+ llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
530
1153
 
531
1154
  return info.Env().Undefined();
532
1155
  }
@@ -534,7 +1157,8 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
534
1157
  Napi::Value SampleToken(const Napi::CallbackInfo& info);
535
1158
 
536
1159
  Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
537
- AddonGrammarEvaluationState* grammar_evaluation_state = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1160
+ AddonGrammarEvaluationState* grammar_evaluation_state =
1161
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
538
1162
  llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
539
1163
 
540
1164
  if ((grammar_evaluation_state)->grammar != nullptr) {
@@ -544,6 +1168,77 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
544
1168
  return info.Env().Undefined();
545
1169
  }
546
1170
 
1171
+ Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
1172
+ AddonGrammarEvaluationState* grammar_evaluation_state =
1173
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1174
+ llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
1175
+
1176
+ if ((grammar_evaluation_state)->grammar != nullptr) {
1177
+ std::vector<llama_token_data> candidates;
1178
+ candidates.reserve(1);
1179
+ candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
1180
+
1181
+ llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
1182
+
1183
+ llama_sample_grammar(ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1184
+
1185
+ if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
1186
+ return Napi::Boolean::New(info.Env(), false);
1187
+ }
1188
+
1189
+ return Napi::Boolean::New(info.Env(), true);
1190
+ }
1191
+
1192
+ return Napi::Boolean::New(info.Env(), false);
1193
+ }
1194
+
1195
+ Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
1196
+ if (disposed) {
1197
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1198
+ return info.Env().Undefined();
1199
+ }
1200
+
1201
+ int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1202
+
1203
+ if (inputTokensLength <= 0) {
1204
+ Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1205
+ return info.Env().Undefined();
1206
+ }
1207
+
1208
+ const int n_embd = llama_n_embd(model->model);
1209
+ const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1210
+ if (embeddings == NULL) {
1211
+ embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1212
+
1213
+ if (embeddings == NULL) {
1214
+ Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1215
+ return info.Env().Undefined();
1216
+ }
1217
+ }
1218
+
1219
+ Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
1220
+ for (size_t i = 0; i < n_embd; ++i) {
1221
+ result[i] = embeddings[i];
1222
+ }
1223
+
1224
+ return result;
1225
+ }
1226
+
1227
+ Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
1228
+ if (disposed) {
1229
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1230
+ return info.Env().Undefined();
1231
+ }
1232
+
1233
+ return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
1234
+ }
1235
+
1236
+ Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
1237
+ llama_print_timings(ctx);
1238
+ llama_reset_timings(ctx);
1239
+ return info.Env().Undefined();
1240
+ }
1241
+
547
1242
  static void init(Napi::Object exports) {
548
1243
  exports.Set(
549
1244
  "AddonContext",
@@ -551,6 +1246,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
551
1246
  exports.Env(),
552
1247
  "AddonContext",
553
1248
  {
1249
+ InstanceMethod("init", &AddonContext::Init),
554
1250
  InstanceMethod("getContextSize", &AddonContext::GetContextSize),
555
1251
  InstanceMethod("initBatch", &AddonContext::InitBatch),
556
1252
  InstanceMethod("addToBatch", &AddonContext::AddToBatch),
@@ -560,7 +1256,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
560
1256
  InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
561
1257
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
562
1258
  InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
563
- InstanceMethod("dispose", &AddonContext::Dispose)
1259
+ InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
1260
+ InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
1261
+ InstanceMethod("getStateSize", &AddonContext::GetStateSize),
1262
+ InstanceMethod("printTimings", &AddonContext::PrintTimings),
1263
+ InstanceMethod("dispose", &AddonContext::Dispose),
564
1264
  }
565
1265
  )
566
1266
  );
@@ -568,53 +1268,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
568
1268
  };
569
1269
 
570
1270
 
571
- class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1271
+ class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
572
1272
  public:
573
1273
  AddonContext* ctx;
574
1274
 
575
- AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
576
- : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
1275
+ AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1276
+ : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
577
1277
  ctx(ctx),
578
- Napi::Promise::Deferred(info.Env()) {
1278
+ deferred(Napi::Promise::Deferred::New(env)) {
579
1279
  ctx->Ref();
580
1280
  }
581
1281
  ~AddonContextDecodeBatchWorker() {
582
1282
  ctx->Unref();
583
1283
  }
584
- using Napi::AsyncWorker::Queue;
585
- using Napi::Promise::Deferred::Promise;
1284
+
1285
+ Napi::Promise GetPromise() {
1286
+ return deferred.Promise();
1287
+ }
586
1288
 
587
1289
  protected:
1290
+ Napi::Promise::Deferred deferred;
1291
+
588
1292
  void Execute() {
589
- // Perform the evaluation using llama_decode.
590
- int r = llama_decode(ctx->ctx, ctx->batch);
1293
+ try {
1294
+ // Perform the evaluation using llama_decode.
1295
+ int r = llama_decode(ctx->ctx, ctx->batch);
1296
+
1297
+ if (r != 0) {
1298
+ if (r == 1) {
1299
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1300
+ } else {
1301
+ SetError("Eval has failed");
1302
+ }
591
1303
 
592
- if (r != 0) {
593
- if (r == 1) {
594
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
595
- } else {
596
- SetError("Eval has failed");
1304
+ return;
597
1305
  }
598
1306
 
599
- return;
1307
+ llama_synchronize(ctx->ctx);
1308
+ } catch (const std::exception& e) {
1309
+ SetError(e.what());
1310
+ } catch(...) {
1311
+ SetError("Unknown error when calling \"llama_decode\"");
600
1312
  }
601
1313
  }
602
1314
  void OnOK() {
603
- Napi::Env env = Napi::AsyncWorker::Env();
604
- Napi::Promise::Deferred::Resolve(env.Undefined());
1315
+ deferred.Resolve(Env().Undefined());
605
1316
  }
606
1317
  void OnError(const Napi::Error& err) {
607
- Napi::Promise::Deferred::Reject(err.Value());
1318
+ deferred.Reject(err.Value());
608
1319
  }
609
1320
  };
610
1321
 
611
1322
  Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
612
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
1323
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
1324
+ worker->Queue();
1325
+ return worker->GetPromise();
1326
+ }
1327
+
1328
+ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1329
+ public:
1330
+ AddonContext* context;
1331
+
1332
+ AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1333
+ : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1334
+ context(context),
1335
+ deferred(Napi::Promise::Deferred::New(env)) {
1336
+ context->Ref();
1337
+ }
1338
+ ~AddonContextLoadContextWorker() {
1339
+ context->Unref();
1340
+ }
1341
+
1342
+ Napi::Promise GetPromise() {
1343
+ return deferred.Promise();
1344
+ }
1345
+
1346
+ protected:
1347
+ Napi::Promise::Deferred deferred;
1348
+
1349
+ void Execute() {
1350
+ try {
1351
+ context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1352
+
1353
+ context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1354
+ } catch (const std::exception& e) {
1355
+ SetError(e.what());
1356
+ } catch(...) {
1357
+ SetError("Unknown error when calling \"llama_new_context_with_model\"");
1358
+ }
1359
+ }
1360
+ void OnOK() {
1361
+ if (context->contextLoaded) {
1362
+ uint64_t contextMemorySize = llama_state_get_size(context->ctx);
1363
+ adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1364
+ context->loadedContextMemorySize = contextMemorySize;
1365
+ }
1366
+
1367
+ deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1368
+ }
1369
+ void OnError(const Napi::Error& err) {
1370
+ deferred.Reject(err.Value());
1371
+ }
1372
+ };
1373
+ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1374
+ public:
1375
+ AddonContext* context;
1376
+
1377
+ AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1378
+ : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1379
+ context(context),
1380
+ deferred(Napi::Promise::Deferred::New(env)) {
1381
+ context->Ref();
1382
+ }
1383
+ ~AddonContextUnloadContextWorker() {
1384
+ context->Unref();
1385
+ }
1386
+
1387
+ Napi::Promise GetPromise() {
1388
+ return deferred.Promise();
1389
+ }
1390
+
1391
+ protected:
1392
+ Napi::Promise::Deferred deferred;
1393
+
1394
+ void Execute() {
1395
+ try {
1396
+ llama_free(context->ctx);
1397
+ context->contextLoaded = false;
1398
+
1399
+ try {
1400
+ if (context->has_batch) {
1401
+ llama_batch_free(context->batch);
1402
+ context->has_batch = false;
1403
+ context->batch_n_tokens = 0;
1404
+ }
1405
+
1406
+ context->dispose();
1407
+ } catch (const std::exception& e) {
1408
+ SetError(e.what());
1409
+ } catch(...) {
1410
+ SetError("Unknown error when calling \"llama_batch_free\"");
1411
+ }
1412
+ } catch (const std::exception& e) {
1413
+ SetError(e.what());
1414
+ } catch(...) {
1415
+ SetError("Unknown error when calling \"llama_free\"");
1416
+ }
1417
+ }
1418
+ void OnOK() {
1419
+ adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1420
+ context->loadedContextMemorySize = 0;
1421
+
1422
+ adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1423
+ context->batchMemorySize = 0;
1424
+
1425
+ deferred.Resolve(Env().Undefined());
1426
+ }
1427
+ void OnError(const Napi::Error& err) {
1428
+ deferred.Reject(err.Value());
1429
+ }
1430
+ };
1431
+
1432
+ Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1433
+ if (disposed) {
1434
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1435
+ return info.Env().Undefined();
1436
+ }
1437
+
1438
+ AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
613
1439
  worker->Queue();
614
- return worker->Promise();
1440
+ return worker->GetPromise();
615
1441
  }
1442
+ Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1443
+ if (disposed) {
1444
+ return info.Env().Undefined();
1445
+ }
616
1446
 
617
- class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1447
+ if (contextLoaded) {
1448
+ contextLoaded = false;
1449
+
1450
+ AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1451
+ worker->Queue();
1452
+ return worker->GetPromise();
1453
+ } else {
1454
+ dispose();
1455
+
1456
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1457
+ deferred.Resolve(info.Env().Undefined());
1458
+ return deferred.Promise();
1459
+ }
1460
+ }
1461
+
1462
+ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
618
1463
  public:
619
1464
  AddonContext* ctx;
620
1465
  AddonGrammarEvaluationState* grammar_evaluation_state;
@@ -622,18 +1467,21 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
622
1467
  bool use_grammar = false;
623
1468
  llama_token result;
624
1469
  float temperature = 0.0f;
1470
+ float min_p = 0;
625
1471
  int32_t top_k = 40;
626
1472
  float top_p = 0.95f;
627
1473
  float repeat_penalty = 1.10f; // 1.0 = disabled
628
1474
  float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
629
1475
  float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
630
1476
  std::vector<llama_token> repeat_penalty_tokens;
1477
+ std::unordered_map<llama_token, float> tokenBiases;
1478
+ bool useTokenBiases = false;
631
1479
  bool use_repeat_penalty = false;
632
1480
 
633
1481
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
634
1482
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
635
1483
  ctx(ctx),
636
- Napi::Promise::Deferred(info.Env()) {
1484
+ deferred(Napi::Promise::Deferred::New(info.Env())) {
637
1485
  ctx->Ref();
638
1486
 
639
1487
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
@@ -645,6 +1493,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
645
1493
  temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
646
1494
  }
647
1495
 
1496
+ if (options.Has("minP")) {
1497
+ min_p = options.Get("minP").As<Napi::Number>().FloatValue();
1498
+ }
1499
+
648
1500
  if (options.Has("topK")) {
649
1501
  top_k = options.Get("topK").As<Napi::Number>().Int32Value();
650
1502
  }
@@ -668,6 +1520,19 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
668
1520
  use_repeat_penalty = true;
669
1521
  }
670
1522
 
1523
+ if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
1524
+ Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
1525
+ Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
1526
+
1527
+ if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
1528
+ for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
1529
+ tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
1530
+ }
1531
+
1532
+ useTokenBiases = true;
1533
+ }
1534
+ }
1535
+
671
1536
  if (options.Has("repeatPenaltyPresencePenalty")) {
672
1537
  repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
673
1538
  }
@@ -692,14 +1557,33 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
692
1557
  use_grammar = false;
693
1558
  }
694
1559
  }
695
- using Napi::AsyncWorker::Queue;
696
- using Napi::Promise::Deferred::Promise;
1560
+
1561
+ Napi::Promise GetPromise() {
1562
+ return deferred.Promise();
1563
+ }
697
1564
 
698
1565
  protected:
1566
+ Napi::Promise::Deferred deferred;
1567
+
699
1568
  void Execute() {
1569
+ try {
1570
+ SampleToken();
1571
+ } catch (const std::exception& e) {
1572
+ SetError(e.what());
1573
+ } catch(...) {
1574
+ SetError("Unknown error when calling \"SampleToken\"");
1575
+ }
1576
+ }
1577
+
1578
+ void SampleToken() {
700
1579
  llama_token new_token_id = 0;
701
1580
 
702
1581
  // Select the best prediction.
1582
+ if (llama_get_logits(ctx->ctx) == nullptr) {
1583
+ SetError("This model does not support token generation");
1584
+ return;
1585
+ }
1586
+
703
1587
  auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
704
1588
  auto n_vocab = llama_n_vocab(ctx->model->model);
705
1589
 
@@ -707,13 +1591,27 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
707
1591
  candidates.reserve(n_vocab);
708
1592
 
709
1593
  for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
710
- candidates.emplace_back(llama_token_data { token_id, logits[token_id], 0.0f });
1594
+ auto logit = logits[token_id];
1595
+
1596
+ if (useTokenBiases) {
1597
+ bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
1598
+ if (hasTokenBias) {
1599
+ auto logitBias = tokenBiases.at(token_id);
1600
+ if (logitBias == -INFINITY || logitBias < -INFINITY) {
1601
+ if (!llama_token_is_eog(ctx->model->model, token_id)) {
1602
+ logit = -INFINITY;
1603
+ }
1604
+ } else {
1605
+ logit += logitBias;
1606
+ }
1607
+ }
1608
+ }
1609
+
1610
+ candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
711
1611
  }
712
1612
 
713
1613
  llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
714
1614
 
715
- auto eos_token = llama_token_eos(ctx->model->model);
716
-
717
1615
  if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
718
1616
  llama_sample_repetition_penalties(
719
1617
  ctx->ctx,
@@ -728,6 +1626,13 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
728
1626
 
729
1627
  if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
730
1628
  llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1629
+
1630
+ if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
1631
+ // logit biases caused grammar sampling to fail, so sampling again without logit biases
1632
+ useTokenBiases = false;
1633
+ SampleToken();
1634
+ return;
1635
+ }
731
1636
  }
732
1637
 
733
1638
  if (temperature <= 0) {
@@ -746,45 +1651,359 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
746
1651
  llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
747
1652
  llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
748
1653
  llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
1654
+ llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
749
1655
  llama_sample_temp(ctx->ctx, &candidates_p, temperature);
750
1656
  new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
751
1657
  }
752
1658
 
753
- if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
1659
+ if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
754
1660
  llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
755
1661
  }
756
1662
 
757
1663
  result = new_token_id;
758
1664
  }
759
1665
  void OnOK() {
760
- Napi::Env env = Napi::AsyncWorker::Env();
761
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
762
- Napi::Promise::Deferred::Resolve(resultValue);
1666
+ Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1667
+ deferred.Resolve(resultValue);
763
1668
  }
764
1669
  void OnError(const Napi::Error& err) {
765
- Napi::Promise::Deferred::Reject(err.Value());
1670
+ deferred.Reject(err.Value());
766
1671
  }
767
1672
  };
768
1673
 
769
1674
  Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
770
1675
  AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
771
1676
  worker->Queue();
772
- return worker->Promise();
1677
+ return worker->GetPromise();
773
1678
  }
774
1679
 
775
1680
  Napi::Value systemInfo(const Napi::CallbackInfo& info) {
776
1681
  return Napi::String::From(info.Env(), llama_print_system_info());
777
1682
  }
778
1683
 
1684
+ Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
1685
+ return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
1686
+ }
1687
+
1688
+ Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
1689
+ return Napi::Boolean::New(info.Env(), llama_supports_mmap());
1690
+ }
1691
+
1692
+ Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
1693
+ return Napi::Boolean::New(info.Env(), llama_supports_mlock());
1694
+ }
1695
+
1696
+ Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
1697
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1698
+
1699
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1700
+ return info.Env().Undefined();
1701
+ }
1702
+
1703
+ const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
1704
+
1705
+ return Napi::Number::New(info.Env(), blockSize);
1706
+ }
1707
+
1708
+ Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
1709
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1710
+
1711
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1712
+ return info.Env().Undefined();
1713
+ }
1714
+
1715
+ const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
1716
+
1717
+ return Napi::Number::New(info.Env(), typeSize);
1718
+ }
1719
+
1720
+ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
1721
+ Napi::Object consts = Napi::Object::New(info.Env());
1722
+ consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
1723
+ consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
1724
+ consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
1725
+ consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
1726
+ consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
1727
+ consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
1728
+ consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
1729
+
1730
+ return consts;
1731
+ }
1732
+
1733
+ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
1734
+ switch (level) {
1735
+ case GGML_LOG_LEVEL_ERROR: return 2;
1736
+ case GGML_LOG_LEVEL_WARN: return 3;
1737
+ case GGML_LOG_LEVEL_INFO: return 4;
1738
+ case GGML_LOG_LEVEL_DEBUG: return 5;
1739
+ }
1740
+
1741
+ return 1;
1742
+ }
1743
+
1744
+ void addonCallJsLogCallback(
1745
+ Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
1746
+ ) {
1747
+ bool called = false;
1748
+
1749
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
1750
+ try {
1751
+ callback.Call({
1752
+ Napi::Number::New(env, data->logLevelNumber),
1753
+ Napi::String::New(env, data->stringStream->str()),
1754
+ });
1755
+ called = true;
1756
+ } catch (const Napi::Error& e) {
1757
+ called = false;
1758
+ }
1759
+ }
1760
+
1761
+ if (!called && data != nullptr) {
1762
+ if (data->logLevelNumber == 2) {
1763
+ fputs(data->stringStream->str().c_str(), stderr);
1764
+ fflush(stderr);
1765
+ } else {
1766
+ fputs(data->stringStream->str().c_str(), stdout);
1767
+ fflush(stdout);
1768
+ }
1769
+ }
1770
+
1771
+ if (data != nullptr) {
1772
+ delete data->stringStream;
1773
+ delete data;
1774
+ }
1775
+ }
1776
+
1777
+ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
1778
+ int logLevelNumber = addonGetGgmlLogLevelNumber(level);
1779
+
1780
+ if (logLevelNumber > addonLoggerLogLevel) {
1781
+ return;
1782
+ }
1783
+
1784
+ if (addonJsLoggerCallbackSet) {
1785
+ std::stringstream* stringStream = new std::stringstream();
1786
+ if (text != nullptr) {
1787
+ *stringStream << text;
1788
+ }
1789
+
1790
+ addon_logger_log* data = new addon_logger_log {
1791
+ logLevelNumber,
1792
+ stringStream,
1793
+ };
1794
+
1795
+ auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
1796
+
1797
+ if (status == napi_ok) {
1798
+ return;
1799
+ } else {
1800
+ delete stringStream;
1801
+ delete data;
1802
+ }
1803
+ }
1804
+
1805
+ if (text != nullptr) {
1806
+ if (level == 2) {
1807
+ fputs(text, stderr);
1808
+ fflush(stderr);
1809
+ } else {
1810
+ fputs(text, stdout);
1811
+ fflush(stdout);
1812
+ }
1813
+ }
1814
+ }
1815
+
1816
+ Napi::Value setLogger(const Napi::CallbackInfo& info) {
1817
+ if (info.Length() < 1 || !info[0].IsFunction()) {
1818
+ if (addonJsLoggerCallbackSet) {
1819
+ addonJsLoggerCallbackSet = false;
1820
+ addonThreadSafeLoggerCallback.Release();
1821
+ }
1822
+
1823
+ return info.Env().Undefined();
1824
+ }
1825
+
1826
+ auto addonLoggerJSCallback = info[0].As<Napi::Function>();
1827
+ AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
1828
+ addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
1829
+ info.Env(),
1830
+ addonLoggerJSCallback,
1831
+ "loggerCallback",
1832
+ 0,
1833
+ 1,
1834
+ context,
1835
+ [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
1836
+ addonJsLoggerCallbackSet = false;
1837
+
1838
+ delete ctx;
1839
+ }
1840
+ );
1841
+ addonJsLoggerCallbackSet = true;
1842
+
1843
+ // prevent blocking the main node process from exiting due to active resources
1844
+ addonThreadSafeLoggerCallback.Unref(info.Env());
1845
+
1846
+ return info.Env().Undefined();
1847
+ }
1848
+
1849
+ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1850
+ if (info.Length() < 1 || !info[0].IsNumber()) {
1851
+ addonLoggerLogLevel = 5;
1852
+
1853
+ return info.Env().Undefined();
1854
+ }
1855
+
1856
+ addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
1857
+
1858
+ return info.Env().Undefined();
1859
+ }
1860
+
1861
+ class AddonBackendLoadWorker : public Napi::AsyncWorker {
1862
+ public:
1863
+ AddonBackendLoadWorker(const Napi::Env& env)
1864
+ : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1865
+ deferred(Napi::Promise::Deferred::New(env)) {
1866
+ }
1867
+ ~AddonBackendLoadWorker() {
1868
+ }
1869
+
1870
+ Napi::Promise GetPromise() {
1871
+ return deferred.Promise();
1872
+ }
1873
+
1874
+ protected:
1875
+ Napi::Promise::Deferred deferred;
1876
+
1877
+ void Execute() {
1878
+ try {
1879
+ llama_backend_init();
1880
+
1881
+ try {
1882
+ if (backendDisposed) {
1883
+ llama_backend_free();
1884
+ } else {
1885
+ backendInitialized = true;
1886
+ }
1887
+ } catch (const std::exception& e) {
1888
+ SetError(e.what());
1889
+ } catch(...) {
1890
+ SetError("Unknown error when calling \"llama_backend_free\"");
1891
+ }
1892
+ } catch (const std::exception& e) {
1893
+ SetError(e.what());
1894
+ } catch(...) {
1895
+ SetError("Unknown error when calling \"llama_backend_init\"");
1896
+ }
1897
+ }
1898
+ void OnOK() {
1899
+ deferred.Resolve(Env().Undefined());
1900
+ }
1901
+ void OnError(const Napi::Error& err) {
1902
+ deferred.Reject(err.Value());
1903
+ }
1904
+ };
1905
+
1906
+
1907
+ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1908
+ public:
1909
+ AddonBackendUnloadWorker(const Napi::Env& env)
1910
+ : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1911
+ deferred(Napi::Promise::Deferred::New(env)) {
1912
+ }
1913
+ ~AddonBackendUnloadWorker() {
1914
+ }
1915
+
1916
+ Napi::Promise GetPromise() {
1917
+ return deferred.Promise();
1918
+ }
1919
+
1920
+ protected:
1921
+ Napi::Promise::Deferred deferred;
1922
+
1923
+ void Execute() {
1924
+ try {
1925
+ if (backendInitialized) {
1926
+ backendInitialized = false;
1927
+ llama_backend_free();
1928
+ }
1929
+ } catch (const std::exception& e) {
1930
+ SetError(e.what());
1931
+ } catch(...) {
1932
+ SetError("Unknown error when calling \"llama_backend_free\"");
1933
+ }
1934
+ }
1935
+ void OnOK() {
1936
+ deferred.Resolve(Env().Undefined());
1937
+ }
1938
+ void OnError(const Napi::Error& err) {
1939
+ deferred.Reject(err.Value());
1940
+ }
1941
+ };
1942
+
1943
+ Napi::Value addonInit(const Napi::CallbackInfo& info) {
1944
+ if (backendInitialized) {
1945
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1946
+ deferred.Resolve(info.Env().Undefined());
1947
+ return deferred.Promise();
1948
+ }
1949
+
1950
+ AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1951
+ worker->Queue();
1952
+ return worker->GetPromise();
1953
+ }
1954
+
1955
+ Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1956
+ if (backendDisposed) {
1957
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1958
+ deferred.Resolve(info.Env().Undefined());
1959
+ return deferred.Promise();
1960
+ }
1961
+
1962
+ backendDisposed = true;
1963
+
1964
+ AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1965
+ worker->Queue();
1966
+ return worker->GetPromise();
1967
+ }
1968
+
1969
+ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1970
+ if (backendDisposed) {
1971
+ return;
1972
+ }
1973
+
1974
+ backendDisposed = true;
1975
+ if (backendInitialized) {
1976
+ backendInitialized = false;
1977
+ llama_backend_free();
1978
+ }
1979
+ }
1980
+
779
1981
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
780
- llama_backend_init(false);
781
1982
  exports.DefineProperties({
782
1983
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1984
+ Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
1985
+ Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
1986
+ Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
1987
+ Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
1988
+ Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
1989
+ Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
1990
+ Napi::PropertyDescriptor::Function("setLogger", setLogger),
1991
+ Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1992
+ Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1993
+ Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
1994
+ Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1995
+ Napi::PropertyDescriptor::Function("init", addonInit),
1996
+ Napi::PropertyDescriptor::Function("dispose", addonDispose),
783
1997
  });
784
1998
  AddonModel::init(exports);
785
1999
  AddonGrammar::init(exports);
786
2000
  AddonGrammarEvaluationState::init(exports);
787
2001
  AddonContext::init(exports);
2002
+
2003
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
2004
+
2005
+ exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
2006
+
788
2007
  return exports;
789
2008
  }
790
2009