node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (351) hide show
  1. package/README.md +1 -1
  2. package/dist/ChatWrapper.js +4 -0
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/bindings/AddonTypes.d.ts +35 -6
  5. package/dist/bindings/Llama.d.ts +12 -0
  6. package/dist/bindings/Llama.js +100 -7
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/getLlama.d.ts +19 -1
  9. package/dist/bindings/getLlama.js +16 -6
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +18 -0
  12. package/dist/bindings/types.js +31 -2
  13. package/dist/bindings/types.js.map +1 -1
  14. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  15. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  16. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  17. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  18. package/dist/bindings/utils/cloneLlamaCppRepo.js +4 -3
  19. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  20. package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
  21. package/dist/bindings/utils/compileLLamaCpp.js +133 -97
  22. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  23. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
  24. package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
  25. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  26. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  27. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  28. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  29. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
  30. package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
  31. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
  32. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
  33. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  34. package/dist/bindings/utils/testBindingBinary.js +2 -2
  35. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  36. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  37. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  38. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  39. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  40. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  41. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
  43. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  44. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  45. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  46. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  47. package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
  48. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  49. package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
  50. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  51. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  52. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  53. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  54. package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
  55. package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
  56. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
  57. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  58. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  60. package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +16 -18
  61. package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
  62. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  63. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  64. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  65. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  66. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  67. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  68. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  69. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
  70. package/dist/chatWrappers/utils/resolveChatWrapper.js +206 -0
  71. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  72. package/dist/cli/cli.js +1 -1
  73. package/dist/cli/cli.js.map +1 -1
  74. package/dist/cli/commands/ChatCommand.d.ts +7 -4
  75. package/dist/cli/commands/ChatCommand.js +177 -70
  76. package/dist/cli/commands/ChatCommand.js.map +1 -1
  77. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  78. package/dist/cli/commands/ClearCommand.js +5 -5
  79. package/dist/cli/commands/ClearCommand.js.map +1 -1
  80. package/dist/cli/commands/CompleteCommand.d.ts +3 -2
  81. package/dist/cli/commands/CompleteCommand.js +115 -51
  82. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  83. package/dist/cli/commands/InfillCommand.d.ts +3 -2
  84. package/dist/cli/commands/InfillCommand.js +115 -51
  85. package/dist/cli/commands/InfillCommand.js.map +1 -1
  86. package/dist/cli/commands/OnPostInstallCommand.js +2 -0
  87. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  88. package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
  89. package/dist/cli/commands/inspect/InspectCommand.js +17 -0
  90. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  91. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +10 -0
  92. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +108 -0
  93. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  94. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  95. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +98 -0
  96. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  97. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +14 -0
  98. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +577 -0
  99. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  100. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  101. package/dist/cli/utils/ConsoleTable.js +86 -0
  102. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  103. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  104. package/dist/cli/utils/printCommonInfoLines.js +70 -0
  105. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  106. package/dist/cli/utils/printInfoLine.d.ts +10 -0
  107. package/dist/cli/utils/printInfoLine.js +45 -0
  108. package/dist/cli/utils/printInfoLine.js.map +1 -0
  109. package/dist/cli/utils/resolveCommandGgufPath.d.ts +1 -0
  110. package/dist/cli/utils/resolveCommandGgufPath.js +6 -0
  111. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  112. package/dist/config.d.ts +3 -1
  113. package/dist/config.js +7 -1
  114. package/dist/config.js.map +1 -1
  115. package/dist/evaluator/LlamaChat/LlamaChat.js +13 -5
  116. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  117. package/dist/evaluator/LlamaCompletion.js +5 -3
  118. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  119. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +43 -9
  120. package/dist/evaluator/LlamaContext/LlamaContext.js +251 -60
  121. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  122. package/dist/evaluator/LlamaContext/types.d.ts +68 -10
  123. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  124. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  125. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  126. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
  127. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  128. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -7
  129. package/dist/evaluator/LlamaEmbeddingContext.js +31 -22
  130. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  131. package/dist/evaluator/LlamaGrammar.js +1 -0
  132. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  133. package/dist/evaluator/LlamaModel.d.ts +78 -20
  134. package/dist/evaluator/LlamaModel.js +385 -21
  135. package/dist/evaluator/LlamaModel.js.map +1 -1
  136. package/dist/evaluator/TokenMeter.d.ts +54 -0
  137. package/dist/evaluator/TokenMeter.js +86 -0
  138. package/dist/evaluator/TokenMeter.js.map +1 -0
  139. package/dist/gguf/GgufInsights.d.ts +40 -0
  140. package/dist/gguf/GgufInsights.js +350 -0
  141. package/dist/gguf/GgufInsights.js.map +1 -0
  142. package/dist/gguf/consts.d.ts +3 -0
  143. package/dist/gguf/consts.js +8 -0
  144. package/dist/gguf/consts.js.map +1 -0
  145. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  146. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  147. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  148. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  149. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  150. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  151. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  152. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  153. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  154. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  155. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  156. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  157. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  158. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  159. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  160. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  161. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  162. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  163. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  164. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  165. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  166. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  167. package/dist/gguf/parser/parseGguf.js +58 -0
  168. package/dist/gguf/parser/parseGguf.js.map +1 -0
  169. package/dist/gguf/readGgufFileInfo.d.ts +30 -0
  170. package/dist/gguf/readGgufFileInfo.js +37 -0
  171. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  172. package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
  173. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  174. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  175. package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
  176. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  177. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  178. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  179. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  180. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  181. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  182. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  183. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  184. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  185. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  186. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  187. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  188. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  189. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  190. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  191. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  192. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  193. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  194. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  195. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  196. package/dist/index.d.ts +13 -7
  197. package/dist/index.js +11 -6
  198. package/dist/index.js.map +1 -1
  199. package/dist/state.d.ts +2 -0
  200. package/dist/state.js +7 -0
  201. package/dist/state.js.map +1 -1
  202. package/dist/types.d.ts +1 -1
  203. package/dist/utils/DisposeGuard.d.ts +13 -0
  204. package/dist/utils/DisposeGuard.js +120 -0
  205. package/dist/utils/DisposeGuard.js.map +1 -0
  206. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  207. package/dist/utils/InsufficientMemoryError.js +6 -0
  208. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  209. package/dist/utils/LlamaText.d.ts +25 -10
  210. package/dist/utils/LlamaText.js +205 -23
  211. package/dist/utils/LlamaText.js.map +1 -1
  212. package/dist/utils/StopGenerationDetector.js +3 -1
  213. package/dist/utils/StopGenerationDetector.js.map +1 -1
  214. package/dist/utils/cmake.js +1 -1
  215. package/dist/utils/cmake.js.map +1 -1
  216. package/dist/utils/findBestOption.d.ts +4 -0
  217. package/dist/utils/findBestOption.js +15 -0
  218. package/dist/utils/findBestOption.js.map +1 -0
  219. package/dist/utils/getConsoleLogPrefix.js +3 -2
  220. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  221. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
  222. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
  223. package/dist/utils/gitReleaseBundles.js +68 -1
  224. package/dist/utils/gitReleaseBundles.js.map +1 -1
  225. package/dist/utils/mergeUnionTypes.d.ts +4 -0
  226. package/dist/utils/parseModelFileName.d.ts +1 -0
  227. package/dist/utils/parseModelFileName.js +6 -1
  228. package/dist/utils/parseModelFileName.js.map +1 -1
  229. package/dist/utils/prettyPrintObject.d.ts +10 -1
  230. package/dist/utils/prettyPrintObject.js +57 -13
  231. package/dist/utils/prettyPrintObject.js.map +1 -1
  232. package/dist/utils/removeNullFields.d.ts +2 -2
  233. package/dist/utils/removeNullFields.js.map +1 -1
  234. package/dist/utils/spawnCommand.d.ts +11 -1
  235. package/dist/utils/spawnCommand.js +55 -7
  236. package/dist/utils/spawnCommand.js.map +1 -1
  237. package/dist/utils/tokenizeInput.d.ts +1 -1
  238. package/dist/utils/tokenizeInput.js +3 -3
  239. package/dist/utils/tokenizeInput.js.map +1 -1
  240. package/dist/utils/withOra.d.ts +1 -0
  241. package/dist/utils/withOra.js +2 -2
  242. package/dist/utils/withOra.js.map +1 -1
  243. package/llama/CMakeLists.txt +5 -5
  244. package/llama/addon.cpp +793 -88
  245. package/llama/binariesGithubRelease.json +1 -1
  246. package/llama/gitRelease.bundle +0 -0
  247. package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
  248. package/llama/gpuInfo/cuda-gpu-info.h +3 -0
  249. package/llama/gpuInfo/metal-gpu-info.h +4 -1
  250. package/llama/gpuInfo/metal-gpu-info.mm +14 -1
  251. package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
  252. package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
  253. package/llama/grammars/json.gbnf +1 -1
  254. package/llama/grammars/json_arr.gbnf +1 -1
  255. package/llama/llama.cpp.info.json +1 -1
  256. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  257. package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  258. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  259. package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  260. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  261. package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  262. package/llamaBins/linux-x64/llama-addon.node +0 -0
  263. package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  264. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  265. package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  266. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  267. package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  268. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  269. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  270. package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  271. package/llamaBins/mac-x64/llama-addon.node +0 -0
  272. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  273. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  274. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  275. package/llamaBins/win-arm64/llama-addon.node +0 -0
  276. package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  277. package/llamaBins/win-x64/llama-addon.exp +0 -0
  278. package/llamaBins/win-x64/llama-addon.lib +0 -0
  279. package/llamaBins/win-x64/llama-addon.node +0 -0
  280. package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  281. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  282. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  283. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  284. package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  285. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  286. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  287. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  288. package/package.json +16 -11
  289. package/dist/TemplateChatWrapper.js.map +0 -1
  290. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
  291. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
  292. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
  293. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  294. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
  295. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  296. package/dist/cli/commands/InspectCommand.js +0 -113
  297. package/dist/cli/commands/InspectCommand.js.map +0 -1
  298. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  299. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  301. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  302. package/dist/gguf/GGUFInsights.d.ts +0 -28
  303. package/dist/gguf/GGUFInsights.js +0 -58
  304. package/dist/gguf/GGUFInsights.js.map +0 -1
  305. package/dist/gguf/GGUFMetadata.d.ts +0 -19
  306. package/dist/gguf/GGUFMetadata.js +0 -38
  307. package/dist/gguf/GGUFMetadata.js.map +0 -1
  308. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
  309. package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
  310. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
  311. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
  312. package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
  313. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
  314. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
  315. package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
  316. package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
  317. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
  318. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -12
  319. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
  320. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
  321. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
  322. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
  323. package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
  324. package/dist/gguf/ggufParser/GGUFParser.js +0 -123
  325. package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
  326. package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
  327. package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
  328. package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
  329. package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
  330. package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
  331. package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
  332. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
  333. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
  334. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
  335. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
  336. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
  337. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
  338. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
  339. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
  340. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
  341. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  342. package/dist/utils/parseModelTypeDescription.js +0 -9
  343. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  344. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  345. package/dist/utils/resolveChatWrapper.js +0 -16
  346. package/dist/utils/resolveChatWrapper.js.map +0 -1
  347. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
  348. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  349. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  350. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  351. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
package/llama/addon.cpp CHANGED
@@ -9,7 +9,7 @@
9
9
  #include "llama.h"
10
10
  #include "napi.h"
11
11
 
12
- #ifdef GPU_INFO_USE_CUBLAS
12
+ #ifdef GPU_INFO_USE_CUDA
13
13
  # include "gpuInfo/cuda-gpu-info.h"
14
14
  #endif
15
15
  #ifdef GPU_INFO_USE_VULKAN
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
35
35
  using AddonThreadSafeLogCallbackFunction =
36
36
  Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
37
37
 
38
+
39
+ struct addon_progress_event {
40
+ public:
41
+ const float progress;
42
+ };
43
+
44
+ using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
45
+ void addonCallJsProgressCallback(
46
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
47
+ );
48
+ using AddonThreadSafeProgressEventCallbackFunction =
49
+ Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
50
+
51
+
38
52
  AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
39
53
  bool addonJsLoggerCallbackSet = false;
40
54
  int addonLoggerLogLevel = 5;
41
55
  bool backendInitialized = false;
56
+ bool backendDisposed = false;
57
+
58
+ void addonCallJsProgressCallback(
59
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
60
+ ) {
61
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
62
+ try {
63
+ callback.Call({Napi::Number::New(env, data->progress)});
64
+ } catch (const Napi::Error& e) {}
65
+ }
66
+
67
+ if (data != nullptr) {
68
+ delete data;
69
+ }
70
+ }
71
+
72
+ static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
73
+ uint64_t totalSize = 0;
74
+
75
+ if (embd) {
76
+ totalSize += sizeof(float) * n_tokens_alloc * embd;
77
+ } else {
78
+ totalSize += sizeof(llama_token) * n_tokens_alloc;
79
+ }
80
+
81
+ totalSize += sizeof(llama_pos) * n_tokens_alloc;
82
+ totalSize += sizeof(int32_t) * n_tokens_alloc;
83
+ totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
84
+
85
+ totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
86
+
87
+ totalSize += sizeof(int8_t) * n_tokens_alloc;
88
+
89
+ return totalSize;
90
+ }
91
+
92
+ static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
93
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
94
+ while (size > 0) {
95
+ int64_t adjustSize = std::min(size, chunkSize);
96
+ Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
97
+ size -= adjustSize;
98
+ }
99
+ }
100
+
101
+ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
102
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
103
+ while (size > 0) {
104
+ int64_t adjustSize = std::min(size, chunkSize);
105
+ Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
106
+ size -= adjustSize;
107
+ }
108
+ }
42
109
 
43
110
  std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
44
111
  std::vector<char> result(8, 0);
@@ -54,7 +121,7 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
54
121
  return std::string(result.data(), result.size());
55
122
  }
56
123
 
57
- #ifdef GPU_INFO_USE_CUBLAS
124
+ #ifdef GPU_INFO_USE_CUDA
58
125
  void logCudaError(const char* message) {
59
126
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
60
127
  }
@@ -69,7 +136,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
69
136
  uint64_t total = 0;
70
137
  uint64_t used = 0;
71
138
 
72
- #ifdef GPU_INFO_USE_CUBLAS
139
+ #ifdef GPU_INFO_USE_CUDA
73
140
  size_t cudaDeviceTotal = 0;
74
141
  size_t cudaDeviceUsed = 0;
75
142
  bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
@@ -94,7 +161,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
94
161
  #ifdef GPU_INFO_USE_METAL
95
162
  uint64_t metalDeviceTotal = 0;
96
163
  uint64_t metalDeviceUsed = 0;
97
- get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
164
+ getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
98
165
 
99
166
  total += metalDeviceTotal;
100
167
  used += metalDeviceUsed;
@@ -107,8 +174,34 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
107
174
  return result;
108
175
  }
109
176
 
177
+ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
178
+ std::vector<std::string> deviceNames;
179
+
180
+ #ifdef GPU_INFO_USE_CUDA
181
+ gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
182
+ #endif
183
+
184
+ #ifdef GPU_INFO_USE_VULKAN
185
+ gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
186
+ #endif
187
+
188
+ #ifdef GPU_INFO_USE_METAL
189
+ getMetalGpuDeviceNames(&deviceNames);
190
+ #endif
191
+
192
+ Napi::Object result = Napi::Object::New(info.Env());
193
+
194
+ Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
195
+ for (size_t i = 0; i < deviceNames.size(); ++i) {
196
+ deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
197
+ }
198
+ result.Set("deviceNames", deviceNamesNapiArray);
199
+
200
+ return result;
201
+ }
202
+
110
203
  Napi::Value getGpuType(const Napi::CallbackInfo& info) {
111
- #ifdef GPU_INFO_USE_CUBLAS
204
+ #ifdef GPU_INFO_USE_CUDA
112
205
  return Napi::String::New(info.Env(), "cuda");
113
206
  #endif
114
207
 
@@ -143,21 +236,42 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
143
236
  return Napi::Number::From(info.Env(), token);
144
237
  }
145
238
 
239
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data);
240
+
146
241
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
147
242
  public:
148
243
  llama_model_params model_params;
149
244
  llama_model* model;
245
+ uint64_t loadedModelSize = 0;
246
+ Napi::Reference<Napi::Object> addonExportsRef;
247
+ bool hasAddonExportsRef = false;
248
+
249
+ std::string modelPath;
250
+ bool modelLoaded = false;
251
+ bool abortModelLoad = false;
252
+ bool model_load_stopped = false;
253
+ float rawModelLoadPercentage = 0;
254
+ unsigned modelLoadPercentage = 0;
255
+ AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
256
+ bool onLoadProgressEventCallbackSet = false;
257
+ bool hasLoadAbortSignal = false;
258
+
150
259
  bool disposed = false;
151
260
 
152
261
  AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
153
262
  model_params = llama_model_default_params();
154
263
 
155
264
  // Get the model path
156
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
265
+ modelPath = info[0].As<Napi::String>().Utf8Value();
157
266
 
158
267
  if (info.Length() > 1 && info[1].IsObject()) {
159
268
  Napi::Object options = info[1].As<Napi::Object>();
160
269
 
270
+ if (options.Has("addonExports")) {
271
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
272
+ hasAddonExportsRef = true;
273
+ }
274
+
161
275
  if (options.Has("gpuLayers")) {
162
276
  model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
163
277
  }
@@ -173,13 +287,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
173
287
  if (options.Has("useMlock")) {
174
288
  model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
175
289
  }
176
- }
177
290
 
178
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
291
+ if (options.Has("onLoadProgress")) {
292
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
293
+ if (onLoadProgressJSCallback.IsFunction()) {
294
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
295
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
296
+ info.Env(),
297
+ onLoadProgressJSCallback,
298
+ "onLoadProgressCallback",
299
+ 0,
300
+ 1,
301
+ context,
302
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
303
+ addonModel->onLoadProgressEventCallbackSet = false;
304
+
305
+ delete ctx;
306
+ },
307
+ this
308
+ );
309
+ onLoadProgressEventCallbackSet = true;
310
+ }
311
+ }
312
+
313
+ if (options.Has("hasLoadAbortSignal")) {
314
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
315
+ }
179
316
 
180
- if (model == NULL) {
181
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
182
- return;
317
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
318
+ model_params.progress_callback_user_data = &(*this);
319
+ model_params.progress_callback = llamaModelParamsProgressCallback;
320
+ }
183
321
  }
184
322
  }
185
323
 
@@ -192,23 +330,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
192
330
  return;
193
331
  }
194
332
 
195
- llama_free_model(model);
196
333
  disposed = true;
197
- }
334
+ if (modelLoaded) {
335
+ modelLoaded = false;
336
+ llama_free_model(model);
198
337
 
199
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
200
- if (disposed) {
201
- return info.Env().Undefined();
338
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
339
+ loadedModelSize = 0;
202
340
  }
203
341
 
204
- dispose();
342
+ if (hasAddonExportsRef) {
343
+ addonExportsRef.Unref();
344
+ hasAddonExportsRef = false;
345
+ }
346
+ }
205
347
 
348
+ Napi::Value Init(const Napi::CallbackInfo& info);
349
+ Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
350
+ abortModelLoad = true;
206
351
  return info.Env().Undefined();
207
352
  }
353
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
208
354
 
209
355
  Napi::Value Tokenize(const Napi::CallbackInfo& info) {
210
356
  if (disposed) {
211
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
357
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
212
358
  return info.Env().Undefined();
213
359
  }
214
360
 
@@ -226,7 +372,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
226
372
  }
227
373
  Napi::Value Detokenize(const Napi::CallbackInfo& info) {
228
374
  if (disposed) {
229
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
375
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
230
376
  return info.Env().Undefined();
231
377
  }
232
378
 
@@ -251,7 +397,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
251
397
 
252
398
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
253
399
  if (disposed) {
254
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
400
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
255
401
  return info.Env().Undefined();
256
402
  }
257
403
 
@@ -260,7 +406,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
260
406
 
261
407
  Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
262
408
  if (disposed) {
263
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
409
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
264
410
  return info.Env().Undefined();
265
411
  }
266
412
 
@@ -269,7 +415,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
269
415
 
270
416
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
271
417
  if (disposed) {
272
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
418
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
273
419
  return info.Env().Undefined();
274
420
  }
275
421
 
@@ -278,7 +424,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
278
424
 
279
425
  Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
280
426
  if (disposed) {
281
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
427
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
282
428
  return info.Env().Undefined();
283
429
  }
284
430
 
@@ -287,7 +433,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
287
433
 
288
434
  Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
289
435
  if (disposed) {
290
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
436
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
291
437
  return info.Env().Undefined();
292
438
  }
293
439
 
@@ -299,7 +445,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
299
445
 
300
446
  Napi::Value TokenBos(const Napi::CallbackInfo& info) {
301
447
  if (disposed) {
302
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
448
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
303
449
  return info.Env().Undefined();
304
450
  }
305
451
 
@@ -307,7 +453,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
307
453
  }
308
454
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
309
455
  if (disposed) {
310
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
456
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
311
457
  return info.Env().Undefined();
312
458
  }
313
459
 
@@ -315,7 +461,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
315
461
  }
316
462
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
317
463
  if (disposed) {
318
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
464
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
319
465
  return info.Env().Undefined();
320
466
  }
321
467
 
@@ -323,7 +469,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
323
469
  }
324
470
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
325
471
  if (disposed) {
326
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
472
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
327
473
  return info.Env().Undefined();
328
474
  }
329
475
 
@@ -331,7 +477,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
331
477
  }
332
478
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
333
479
  if (disposed) {
334
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
480
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
335
481
  return info.Env().Undefined();
336
482
  }
337
483
 
@@ -339,7 +485,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
339
485
  }
340
486
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
341
487
  if (disposed) {
342
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
488
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
343
489
  return info.Env().Undefined();
344
490
  }
345
491
 
@@ -347,7 +493,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
347
493
  }
348
494
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
349
495
  if (disposed) {
350
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
496
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
351
497
  return info.Env().Undefined();
352
498
  }
353
499
 
@@ -355,7 +501,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
355
501
  }
356
502
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
357
503
  if (disposed) {
358
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
504
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
359
505
  return info.Env().Undefined();
360
506
  }
361
507
 
@@ -374,7 +520,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
374
520
 
375
521
  Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
376
522
  if (disposed) {
377
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
523
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
378
524
  return info.Env().Undefined();
379
525
  }
380
526
 
@@ -387,6 +533,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
387
533
 
388
534
  return Napi::Number::From(info.Env(), int32_t(tokenType));
389
535
  }
536
+ Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
537
+ if (disposed) {
538
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
539
+ return info.Env().Undefined();
540
+ }
541
+
542
+ auto vocabularyType = llama_vocab_type(model);
543
+
544
+ return Napi::Number::From(info.Env(), int32_t(vocabularyType));
545
+ }
390
546
  Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
391
547
  const int addBos = llama_add_bos_token(model);
392
548
 
@@ -395,6 +551,10 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
395
551
  return Napi::Boolean::New(info.Env(), shouldPrependBos);
396
552
  }
397
553
 
554
+ Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
555
+ return Napi::Number::From(info.Env(), llama_model_size(model));
556
+ }
557
+
398
558
  static void init(Napi::Object exports) {
399
559
  exports.Set(
400
560
  "AddonModel",
@@ -402,6 +562,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
402
562
  exports.Env(),
403
563
  "AddonModel",
404
564
  {
565
+ InstanceMethod("init", &AddonModel::Init),
566
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
405
567
  InstanceMethod("tokenize", &AddonModel::Tokenize),
406
568
  InstanceMethod("detokenize", &AddonModel::Detokenize),
407
569
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
@@ -418,7 +580,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
418
580
  InstanceMethod("eotToken", &AddonModel::EotToken),
419
581
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
420
582
  InstanceMethod("getTokenType", &AddonModel::GetTokenType),
583
+ InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
421
584
  InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
585
+ InstanceMethod("getModelSize", &AddonModel::GetModelSize),
422
586
  InstanceMethod("dispose", &AddonModel::Dispose),
423
587
  }
424
588
  )
@@ -426,9 +590,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
426
590
  }
427
591
  };
428
592
 
593
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
594
+ AddonModel* addonModel = (AddonModel *) user_data;
595
+ unsigned percentage = (unsigned) (100 * progress);
596
+
597
+ if (percentage > addonModel->modelLoadPercentage) {
598
+ addonModel->modelLoadPercentage = percentage;
599
+
600
+ // original llama.cpp logs
601
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
602
+ if (percentage >= 100) {
603
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
604
+ }
605
+ }
606
+
607
+ if (progress > addonModel->rawModelLoadPercentage) {
608
+ addonModel->rawModelLoadPercentage = progress;
609
+
610
+ if (addonModel->onLoadProgressEventCallbackSet) {
611
+ addon_progress_event* data = new addon_progress_event {
612
+ progress
613
+ };
614
+
615
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
616
+
617
+ if (status != napi_ok) {
618
+ delete data;
619
+ }
620
+ }
621
+ }
622
+
623
+ return !(addonModel->abortModelLoad);
624
+ }
625
+
626
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
627
+ public:
628
+ AddonModel* model;
629
+
630
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
631
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
632
+ model(model),
633
+ deferred(Napi::Promise::Deferred::New(env)) {
634
+ model->Ref();
635
+ }
636
+ ~AddonModelLoadModelWorker() {
637
+ model->Unref();
638
+ }
639
+
640
+ Napi::Promise GetPromise() {
641
+ return deferred.Promise();
642
+ }
643
+
644
+ protected:
645
+ Napi::Promise::Deferred deferred;
646
+
647
+ void Execute() {
648
+ try {
649
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
650
+
651
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
652
+ } catch (const std::exception& e) {
653
+ SetError(e.what());
654
+ } catch(...) {
655
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
656
+ }
657
+ }
658
+ void OnOK() {
659
+ if (model->modelLoaded) {
660
+ uint64_t modelSize = llama_model_size(model->model);
661
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
662
+ model->loadedModelSize = modelSize;
663
+ }
664
+
665
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
666
+ if (model->onLoadProgressEventCallbackSet) {
667
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
668
+ }
669
+ }
670
+ void OnError(const Napi::Error& err) {
671
+ deferred.Reject(err.Value());
672
+ }
673
+ };
674
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
675
+ public:
676
+ AddonModel* model;
677
+
678
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
679
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
680
+ model(model),
681
+ deferred(Napi::Promise::Deferred::New(env)) {
682
+ model->Ref();
683
+ }
684
+ ~AddonModelUnloadModelWorker() {
685
+ model->Unref();
686
+ }
687
+
688
+ Napi::Promise GetPromise() {
689
+ return deferred.Promise();
690
+ }
691
+
692
+ protected:
693
+ Napi::Promise::Deferred deferred;
694
+
695
+ void Execute() {
696
+ try {
697
+ llama_free_model(model->model);
698
+ model->modelLoaded = false;
699
+
700
+ model->dispose();
701
+ } catch (const std::exception& e) {
702
+ SetError(e.what());
703
+ } catch(...) {
704
+ SetError("Unknown error when calling \"llama_free_model\"");
705
+ }
706
+ }
707
+ void OnOK() {
708
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
709
+ model->loadedModelSize = 0;
710
+
711
+ deferred.Resolve(Env().Undefined());
712
+ }
713
+ void OnError(const Napi::Error& err) {
714
+ deferred.Reject(err.Value());
715
+ }
716
+ };
717
+
718
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
719
+ if (disposed) {
720
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
721
+ return info.Env().Undefined();
722
+ }
723
+
724
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
725
+ worker->Queue();
726
+ return worker->GetPromise();
727
+ }
728
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
729
+ if (disposed) {
730
+ return info.Env().Undefined();
731
+ }
732
+
733
+ if (modelLoaded) {
734
+ modelLoaded = false;
735
+
736
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
737
+ worker->Queue();
738
+ return worker->GetPromise();
739
+ } else {
740
+ dispose();
741
+
742
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
743
+ deferred.Resolve(info.Env().Undefined());
744
+ return deferred.Promise();
745
+ }
746
+ }
747
+
429
748
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
430
749
  public:
431
750
  grammar_parser::parse_state parsed_grammar;
751
+ Napi::Reference<Napi::Object> addonExportsRef;
752
+ bool hasAddonExportsRef = false;
432
753
 
433
754
  AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
434
755
  // Get the model path
@@ -438,6 +759,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
438
759
  if (info.Length() > 1 && info[1].IsObject()) {
439
760
  Napi::Object options = info[1].As<Napi::Object>();
440
761
 
762
+ if (options.Has("addonExports")) {
763
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
764
+ hasAddonExportsRef = true;
765
+ }
766
+
441
767
  if (options.Has("printGrammar")) {
442
768
  should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
443
769
  }
@@ -455,6 +781,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
455
781
  }
456
782
  }
457
783
 
784
+ ~AddonGrammar() {
785
+ if (hasAddonExportsRef) {
786
+ addonExportsRef.Unref();
787
+ hasAddonExportsRef = false;
788
+ }
789
+ }
790
+
458
791
  static void init(Napi::Object exports) {
459
792
  exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
460
793
  }
@@ -493,9 +826,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
493
826
  llama_context_params context_params;
494
827
  llama_context* ctx;
495
828
  llama_batch batch;
829
+ uint64_t batchMemorySize = 0;
496
830
  bool has_batch = false;
497
831
  int32_t batch_n_tokens = 0;
498
832
  int n_cur = 0;
833
+
834
+ uint64_t loadedContextMemorySize = 0;
835
+ bool contextLoaded = false;
836
+
499
837
  bool disposed = false;
500
838
 
501
839
  AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
@@ -523,10 +861,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
523
861
 
524
862
  if (options.Has("batchSize")) {
525
863
  context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
864
+ context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
526
865
  }
527
866
 
528
- if (options.Has("embedding")) {
529
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
867
+ if (options.Has("sequences")) {
868
+ context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
869
+ }
870
+
871
+ if (options.Has("embeddings")) {
872
+ context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
530
873
  }
531
874
 
532
875
  if (options.Has("threads")) {
@@ -537,9 +880,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
537
880
  context_params.n_threads_batch = resolved_n_threads;
538
881
  }
539
882
  }
540
-
541
- ctx = llama_new_context_with_model(model->model, context_params);
542
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
543
883
  }
544
884
  ~AddonContext() {
545
885
  dispose();
@@ -550,13 +890,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
550
890
  return;
551
891
  }
552
892
 
553
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
554
- llama_free(ctx);
893
+ disposed = true;
894
+ if (contextLoaded) {
895
+ contextLoaded = false;
896
+ llama_free(ctx);
897
+
898
+ adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
899
+ loadedContextMemorySize = 0;
900
+ }
901
+
555
902
  model->Unref();
556
903
 
557
904
  disposeBatch();
558
-
559
- disposed = true;
560
905
  }
561
906
  void disposeBatch() {
562
907
  if (!has_batch) {
@@ -566,16 +911,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
566
911
  llama_batch_free(batch);
567
912
  has_batch = false;
568
913
  batch_n_tokens = 0;
914
+
915
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
916
+ batchMemorySize = 0;
569
917
  }
570
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
571
- if (disposed) {
572
- return info.Env().Undefined();
573
- }
574
918
 
575
- dispose();
919
+ Napi::Value Init(const Napi::CallbackInfo& info);
920
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
576
921
 
577
- return info.Env().Undefined();
578
- }
579
922
  Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
580
923
  if (disposed) {
581
924
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -600,6 +943,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
600
943
  has_batch = true;
601
944
  batch_n_tokens = n_tokens;
602
945
 
946
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
947
+ if (newBatchMemorySize > batchMemorySize) {
948
+ adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
949
+ batchMemorySize = newBatchMemorySize;
950
+ } else if (newBatchMemorySize < batchMemorySize) {
951
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
952
+ batchMemorySize = newBatchMemorySize;
953
+ }
954
+
603
955
  return info.Env().Undefined();
604
956
  }
605
957
  Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
@@ -648,7 +1000,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
648
1000
 
649
1001
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
650
1002
 
651
- llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1003
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1004
+
1005
+ if (!result) {
1006
+ Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
1007
+ return info.Env().Undefined();
1008
+ }
652
1009
 
653
1010
  return info.Env().Undefined();
654
1011
  }
@@ -662,9 +1019,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
662
1019
  int32_t startPos = info[1].As<Napi::Number>().Int32Value();
663
1020
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
664
1021
 
665
- llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
1022
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
666
1023
 
667
- return info.Env().Undefined();
1024
+ return Napi::Boolean::New(info.Env(), result);
668
1025
  }
669
1026
  Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
670
1027
  if (disposed) {
@@ -702,8 +1059,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
702
1059
  return info.Env().Undefined();
703
1060
  }
704
1061
 
1062
+ int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1063
+
1064
+ if (inputTokensLength <= 0) {
1065
+ Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1066
+ return info.Env().Undefined();
1067
+ }
1068
+
705
1069
  const int n_embd = llama_n_embd(model->model);
706
- const auto* embeddings = llama_get_embeddings(ctx);
1070
+ const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1071
+ if (embeddings == NULL) {
1072
+ embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1073
+
1074
+ if (embeddings == NULL) {
1075
+ Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1076
+ return info.Env().Undefined();
1077
+ }
1078
+ }
707
1079
 
708
1080
  Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
709
1081
  for (size_t i = 0; i < n_embd; ++i) {
@@ -713,6 +1085,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
713
1085
  return result;
714
1086
  }
715
1087
 
1088
+ Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
1089
+ if (disposed) {
1090
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1091
+ return info.Env().Undefined();
1092
+ }
1093
+
1094
+ return Napi::Number::From(info.Env(), llama_get_state_size(ctx));
1095
+ }
1096
+
716
1097
  Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
717
1098
  llama_print_timings(ctx);
718
1099
  llama_reset_timings(ctx);
@@ -726,6 +1107,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
726
1107
  exports.Env(),
727
1108
  "AddonContext",
728
1109
  {
1110
+ InstanceMethod("init", &AddonContext::Init),
729
1111
  InstanceMethod("getContextSize", &AddonContext::GetContextSize),
730
1112
  InstanceMethod("initBatch", &AddonContext::InitBatch),
731
1113
  InstanceMethod("addToBatch", &AddonContext::AddToBatch),
@@ -736,6 +1118,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
736
1118
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
737
1119
  InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
738
1120
  InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
1121
+ InstanceMethod("getStateSize", &AddonContext::GetStateSize),
739
1122
  InstanceMethod("printTimings", &AddonContext::PrintTimings),
740
1123
  InstanceMethod("dispose", &AddonContext::Dispose),
741
1124
  }
@@ -745,53 +1128,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
745
1128
  };
746
1129
 
747
1130
 
748
- class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1131
+ class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
749
1132
  public:
750
1133
  AddonContext* ctx;
751
1134
 
752
- AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
753
- : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
1135
+ AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1136
+ : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
754
1137
  ctx(ctx),
755
- Napi::Promise::Deferred(info.Env()) {
1138
+ deferred(Napi::Promise::Deferred::New(env)) {
756
1139
  ctx->Ref();
757
1140
  }
758
1141
  ~AddonContextDecodeBatchWorker() {
759
1142
  ctx->Unref();
760
1143
  }
761
- using Napi::AsyncWorker::Queue;
762
- using Napi::Promise::Deferred::Promise;
1144
+
1145
+ Napi::Promise GetPromise() {
1146
+ return deferred.Promise();
1147
+ }
763
1148
 
764
1149
  protected:
1150
+ Napi::Promise::Deferred deferred;
1151
+
765
1152
  void Execute() {
766
- // Perform the evaluation using llama_decode.
767
- int r = llama_decode(ctx->ctx, ctx->batch);
768
-
769
- if (r != 0) {
770
- if (r == 1) {
771
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
772
- } else {
773
- SetError("Eval has failed");
1153
+ try {
1154
+ // Perform the evaluation using llama_decode.
1155
+ int r = llama_decode(ctx->ctx, ctx->batch);
1156
+
1157
+ if (r != 0) {
1158
+ if (r == 1) {
1159
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1160
+ } else {
1161
+ SetError("Eval has failed");
1162
+ }
1163
+
1164
+ return;
774
1165
  }
775
1166
 
776
- return;
1167
+ llama_synchronize(ctx->ctx);
1168
+ } catch (const std::exception& e) {
1169
+ SetError(e.what());
1170
+ } catch(...) {
1171
+ SetError("Unknown error when calling \"llama_decode\"");
777
1172
  }
778
1173
  }
779
1174
  void OnOK() {
780
- Napi::Env env = Napi::AsyncWorker::Env();
781
- Napi::Promise::Deferred::Resolve(env.Undefined());
1175
+ deferred.Resolve(Env().Undefined());
782
1176
  }
783
1177
  void OnError(const Napi::Error& err) {
784
- Napi::Promise::Deferred::Reject(err.Value());
1178
+ deferred.Reject(err.Value());
785
1179
  }
786
1180
  };
787
1181
 
788
1182
  Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
789
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
1183
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
790
1184
  worker->Queue();
791
- return worker->Promise();
1185
+ return worker->GetPromise();
792
1186
  }
793
1187
 
794
- class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1188
+ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1189
+ public:
1190
+ AddonContext* context;
1191
+
1192
+ AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1193
+ : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1194
+ context(context),
1195
+ deferred(Napi::Promise::Deferred::New(env)) {
1196
+ context->Ref();
1197
+ }
1198
+ ~AddonContextLoadContextWorker() {
1199
+ context->Unref();
1200
+ }
1201
+
1202
+ Napi::Promise GetPromise() {
1203
+ return deferred.Promise();
1204
+ }
1205
+
1206
+ protected:
1207
+ Napi::Promise::Deferred deferred;
1208
+
1209
+ void Execute() {
1210
+ try {
1211
+ context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1212
+
1213
+ context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1214
+ } catch (const std::exception& e) {
1215
+ SetError(e.what());
1216
+ } catch(...) {
1217
+ SetError("Unknown error when calling \"llama_new_context_with_model\"");
1218
+ }
1219
+ }
1220
+ void OnOK() {
1221
+ if (context->contextLoaded) {
1222
+ uint64_t contextMemorySize = llama_get_state_size(context->ctx);
1223
+ adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1224
+ context->loadedContextMemorySize = contextMemorySize;
1225
+ }
1226
+
1227
+ deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1228
+ }
1229
+ void OnError(const Napi::Error& err) {
1230
+ deferred.Reject(err.Value());
1231
+ }
1232
+ };
1233
+ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1234
+ public:
1235
+ AddonContext* context;
1236
+
1237
+ AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1238
+ : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1239
+ context(context),
1240
+ deferred(Napi::Promise::Deferred::New(env)) {
1241
+ context->Ref();
1242
+ }
1243
+ ~AddonContextUnloadContextWorker() {
1244
+ context->Unref();
1245
+ }
1246
+
1247
+ Napi::Promise GetPromise() {
1248
+ return deferred.Promise();
1249
+ }
1250
+
1251
+ protected:
1252
+ Napi::Promise::Deferred deferred;
1253
+
1254
+ void Execute() {
1255
+ try {
1256
+ llama_free(context->ctx);
1257
+ context->contextLoaded = false;
1258
+
1259
+ try {
1260
+ if (context->has_batch) {
1261
+ llama_batch_free(context->batch);
1262
+ context->has_batch = false;
1263
+ context->batch_n_tokens = 0;
1264
+ }
1265
+
1266
+ context->dispose();
1267
+ } catch (const std::exception& e) {
1268
+ SetError(e.what());
1269
+ } catch(...) {
1270
+ SetError("Unknown error when calling \"llama_batch_free\"");
1271
+ }
1272
+ } catch (const std::exception& e) {
1273
+ SetError(e.what());
1274
+ } catch(...) {
1275
+ SetError("Unknown error when calling \"llama_free\"");
1276
+ }
1277
+ }
1278
+ void OnOK() {
1279
+ adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1280
+ context->loadedContextMemorySize = 0;
1281
+
1282
+ adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1283
+ context->batchMemorySize = 0;
1284
+
1285
+ deferred.Resolve(Env().Undefined());
1286
+ }
1287
+ void OnError(const Napi::Error& err) {
1288
+ deferred.Reject(err.Value());
1289
+ }
1290
+ };
1291
+
1292
+ Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1293
+ if (disposed) {
1294
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1295
+ return info.Env().Undefined();
1296
+ }
1297
+
1298
+ AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
1299
+ worker->Queue();
1300
+ return worker->GetPromise();
1301
+ }
1302
+ Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1303
+ if (disposed) {
1304
+ return info.Env().Undefined();
1305
+ }
1306
+
1307
+ if (contextLoaded) {
1308
+ contextLoaded = false;
1309
+
1310
+ AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1311
+ worker->Queue();
1312
+ return worker->GetPromise();
1313
+ } else {
1314
+ dispose();
1315
+
1316
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1317
+ deferred.Resolve(info.Env().Undefined());
1318
+ return deferred.Promise();
1319
+ }
1320
+ }
1321
+
1322
+ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
795
1323
  public:
796
1324
  AddonContext* ctx;
797
1325
  AddonGrammarEvaluationState* grammar_evaluation_state;
@@ -811,7 +1339,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
811
1339
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
812
1340
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
813
1341
  ctx(ctx),
814
- Napi::Promise::Deferred(info.Env()) {
1342
+ deferred(Napi::Promise::Deferred::New(info.Env())) {
815
1343
  ctx->Ref();
816
1344
 
817
1345
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
@@ -874,11 +1402,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
874
1402
  use_grammar = false;
875
1403
  }
876
1404
  }
877
- using Napi::AsyncWorker::Queue;
878
- using Napi::Promise::Deferred::Promise;
1405
+
1406
+ Napi::Promise GetPromise() {
1407
+ return deferred.Promise();
1408
+ }
879
1409
 
880
1410
  protected:
1411
+ Napi::Promise::Deferred deferred;
1412
+
881
1413
  void Execute() {
1414
+ try {
1415
+ SampleToken();
1416
+ } catch (const std::exception& e) {
1417
+ SetError(e.what());
1418
+ } catch(...) {
1419
+ SetError("Unknown error when calling \"SampleToken\"");
1420
+ }
1421
+ }
1422
+
1423
+ void SampleToken() {
882
1424
  llama_token new_token_id = 0;
883
1425
 
884
1426
  // Select the best prediction.
@@ -940,25 +1482,73 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
940
1482
  result = new_token_id;
941
1483
  }
942
1484
  void OnOK() {
943
- Napi::Env env = Napi::AsyncWorker::Env();
944
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
945
- Napi::Promise::Deferred::Resolve(resultValue);
1485
+ Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1486
+ deferred.Resolve(resultValue);
946
1487
  }
947
1488
  void OnError(const Napi::Error& err) {
948
- Napi::Promise::Deferred::Reject(err.Value());
1489
+ deferred.Reject(err.Value());
949
1490
  }
950
1491
  };
951
1492
 
952
1493
  Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
953
1494
  AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
954
1495
  worker->Queue();
955
- return worker->Promise();
1496
+ return worker->GetPromise();
956
1497
  }
957
1498
 
958
1499
  Napi::Value systemInfo(const Napi::CallbackInfo& info) {
959
1500
  return Napi::String::From(info.Env(), llama_print_system_info());
960
1501
  }
961
1502
 
1503
+ Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
1504
+ return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
1505
+ }
1506
+
1507
+ Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
1508
+ return Napi::Boolean::New(info.Env(), llama_supports_mmap());
1509
+ }
1510
+
1511
+ Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
1512
+ return Napi::Boolean::New(info.Env(), llama_supports_mlock());
1513
+ }
1514
+
1515
+ Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
1516
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1517
+
1518
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1519
+ return info.Env().Undefined();
1520
+ }
1521
+
1522
+ const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
1523
+
1524
+ return Napi::Number::New(info.Env(), blockSize);
1525
+ }
1526
+
1527
+ Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
1528
+ const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1529
+
1530
+ if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1531
+ return info.Env().Undefined();
1532
+ }
1533
+
1534
+ const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
1535
+
1536
+ return Napi::Number::New(info.Env(), typeSize);
1537
+ }
1538
+
1539
+ Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
1540
+ Napi::Object consts = Napi::Object::New(info.Env());
1541
+ consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
1542
+ consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
1543
+ consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
1544
+ consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
1545
+ consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
1546
+ consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
1547
+ consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
1548
+
1549
+ return consts;
1550
+ }
1551
+
962
1552
  int addonGetGgmlLogLevelNumber(ggml_log_level level) {
963
1553
  switch (level) {
964
1554
  case GGML_LOG_LEVEL_ERROR: return 2;
@@ -1025,6 +1615,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
1025
1615
 
1026
1616
  if (status == napi_ok) {
1027
1617
  return;
1618
+ } else {
1619
+ delete stringStream;
1620
+ delete data;
1028
1621
  }
1029
1622
  }
1030
1623
 
@@ -1082,38 +1675,150 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1082
1675
  return info.Env().Undefined();
1083
1676
  }
1084
1677
 
1678
+ class AddonBackendLoadWorker : public Napi::AsyncWorker {
1679
+ public:
1680
+ AddonBackendLoadWorker(const Napi::Env& env)
1681
+ : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1682
+ deferred(Napi::Promise::Deferred::New(env)) {
1683
+ }
1684
+ ~AddonBackendLoadWorker() {
1685
+ }
1686
+
1687
+ Napi::Promise GetPromise() {
1688
+ return deferred.Promise();
1689
+ }
1690
+
1691
+ protected:
1692
+ Napi::Promise::Deferred deferred;
1693
+
1694
+ void Execute() {
1695
+ try {
1696
+ llama_backend_init();
1697
+
1698
+ try {
1699
+ if (backendDisposed) {
1700
+ llama_backend_free();
1701
+ } else {
1702
+ backendInitialized = true;
1703
+ }
1704
+ } catch (const std::exception& e) {
1705
+ SetError(e.what());
1706
+ } catch(...) {
1707
+ SetError("Unknown error when calling \"llama_backend_free\"");
1708
+ }
1709
+ } catch (const std::exception& e) {
1710
+ SetError(e.what());
1711
+ } catch(...) {
1712
+ SetError("Unknown error when calling \"llama_backend_init\"");
1713
+ }
1714
+ }
1715
+ void OnOK() {
1716
+ deferred.Resolve(Env().Undefined());
1717
+ }
1718
+ void OnError(const Napi::Error& err) {
1719
+ deferred.Reject(err.Value());
1720
+ }
1721
+ };
1722
+
1723
+
1724
+ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1725
+ public:
1726
+ AddonBackendUnloadWorker(const Napi::Env& env)
1727
+ : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1728
+ deferred(Napi::Promise::Deferred::New(env)) {
1729
+ }
1730
+ ~AddonBackendUnloadWorker() {
1731
+ }
1732
+
1733
+ Napi::Promise GetPromise() {
1734
+ return deferred.Promise();
1735
+ }
1736
+
1737
+ protected:
1738
+ Napi::Promise::Deferred deferred;
1739
+
1740
+ void Execute() {
1741
+ try {
1742
+ if (backendInitialized) {
1743
+ backendInitialized = false;
1744
+ llama_backend_free();
1745
+ }
1746
+ } catch (const std::exception& e) {
1747
+ SetError(e.what());
1748
+ } catch(...) {
1749
+ SetError("Unknown error when calling \"llama_backend_free\"");
1750
+ }
1751
+ }
1752
+ void OnOK() {
1753
+ deferred.Resolve(Env().Undefined());
1754
+ }
1755
+ void OnError(const Napi::Error& err) {
1756
+ deferred.Reject(err.Value());
1757
+ }
1758
+ };
1759
+
1085
1760
  Napi::Value addonInit(const Napi::CallbackInfo& info) {
1086
- if (!backendInitialized) {
1087
- llama_backend_init();
1088
- backendInitialized = true;
1761
+ if (backendInitialized) {
1762
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1763
+ deferred.Resolve(info.Env().Undefined());
1764
+ return deferred.Promise();
1089
1765
  }
1090
1766
 
1091
- llama_log_set(addonLlamaCppLogCallback, nullptr);
1767
+ AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1768
+ worker->Queue();
1769
+ return worker->GetPromise();
1770
+ }
1092
1771
 
1093
- return info.Env().Undefined();
1772
+ Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1773
+ if (backendDisposed) {
1774
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1775
+ deferred.Resolve(info.Env().Undefined());
1776
+ return deferred.Promise();
1777
+ }
1778
+
1779
+ backendDisposed = true;
1780
+
1781
+ AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1782
+ worker->Queue();
1783
+ return worker->GetPromise();
1094
1784
  }
1095
1785
 
1096
1786
  static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1787
+ if (backendDisposed) {
1788
+ return;
1789
+ }
1790
+
1791
+ backendDisposed = true;
1097
1792
  if (backendInitialized) {
1098
- llama_backend_free();
1099
1793
  backendInitialized = false;
1794
+ llama_backend_free();
1100
1795
  }
1101
1796
  }
1102
1797
 
1103
1798
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1104
1799
  exports.DefineProperties({
1105
1800
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1801
+ Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
1802
+ Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
1803
+ Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
1804
+ Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
1805
+ Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
1806
+ Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
1106
1807
  Napi::PropertyDescriptor::Function("setLogger", setLogger),
1107
1808
  Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1108
1809
  Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1810
+ Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
1109
1811
  Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1110
1812
  Napi::PropertyDescriptor::Function("init", addonInit),
1813
+ Napi::PropertyDescriptor::Function("dispose", addonDispose),
1111
1814
  });
1112
1815
  AddonModel::init(exports);
1113
1816
  AddonGrammar::init(exports);
1114
1817
  AddonGrammarEvaluationState::init(exports);
1115
1818
  AddonContext::init(exports);
1116
1819
 
1820
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1821
+
1117
1822
  exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
1118
1823
 
1119
1824
  return exports;