node-llama-cpp 3.0.0-beta.12 → 3.0.0-beta.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/ChatWrapper.d.ts +1 -0
  2. package/dist/ChatWrapper.js +2 -1
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/TemplateChatWrapper.d.ts +68 -0
  5. package/dist/TemplateChatWrapper.js +239 -0
  6. package/dist/TemplateChatWrapper.js.map +1 -0
  7. package/dist/bindings/AddonTypes.d.ts +15 -5
  8. package/dist/bindings/Llama.d.ts +9 -3
  9. package/dist/bindings/Llama.js +61 -19
  10. package/dist/bindings/Llama.js.map +1 -1
  11. package/dist/bindings/consts.d.ts +2 -0
  12. package/dist/bindings/consts.js +11 -0
  13. package/dist/bindings/consts.js.map +1 -0
  14. package/dist/bindings/getLlama.d.ts +14 -18
  15. package/dist/bindings/getLlama.js +210 -78
  16. package/dist/bindings/getLlama.js.map +1 -1
  17. package/dist/bindings/types.d.ts +11 -5
  18. package/dist/bindings/types.js +22 -0
  19. package/dist/bindings/types.js.map +1 -1
  20. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  21. package/dist/bindings/utils/asyncEvery.js +15 -0
  22. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  23. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  24. package/dist/bindings/utils/asyncSome.js +27 -0
  25. package/dist/bindings/utils/asyncSome.js.map +1 -0
  26. package/dist/bindings/utils/cloneLlamaCppRepo.js +13 -3
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
  29. package/dist/bindings/utils/compileLLamaCpp.js +136 -74
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  31. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  32. package/dist/bindings/utils/detectAvailableComputeLayers.js +300 -0
  33. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  34. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  35. package/dist/bindings/utils/detectGlibc.js +36 -0
  36. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  37. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  38. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  39. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  40. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +12 -6
  41. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  42. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
  43. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  44. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  45. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  46. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  47. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  48. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  49. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  50. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  51. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  52. package/dist/bindings/utils/hasFileInPath.js +34 -0
  53. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  54. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  55. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  56. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  57. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  58. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  59. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  60. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +9 -2
  61. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +10 -4
  62. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -1
  63. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  64. package/dist/bindings/utils/testBindingBinary.js +98 -0
  65. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  66. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  67. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  68. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  69. package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
  70. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  71. package/dist/chatWrappers/GemmaChatWrapper.d.ts +18 -0
  72. package/dist/chatWrappers/GemmaChatWrapper.js +86 -0
  73. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  74. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +3 -0
  75. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  76. package/dist/cli/cli.js +2 -0
  77. package/dist/cli/cli.js.map +1 -1
  78. package/dist/cli/commands/BuildCommand.d.ts +5 -5
  79. package/dist/cli/commands/BuildCommand.js +78 -60
  80. package/dist/cli/commands/BuildCommand.js.map +1 -1
  81. package/dist/cli/commands/ChatCommand.js +31 -14
  82. package/dist/cli/commands/ChatCommand.js.map +1 -1
  83. package/dist/cli/commands/CompleteCommand.js +30 -13
  84. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  85. package/dist/cli/commands/DebugCommand.js +3 -9
  86. package/dist/cli/commands/DebugCommand.js.map +1 -1
  87. package/dist/cli/commands/DownloadCommand.d.ts +5 -5
  88. package/dist/cli/commands/DownloadCommand.js +97 -56
  89. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  90. package/dist/cli/commands/InfillCommand.js +30 -13
  91. package/dist/cli/commands/InfillCommand.js.map +1 -1
  92. package/dist/cli/commands/InspectCommand.d.ts +7 -0
  93. package/dist/cli/commands/InspectCommand.js +113 -0
  94. package/dist/cli/commands/InspectCommand.js.map +1 -0
  95. package/dist/cli/commands/OnPostInstallCommand.js +2 -0
  96. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  97. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  98. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  99. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  100. package/dist/config.d.ts +4 -4
  101. package/dist/config.js +11 -12
  102. package/dist/config.js.map +1 -1
  103. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  104. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  105. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  106. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +5 -8
  107. package/dist/evaluator/LlamaContext/LlamaContext.js +111 -65
  108. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  109. package/dist/evaluator/LlamaContext/types.d.ts +13 -6
  110. package/dist/evaluator/LlamaEmbeddingContext.d.ts +6 -5
  111. package/dist/evaluator/LlamaEmbeddingContext.js +32 -22
  112. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  113. package/dist/evaluator/LlamaGrammar.js +1 -0
  114. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  115. package/dist/evaluator/LlamaModel.d.ts +16 -16
  116. package/dist/evaluator/LlamaModel.js +95 -20
  117. package/dist/evaluator/LlamaModel.js.map +1 -1
  118. package/dist/gguf/GGUFInsights.d.ts +28 -0
  119. package/dist/gguf/GGUFInsights.js +58 -0
  120. package/dist/gguf/GGUFInsights.js.map +1 -0
  121. package/dist/gguf/GGUFMetadata.d.ts +19 -0
  122. package/dist/gguf/GGUFMetadata.js +38 -0
  123. package/dist/gguf/GGUFMetadata.js.map +1 -0
  124. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +3 -0
  125. package/dist/gguf/errors/InvalidGGUFMagicError.js +6 -0
  126. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +1 -0
  127. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +3 -0
  128. package/dist/gguf/errors/MetadataNotParsedYetError.js +6 -0
  129. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +1 -0
  130. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +3 -0
  131. package/dist/gguf/errors/MissingNodeLlamaError.js +6 -0
  132. package/dist/gguf/errors/MissingNodeLlamaError.js.map +1 -0
  133. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +5 -0
  134. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +11 -0
  135. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -0
  136. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +4 -0
  137. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +8 -0
  138. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +1 -0
  139. package/dist/gguf/ggufParser/GGUFParser.d.ts +18 -0
  140. package/dist/gguf/ggufParser/GGUFParser.js +123 -0
  141. package/dist/gguf/ggufParser/GGUFParser.js.map +1 -0
  142. package/dist/gguf/ggufParser/GGUFTypes.d.ts +257 -0
  143. package/dist/gguf/ggufParser/GGUFTypes.js +2 -0
  144. package/dist/gguf/ggufParser/GGUFTypes.js.map +1 -0
  145. package/dist/gguf/ggufParser/checkArchitecture.d.ts +14 -0
  146. package/dist/gguf/ggufParser/checkArchitecture.js +74 -0
  147. package/dist/gguf/ggufParser/checkArchitecture.js.map +1 -0
  148. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +38 -0
  149. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +83 -0
  150. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +1 -0
  151. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +14 -0
  152. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +35 -0
  153. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +1 -0
  154. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +15 -0
  155. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +40 -0
  156. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -0
  157. package/dist/index.d.ts +3 -1
  158. package/dist/index.js +3 -1
  159. package/dist/index.js.map +1 -1
  160. package/dist/state.d.ts +2 -0
  161. package/dist/state.js +7 -0
  162. package/dist/state.js.map +1 -1
  163. package/dist/utils/DisposeGuard.d.ts +13 -0
  164. package/dist/utils/DisposeGuard.js +120 -0
  165. package/dist/utils/DisposeGuard.js.map +1 -0
  166. package/dist/utils/LlamaText.js +2 -2
  167. package/dist/utils/LlamaText.js.map +1 -1
  168. package/dist/utils/cmake.js +23 -10
  169. package/dist/utils/cmake.js.map +1 -1
  170. package/dist/utils/getBuildDefaults.d.ts +1 -3
  171. package/dist/utils/getBuildDefaults.js +2 -4
  172. package/dist/utils/getBuildDefaults.js.map +1 -1
  173. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  174. package/dist/utils/getConsoleLogPrefix.js +5 -4
  175. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  176. package/dist/utils/mergeUnionTypes.d.ts +6 -0
  177. package/dist/utils/mergeUnionTypes.js +2 -0
  178. package/dist/utils/mergeUnionTypes.js.map +1 -0
  179. package/dist/utils/parseTextTemplate.d.ts +66 -0
  180. package/dist/utils/parseTextTemplate.js +116 -0
  181. package/dist/utils/parseTextTemplate.js.map +1 -0
  182. package/dist/utils/removeNullFields.d.ts +2 -2
  183. package/dist/utils/removeNullFields.js.map +1 -1
  184. package/dist/utils/spawnCommand.d.ts +11 -1
  185. package/dist/utils/spawnCommand.js +55 -7
  186. package/dist/utils/spawnCommand.js.map +1 -1
  187. package/llama/CMakeLists.txt +11 -5
  188. package/llama/addon.cpp +700 -83
  189. package/llama/binariesGithubRelease.json +1 -1
  190. package/llama/gitRelease.bundle +0 -0
  191. package/llama/grammars/json.gbnf +1 -1
  192. package/llama/grammars/json_arr.gbnf +1 -1
  193. package/llama/llama.cpp.info.json +1 -1
  194. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
  195. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  196. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  197. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  198. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
  199. package/llamaBins/linux-x64/llama-addon.node +0 -0
  200. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  201. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  202. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  203. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  204. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  205. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  206. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  207. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
  208. package/llamaBins/mac-x64/llama-addon.node +0 -0
  209. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
  210. package/llamaBins/win-x64/llama-addon.exp +0 -0
  211. package/llamaBins/win-x64/llama-addon.lib +0 -0
  212. package/llamaBins/win-x64/llama-addon.node +0 -0
  213. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  214. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  215. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  216. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  217. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  218. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  219. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  220. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  221. package/package.json +14 -9
  222. package/dist/cli/utils/logEnabledComputeLayers.d.ts +0 -8
  223. package/dist/cli/utils/logEnabledComputeLayers.js +0 -11
  224. package/dist/cli/utils/logEnabledComputeLayers.js.map +0 -1
  225. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  226. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  227. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  228. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  229. package/llamaBins/linux-x64-vulkan/.buildMetadata.json +0 -1
  230. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  231. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7022
  232. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  233. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  234. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  235. package/llamaBins/win-x64-vulkan/.buildMetadata.json +0 -1
package/llama/addon.cpp CHANGED
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
35
35
  using AddonThreadSafeLogCallbackFunction =
36
36
  Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
37
37
 
38
+
39
+ struct addon_progress_event {
40
+ public:
41
+ const float progress;
42
+ };
43
+
44
+ using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
45
+ void addonCallJsProgressCallback(
46
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
47
+ );
48
+ using AddonThreadSafeProgressEventCallbackFunction =
49
+ Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
50
+
51
+
38
52
  AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
39
53
  bool addonJsLoggerCallbackSet = false;
40
54
  int addonLoggerLogLevel = 5;
41
55
  bool backendInitialized = false;
56
+ bool backendDisposed = false;
57
+
58
+ void addonCallJsProgressCallback(
59
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
60
+ ) {
61
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
62
+ try {
63
+ callback.Call({Napi::Number::New(env, data->progress)});
64
+ } catch (const Napi::Error& e) {}
65
+ }
66
+
67
+ if (data != nullptr) {
68
+ delete data;
69
+ }
70
+ }
71
+
72
+ static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
73
+ uint64_t totalSize = 0;
74
+
75
+ if (embd) {
76
+ totalSize += sizeof(float) * n_tokens_alloc * embd;
77
+ } else {
78
+ totalSize += sizeof(llama_token) * n_tokens_alloc;
79
+ }
80
+
81
+ totalSize += sizeof(llama_pos) * n_tokens_alloc;
82
+ totalSize += sizeof(int32_t) * n_tokens_alloc;
83
+ totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
84
+
85
+ totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
86
+
87
+ totalSize += sizeof(int8_t) * n_tokens_alloc;
88
+
89
+ return totalSize;
90
+ }
91
+
92
+ static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
93
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
94
+ while (size > 0) {
95
+ int64_t adjustSize = std::min(size, chunkSize);
96
+ Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
97
+ size -= adjustSize;
98
+ }
99
+ }
100
+
101
+ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
102
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
103
+ while (size > 0) {
104
+ int64_t adjustSize = std::min(size, chunkSize);
105
+ Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
106
+ size -= adjustSize;
107
+ }
108
+ }
42
109
 
43
110
  std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
44
111
  std::vector<char> result(8, 0);
@@ -107,6 +174,22 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
107
174
  return result;
108
175
  }
109
176
 
177
+ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
178
+ #ifdef GPU_INFO_USE_CUBLAS
179
+ return Napi::String::New(info.Env(), "cuda");
180
+ #endif
181
+
182
+ #ifdef GPU_INFO_USE_VULKAN
183
+ return Napi::String::New(info.Env(), "vulkan");
184
+ #endif
185
+
186
+ #ifdef GPU_INFO_USE_METAL
187
+ return Napi::String::New(info.Env(), "metal");
188
+ #endif
189
+
190
+ return info.Env().Undefined();
191
+ }
192
+
110
193
  static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
111
194
  auto tokenType = llama_token_get_type(model, token);
112
195
 
@@ -120,28 +203,49 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
120
203
  static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
121
204
  auto tokenType = llama_token_get_type(model, token);
122
205
 
123
- if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
206
+ if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
124
207
  return Napi::Number::From(info.Env(), -1);
125
208
  }
126
209
 
127
210
  return Napi::Number::From(info.Env(), token);
128
211
  }
129
212
 
213
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data);
214
+
130
215
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
131
216
  public:
132
217
  llama_model_params model_params;
133
218
  llama_model* model;
219
+ uint64_t loadedModelSize = 0;
220
+ Napi::Reference<Napi::Object> addonExportsRef;
221
+ bool hasAddonExportsRef = false;
222
+
223
+ std::string modelPath;
224
+ bool modelLoaded = false;
225
+ bool abortModelLoad = false;
226
+ bool model_load_stopped = false;
227
+ float rawModelLoadPercentage = 0;
228
+ unsigned modelLoadPercentage = 0;
229
+ AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
230
+ bool onLoadProgressEventCallbackSet = false;
231
+ bool hasLoadAbortSignal = false;
232
+
134
233
  bool disposed = false;
135
234
 
136
235
  AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
137
236
  model_params = llama_model_default_params();
138
237
 
139
238
  // Get the model path
140
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
239
+ modelPath = info[0].As<Napi::String>().Utf8Value();
141
240
 
142
241
  if (info.Length() > 1 && info[1].IsObject()) {
143
242
  Napi::Object options = info[1].As<Napi::Object>();
144
243
 
244
+ if (options.Has("addonExports")) {
245
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
246
+ hasAddonExportsRef = true;
247
+ }
248
+
145
249
  if (options.Has("gpuLayers")) {
146
250
  model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
147
251
  }
@@ -157,13 +261,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
157
261
  if (options.Has("useMlock")) {
158
262
  model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
159
263
  }
160
- }
161
264
 
162
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
265
+ if (options.Has("onLoadProgress")) {
266
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
267
+ if (onLoadProgressJSCallback.IsFunction()) {
268
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
269
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
270
+ info.Env(),
271
+ onLoadProgressJSCallback,
272
+ "onLoadProgressCallback",
273
+ 0,
274
+ 1,
275
+ context,
276
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
277
+ addonModel->onLoadProgressEventCallbackSet = false;
278
+
279
+ delete ctx;
280
+ },
281
+ this
282
+ );
283
+ onLoadProgressEventCallbackSet = true;
284
+ }
285
+ }
163
286
 
164
- if (model == NULL) {
165
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
166
- return;
287
+ if (options.Has("hasLoadAbortSignal")) {
288
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
289
+ }
290
+
291
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
292
+ model_params.progress_callback_user_data = &(*this);
293
+ model_params.progress_callback = llamaModelParamsProgressCallback;
294
+ }
167
295
  }
168
296
  }
169
297
 
@@ -176,23 +304,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
176
304
  return;
177
305
  }
178
306
 
179
- llama_free_model(model);
180
307
  disposed = true;
181
- }
308
+ if (modelLoaded) {
309
+ modelLoaded = false;
310
+ llama_free_model(model);
182
311
 
183
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
184
- if (disposed) {
185
- return info.Env().Undefined();
312
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
313
+ loadedModelSize = 0;
186
314
  }
187
315
 
188
- dispose();
316
+ if (hasAddonExportsRef) {
317
+ addonExportsRef.Unref();
318
+ hasAddonExportsRef = false;
319
+ }
320
+ }
189
321
 
322
+ Napi::Value Init(const Napi::CallbackInfo& info);
323
+ Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
324
+ abortModelLoad = true;
190
325
  return info.Env().Undefined();
191
326
  }
327
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
192
328
 
193
329
  Napi::Value Tokenize(const Napi::CallbackInfo& info) {
194
330
  if (disposed) {
195
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
331
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
196
332
  return info.Env().Undefined();
197
333
  }
198
334
 
@@ -210,7 +346,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
210
346
  }
211
347
  Napi::Value Detokenize(const Napi::CallbackInfo& info) {
212
348
  if (disposed) {
213
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
349
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
214
350
  return info.Env().Undefined();
215
351
  }
216
352
 
@@ -235,7 +371,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
235
371
 
236
372
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
237
373
  if (disposed) {
238
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
374
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
239
375
  return info.Env().Undefined();
240
376
  }
241
377
 
@@ -244,7 +380,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
244
380
 
245
381
  Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
246
382
  if (disposed) {
247
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
383
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
248
384
  return info.Env().Undefined();
249
385
  }
250
386
 
@@ -253,7 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
253
389
 
254
390
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
255
391
  if (disposed) {
256
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
392
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
257
393
  return info.Env().Undefined();
258
394
  }
259
395
 
@@ -262,7 +398,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
262
398
 
263
399
  Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
264
400
  if (disposed) {
265
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
401
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
266
402
  return info.Env().Undefined();
267
403
  }
268
404
 
@@ -271,7 +407,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
271
407
 
272
408
  Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
273
409
  if (disposed) {
274
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
410
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
275
411
  return info.Env().Undefined();
276
412
  }
277
413
 
@@ -283,7 +419,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
283
419
 
284
420
  Napi::Value TokenBos(const Napi::CallbackInfo& info) {
285
421
  if (disposed) {
286
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
422
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
287
423
  return info.Env().Undefined();
288
424
  }
289
425
 
@@ -291,7 +427,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
291
427
  }
292
428
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
293
429
  if (disposed) {
294
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
430
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
295
431
  return info.Env().Undefined();
296
432
  }
297
433
 
@@ -299,7 +435,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
299
435
  }
300
436
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
301
437
  if (disposed) {
302
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
438
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
303
439
  return info.Env().Undefined();
304
440
  }
305
441
 
@@ -307,7 +443,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
307
443
  }
308
444
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
309
445
  if (disposed) {
310
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
446
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
311
447
  return info.Env().Undefined();
312
448
  }
313
449
 
@@ -315,7 +451,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
315
451
  }
316
452
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
317
453
  if (disposed) {
318
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
454
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
319
455
  return info.Env().Undefined();
320
456
  }
321
457
 
@@ -323,7 +459,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
323
459
  }
324
460
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
325
461
  if (disposed) {
326
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
462
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
327
463
  return info.Env().Undefined();
328
464
  }
329
465
 
@@ -331,7 +467,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
331
467
  }
332
468
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
333
469
  if (disposed) {
334
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
470
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
335
471
  return info.Env().Undefined();
336
472
  }
337
473
 
@@ -339,7 +475,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
339
475
  }
340
476
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
341
477
  if (disposed) {
342
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
478
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
343
479
  return info.Env().Undefined();
344
480
  }
345
481
 
@@ -358,7 +494,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
358
494
 
359
495
  Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
360
496
  if (disposed) {
361
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
497
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
362
498
  return info.Env().Undefined();
363
499
  }
364
500
 
@@ -386,6 +522,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
386
522
  exports.Env(),
387
523
  "AddonModel",
388
524
  {
525
+ InstanceMethod("init", &AddonModel::Init),
526
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
389
527
  InstanceMethod("tokenize", &AddonModel::Tokenize),
390
528
  InstanceMethod("detokenize", &AddonModel::Detokenize),
391
529
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
@@ -410,9 +548,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
410
548
  }
411
549
  };
412
550
 
551
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
552
+ AddonModel* addonModel = (AddonModel *) user_data;
553
+ unsigned percentage = (unsigned) (100 * progress);
554
+
555
+ if (percentage > addonModel->modelLoadPercentage) {
556
+ addonModel->modelLoadPercentage = percentage;
557
+
558
+ // original llama.cpp logs
559
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
560
+ if (percentage >= 100) {
561
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
562
+ }
563
+ }
564
+
565
+ if (progress > addonModel->rawModelLoadPercentage) {
566
+ addonModel->rawModelLoadPercentage = progress;
567
+
568
+ if (addonModel->onLoadProgressEventCallbackSet) {
569
+ addon_progress_event* data = new addon_progress_event {
570
+ progress
571
+ };
572
+
573
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
574
+
575
+ if (status != napi_ok) {
576
+ delete data;
577
+ }
578
+ }
579
+ }
580
+
581
+ return !(addonModel->abortModelLoad);
582
+ }
583
+
584
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
585
+ public:
586
+ AddonModel* model;
587
+
588
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
589
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
590
+ model(model),
591
+ deferred(Napi::Promise::Deferred::New(env)) {
592
+ model->Ref();
593
+ }
594
+ ~AddonModelLoadModelWorker() {
595
+ model->Unref();
596
+ }
597
+
598
+ Napi::Promise GetPromise() {
599
+ return deferred.Promise();
600
+ }
601
+
602
+ protected:
603
+ Napi::Promise::Deferred deferred;
604
+
605
+ void Execute() {
606
+ try {
607
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
608
+
609
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
610
+ } catch (const std::exception& e) {
611
+ SetError(e.what());
612
+ } catch(...) {
613
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
614
+ }
615
+ }
616
+ void OnOK() {
617
+ if (model->modelLoaded) {
618
+ uint64_t modelSize = llama_model_size(model->model);
619
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
620
+ model->loadedModelSize = modelSize;
621
+ }
622
+
623
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
624
+ if (model->onLoadProgressEventCallbackSet) {
625
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
626
+ }
627
+ }
628
+ void OnError(const Napi::Error& err) {
629
+ deferred.Reject(err.Value());
630
+ }
631
+ };
632
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
633
+ public:
634
+ AddonModel* model;
635
+
636
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
637
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
638
+ model(model),
639
+ deferred(Napi::Promise::Deferred::New(env)) {
640
+ model->Ref();
641
+ }
642
+ ~AddonModelUnloadModelWorker() {
643
+ model->Unref();
644
+ }
645
+
646
+ Napi::Promise GetPromise() {
647
+ return deferred.Promise();
648
+ }
649
+
650
+ protected:
651
+ Napi::Promise::Deferred deferred;
652
+
653
+ void Execute() {
654
+ try {
655
+ llama_free_model(model->model);
656
+ model->modelLoaded = false;
657
+
658
+ model->dispose();
659
+ } catch (const std::exception& e) {
660
+ SetError(e.what());
661
+ } catch(...) {
662
+ SetError("Unknown error when calling \"llama_free_model\"");
663
+ }
664
+ }
665
+ void OnOK() {
666
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
667
+ model->loadedModelSize = 0;
668
+
669
+ deferred.Resolve(Env().Undefined());
670
+ }
671
+ void OnError(const Napi::Error& err) {
672
+ deferred.Reject(err.Value());
673
+ }
674
+ };
675
+
676
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
677
+ if (disposed) {
678
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
679
+ return info.Env().Undefined();
680
+ }
681
+
682
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
683
+ worker->Queue();
684
+ return worker->GetPromise();
685
+ }
686
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
687
+ if (disposed) {
688
+ return info.Env().Undefined();
689
+ }
690
+
691
+ if (modelLoaded) {
692
+ modelLoaded = false;
693
+
694
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
695
+ worker->Queue();
696
+ return worker->GetPromise();
697
+ } else {
698
+ dispose();
699
+
700
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
701
+ deferred.Resolve(info.Env().Undefined());
702
+ return deferred.Promise();
703
+ }
704
+ }
705
+
413
706
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
414
707
  public:
415
708
  grammar_parser::parse_state parsed_grammar;
709
+ Napi::Reference<Napi::Object> addonExportsRef;
710
+ bool hasAddonExportsRef = false;
416
711
 
417
712
  AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
418
713
  // Get the model path
@@ -422,6 +717,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
422
717
  if (info.Length() > 1 && info[1].IsObject()) {
423
718
  Napi::Object options = info[1].As<Napi::Object>();
424
719
 
720
+ if (options.Has("addonExports")) {
721
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
722
+ hasAddonExportsRef = true;
723
+ }
724
+
425
725
  if (options.Has("printGrammar")) {
426
726
  should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
427
727
  }
@@ -439,6 +739,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
439
739
  }
440
740
  }
441
741
 
742
+ ~AddonGrammar() {
743
+ if (hasAddonExportsRef) {
744
+ addonExportsRef.Unref();
745
+ hasAddonExportsRef = false;
746
+ }
747
+ }
748
+
442
749
  static void init(Napi::Object exports) {
443
750
  exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
444
751
  }
@@ -477,9 +784,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
477
784
  llama_context_params context_params;
478
785
  llama_context* ctx;
479
786
  llama_batch batch;
787
+ uint64_t batchMemorySize = 0;
480
788
  bool has_batch = false;
481
789
  int32_t batch_n_tokens = 0;
482
790
  int n_cur = 0;
791
+
792
+ uint64_t loadedContextMemorySize = 0;
793
+ bool contextLoaded = false;
794
+
483
795
  bool disposed = false;
484
796
 
485
797
  AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
@@ -507,10 +819,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
507
819
 
508
820
  if (options.Has("batchSize")) {
509
821
  context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
822
+ context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
510
823
  }
511
824
 
512
- if (options.Has("embedding")) {
513
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
825
+ if (options.Has("embeddings")) {
826
+ context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
514
827
  }
515
828
 
516
829
  if (options.Has("threads")) {
@@ -521,9 +834,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
521
834
  context_params.n_threads_batch = resolved_n_threads;
522
835
  }
523
836
  }
524
-
525
- ctx = llama_new_context_with_model(model->model, context_params);
526
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
527
837
  }
528
838
  ~AddonContext() {
529
839
  dispose();
@@ -534,13 +844,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
534
844
  return;
535
845
  }
536
846
 
537
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
538
- llama_free(ctx);
847
+ disposed = true;
848
+ if (contextLoaded) {
849
+ contextLoaded = false;
850
+ llama_free(ctx);
851
+
852
+ adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
853
+ loadedContextMemorySize = 0;
854
+ }
855
+
539
856
  model->Unref();
540
857
 
541
858
  disposeBatch();
542
-
543
- disposed = true;
544
859
  }
545
860
  void disposeBatch() {
546
861
  if (!has_batch) {
@@ -550,16 +865,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
550
865
  llama_batch_free(batch);
551
866
  has_batch = false;
552
867
  batch_n_tokens = 0;
868
+
869
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
870
+ batchMemorySize = 0;
553
871
  }
554
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
555
- if (disposed) {
556
- return info.Env().Undefined();
557
- }
558
872
 
559
- dispose();
873
+ Napi::Value Init(const Napi::CallbackInfo& info);
874
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
560
875
 
561
- return info.Env().Undefined();
562
- }
563
876
  Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
564
877
  if (disposed) {
565
878
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -584,6 +897,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
584
897
  has_batch = true;
585
898
  batch_n_tokens = n_tokens;
586
899
 
900
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
901
+ if (newBatchMemorySize > batchMemorySize) {
902
+ adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
903
+ batchMemorySize = newBatchMemorySize;
904
+ } else if (newBatchMemorySize < batchMemorySize) {
905
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
906
+ batchMemorySize = newBatchMemorySize;
907
+ }
908
+
587
909
  return info.Env().Undefined();
588
910
  }
589
911
  Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
@@ -632,7 +954,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
632
954
 
633
955
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
634
956
 
635
- llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
957
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
958
+
959
+ if (!result) {
960
+ Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
961
+ return info.Env().Undefined();
962
+ }
636
963
 
637
964
  return info.Env().Undefined();
638
965
  }
@@ -646,9 +973,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
646
973
  int32_t startPos = info[1].As<Napi::Number>().Int32Value();
647
974
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
648
975
 
649
- llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
976
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
650
977
 
651
- return info.Env().Undefined();
978
+ return Napi::Boolean::New(info.Env(), result);
652
979
  }
653
980
  Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
654
981
  if (disposed) {
@@ -661,7 +988,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
661
988
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
662
989
  int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
663
990
 
664
- llama_kv_cache_seq_shift(ctx, sequenceId, startPos, endPos, shiftDelta);
991
+ llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
665
992
 
666
993
  return info.Env().Undefined();
667
994
  }
@@ -686,8 +1013,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
686
1013
  return info.Env().Undefined();
687
1014
  }
688
1015
 
1016
+ int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1017
+
1018
+ if (inputTokensLength <= 0) {
1019
+ Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1020
+ return info.Env().Undefined();
1021
+ }
1022
+
689
1023
  const int n_embd = llama_n_embd(model->model);
690
- const auto* embeddings = llama_get_embeddings(ctx);
1024
+ const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1025
+ if (embeddings == NULL) {
1026
+ embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1027
+
1028
+ if (embeddings == NULL) {
1029
+ Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1030
+ return info.Env().Undefined();
1031
+ }
1032
+ }
691
1033
 
692
1034
  Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
693
1035
  for (size_t i = 0; i < n_embd; ++i) {
@@ -710,6 +1052,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
710
1052
  exports.Env(),
711
1053
  "AddonContext",
712
1054
  {
1055
+ InstanceMethod("init", &AddonContext::Init),
713
1056
  InstanceMethod("getContextSize", &AddonContext::GetContextSize),
714
1057
  InstanceMethod("initBatch", &AddonContext::InitBatch),
715
1058
  InstanceMethod("addToBatch", &AddonContext::AddToBatch),
@@ -729,53 +1072,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
729
1072
  };
730
1073
 
731
1074
 
732
- class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1075
+ class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
733
1076
  public:
734
1077
  AddonContext* ctx;
735
1078
 
736
- AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
737
- : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
1079
+ AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1080
+ : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
738
1081
  ctx(ctx),
739
- Napi::Promise::Deferred(info.Env()) {
1082
+ deferred(Napi::Promise::Deferred::New(env)) {
740
1083
  ctx->Ref();
741
1084
  }
742
1085
  ~AddonContextDecodeBatchWorker() {
743
1086
  ctx->Unref();
744
1087
  }
745
- using Napi::AsyncWorker::Queue;
746
- using Napi::Promise::Deferred::Promise;
1088
+
1089
+ Napi::Promise GetPromise() {
1090
+ return deferred.Promise();
1091
+ }
747
1092
 
748
1093
  protected:
1094
+ Napi::Promise::Deferred deferred;
1095
+
749
1096
  void Execute() {
750
- // Perform the evaluation using llama_decode.
751
- int r = llama_decode(ctx->ctx, ctx->batch);
752
-
753
- if (r != 0) {
754
- if (r == 1) {
755
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
756
- } else {
757
- SetError("Eval has failed");
1097
+ try {
1098
+ // Perform the evaluation using llama_decode.
1099
+ int r = llama_decode(ctx->ctx, ctx->batch);
1100
+
1101
+ if (r != 0) {
1102
+ if (r == 1) {
1103
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1104
+ } else {
1105
+ SetError("Eval has failed");
1106
+ }
1107
+
1108
+ return;
758
1109
  }
759
1110
 
760
- return;
1111
+ llama_synchronize(ctx->ctx);
1112
+ } catch (const std::exception& e) {
1113
+ SetError(e.what());
1114
+ } catch(...) {
1115
+ SetError("Unknown error when calling \"llama_decode\"");
761
1116
  }
762
1117
  }
763
1118
  void OnOK() {
764
- Napi::Env env = Napi::AsyncWorker::Env();
765
- Napi::Promise::Deferred::Resolve(env.Undefined());
1119
+ deferred.Resolve(Env().Undefined());
766
1120
  }
767
1121
  void OnError(const Napi::Error& err) {
768
- Napi::Promise::Deferred::Reject(err.Value());
1122
+ deferred.Reject(err.Value());
769
1123
  }
770
1124
  };
771
1125
 
772
1126
  Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
773
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
1127
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
1128
+ worker->Queue();
1129
+ return worker->GetPromise();
1130
+ }
1131
+
1132
+ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1133
+ public:
1134
+ AddonContext* context;
1135
+
1136
+ AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1137
+ : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1138
+ context(context),
1139
+ deferred(Napi::Promise::Deferred::New(env)) {
1140
+ context->Ref();
1141
+ }
1142
+ ~AddonContextLoadContextWorker() {
1143
+ context->Unref();
1144
+ }
1145
+
1146
+ Napi::Promise GetPromise() {
1147
+ return deferred.Promise();
1148
+ }
1149
+
1150
+ protected:
1151
+ Napi::Promise::Deferred deferred;
1152
+
1153
+ void Execute() {
1154
+ try {
1155
+ context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1156
+
1157
+ context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1158
+ } catch (const std::exception& e) {
1159
+ SetError(e.what());
1160
+ } catch(...) {
1161
+ SetError("Unknown error when calling \"llama_new_context_with_model\"");
1162
+ }
1163
+ }
1164
+ void OnOK() {
1165
+ if (context->contextLoaded) {
1166
+ uint64_t contextMemorySize = llama_get_state_size(context->ctx);
1167
+ adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1168
+ context->loadedContextMemorySize = contextMemorySize;
1169
+ }
1170
+
1171
+ deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1172
+ }
1173
+ void OnError(const Napi::Error& err) {
1174
+ deferred.Reject(err.Value());
1175
+ }
1176
+ };
1177
+ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1178
+ public:
1179
+ AddonContext* context;
1180
+
1181
+ AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1182
+ : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1183
+ context(context),
1184
+ deferred(Napi::Promise::Deferred::New(env)) {
1185
+ context->Ref();
1186
+ }
1187
+ ~AddonContextUnloadContextWorker() {
1188
+ context->Unref();
1189
+ }
1190
+
1191
+ Napi::Promise GetPromise() {
1192
+ return deferred.Promise();
1193
+ }
1194
+
1195
+ protected:
1196
+ Napi::Promise::Deferred deferred;
1197
+
1198
+ void Execute() {
1199
+ try {
1200
+ llama_free(context->ctx);
1201
+ context->contextLoaded = false;
1202
+
1203
+ try {
1204
+ if (context->has_batch) {
1205
+ llama_batch_free(context->batch);
1206
+ context->has_batch = false;
1207
+ context->batch_n_tokens = 0;
1208
+ }
1209
+
1210
+ context->dispose();
1211
+ } catch (const std::exception& e) {
1212
+ SetError(e.what());
1213
+ } catch(...) {
1214
+ SetError("Unknown error when calling \"llama_batch_free\"");
1215
+ }
1216
+ } catch (const std::exception& e) {
1217
+ SetError(e.what());
1218
+ } catch(...) {
1219
+ SetError("Unknown error when calling \"llama_free\"");
1220
+ }
1221
+ }
1222
+ void OnOK() {
1223
+ adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1224
+ context->loadedContextMemorySize = 0;
1225
+
1226
+ adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1227
+ context->batchMemorySize = 0;
1228
+
1229
+ deferred.Resolve(Env().Undefined());
1230
+ }
1231
+ void OnError(const Napi::Error& err) {
1232
+ deferred.Reject(err.Value());
1233
+ }
1234
+ };
1235
+
1236
+ Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1237
+ if (disposed) {
1238
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1239
+ return info.Env().Undefined();
1240
+ }
1241
+
1242
+ AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
774
1243
  worker->Queue();
775
- return worker->Promise();
1244
+ return worker->GetPromise();
1245
+ }
1246
+ Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1247
+ if (disposed) {
1248
+ return info.Env().Undefined();
1249
+ }
1250
+
1251
+ if (contextLoaded) {
1252
+ contextLoaded = false;
1253
+
1254
+ AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1255
+ worker->Queue();
1256
+ return worker->GetPromise();
1257
+ } else {
1258
+ dispose();
1259
+
1260
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1261
+ deferred.Resolve(info.Env().Undefined());
1262
+ return deferred.Promise();
1263
+ }
776
1264
  }
777
1265
 
778
- class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1266
+ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
779
1267
  public:
780
1268
  AddonContext* ctx;
781
1269
  AddonGrammarEvaluationState* grammar_evaluation_state;
@@ -795,7 +1283,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
795
1283
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
796
1284
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
797
1285
  ctx(ctx),
798
- Napi::Promise::Deferred(info.Env()) {
1286
+ deferred(Napi::Promise::Deferred::New(info.Env())) {
799
1287
  ctx->Ref();
800
1288
 
801
1289
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
@@ -858,11 +1346,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
858
1346
  use_grammar = false;
859
1347
  }
860
1348
  }
861
- using Napi::AsyncWorker::Queue;
862
- using Napi::Promise::Deferred::Promise;
1349
+
1350
+ Napi::Promise GetPromise() {
1351
+ return deferred.Promise();
1352
+ }
863
1353
 
864
1354
  protected:
1355
+ Napi::Promise::Deferred deferred;
1356
+
865
1357
  void Execute() {
1358
+ try {
1359
+ SampleToken();
1360
+ } catch (const std::exception& e) {
1361
+ SetError(e.what());
1362
+ } catch(...) {
1363
+ SetError("Unknown error when calling \"SampleToken\"");
1364
+ }
1365
+ }
1366
+
1367
+ void SampleToken() {
866
1368
  llama_token new_token_id = 0;
867
1369
 
868
1370
  // Select the best prediction.
@@ -924,19 +1426,18 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
924
1426
  result = new_token_id;
925
1427
  }
926
1428
  void OnOK() {
927
- Napi::Env env = Napi::AsyncWorker::Env();
928
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
929
- Napi::Promise::Deferred::Resolve(resultValue);
1429
+ Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1430
+ deferred.Resolve(resultValue);
930
1431
  }
931
1432
  void OnError(const Napi::Error& err) {
932
- Napi::Promise::Deferred::Reject(err.Value());
1433
+ deferred.Reject(err.Value());
933
1434
  }
934
1435
  };
935
1436
 
936
1437
  Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
937
1438
  AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
938
1439
  worker->Queue();
939
- return worker->Promise();
1440
+ return worker->GetPromise();
940
1441
  }
941
1442
 
942
1443
  Napi::Value systemInfo(const Napi::CallbackInfo& info) {
@@ -1009,6 +1510,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
1009
1510
 
1010
1511
  if (status == napi_ok) {
1011
1512
  return;
1513
+ } else {
1514
+ delete stringStream;
1515
+ delete data;
1012
1516
  }
1013
1517
  }
1014
1518
 
@@ -1066,22 +1570,135 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1066
1570
  return info.Env().Undefined();
1067
1571
  }
1068
1572
 
1573
+ class AddonBackendLoadWorker : public Napi::AsyncWorker {
1574
+ public:
1575
+ AddonBackendLoadWorker(const Napi::Env& env)
1576
+ : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1577
+ deferred(Napi::Promise::Deferred::New(env)) {
1578
+ }
1579
+ ~AddonBackendLoadWorker() {
1580
+ }
1581
+
1582
+ Napi::Promise GetPromise() {
1583
+ return deferred.Promise();
1584
+ }
1585
+
1586
+ protected:
1587
+ Napi::Promise::Deferred deferred;
1588
+
1589
+ void Execute() {
1590
+ try {
1591
+ llama_backend_init();
1592
+
1593
+ try {
1594
+ if (backendDisposed) {
1595
+ llama_backend_free();
1596
+ } else {
1597
+ backendInitialized = true;
1598
+ }
1599
+ } catch (const std::exception& e) {
1600
+ SetError(e.what());
1601
+ } catch(...) {
1602
+ SetError("Unknown error when calling \"llama_backend_free\"");
1603
+ }
1604
+ } catch (const std::exception& e) {
1605
+ SetError(e.what());
1606
+ } catch(...) {
1607
+ SetError("Unknown error when calling \"llama_backend_init\"");
1608
+ }
1609
+ }
1610
+ void OnOK() {
1611
+ deferred.Resolve(Env().Undefined());
1612
+ }
1613
+ void OnError(const Napi::Error& err) {
1614
+ deferred.Reject(err.Value());
1615
+ }
1616
+ };
1617
+
1618
+
1619
+ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1620
+ public:
1621
+ AddonBackendUnloadWorker(const Napi::Env& env)
1622
+ : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1623
+ deferred(Napi::Promise::Deferred::New(env)) {
1624
+ }
1625
+ ~AddonBackendUnloadWorker() {
1626
+ }
1627
+
1628
+ Napi::Promise GetPromise() {
1629
+ return deferred.Promise();
1630
+ }
1631
+
1632
+ protected:
1633
+ Napi::Promise::Deferred deferred;
1634
+
1635
+ void Execute() {
1636
+ try {
1637
+ if (backendInitialized) {
1638
+ backendInitialized = false;
1639
+ llama_backend_free();
1640
+ }
1641
+ } catch (const std::exception& e) {
1642
+ SetError(e.what());
1643
+ } catch(...) {
1644
+ SetError("Unknown error when calling \"llama_backend_free\"");
1645
+ }
1646
+ }
1647
+ void OnOK() {
1648
+ deferred.Resolve(Env().Undefined());
1649
+ }
1650
+ void OnError(const Napi::Error& err) {
1651
+ deferred.Reject(err.Value());
1652
+ }
1653
+ };
1654
+
1655
+ Napi::Value addonInit(const Napi::CallbackInfo& info) {
1656
+ if (backendInitialized) {
1657
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1658
+ deferred.Resolve(info.Env().Undefined());
1659
+ return deferred.Promise();
1660
+ }
1661
+
1662
+ AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1663
+ worker->Queue();
1664
+ return worker->GetPromise();
1665
+ }
1666
+
1667
+ Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1668
+ if (backendDisposed) {
1669
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1670
+ deferred.Resolve(info.Env().Undefined());
1671
+ return deferred.Promise();
1672
+ }
1673
+
1674
+ backendDisposed = true;
1675
+
1676
+ AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1677
+ worker->Queue();
1678
+ return worker->GetPromise();
1679
+ }
1680
+
1069
1681
  static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1682
+ if (backendDisposed) {
1683
+ return;
1684
+ }
1685
+
1686
+ backendDisposed = true;
1070
1687
  if (backendInitialized) {
1071
- llama_backend_free();
1072
1688
  backendInitialized = false;
1689
+ llama_backend_free();
1073
1690
  }
1074
1691
  }
1075
1692
 
1076
1693
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1077
- llama_backend_init();
1078
- backendInitialized = true;
1079
-
1080
1694
  exports.DefineProperties({
1081
1695
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1082
1696
  Napi::PropertyDescriptor::Function("setLogger", setLogger),
1083
1697
  Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1084
1698
  Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1699
+ Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1700
+ Napi::PropertyDescriptor::Function("init", addonInit),
1701
+ Napi::PropertyDescriptor::Function("dispose", addonDispose),
1085
1702
  });
1086
1703
  AddonModel::init(exports);
1087
1704
  AddonGrammar::init(exports);