node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/TemplateChatWrapper.d.ts +10 -9
  2. package/dist/TemplateChatWrapper.js.map +1 -1
  3. package/dist/bindings/AddonTypes.d.ts +14 -6
  4. package/dist/bindings/Llama.d.ts +8 -0
  5. package/dist/bindings/Llama.js +59 -7
  6. package/dist/bindings/Llama.js.map +1 -1
  7. package/dist/bindings/getLlama.js +1 -1
  8. package/dist/bindings/getLlama.js.map +1 -1
  9. package/dist/bindings/types.d.ts +3 -0
  10. package/dist/bindings/types.js +4 -0
  11. package/dist/bindings/types.js.map +1 -1
  12. package/dist/bindings/utils/cloneLlamaCppRepo.js +1 -1
  13. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  14. package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
  15. package/dist/bindings/utils/compileLLamaCpp.js +133 -97
  16. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  17. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
  18. package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
  19. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  20. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
  21. package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
  22. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
  23. package/dist/bindings/utils/testBindingBinary.js +2 -2
  24. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  25. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  26. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  27. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  28. package/dist/cli/commands/ChatCommand.js +31 -14
  29. package/dist/cli/commands/ChatCommand.js.map +1 -1
  30. package/dist/cli/commands/CompleteCommand.js +30 -13
  31. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  32. package/dist/cli/commands/InfillCommand.js +30 -13
  33. package/dist/cli/commands/InfillCommand.js.map +1 -1
  34. package/dist/cli/commands/InspectCommand.js +1 -1
  35. package/dist/cli/commands/InspectCommand.js.map +1 -1
  36. package/dist/cli/commands/OnPostInstallCommand.js +2 -0
  37. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  38. package/dist/config.d.ts +1 -1
  39. package/dist/config.js +1 -1
  40. package/dist/config.js.map +1 -1
  41. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +5 -8
  42. package/dist/evaluator/LlamaContext/LlamaContext.js +111 -65
  43. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  44. package/dist/evaluator/LlamaContext/types.d.ts +13 -6
  45. package/dist/evaluator/LlamaEmbeddingContext.d.ts +6 -5
  46. package/dist/evaluator/LlamaEmbeddingContext.js +32 -22
  47. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  48. package/dist/evaluator/LlamaGrammar.js +1 -0
  49. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  50. package/dist/evaluator/LlamaModel.d.ts +16 -16
  51. package/dist/evaluator/LlamaModel.js +95 -20
  52. package/dist/evaluator/LlamaModel.js.map +1 -1
  53. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +2 -3
  54. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -1
  55. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +1 -1
  56. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -1
  57. package/dist/index.d.ts +2 -2
  58. package/dist/index.js.map +1 -1
  59. package/dist/state.d.ts +2 -0
  60. package/dist/state.js +7 -0
  61. package/dist/state.js.map +1 -1
  62. package/dist/utils/DisposeGuard.d.ts +13 -0
  63. package/dist/utils/DisposeGuard.js +120 -0
  64. package/dist/utils/DisposeGuard.js.map +1 -0
  65. package/dist/utils/cmake.js +1 -1
  66. package/dist/utils/cmake.js.map +1 -1
  67. package/dist/utils/getConsoleLogPrefix.js +3 -2
  68. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  69. package/dist/utils/removeNullFields.d.ts +2 -2
  70. package/dist/utils/removeNullFields.js.map +1 -1
  71. package/dist/utils/spawnCommand.d.ts +11 -1
  72. package/dist/utils/spawnCommand.js +55 -7
  73. package/dist/utils/spawnCommand.js.map +1 -1
  74. package/llama/addon.cpp +676 -83
  75. package/llama/binariesGithubRelease.json +1 -1
  76. package/llama/gitRelease.bundle +0 -0
  77. package/llama/grammars/json.gbnf +1 -1
  78. package/llama/grammars/json_arr.gbnf +1 -1
  79. package/llama/llama.cpp.info.json +1 -1
  80. package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  81. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  82. package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  83. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  84. package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  85. package/llamaBins/linux-x64/llama-addon.node +0 -0
  86. package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  87. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  88. package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  89. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  90. package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  91. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  92. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  93. package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  94. package/llamaBins/mac-x64/llama-addon.node +0 -0
  95. package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  96. package/llamaBins/win-x64/llama-addon.exp +0 -0
  97. package/llamaBins/win-x64/llama-addon.lib +0 -0
  98. package/llamaBins/win-x64/llama-addon.node +0 -0
  99. package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  100. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  101. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  102. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  103. package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  104. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  105. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  106. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  107. package/package.json +10 -7
  108. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
package/llama/addon.cpp CHANGED
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
35
35
  using AddonThreadSafeLogCallbackFunction =
36
36
  Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
37
37
 
38
+
39
+ struct addon_progress_event {
40
+ public:
41
+ const float progress;
42
+ };
43
+
44
+ using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
45
+ void addonCallJsProgressCallback(
46
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
47
+ );
48
+ using AddonThreadSafeProgressEventCallbackFunction =
49
+ Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
50
+
51
+
38
52
  AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
39
53
  bool addonJsLoggerCallbackSet = false;
40
54
  int addonLoggerLogLevel = 5;
41
55
  bool backendInitialized = false;
56
+ bool backendDisposed = false;
57
+
58
+ void addonCallJsProgressCallback(
59
+ Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
60
+ ) {
61
+ if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
62
+ try {
63
+ callback.Call({Napi::Number::New(env, data->progress)});
64
+ } catch (const Napi::Error& e) {}
65
+ }
66
+
67
+ if (data != nullptr) {
68
+ delete data;
69
+ }
70
+ }
71
+
72
+ static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
73
+ uint64_t totalSize = 0;
74
+
75
+ if (embd) {
76
+ totalSize += sizeof(float) * n_tokens_alloc * embd;
77
+ } else {
78
+ totalSize += sizeof(llama_token) * n_tokens_alloc;
79
+ }
80
+
81
+ totalSize += sizeof(llama_pos) * n_tokens_alloc;
82
+ totalSize += sizeof(int32_t) * n_tokens_alloc;
83
+ totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
84
+
85
+ totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
86
+
87
+ totalSize += sizeof(int8_t) * n_tokens_alloc;
88
+
89
+ return totalSize;
90
+ }
91
+
92
+ static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
93
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
94
+ while (size > 0) {
95
+ int64_t adjustSize = std::min(size, chunkSize);
96
+ Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
97
+ size -= adjustSize;
98
+ }
99
+ }
100
+
101
+ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
102
+ const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
103
+ while (size > 0) {
104
+ int64_t adjustSize = std::min(size, chunkSize);
105
+ Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
106
+ size -= adjustSize;
107
+ }
108
+ }
42
109
 
43
110
  std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
44
111
  std::vector<char> result(8, 0);
@@ -143,21 +210,42 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
143
210
  return Napi::Number::From(info.Env(), token);
144
211
  }
145
212
 
213
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data);
214
+
146
215
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
147
216
  public:
148
217
  llama_model_params model_params;
149
218
  llama_model* model;
219
+ uint64_t loadedModelSize = 0;
220
+ Napi::Reference<Napi::Object> addonExportsRef;
221
+ bool hasAddonExportsRef = false;
222
+
223
+ std::string modelPath;
224
+ bool modelLoaded = false;
225
+ bool abortModelLoad = false;
226
+ bool model_load_stopped = false;
227
+ float rawModelLoadPercentage = 0;
228
+ unsigned modelLoadPercentage = 0;
229
+ AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
230
+ bool onLoadProgressEventCallbackSet = false;
231
+ bool hasLoadAbortSignal = false;
232
+
150
233
  bool disposed = false;
151
234
 
152
235
  AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
153
236
  model_params = llama_model_default_params();
154
237
 
155
238
  // Get the model path
156
- std::string modelPath = info[0].As<Napi::String>().Utf8Value();
239
+ modelPath = info[0].As<Napi::String>().Utf8Value();
157
240
 
158
241
  if (info.Length() > 1 && info[1].IsObject()) {
159
242
  Napi::Object options = info[1].As<Napi::Object>();
160
243
 
244
+ if (options.Has("addonExports")) {
245
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
246
+ hasAddonExportsRef = true;
247
+ }
248
+
161
249
  if (options.Has("gpuLayers")) {
162
250
  model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
163
251
  }
@@ -173,13 +261,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
173
261
  if (options.Has("useMlock")) {
174
262
  model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
175
263
  }
176
- }
177
264
 
178
- model = llama_load_model_from_file(modelPath.c_str(), model_params);
265
+ if (options.Has("onLoadProgress")) {
266
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
267
+ if (onLoadProgressJSCallback.IsFunction()) {
268
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
269
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
270
+ info.Env(),
271
+ onLoadProgressJSCallback,
272
+ "onLoadProgressCallback",
273
+ 0,
274
+ 1,
275
+ context,
276
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
277
+ addonModel->onLoadProgressEventCallbackSet = false;
278
+
279
+ delete ctx;
280
+ },
281
+ this
282
+ );
283
+ onLoadProgressEventCallbackSet = true;
284
+ }
285
+ }
179
286
 
180
- if (model == NULL) {
181
- Napi::Error::New(info.Env(), "Failed to load model").ThrowAsJavaScriptException();
182
- return;
287
+ if (options.Has("hasLoadAbortSignal")) {
288
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
289
+ }
290
+
291
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
292
+ model_params.progress_callback_user_data = &(*this);
293
+ model_params.progress_callback = llamaModelParamsProgressCallback;
294
+ }
183
295
  }
184
296
  }
185
297
 
@@ -192,23 +304,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
192
304
  return;
193
305
  }
194
306
 
195
- llama_free_model(model);
196
307
  disposed = true;
197
- }
308
+ if (modelLoaded) {
309
+ modelLoaded = false;
310
+ llama_free_model(model);
198
311
 
199
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
200
- if (disposed) {
201
- return info.Env().Undefined();
312
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
313
+ loadedModelSize = 0;
202
314
  }
203
315
 
204
- dispose();
316
+ if (hasAddonExportsRef) {
317
+ addonExportsRef.Unref();
318
+ hasAddonExportsRef = false;
319
+ }
320
+ }
205
321
 
322
+ Napi::Value Init(const Napi::CallbackInfo& info);
323
+ Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
324
+ abortModelLoad = true;
206
325
  return info.Env().Undefined();
207
326
  }
327
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
208
328
 
209
329
  Napi::Value Tokenize(const Napi::CallbackInfo& info) {
210
330
  if (disposed) {
211
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
331
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
212
332
  return info.Env().Undefined();
213
333
  }
214
334
 
@@ -226,7 +346,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
226
346
  }
227
347
  Napi::Value Detokenize(const Napi::CallbackInfo& info) {
228
348
  if (disposed) {
229
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
349
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
230
350
  return info.Env().Undefined();
231
351
  }
232
352
 
@@ -251,7 +371,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
251
371
 
252
372
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
253
373
  if (disposed) {
254
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
374
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
255
375
  return info.Env().Undefined();
256
376
  }
257
377
 
@@ -260,7 +380,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
260
380
 
261
381
  Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
262
382
  if (disposed) {
263
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
383
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
264
384
  return info.Env().Undefined();
265
385
  }
266
386
 
@@ -269,7 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
269
389
 
270
390
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
271
391
  if (disposed) {
272
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
392
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
273
393
  return info.Env().Undefined();
274
394
  }
275
395
 
@@ -278,7 +398,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
278
398
 
279
399
  Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
280
400
  if (disposed) {
281
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
401
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
282
402
  return info.Env().Undefined();
283
403
  }
284
404
 
@@ -287,7 +407,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
287
407
 
288
408
  Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
289
409
  if (disposed) {
290
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
410
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
291
411
  return info.Env().Undefined();
292
412
  }
293
413
 
@@ -299,7 +419,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
299
419
 
300
420
  Napi::Value TokenBos(const Napi::CallbackInfo& info) {
301
421
  if (disposed) {
302
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
422
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
303
423
  return info.Env().Undefined();
304
424
  }
305
425
 
@@ -307,7 +427,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
307
427
  }
308
428
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
309
429
  if (disposed) {
310
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
430
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
311
431
  return info.Env().Undefined();
312
432
  }
313
433
 
@@ -315,7 +435,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
315
435
  }
316
436
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
317
437
  if (disposed) {
318
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
438
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
319
439
  return info.Env().Undefined();
320
440
  }
321
441
 
@@ -323,7 +443,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
323
443
  }
324
444
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
325
445
  if (disposed) {
326
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
446
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
327
447
  return info.Env().Undefined();
328
448
  }
329
449
 
@@ -331,7 +451,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
331
451
  }
332
452
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
333
453
  if (disposed) {
334
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
454
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
335
455
  return info.Env().Undefined();
336
456
  }
337
457
 
@@ -339,7 +459,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
339
459
  }
340
460
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
341
461
  if (disposed) {
342
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
462
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
343
463
  return info.Env().Undefined();
344
464
  }
345
465
 
@@ -347,7 +467,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
347
467
  }
348
468
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
349
469
  if (disposed) {
350
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
470
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
351
471
  return info.Env().Undefined();
352
472
  }
353
473
 
@@ -355,7 +475,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
355
475
  }
356
476
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
357
477
  if (disposed) {
358
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
478
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
359
479
  return info.Env().Undefined();
360
480
  }
361
481
 
@@ -374,7 +494,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
374
494
 
375
495
  Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
376
496
  if (disposed) {
377
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
497
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
378
498
  return info.Env().Undefined();
379
499
  }
380
500
 
@@ -402,6 +522,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
402
522
  exports.Env(),
403
523
  "AddonModel",
404
524
  {
525
+ InstanceMethod("init", &AddonModel::Init),
526
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
405
527
  InstanceMethod("tokenize", &AddonModel::Tokenize),
406
528
  InstanceMethod("detokenize", &AddonModel::Detokenize),
407
529
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
@@ -426,9 +548,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
426
548
  }
427
549
  };
428
550
 
551
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
552
+ AddonModel* addonModel = (AddonModel *) user_data;
553
+ unsigned percentage = (unsigned) (100 * progress);
554
+
555
+ if (percentage > addonModel->modelLoadPercentage) {
556
+ addonModel->modelLoadPercentage = percentage;
557
+
558
+ // original llama.cpp logs
559
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
560
+ if (percentage >= 100) {
561
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
562
+ }
563
+ }
564
+
565
+ if (progress > addonModel->rawModelLoadPercentage) {
566
+ addonModel->rawModelLoadPercentage = progress;
567
+
568
+ if (addonModel->onLoadProgressEventCallbackSet) {
569
+ addon_progress_event* data = new addon_progress_event {
570
+ progress
571
+ };
572
+
573
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
574
+
575
+ if (status != napi_ok) {
576
+ delete data;
577
+ }
578
+ }
579
+ }
580
+
581
+ return !(addonModel->abortModelLoad);
582
+ }
583
+
584
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
585
+ public:
586
+ AddonModel* model;
587
+
588
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
589
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
590
+ model(model),
591
+ deferred(Napi::Promise::Deferred::New(env)) {
592
+ model->Ref();
593
+ }
594
+ ~AddonModelLoadModelWorker() {
595
+ model->Unref();
596
+ }
597
+
598
+ Napi::Promise GetPromise() {
599
+ return deferred.Promise();
600
+ }
601
+
602
+ protected:
603
+ Napi::Promise::Deferred deferred;
604
+
605
+ void Execute() {
606
+ try {
607
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
608
+
609
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
610
+ } catch (const std::exception& e) {
611
+ SetError(e.what());
612
+ } catch(...) {
613
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
614
+ }
615
+ }
616
+ void OnOK() {
617
+ if (model->modelLoaded) {
618
+ uint64_t modelSize = llama_model_size(model->model);
619
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
620
+ model->loadedModelSize = modelSize;
621
+ }
622
+
623
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
624
+ if (model->onLoadProgressEventCallbackSet) {
625
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
626
+ }
627
+ }
628
+ void OnError(const Napi::Error& err) {
629
+ deferred.Reject(err.Value());
630
+ }
631
+ };
632
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
633
+ public:
634
+ AddonModel* model;
635
+
636
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
637
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
638
+ model(model),
639
+ deferred(Napi::Promise::Deferred::New(env)) {
640
+ model->Ref();
641
+ }
642
+ ~AddonModelUnloadModelWorker() {
643
+ model->Unref();
644
+ }
645
+
646
+ Napi::Promise GetPromise() {
647
+ return deferred.Promise();
648
+ }
649
+
650
+ protected:
651
+ Napi::Promise::Deferred deferred;
652
+
653
+ void Execute() {
654
+ try {
655
+ llama_free_model(model->model);
656
+ model->modelLoaded = false;
657
+
658
+ model->dispose();
659
+ } catch (const std::exception& e) {
660
+ SetError(e.what());
661
+ } catch(...) {
662
+ SetError("Unknown error when calling \"llama_free_model\"");
663
+ }
664
+ }
665
+ void OnOK() {
666
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
667
+ model->loadedModelSize = 0;
668
+
669
+ deferred.Resolve(Env().Undefined());
670
+ }
671
+ void OnError(const Napi::Error& err) {
672
+ deferred.Reject(err.Value());
673
+ }
674
+ };
675
+
676
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
677
+ if (disposed) {
678
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
679
+ return info.Env().Undefined();
680
+ }
681
+
682
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
683
+ worker->Queue();
684
+ return worker->GetPromise();
685
+ }
686
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
687
+ if (disposed) {
688
+ return info.Env().Undefined();
689
+ }
690
+
691
+ if (modelLoaded) {
692
+ modelLoaded = false;
693
+
694
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
695
+ worker->Queue();
696
+ return worker->GetPromise();
697
+ } else {
698
+ dispose();
699
+
700
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
701
+ deferred.Resolve(info.Env().Undefined());
702
+ return deferred.Promise();
703
+ }
704
+ }
705
+
429
706
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
430
707
  public:
431
708
  grammar_parser::parse_state parsed_grammar;
709
+ Napi::Reference<Napi::Object> addonExportsRef;
710
+ bool hasAddonExportsRef = false;
432
711
 
433
712
  AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
434
713
  // Get the model path
@@ -438,6 +717,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
438
717
  if (info.Length() > 1 && info[1].IsObject()) {
439
718
  Napi::Object options = info[1].As<Napi::Object>();
440
719
 
720
+ if (options.Has("addonExports")) {
721
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
722
+ hasAddonExportsRef = true;
723
+ }
724
+
441
725
  if (options.Has("printGrammar")) {
442
726
  should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
443
727
  }
@@ -455,6 +739,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
455
739
  }
456
740
  }
457
741
 
742
+ ~AddonGrammar() {
743
+ if (hasAddonExportsRef) {
744
+ addonExportsRef.Unref();
745
+ hasAddonExportsRef = false;
746
+ }
747
+ }
748
+
458
749
  static void init(Napi::Object exports) {
459
750
  exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
460
751
  }
@@ -493,9 +784,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
493
784
  llama_context_params context_params;
494
785
  llama_context* ctx;
495
786
  llama_batch batch;
787
+ uint64_t batchMemorySize = 0;
496
788
  bool has_batch = false;
497
789
  int32_t batch_n_tokens = 0;
498
790
  int n_cur = 0;
791
+
792
+ uint64_t loadedContextMemorySize = 0;
793
+ bool contextLoaded = false;
794
+
499
795
  bool disposed = false;
500
796
 
501
797
  AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
@@ -523,10 +819,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
523
819
 
524
820
  if (options.Has("batchSize")) {
525
821
  context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
822
+ context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
526
823
  }
527
824
 
528
- if (options.Has("embedding")) {
529
- context_params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
825
+ if (options.Has("embeddings")) {
826
+ context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
530
827
  }
531
828
 
532
829
  if (options.Has("threads")) {
@@ -537,9 +834,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
537
834
  context_params.n_threads_batch = resolved_n_threads;
538
835
  }
539
836
  }
540
-
541
- ctx = llama_new_context_with_model(model->model, context_params);
542
- Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
543
837
  }
544
838
  ~AddonContext() {
545
839
  dispose();
@@ -550,13 +844,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
550
844
  return;
551
845
  }
552
846
 
553
- Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
554
- llama_free(ctx);
847
+ disposed = true;
848
+ if (contextLoaded) {
849
+ contextLoaded = false;
850
+ llama_free(ctx);
851
+
852
+ adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
853
+ loadedContextMemorySize = 0;
854
+ }
855
+
555
856
  model->Unref();
556
857
 
557
858
  disposeBatch();
558
-
559
- disposed = true;
560
859
  }
561
860
  void disposeBatch() {
562
861
  if (!has_batch) {
@@ -566,16 +865,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
566
865
  llama_batch_free(batch);
567
866
  has_batch = false;
568
867
  batch_n_tokens = 0;
868
+
869
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
870
+ batchMemorySize = 0;
569
871
  }
570
- Napi::Value Dispose(const Napi::CallbackInfo& info) {
571
- if (disposed) {
572
- return info.Env().Undefined();
573
- }
574
872
 
575
- dispose();
873
+ Napi::Value Init(const Napi::CallbackInfo& info);
874
+ Napi::Value Dispose(const Napi::CallbackInfo& info);
576
875
 
577
- return info.Env().Undefined();
578
- }
579
876
  Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
580
877
  if (disposed) {
581
878
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -600,6 +897,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
600
897
  has_batch = true;
601
898
  batch_n_tokens = n_tokens;
602
899
 
900
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
901
+ if (newBatchMemorySize > batchMemorySize) {
902
+ adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
903
+ batchMemorySize = newBatchMemorySize;
904
+ } else if (newBatchMemorySize < batchMemorySize) {
905
+ adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
906
+ batchMemorySize = newBatchMemorySize;
907
+ }
908
+
603
909
  return info.Env().Undefined();
604
910
  }
605
911
  Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
@@ -648,7 +954,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
648
954
 
649
955
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
650
956
 
651
- llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
957
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
958
+
959
+ if (!result) {
960
+ Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
961
+ return info.Env().Undefined();
962
+ }
652
963
 
653
964
  return info.Env().Undefined();
654
965
  }
@@ -662,9 +973,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
662
973
  int32_t startPos = info[1].As<Napi::Number>().Int32Value();
663
974
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
664
975
 
665
- llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
976
+ bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
666
977
 
667
- return info.Env().Undefined();
978
+ return Napi::Boolean::New(info.Env(), result);
668
979
  }
669
980
  Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
670
981
  if (disposed) {
@@ -702,8 +1013,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
702
1013
  return info.Env().Undefined();
703
1014
  }
704
1015
 
1016
+ int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1017
+
1018
+ if (inputTokensLength <= 0) {
1019
+ Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1020
+ return info.Env().Undefined();
1021
+ }
1022
+
705
1023
  const int n_embd = llama_n_embd(model->model);
706
- const auto* embeddings = llama_get_embeddings(ctx);
1024
+ const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1025
+ if (embeddings == NULL) {
1026
+ embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1027
+
1028
+ if (embeddings == NULL) {
1029
+ Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1030
+ return info.Env().Undefined();
1031
+ }
1032
+ }
707
1033
 
708
1034
  Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
709
1035
  for (size_t i = 0; i < n_embd; ++i) {
@@ -726,6 +1052,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
726
1052
  exports.Env(),
727
1053
  "AddonContext",
728
1054
  {
1055
+ InstanceMethod("init", &AddonContext::Init),
729
1056
  InstanceMethod("getContextSize", &AddonContext::GetContextSize),
730
1057
  InstanceMethod("initBatch", &AddonContext::InitBatch),
731
1058
  InstanceMethod("addToBatch", &AddonContext::AddToBatch),
@@ -745,53 +1072,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
745
1072
  };
746
1073
 
747
1074
 
748
- class AddonContextDecodeBatchWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1075
+ class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
749
1076
  public:
750
1077
  AddonContext* ctx;
751
1078
 
752
- AddonContextDecodeBatchWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
753
- : Napi::AsyncWorker(info.Env(), "AddonContextDecodeBatchWorker"),
1079
+ AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1080
+ : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
754
1081
  ctx(ctx),
755
- Napi::Promise::Deferred(info.Env()) {
1082
+ deferred(Napi::Promise::Deferred::New(env)) {
756
1083
  ctx->Ref();
757
1084
  }
758
1085
  ~AddonContextDecodeBatchWorker() {
759
1086
  ctx->Unref();
760
1087
  }
761
- using Napi::AsyncWorker::Queue;
762
- using Napi::Promise::Deferred::Promise;
1088
+
1089
+ Napi::Promise GetPromise() {
1090
+ return deferred.Promise();
1091
+ }
763
1092
 
764
1093
  protected:
1094
+ Napi::Promise::Deferred deferred;
1095
+
765
1096
  void Execute() {
766
- // Perform the evaluation using llama_decode.
767
- int r = llama_decode(ctx->ctx, ctx->batch);
768
-
769
- if (r != 0) {
770
- if (r == 1) {
771
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
772
- } else {
773
- SetError("Eval has failed");
1097
+ try {
1098
+ // Perform the evaluation using llama_decode.
1099
+ int r = llama_decode(ctx->ctx, ctx->batch);
1100
+
1101
+ if (r != 0) {
1102
+ if (r == 1) {
1103
+ SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1104
+ } else {
1105
+ SetError("Eval has failed");
1106
+ }
1107
+
1108
+ return;
774
1109
  }
775
1110
 
776
- return;
1111
+ llama_synchronize(ctx->ctx);
1112
+ } catch (const std::exception& e) {
1113
+ SetError(e.what());
1114
+ } catch(...) {
1115
+ SetError("Unknown error when calling \"llama_decode\"");
777
1116
  }
778
1117
  }
779
1118
  void OnOK() {
780
- Napi::Env env = Napi::AsyncWorker::Env();
781
- Napi::Promise::Deferred::Resolve(env.Undefined());
1119
+ deferred.Resolve(Env().Undefined());
782
1120
  }
783
1121
  void OnError(const Napi::Error& err) {
784
- Napi::Promise::Deferred::Reject(err.Value());
1122
+ deferred.Reject(err.Value());
785
1123
  }
786
1124
  };
787
1125
 
788
1126
  Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
789
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
1127
+ AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
790
1128
  worker->Queue();
791
- return worker->Promise();
1129
+ return worker->GetPromise();
792
1130
  }
793
1131
 
794
- class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
1132
+ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1133
+ public:
1134
+ AddonContext* context;
1135
+
1136
+ AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1137
+ : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1138
+ context(context),
1139
+ deferred(Napi::Promise::Deferred::New(env)) {
1140
+ context->Ref();
1141
+ }
1142
+ ~AddonContextLoadContextWorker() {
1143
+ context->Unref();
1144
+ }
1145
+
1146
+ Napi::Promise GetPromise() {
1147
+ return deferred.Promise();
1148
+ }
1149
+
1150
+ protected:
1151
+ Napi::Promise::Deferred deferred;
1152
+
1153
+ void Execute() {
1154
+ try {
1155
+ context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1156
+
1157
+ context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1158
+ } catch (const std::exception& e) {
1159
+ SetError(e.what());
1160
+ } catch(...) {
1161
+ SetError("Unknown error when calling \"llama_new_context_with_model\"");
1162
+ }
1163
+ }
1164
+ void OnOK() {
1165
+ if (context->contextLoaded) {
1166
+ uint64_t contextMemorySize = llama_get_state_size(context->ctx);
1167
+ adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1168
+ context->loadedContextMemorySize = contextMemorySize;
1169
+ }
1170
+
1171
+ deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1172
+ }
1173
+ void OnError(const Napi::Error& err) {
1174
+ deferred.Reject(err.Value());
1175
+ }
1176
+ };
1177
+ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1178
+ public:
1179
+ AddonContext* context;
1180
+
1181
+ AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1182
+ : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1183
+ context(context),
1184
+ deferred(Napi::Promise::Deferred::New(env)) {
1185
+ context->Ref();
1186
+ }
1187
+ ~AddonContextUnloadContextWorker() {
1188
+ context->Unref();
1189
+ }
1190
+
1191
+ Napi::Promise GetPromise() {
1192
+ return deferred.Promise();
1193
+ }
1194
+
1195
+ protected:
1196
+ Napi::Promise::Deferred deferred;
1197
+
1198
+ void Execute() {
1199
+ try {
1200
+ llama_free(context->ctx);
1201
+ context->contextLoaded = false;
1202
+
1203
+ try {
1204
+ if (context->has_batch) {
1205
+ llama_batch_free(context->batch);
1206
+ context->has_batch = false;
1207
+ context->batch_n_tokens = 0;
1208
+ }
1209
+
1210
+ context->dispose();
1211
+ } catch (const std::exception& e) {
1212
+ SetError(e.what());
1213
+ } catch(...) {
1214
+ SetError("Unknown error when calling \"llama_batch_free\"");
1215
+ }
1216
+ } catch (const std::exception& e) {
1217
+ SetError(e.what());
1218
+ } catch(...) {
1219
+ SetError("Unknown error when calling \"llama_free\"");
1220
+ }
1221
+ }
1222
+ void OnOK() {
1223
+ adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1224
+ context->loadedContextMemorySize = 0;
1225
+
1226
+ adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1227
+ context->batchMemorySize = 0;
1228
+
1229
+ deferred.Resolve(Env().Undefined());
1230
+ }
1231
+ void OnError(const Napi::Error& err) {
1232
+ deferred.Reject(err.Value());
1233
+ }
1234
+ };
1235
+
1236
+ Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1237
+ if (disposed) {
1238
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1239
+ return info.Env().Undefined();
1240
+ }
1241
+
1242
+ AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
1243
+ worker->Queue();
1244
+ return worker->GetPromise();
1245
+ }
1246
+ Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1247
+ if (disposed) {
1248
+ return info.Env().Undefined();
1249
+ }
1250
+
1251
+ if (contextLoaded) {
1252
+ contextLoaded = false;
1253
+
1254
+ AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1255
+ worker->Queue();
1256
+ return worker->GetPromise();
1257
+ } else {
1258
+ dispose();
1259
+
1260
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1261
+ deferred.Resolve(info.Env().Undefined());
1262
+ return deferred.Promise();
1263
+ }
1264
+ }
1265
+
1266
+ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
795
1267
  public:
796
1268
  AddonContext* ctx;
797
1269
  AddonGrammarEvaluationState* grammar_evaluation_state;
@@ -811,7 +1283,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
811
1283
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
812
1284
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
813
1285
  ctx(ctx),
814
- Napi::Promise::Deferred(info.Env()) {
1286
+ deferred(Napi::Promise::Deferred::New(info.Env())) {
815
1287
  ctx->Ref();
816
1288
 
817
1289
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
@@ -874,11 +1346,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
874
1346
  use_grammar = false;
875
1347
  }
876
1348
  }
877
- using Napi::AsyncWorker::Queue;
878
- using Napi::Promise::Deferred::Promise;
1349
+
1350
+ Napi::Promise GetPromise() {
1351
+ return deferred.Promise();
1352
+ }
879
1353
 
880
1354
  protected:
1355
+ Napi::Promise::Deferred deferred;
1356
+
881
1357
  void Execute() {
1358
+ try {
1359
+ SampleToken();
1360
+ } catch (const std::exception& e) {
1361
+ SetError(e.what());
1362
+ } catch(...) {
1363
+ SetError("Unknown error when calling \"SampleToken\"");
1364
+ }
1365
+ }
1366
+
1367
+ void SampleToken() {
882
1368
  llama_token new_token_id = 0;
883
1369
 
884
1370
  // Select the best prediction.
@@ -940,19 +1426,18 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
940
1426
  result = new_token_id;
941
1427
  }
942
1428
  void OnOK() {
943
- Napi::Env env = Napi::AsyncWorker::Env();
944
- Napi::Number resultValue = Napi::Number::New(env, static_cast<uint32_t>(result));
945
- Napi::Promise::Deferred::Resolve(resultValue);
1429
+ Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1430
+ deferred.Resolve(resultValue);
946
1431
  }
947
1432
  void OnError(const Napi::Error& err) {
948
- Napi::Promise::Deferred::Reject(err.Value());
1433
+ deferred.Reject(err.Value());
949
1434
  }
950
1435
  };
951
1436
 
952
1437
  Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
953
1438
  AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
954
1439
  worker->Queue();
955
- return worker->Promise();
1440
+ return worker->GetPromise();
956
1441
  }
957
1442
 
958
1443
  Napi::Value systemInfo(const Napi::CallbackInfo& info) {
@@ -1025,6 +1510,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
1025
1510
 
1026
1511
  if (status == napi_ok) {
1027
1512
  return;
1513
+ } else {
1514
+ delete stringStream;
1515
+ delete data;
1028
1516
  }
1029
1517
  }
1030
1518
 
@@ -1082,21 +1570,123 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1082
1570
  return info.Env().Undefined();
1083
1571
  }
1084
1572
 
1573
+ class AddonBackendLoadWorker : public Napi::AsyncWorker {
1574
+ public:
1575
+ AddonBackendLoadWorker(const Napi::Env& env)
1576
+ : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1577
+ deferred(Napi::Promise::Deferred::New(env)) {
1578
+ }
1579
+ ~AddonBackendLoadWorker() {
1580
+ }
1581
+
1582
+ Napi::Promise GetPromise() {
1583
+ return deferred.Promise();
1584
+ }
1585
+
1586
+ protected:
1587
+ Napi::Promise::Deferred deferred;
1588
+
1589
+ void Execute() {
1590
+ try {
1591
+ llama_backend_init();
1592
+
1593
+ try {
1594
+ if (backendDisposed) {
1595
+ llama_backend_free();
1596
+ } else {
1597
+ backendInitialized = true;
1598
+ }
1599
+ } catch (const std::exception& e) {
1600
+ SetError(e.what());
1601
+ } catch(...) {
1602
+ SetError("Unknown error when calling \"llama_backend_free\"");
1603
+ }
1604
+ } catch (const std::exception& e) {
1605
+ SetError(e.what());
1606
+ } catch(...) {
1607
+ SetError("Unknown error when calling \"llama_backend_init\"");
1608
+ }
1609
+ }
1610
+ void OnOK() {
1611
+ deferred.Resolve(Env().Undefined());
1612
+ }
1613
+ void OnError(const Napi::Error& err) {
1614
+ deferred.Reject(err.Value());
1615
+ }
1616
+ };
1617
+
1618
+
1619
+ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1620
+ public:
1621
+ AddonBackendUnloadWorker(const Napi::Env& env)
1622
+ : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1623
+ deferred(Napi::Promise::Deferred::New(env)) {
1624
+ }
1625
+ ~AddonBackendUnloadWorker() {
1626
+ }
1627
+
1628
+ Napi::Promise GetPromise() {
1629
+ return deferred.Promise();
1630
+ }
1631
+
1632
+ protected:
1633
+ Napi::Promise::Deferred deferred;
1634
+
1635
+ void Execute() {
1636
+ try {
1637
+ if (backendInitialized) {
1638
+ backendInitialized = false;
1639
+ llama_backend_free();
1640
+ }
1641
+ } catch (const std::exception& e) {
1642
+ SetError(e.what());
1643
+ } catch(...) {
1644
+ SetError("Unknown error when calling \"llama_backend_free\"");
1645
+ }
1646
+ }
1647
+ void OnOK() {
1648
+ deferred.Resolve(Env().Undefined());
1649
+ }
1650
+ void OnError(const Napi::Error& err) {
1651
+ deferred.Reject(err.Value());
1652
+ }
1653
+ };
1654
+
1085
1655
  Napi::Value addonInit(const Napi::CallbackInfo& info) {
1086
- if (!backendInitialized) {
1087
- llama_backend_init();
1088
- backendInitialized = true;
1656
+ if (backendInitialized) {
1657
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1658
+ deferred.Resolve(info.Env().Undefined());
1659
+ return deferred.Promise();
1089
1660
  }
1090
1661
 
1091
- llama_log_set(addonLlamaCppLogCallback, nullptr);
1662
+ AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1663
+ worker->Queue();
1664
+ return worker->GetPromise();
1665
+ }
1092
1666
 
1093
- return info.Env().Undefined();
1667
+ Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1668
+ if (backendDisposed) {
1669
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1670
+ deferred.Resolve(info.Env().Undefined());
1671
+ return deferred.Promise();
1672
+ }
1673
+
1674
+ backendDisposed = true;
1675
+
1676
+ AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1677
+ worker->Queue();
1678
+ return worker->GetPromise();
1094
1679
  }
1095
1680
 
1096
1681
  static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1682
+ if (backendDisposed) {
1683
+ return;
1684
+ }
1685
+
1686
+ backendDisposed = true;
1097
1687
  if (backendInitialized) {
1098
- llama_backend_free();
1099
1688
  backendInitialized = false;
1689
+ llama_backend_free();
1100
1690
  }
1101
1691
  }
1102
1692
 
@@ -1108,12 +1698,15 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1108
1698
  Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1109
1699
  Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1110
1700
  Napi::PropertyDescriptor::Function("init", addonInit),
1701
+ Napi::PropertyDescriptor::Function("dispose", addonDispose),
1111
1702
  });
1112
1703
  AddonModel::init(exports);
1113
1704
  AddonGrammar::init(exports);
1114
1705
  AddonGrammarEvaluationState::init(exports);
1115
1706
  AddonContext::init(exports);
1116
1707
 
1708
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1709
+
1117
1710
  exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
1118
1711
 
1119
1712
  return exports;