node-llama-cpp 3.3.2 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/README.md +3 -2
  2. package/dist/bindings/AddonTypes.d.ts +12 -4
  3. package/dist/bindings/Llama.d.ts +9 -0
  4. package/dist/bindings/Llama.js +52 -28
  5. package/dist/bindings/Llama.js.map +1 -1
  6. package/dist/bindings/getLlama.d.ts +2 -1
  7. package/dist/bindings/getLlama.js +19 -9
  8. package/dist/bindings/getLlama.js.map +1 -1
  9. package/dist/bindings/utils/asyncSome.js +2 -0
  10. package/dist/bindings/utils/asyncSome.js.map +1 -1
  11. package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -1
  12. package/dist/bindings/utils/compileLLamaCpp.js +115 -34
  13. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  14. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +1 -0
  15. package/dist/bindings/utils/detectAvailableComputeLayers.js +4 -4
  16. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  17. package/dist/bindings/utils/detectBuildTools.d.ts +14 -0
  18. package/dist/bindings/utils/detectBuildTools.js +149 -0
  19. package/dist/bindings/utils/detectBuildTools.js.map +1 -0
  20. package/dist/bindings/utils/resolveActualBindingBinaryPath.d.ts +1 -0
  21. package/dist/bindings/utils/resolveActualBindingBinaryPath.js +18 -0
  22. package/dist/bindings/utils/resolveActualBindingBinaryPath.js.map +1 -0
  23. package/dist/bindings/utils/testBindingBinary.d.ts +1 -1
  24. package/dist/bindings/utils/testBindingBinary.js +58 -5
  25. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  26. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +4 -0
  27. package/dist/chatWrappers/AlpacaChatWrapper.js +4 -0
  28. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  29. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -0
  30. package/dist/chatWrappers/FalconChatWrapper.js +4 -0
  31. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  32. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -0
  33. package/dist/chatWrappers/GeneralChatWrapper.js +4 -0
  34. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  35. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +2 -0
  36. package/dist/chatWrappers/utils/resolveChatWrapper.js +8 -27
  37. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  38. package/dist/cli/commands/ChatCommand.d.ts +4 -0
  39. package/dist/cli/commands/ChatCommand.js +158 -13
  40. package/dist/cli/commands/ChatCommand.js.map +1 -1
  41. package/dist/cli/commands/CompleteCommand.d.ts +4 -0
  42. package/dist/cli/commands/CompleteCommand.js +143 -10
  43. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  44. package/dist/cli/commands/DebugCommand.js +5 -5
  45. package/dist/cli/commands/DebugCommand.js.map +1 -1
  46. package/dist/cli/commands/InfillCommand.d.ts +4 -0
  47. package/dist/cli/commands/InfillCommand.js +142 -10
  48. package/dist/cli/commands/InfillCommand.js.map +1 -1
  49. package/dist/cli/commands/OnPostInstallCommand.js +12 -2
  50. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  51. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +1 -0
  52. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +14 -7
  53. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -1
  54. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +13 -3
  55. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
  56. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +20 -10
  57. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
  58. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +2 -0
  59. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +234 -77
  60. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  61. package/dist/cli/recommendedModels.js +11 -1
  62. package/dist/cli/recommendedModels.js.map +1 -1
  63. package/dist/cli/utils/ConsoleTable.d.ts +1 -0
  64. package/dist/cli/utils/ConsoleTable.js +5 -1
  65. package/dist/cli/utils/ConsoleTable.js.map +1 -1
  66. package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
  67. package/dist/cli/utils/interactivelyAskForModel.js +16 -13
  68. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  69. package/dist/cli/utils/isRunningUnderRosetta.d.ts +1 -0
  70. package/dist/cli/utils/isRunningUnderRosetta.js +20 -0
  71. package/dist/cli/utils/isRunningUnderRosetta.js.map +1 -0
  72. package/dist/cli/utils/printCommonInfoLines.d.ts +4 -2
  73. package/dist/cli/utils/printCommonInfoLines.js +67 -5
  74. package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
  75. package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
  76. package/dist/cli/utils/resolveCommandGgufPath.js +6 -5
  77. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  78. package/dist/cli/utils/toBytes.d.ts +1 -0
  79. package/dist/cli/utils/toBytes.js +5 -0
  80. package/dist/cli/utils/toBytes.js.map +1 -0
  81. package/dist/config.d.ts +3 -0
  82. package/dist/config.js +3 -0
  83. package/dist/config.js.map +1 -1
  84. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +12 -3
  85. package/dist/evaluator/LlamaChat/LlamaChat.js +21 -7
  86. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  87. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +6 -2
  88. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +3 -0
  89. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  90. package/dist/evaluator/LlamaCompletion.d.ts +3 -0
  91. package/dist/evaluator/LlamaCompletion.js +5 -0
  92. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  93. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +81 -38
  94. package/dist/evaluator/LlamaContext/LlamaContext.js +678 -132
  95. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  96. package/dist/evaluator/LlamaContext/TokenPredictor.d.ts +55 -0
  97. package/dist/evaluator/LlamaContext/TokenPredictor.js +20 -0
  98. package/dist/evaluator/LlamaContext/TokenPredictor.js.map +1 -0
  99. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +56 -0
  100. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +266 -0
  101. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -0
  102. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.d.ts +58 -0
  103. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js +138 -0
  104. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js.map +1 -0
  105. package/dist/evaluator/LlamaContext/types.d.ts +198 -5
  106. package/dist/evaluator/LlamaEmbeddingContext.d.ts +3 -0
  107. package/dist/evaluator/LlamaEmbeddingContext.js +3 -0
  108. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  109. package/dist/evaluator/LlamaGrammar.d.ts +7 -1
  110. package/dist/evaluator/LlamaGrammar.js +6 -0
  111. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  112. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +4 -4
  113. package/dist/evaluator/LlamaGrammarEvaluationState.js +16 -8
  114. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  115. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +5 -0
  116. package/dist/evaluator/LlamaJsonSchemaGrammar.js +7 -0
  117. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  118. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +19 -11
  119. package/dist/evaluator/LlamaModel/LlamaModel.js +23 -29
  120. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  121. package/dist/evaluator/LlamaRankingContext.d.ts +76 -0
  122. package/dist/evaluator/LlamaRankingContext.js +158 -0
  123. package/dist/evaluator/LlamaRankingContext.js.map +1 -0
  124. package/dist/evaluator/TokenBias.d.ts +3 -0
  125. package/dist/evaluator/TokenBias.js +3 -0
  126. package/dist/evaluator/TokenBias.js.map +1 -1
  127. package/dist/evaluator/utils/chunkDocument.d.ts +86 -0
  128. package/dist/evaluator/utils/chunkDocument.js +212 -0
  129. package/dist/evaluator/utils/chunkDocument.js.map +1 -0
  130. package/dist/gguf/insights/GgufInsights.d.ts +3 -1
  131. package/dist/gguf/insights/GgufInsights.js +114 -8
  132. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  133. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
  134. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +11 -7
  135. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
  136. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
  137. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +13 -7
  138. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
  139. package/dist/gguf/parser/GgufV2Parser.js +29 -8
  140. package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
  141. package/dist/gguf/parser/parseGguf.js +11 -11
  142. package/dist/gguf/parser/parseGguf.js.map +1 -1
  143. package/dist/gguf/readGgufFileInfo.js +8 -3
  144. package/dist/gguf/readGgufFileInfo.js.map +1 -1
  145. package/dist/gguf/types/GgufFileInfoTypes.d.ts +1 -0
  146. package/dist/gguf/types/GgufMetadataTypes.d.ts +9 -9
  147. package/dist/gguf/types/GgufMetadataTypes.js +1 -1
  148. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  149. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +13 -0
  150. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
  151. package/dist/index.d.ts +7 -2
  152. package/dist/index.js +6 -1
  153. package/dist/index.js.map +1 -1
  154. package/dist/tsconfig.tsbuildinfo +1 -1
  155. package/dist/utils/LlamaText.d.ts +4 -1
  156. package/dist/utils/LlamaText.js +4 -1
  157. package/dist/utils/LlamaText.js.map +1 -1
  158. package/dist/utils/cmake.js +23 -0
  159. package/dist/utils/cmake.js.map +1 -1
  160. package/dist/utils/pushAll.d.ts +1 -1
  161. package/dist/utils/pushAll.js.map +1 -1
  162. package/dist/utils/tokenizerUtils.js +1 -1
  163. package/dist/utils/utilTypes.d.ts +5 -0
  164. package/llama/CMakeLists.txt +25 -8
  165. package/llama/addon/AddonContext.cpp +196 -22
  166. package/llama/addon/AddonContext.h +1 -0
  167. package/llama/addon/AddonGrammar.cpp +1 -4
  168. package/llama/addon/AddonGrammarEvaluationState.cpp +16 -5
  169. package/llama/addon/AddonModel.cpp +31 -39
  170. package/llama/addon/AddonModel.h +1 -1
  171. package/llama/addon/AddonModelLora.cpp +2 -2
  172. package/llama/addon/AddonModelLora.h +1 -1
  173. package/llama/addon/AddonSampler.cpp +7 -12
  174. package/llama/addon/addon.cpp +26 -7
  175. package/llama/addon/globals/getGpuInfo.cpp +30 -5
  176. package/llama/addon/globals/getGpuInfo.h +6 -1
  177. package/llama/addon/globals/getMemoryInfo.cpp +63 -0
  178. package/llama/addon/globals/getMemoryInfo.h +4 -0
  179. package/llama/binariesGithubRelease.json +1 -1
  180. package/llama/cmake/win32.ensureNinjaPath.cmake +68 -0
  181. package/llama/cmake/win32.ensureNodeLib.cmake +34 -0
  182. package/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake +12 -0
  183. package/llama/cmake/win32.llvmEnsureCmakeAr.cmake +37 -0
  184. package/llama/cmake/win32.llvmUseGnuModeCompilers.cmake +87 -0
  185. package/llama/cmake/win32.programFilesPaths.cmake +35 -0
  186. package/llama/gitRelease.bundle +0 -0
  187. package/llama/gpuInfo/vulkan-gpu-info.cpp +29 -2
  188. package/llama/gpuInfo/vulkan-gpu-info.h +1 -0
  189. package/llama/llama.cpp.info.json +1 -1
  190. package/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake +14 -0
  191. package/llama/profiles/llvm.win32.host-x64.target-arm64.cmake +14 -0
  192. package/llama/profiles/llvm.win32.host-x64.target-x64.cmake +14 -0
  193. package/llama/toolchains/llvm.win32.host-x64.target-x64.cmake +20 -0
  194. package/llama/toolchains/win32.host-arm64.target-arm64.cmake +21 -0
  195. package/llama/toolchains/win32.host-x64.target-arm64.cmake +14 -34
  196. package/package.json +47 -44
  197. package/templates/README.md +1 -1
  198. package/templates/packed/electron-typescript-react.json +1 -1
  199. package/templates/packed/node-typescript.json +1 -1
@@ -1,5 +1,6 @@
1
1
  #include <thread>
2
2
  #include <algorithm>
3
+ #include <cmath>
3
4
  #include "common/common.h"
4
5
  #include "llama-grammar.h"
5
6
  #include "llama.h"
@@ -104,13 +105,13 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
104
105
 
105
106
  void Execute() {
106
107
  try {
107
- context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
108
+ context->ctx = llama_init_from_model(context->model->model, context->context_params);
108
109
 
109
110
  context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
110
111
  } catch (const std::exception& e) {
111
112
  SetError(e.what());
112
113
  } catch(...) {
113
- SetError("Unknown error when calling \"llama_new_context_with_model\"");
114
+ SetError("Unknown error when calling \"llama_init_from_model\"");
114
115
  }
115
116
  }
116
117
  void OnOK() {
@@ -190,6 +191,14 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
190
191
  public:
191
192
  AddonContext* ctx;
192
193
  AddonSampler* sampler;
194
+ bool arrayResult = false;
195
+ bool returnProbabilities = false;
196
+ bool returnConfidence = false;
197
+ float tokenConfidence = -1;
198
+ bool has_probabilities = false;
199
+ size_t probabilities_size;
200
+ llama_token * probabilities_tokens;
201
+ float * probabilities_probs;
193
202
  int32_t batchLogitIndex;
194
203
  llama_token result;
195
204
  bool no_output = false;
@@ -202,11 +211,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
202
211
 
203
212
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
204
213
  sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
214
+ arrayResult = info.Length() > 2 && info[2].IsBoolean();
215
+ returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
216
+ returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
205
217
  sampler->Ref();
206
218
  }
207
219
  ~AddonContextSampleTokenWorker() {
208
220
  ctx->Unref();
209
221
  sampler->Unref();
222
+
223
+ if (has_probabilities) {
224
+ delete[] probabilities_tokens;
225
+ delete[] probabilities_probs;
226
+ }
210
227
  }
211
228
 
212
229
  Napi::Promise GetPromise() {
@@ -235,11 +252,11 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
235
252
  sampler->rebuildChainIfNeeded();
236
253
 
237
254
  const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
238
- const int n_vocab = llama_n_vocab(ctx->model->model);
255
+ const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
239
256
 
240
257
  auto & candidates = sampler->tokenCandidates;
241
258
  for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
242
- candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};;
259
+ candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
243
260
  }
244
261
 
245
262
  llama_token_data_array cur_p = {
@@ -257,18 +274,111 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
257
274
  }
258
275
 
259
276
  auto new_token_id = cur_p.data[cur_p.selected].id;
277
+
278
+ if (returnProbabilities || returnConfidence) {
279
+ if (!cur_p.sorted) {
280
+ std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
281
+ return a.logit > b.logit;
282
+ });
283
+ cur_p.sorted = true;
284
+
285
+ for (size_t i = 0; i < cur_p.size; i++) {
286
+ if (cur_p.data[i].id == new_token_id) {
287
+ cur_p.selected = i;
288
+ break;
289
+ }
290
+ }
291
+ }
292
+ }
293
+
294
+ if (returnProbabilities) {
295
+ probabilities_size = cur_p.size;
296
+ probabilities_tokens = new llama_token[probabilities_size];
297
+ probabilities_probs = new float[probabilities_size];
298
+ float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
299
+
300
+ for (size_t i = 0; i < cur_p.size; i++) {
301
+ auto logit = cur_p.data[i].logit;
302
+
303
+ probabilities_tokens[i] = cur_p.data[i].id;
304
+ probabilities_probs[i] = logit;
305
+
306
+ if (logit > maxLogit) {
307
+ maxLogit = logit;
308
+ }
309
+ }
310
+
311
+ if (probabilities_size > 0 && maxLogit != -INFINITY) {
312
+ float sum = 0.0f;
313
+ for (size_t i = 0; i < probabilities_size; i++) {
314
+ float prob = expf(probabilities_probs[i] - maxLogit);
315
+ probabilities_probs[i] = prob;
316
+ sum += prob;
317
+ }
318
+
319
+ for (size_t i = 0; i < probabilities_size; i++) {
320
+ probabilities_probs[i] /= sum;
321
+ }
322
+ }
323
+
324
+ has_probabilities = true;
325
+ }
326
+
327
+ if (returnConfidence) {
328
+ if (has_probabilities && cur_p.selected < probabilities_size) {
329
+ tokenConfidence = probabilities_probs[cur_p.selected];
330
+ } else {
331
+ float maxLogit = cur_p.data[0].logit;
332
+ float sum = 0.0f;
333
+ for (size_t i = 0; i < cur_p.size; i++) {
334
+ auto logit = cur_p.data[i].logit;
335
+
336
+ if (logit > maxLogit) {
337
+ maxLogit = logit;
338
+ }
339
+ }
340
+
341
+ for (size_t i = 0; i < cur_p.size; i++) {
342
+ sum += expf(cur_p.data[i].logit - maxLogit);
343
+ }
344
+
345
+ tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
346
+ }
347
+ }
348
+
260
349
  sampler->acceptToken(new_token_id);
261
350
  result = new_token_id;
262
351
  }
263
352
  void OnOK() {
353
+ Napi::Number resultToken;
264
354
  if (no_output) {
265
- Napi::Number resultValue = Napi::Number::New(Env(), -1);
266
- deferred.Resolve(resultValue);
355
+ resultToken = Napi::Number::New(Env(), -1);
356
+ } else {
357
+ resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
358
+ }
359
+
360
+ if (!arrayResult) {
361
+ deferred.Resolve(resultToken);
267
362
  return;
268
363
  }
269
364
 
270
- Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
271
- deferred.Resolve(resultValue);
365
+ Napi::Array resultArray = Napi::Array::New(Env(), 2);
366
+ resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
367
+
368
+ if (has_probabilities) {
369
+ Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
370
+ for (size_t i = 0; i < probabilities_size; i++) {
371
+ probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
372
+ probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
373
+ }
374
+ resultArray.Set(1, probabilities);
375
+ }
376
+
377
+ if (returnConfidence && tokenConfidence != -1) {
378
+ resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
379
+ }
380
+
381
+ deferred.Resolve(resultArray);
272
382
  }
273
383
  void OnError(const Napi::Error& err) {
274
384
  deferred.Reject(err.Value());
@@ -305,6 +415,10 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
305
415
  context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
306
416
  }
307
417
 
418
+ if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
419
+ context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
420
+ }
421
+
308
422
  if (options.Has("flashAttention")) {
309
423
  context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
310
424
  }
@@ -411,7 +525,7 @@ Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
411
525
  has_batch = true;
412
526
  batch_n_tokens = n_tokens;
413
527
 
414
- uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
528
+ uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
415
529
  if (newBatchMemorySize > batchMemorySize) {
416
530
  adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
417
531
  batchMemorySize = newBatchMemorySize;
@@ -441,24 +555,25 @@ Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
441
555
  int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
442
556
  int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
443
557
  Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
444
- bool generateLogitAtTheEnd = info[3].As<Napi::Boolean>().Value();
558
+ Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
445
559
 
446
560
  auto tokensLength = tokens.ElementLength();
561
+ auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
447
562
  GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
448
563
 
449
- for (size_t i = 0; i < tokensLength; i++) {
450
- common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
451
- }
452
-
453
- if (generateLogitAtTheEnd) {
454
- batch.logits[batch.n_tokens - 1] = true;
564
+ Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
455
565
 
456
- auto logit_index = batch.n_tokens - 1;
457
-
458
- return Napi::Number::From(info.Env(), logit_index);
566
+ for (size_t i = 0, l = 0; i < tokensLength; i++) {
567
+ if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
568
+ common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
569
+ resLogitIndexes[l] = batch.n_tokens - 1;
570
+ l++;
571
+ } else {
572
+ common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
573
+ }
459
574
  }
460
575
 
461
- return info.Env().Undefined();
576
+ return resLogitIndexes;
462
577
  }
463
578
  Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
464
579
  if (disposed) {
@@ -530,7 +645,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
530
645
  return info.Env().Undefined();
531
646
  }
532
647
 
533
- const int n_embd = llama_n_embd(model->model);
648
+ const int n_embd = llama_model_n_embd(model->model);
534
649
  const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
535
650
  const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
536
651
  if (embeddings == NULL) {
@@ -592,11 +707,69 @@ Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
592
707
  return info.Env().Undefined();
593
708
  }
594
709
 
710
+ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
711
+ constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
712
+ constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
713
+
714
+ const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
715
+ const auto currentCtx = ctx;
716
+ const auto draftCtx = draftContext->ctx;
717
+ const auto currentModel = model->model;
718
+ const auto draftModel = draftContext->model->model;
719
+ const auto currentVocab = model->vocab;
720
+ const auto draftVocab = draftContext->model->vocab;
721
+
722
+ if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
723
+ Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
724
+ return info.Env().Undefined();
725
+ }
726
+
727
+ if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
728
+ llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
729
+ llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
730
+ llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
731
+ ) {
732
+ Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
733
+ return info.Env().Undefined();
734
+ }
735
+
736
+ const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
737
+ const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
738
+
739
+ const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
740
+
741
+ if (vocabDiff > vocabSizeMaxDifference) {
742
+ Napi::Error::New(
743
+ info.Env(),
744
+ std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
745
+ std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
746
+ ).ThrowAsJavaScriptException();
747
+ return info.Env().Undefined();
748
+ }
749
+
750
+ const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
751
+ for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
752
+ const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
753
+ const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
754
+ if (std::strcmp(currentTokenText, draftTokenText) != 0) {
755
+ Napi::Error::New(
756
+ info.Env(),
757
+ std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
758
+ std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
759
+ std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
760
+ ).ThrowAsJavaScriptException();
761
+ return info.Env().Undefined();
762
+ }
763
+ }
764
+
765
+ return info.Env().Undefined();
766
+ }
767
+
595
768
  Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
596
769
  AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
597
770
  float scale = info[1].As<Napi::Number>().FloatValue();
598
771
 
599
- llama_lora_adapter_set(ctx, lora->lora_adapter, scale);
772
+ llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
600
773
 
601
774
  return info.Env().Undefined();
602
775
  }
@@ -622,6 +795,7 @@ void AddonContext::init(Napi::Object exports) {
622
795
  InstanceMethod("getThreads", &AddonContext::GetThreads),
623
796
  InstanceMethod("setThreads", &AddonContext::SetThreads),
624
797
  InstanceMethod("printTimings", &AddonContext::PrintTimings),
798
+ InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
625
799
  InstanceMethod("setLora", &AddonContext::SetLora),
626
800
  InstanceMethod("dispose", &AddonContext::Dispose),
627
801
  }
@@ -45,6 +45,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
45
45
  Napi::Value SetThreads(const Napi::CallbackInfo& info);
46
46
 
47
47
  Napi::Value PrintTimings(const Napi::CallbackInfo& info);
48
+ Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
48
49
 
49
50
  Napi::Value SetLora(const Napi::CallbackInfo& info);
50
51
 
@@ -46,13 +46,10 @@ Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
46
46
  }
47
47
 
48
48
  const auto cpts = unicode_cpts_from_utf8(testText);
49
- const llama_grammar_rules & rules = llama_grammar_get_rules(parsed_grammar);
50
49
  llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
51
50
 
52
51
  for (const auto & cpt : cpts) {
53
- const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(parsed_grammar);
54
-
55
- llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
52
+ llama_grammar_accept(parsed_grammar, cpt);
56
53
 
57
54
  if (stacks_cur.empty()) {
58
55
  // no stacks means that the grammar failed to match at this point
@@ -6,13 +6,24 @@
6
6
  #include "AddonGrammar.h"
7
7
 
8
8
  AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
9
- model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
10
- model->Ref();
9
+ if (info.Length() == 1) {
10
+ AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
11
+ model = existingState->model;
12
+ model->Ref();
11
13
 
12
- grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
13
- grammarDef->Ref();
14
+ grammarDef = existingState->grammarDef;
15
+ grammarDef->Ref();
14
16
 
15
- sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
17
+ sampler = llama_sampler_clone(existingState->sampler);
18
+ } else {
19
+ model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
20
+ model->Ref();
21
+
22
+ grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
23
+ grammarDef->Ref();
24
+
25
+ sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
26
+ }
16
27
  }
17
28
  AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
18
29
  llama_sampler_free(sampler);
@@ -8,12 +8,12 @@
8
8
  #include "AddonModelData.h"
9
9
  #include "AddonModelLora.h"
10
10
 
11
- static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
11
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
12
12
  if (token < 0 || token == LLAMA_TOKEN_NULL) {
13
13
  return Napi::Number::From(info.Env(), -1);
14
14
  }
15
15
 
16
- auto tokenAttributes = llama_token_get_attr(model, token);
16
+ auto tokenAttributes = llama_vocab_get_attr(vocab, token);
17
17
 
18
18
  if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
19
19
  return Napi::Number::From(info.Env(), -1);
@@ -22,12 +22,12 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
22
22
  return Napi::Number::From(info.Env(), token);
23
23
  }
24
24
 
25
- static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
25
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
26
26
  if (token < 0) {
27
27
  return Napi::Number::From(info.Env(), -1);
28
28
  }
29
29
 
30
- auto tokenAttributes = llama_token_get_attr(model, token);
30
+ auto tokenAttributes = llama_vocab_get_attr(vocab, token);
31
31
 
32
32
  if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
33
33
  return Napi::Number::From(info.Env(), -1);
@@ -92,13 +92,14 @@ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
92
92
 
93
93
  void Execute() {
94
94
  try {
95
- model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
95
+ model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
96
+ model->vocab = llama_model_get_vocab(model->model);
96
97
 
97
98
  model->modelLoaded = model->model != nullptr && model->model != NULL;
98
99
  } catch (const std::exception& e) {
99
100
  SetError(e.what());
100
101
  } catch(...) {
101
- SetError("Unknown error when calling \"llama_load_model_from_file\"");
102
+ SetError("Unknown error when calling \"llama_model_load_from_file\"");
102
103
  }
103
104
  }
104
105
  void OnOK() {
@@ -141,14 +142,14 @@ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
141
142
 
142
143
  void Execute() {
143
144
  try {
144
- llama_free_model(model->model);
145
+ llama_model_free(model->model);
145
146
  model->modelLoaded = false;
146
147
 
147
148
  model->dispose();
148
149
  } catch (const std::exception& e) {
149
150
  SetError(e.what());
150
151
  } catch(...) {
151
- SetError("Unknown error when calling \"llama_free_model\"");
152
+ SetError("Unknown error when calling \"llama_model_free\"");
152
153
  }
153
154
  }
154
155
  void OnOK() {
@@ -190,7 +191,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
190
191
 
191
192
  void Execute() {
192
193
  try {
193
- const auto loraAdapter = llama_lora_adapter_init(modelLora->model->model, modelLora->loraFilePath.c_str());
194
+ const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
194
195
 
195
196
  if (loraAdapter == nullptr) {
196
197
  SetError(
@@ -213,7 +214,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
213
214
  } catch (const std::exception& e) {
214
215
  SetError(e.what());
215
216
  } catch(...) {
216
- SetError("Unknown error when calling \"llama_lora_adapter_init\"");
217
+ SetError("Unknown error when calling \"llama_adapter_lora_init\"");
217
218
  }
218
219
  }
219
220
  void OnOK() {
@@ -359,7 +360,7 @@ void AddonModel::dispose() {
359
360
  disposed = true;
360
361
  if (modelLoaded) {
361
362
  modelLoaded = false;
362
- llama_free_model(model);
363
+ llama_model_free(model);
363
364
 
364
365
  adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
365
366
  loadedModelSize = 0;
@@ -426,7 +427,7 @@ Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
426
427
  std::string text = info[0].As<Napi::String>().Utf8Value();
427
428
  bool specialTokens = info[1].As<Napi::Boolean>().Value();
428
429
 
429
- std::vector<llama_token> tokens = common_tokenize(model, text, false, specialTokens);
430
+ std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
430
431
 
431
432
  Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
432
433
  for (size_t i = 0; i < tokens.size(); ++i) {
@@ -449,10 +450,10 @@ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
449
450
  std::string result;
450
451
  result.resize(std::max(result.capacity(), tokens.ElementLength()));
451
452
 
452
- int n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
453
+ int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
453
454
  if (n_chars < 0) {
454
455
  result.resize(-n_chars);
455
- n_chars = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
456
+ n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
456
457
  GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
457
458
  }
458
459
 
@@ -467,7 +468,7 @@ Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
467
468
  return info.Env().Undefined();
468
469
  }
469
470
 
470
- return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
471
+ return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
471
472
  }
472
473
 
473
474
  Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
@@ -476,7 +477,7 @@ Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
476
477
  return info.Env().Undefined();
477
478
  }
478
479
 
479
- return Napi::Number::From(info.Env(), llama_n_embd(model));
480
+ return Napi::Number::From(info.Env(), llama_model_n_embd(model));
480
481
  }
481
482
 
482
483
  Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
@@ -515,7 +516,7 @@ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
515
516
  return info.Env().Undefined();
516
517
  }
517
518
 
518
- return getNapiControlToken(info, model, llama_token_bos(model));
519
+ return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
519
520
  }
520
521
  Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
521
522
  if (disposed) {
@@ -523,7 +524,7 @@ Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
523
524
  return info.Env().Undefined();
524
525
  }
525
526
 
526
- return getNapiControlToken(info, model, llama_token_eos(model));
527
+ return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
527
528
  }
528
529
  Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
529
530
  if (disposed) {
@@ -531,7 +532,7 @@ Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
531
532
  return info.Env().Undefined();
532
533
  }
533
534
 
534
- return getNapiToken(info, model, llama_token_nl(model));
535
+ return getNapiToken(info, vocab, llama_vocab_nl(vocab));
535
536
  }
536
537
  Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
537
538
  if (disposed) {
@@ -539,7 +540,7 @@ Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
539
540
  return info.Env().Undefined();
540
541
  }
541
542
 
542
- return getNapiToken(info, model, llama_token_fim_pre(model));
543
+ return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
543
544
  }
544
545
  Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
545
546
  if (disposed) {
@@ -547,7 +548,7 @@ Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
547
548
  return info.Env().Undefined();
548
549
  }
549
550
 
550
- return getNapiToken(info, model, llama_token_fim_mid(model));
551
+ return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
551
552
  }
552
553
  Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
553
554
  if (disposed) {
@@ -555,7 +556,7 @@ Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
555
556
  return info.Env().Undefined();
556
557
  }
557
558
 
558
- return getNapiToken(info, model, llama_token_fim_suf(model));
559
+ return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
559
560
  }
560
561
  Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
561
562
  if (disposed) {
@@ -563,15 +564,7 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
563
564
  return info.Env().Undefined();
564
565
  }
565
566
 
566
- return getNapiToken(info, model, llama_token_eot(model));
567
- }
568
- Napi::Value AddonModel::ClsToken(const Napi::CallbackInfo& info) {
569
- if (disposed) {
570
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
571
- return info.Env().Undefined();
572
- }
573
-
574
- return getNapiToken(info, model, llama_token_cls(model));
567
+ return getNapiToken(info, vocab, llama_vocab_eot(vocab));
575
568
  }
576
569
  Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
577
570
  if (disposed) {
@@ -579,7 +572,7 @@ Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
579
572
  return info.Env().Undefined();
580
573
  }
581
574
 
582
- return getNapiToken(info, model, llama_token_sep(model));
575
+ return getNapiToken(info, vocab, llama_vocab_sep(vocab));
583
576
  }
584
577
  Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
585
578
  if (disposed) {
@@ -590,7 +583,7 @@ Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
590
583
  int token = info[0].As<Napi::Number>().Int32Value();
591
584
  std::stringstream ss;
592
585
 
593
- const char* str = llama_token_get_text(model, token);
586
+ const char* str = llama_vocab_get_text(vocab, token);
594
587
  if (str == nullptr) {
595
588
  return info.Env().Undefined();
596
589
  }
@@ -611,7 +604,7 @@ Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
611
604
  }
612
605
 
613
606
  int token = info[0].As<Napi::Number>().Int32Value();
614
- auto tokenAttributes = llama_token_get_attr(model, token);
607
+ auto tokenAttributes = llama_vocab_get_attr(vocab, token);
615
608
 
616
609
  return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
617
610
  }
@@ -627,7 +620,7 @@ Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
627
620
 
628
621
  int token = info[0].As<Napi::Number>().Int32Value();
629
622
 
630
- return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
623
+ return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
631
624
  }
632
625
  Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
633
626
  if (disposed) {
@@ -635,17 +628,17 @@ Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
635
628
  return info.Env().Undefined();
636
629
  }
637
630
 
638
- auto vocabularyType = llama_vocab_type(model);
631
+ auto vocabularyType = llama_vocab_type(vocab);
639
632
 
640
633
  return Napi::Number::From(info.Env(), int32_t(vocabularyType));
641
634
  }
642
635
  Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
643
- const bool addBos = llama_add_bos_token(model);
636
+ const bool addBos = llama_vocab_get_add_bos(vocab);
644
637
 
645
638
  return Napi::Boolean::New(info.Env(), addBos);
646
639
  }
647
640
  Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
648
- const bool addEos = llama_add_eos_token(model);
641
+ const bool addEos = llama_vocab_get_add_eos(vocab);
649
642
 
650
643
  return Napi::Boolean::New(info.Env(), addEos);
651
644
  }
@@ -678,7 +671,6 @@ void AddonModel::init(Napi::Object exports) {
678
671
  InstanceMethod("middleToken", &AddonModel::MiddleToken),
679
672
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
680
673
  InstanceMethod("eotToken", &AddonModel::EotToken),
681
- InstanceMethod("clsToken", &AddonModel::ClsToken),
682
674
  InstanceMethod("sepToken", &AddonModel::SepToken),
683
675
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
684
676
  InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
@@ -9,6 +9,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
9
9
  llama_model_params model_params;
10
10
  std::vector<llama_model_kv_override> kv_overrides;
11
11
  llama_model* model;
12
+ const llama_vocab* vocab;
12
13
  uint64_t loadedModelSize = 0;
13
14
  Napi::Reference<Napi::Object> addonExportsRef;
14
15
  bool hasAddonExportsRef = false;
@@ -49,7 +50,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
49
50
  Napi::Value MiddleToken(const Napi::CallbackInfo& info);
50
51
  Napi::Value SuffixToken(const Napi::CallbackInfo& info);
51
52
  Napi::Value EotToken(const Napi::CallbackInfo& info);
52
- Napi::Value ClsToken(const Napi::CallbackInfo& info);
53
53
  Napi::Value SepToken(const Napi::CallbackInfo& info);
54
54
  Napi::Value GetTokenString(const Napi::CallbackInfo& info);
55
55