node-llama-cpp 3.3.2 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/bindings/AddonTypes.d.ts +12 -4
- package/dist/bindings/Llama.d.ts +9 -0
- package/dist/bindings/Llama.js +52 -28
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +2 -1
- package/dist/bindings/getLlama.js +19 -9
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/asyncSome.js +2 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +115 -34
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +4 -4
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/detectBuildTools.d.ts +14 -0
- package/dist/bindings/utils/detectBuildTools.js +149 -0
- package/dist/bindings/utils/detectBuildTools.js.map +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.d.ts +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js +18 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -1
- package/dist/bindings/utils/testBindingBinary.js +58 -5
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +4 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/FalconChatWrapper.js +4 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +4 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +2 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +8 -27
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +4 -0
- package/dist/cli/commands/ChatCommand.js +158 -13
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +4 -0
- package/dist/cli/commands/CompleteCommand.js +143 -10
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +5 -5
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +4 -0
- package/dist/cli/commands/InfillCommand.js +142 -10
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +12 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +14 -7
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +13 -3
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +20 -10
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +2 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +234 -77
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +11 -1
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/ConsoleTable.d.ts +1 -0
- package/dist/cli/utils/ConsoleTable.js +5 -1
- package/dist/cli/utils/ConsoleTable.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
- package/dist/cli/utils/interactivelyAskForModel.js +16 -13
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/isRunningUnderRosetta.d.ts +1 -0
- package/dist/cli/utils/isRunningUnderRosetta.js +20 -0
- package/dist/cli/utils/isRunningUnderRosetta.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +4 -2
- package/dist/cli/utils/printCommonInfoLines.js +67 -5
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +6 -5
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/toBytes.d.ts +1 -0
- package/dist/cli/utils/toBytes.js +5 -0
- package/dist/cli/utils/toBytes.js.map +1 -0
- package/dist/config.d.ts +3 -0
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +12 -3
- package/dist/evaluator/LlamaChat/LlamaChat.js +21 -7
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +6 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +3 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +3 -0
- package/dist/evaluator/LlamaCompletion.js +5 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +81 -38
- package/dist/evaluator/LlamaContext/LlamaContext.js +678 -132
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/TokenPredictor.d.ts +55 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js +20 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +56 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +266 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.d.ts +58 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js +138 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +198 -5
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +3 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +3 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +7 -1
- package/dist/evaluator/LlamaGrammar.js +6 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js +16 -8
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +5 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +7 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +19 -11
- package/dist/evaluator/LlamaModel/LlamaModel.js +23 -29
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaRankingContext.d.ts +76 -0
- package/dist/evaluator/LlamaRankingContext.js +158 -0
- package/dist/evaluator/LlamaRankingContext.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +3 -0
- package/dist/evaluator/TokenBias.js +3 -0
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/evaluator/utils/chunkDocument.d.ts +86 -0
- package/dist/evaluator/utils/chunkDocument.js +212 -0
- package/dist/evaluator/utils/chunkDocument.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +3 -1
- package/dist/gguf/insights/GgufInsights.js +114 -8
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +11 -7
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +13 -7
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/parser/GgufV2Parser.js +29 -8
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
- package/dist/gguf/parser/parseGguf.js +11 -11
- package/dist/gguf/parser/parseGguf.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.js +8 -3
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +9 -9
- package/dist/gguf/types/GgufMetadataTypes.js +1 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +13 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
- package/dist/index.d.ts +7 -2
- package/dist/index.js +6 -1
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/LlamaText.d.ts +4 -1
- package/dist/utils/LlamaText.js +4 -1
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/cmake.js +23 -0
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/pushAll.d.ts +1 -1
- package/dist/utils/pushAll.js.map +1 -1
- package/dist/utils/tokenizerUtils.js +1 -1
- package/dist/utils/utilTypes.d.ts +5 -0
- package/llama/CMakeLists.txt +25 -8
- package/llama/addon/AddonContext.cpp +196 -22
- package/llama/addon/AddonContext.h +1 -0
- package/llama/addon/AddonGrammar.cpp +1 -4
- package/llama/addon/AddonGrammarEvaluationState.cpp +16 -5
- package/llama/addon/AddonModel.cpp +31 -39
- package/llama/addon/AddonModel.h +1 -1
- package/llama/addon/AddonModelLora.cpp +2 -2
- package/llama/addon/AddonModelLora.h +1 -1
- package/llama/addon/AddonSampler.cpp +7 -12
- package/llama/addon/addon.cpp +26 -7
- package/llama/addon/globals/getGpuInfo.cpp +30 -5
- package/llama/addon/globals/getGpuInfo.h +6 -1
- package/llama/addon/globals/getMemoryInfo.cpp +63 -0
- package/llama/addon/globals/getMemoryInfo.h +4 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/cmake/win32.ensureNinjaPath.cmake +68 -0
- package/llama/cmake/win32.ensureNodeLib.cmake +34 -0
- package/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake +12 -0
- package/llama/cmake/win32.llvmEnsureCmakeAr.cmake +37 -0
- package/llama/cmake/win32.llvmUseGnuModeCompilers.cmake +87 -0
- package/llama/cmake/win32.programFilesPaths.cmake +35 -0
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +29 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +1 -0
- package/llama/llama.cpp.info.json +1 -1
- package/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-x64.cmake +14 -0
- package/llama/toolchains/llvm.win32.host-x64.target-x64.cmake +20 -0
- package/llama/toolchains/win32.host-arm64.target-arm64.cmake +21 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +14 -34
- package/package.json +47 -44
- package/templates/README.md +1 -1
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include <thread>
|
|
2
2
|
#include <algorithm>
|
|
3
|
+
#include <cmath>
|
|
3
4
|
#include "common/common.h"
|
|
4
5
|
#include "llama-grammar.h"
|
|
5
6
|
#include "llama.h"
|
|
@@ -104,13 +105,13 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
|
104
105
|
|
|
105
106
|
void Execute() {
|
|
106
107
|
try {
|
|
107
|
-
context->ctx =
|
|
108
|
+
context->ctx = llama_init_from_model(context->model->model, context->context_params);
|
|
108
109
|
|
|
109
110
|
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
110
111
|
} catch (const std::exception& e) {
|
|
111
112
|
SetError(e.what());
|
|
112
113
|
} catch(...) {
|
|
113
|
-
SetError("Unknown error when calling \"
|
|
114
|
+
SetError("Unknown error when calling \"llama_init_from_model\"");
|
|
114
115
|
}
|
|
115
116
|
}
|
|
116
117
|
void OnOK() {
|
|
@@ -190,6 +191,14 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
190
191
|
public:
|
|
191
192
|
AddonContext* ctx;
|
|
192
193
|
AddonSampler* sampler;
|
|
194
|
+
bool arrayResult = false;
|
|
195
|
+
bool returnProbabilities = false;
|
|
196
|
+
bool returnConfidence = false;
|
|
197
|
+
float tokenConfidence = -1;
|
|
198
|
+
bool has_probabilities = false;
|
|
199
|
+
size_t probabilities_size;
|
|
200
|
+
llama_token * probabilities_tokens;
|
|
201
|
+
float * probabilities_probs;
|
|
193
202
|
int32_t batchLogitIndex;
|
|
194
203
|
llama_token result;
|
|
195
204
|
bool no_output = false;
|
|
@@ -202,11 +211,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
202
211
|
|
|
203
212
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
204
213
|
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
|
|
214
|
+
arrayResult = info.Length() > 2 && info[2].IsBoolean();
|
|
215
|
+
returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
|
|
216
|
+
returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
|
|
205
217
|
sampler->Ref();
|
|
206
218
|
}
|
|
207
219
|
~AddonContextSampleTokenWorker() {
|
|
208
220
|
ctx->Unref();
|
|
209
221
|
sampler->Unref();
|
|
222
|
+
|
|
223
|
+
if (has_probabilities) {
|
|
224
|
+
delete[] probabilities_tokens;
|
|
225
|
+
delete[] probabilities_probs;
|
|
226
|
+
}
|
|
210
227
|
}
|
|
211
228
|
|
|
212
229
|
Napi::Promise GetPromise() {
|
|
@@ -235,11 +252,11 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
235
252
|
sampler->rebuildChainIfNeeded();
|
|
236
253
|
|
|
237
254
|
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
|
238
|
-
const int n_vocab =
|
|
255
|
+
const int n_vocab = llama_vocab_n_tokens(ctx->model->vocab);
|
|
239
256
|
|
|
240
257
|
auto & candidates = sampler->tokenCandidates;
|
|
241
258
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
242
|
-
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f}
|
|
259
|
+
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
|
243
260
|
}
|
|
244
261
|
|
|
245
262
|
llama_token_data_array cur_p = {
|
|
@@ -257,18 +274,111 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
257
274
|
}
|
|
258
275
|
|
|
259
276
|
auto new_token_id = cur_p.data[cur_p.selected].id;
|
|
277
|
+
|
|
278
|
+
if (returnProbabilities || returnConfidence) {
|
|
279
|
+
if (!cur_p.sorted) {
|
|
280
|
+
std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
|
|
281
|
+
return a.logit > b.logit;
|
|
282
|
+
});
|
|
283
|
+
cur_p.sorted = true;
|
|
284
|
+
|
|
285
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
286
|
+
if (cur_p.data[i].id == new_token_id) {
|
|
287
|
+
cur_p.selected = i;
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (returnProbabilities) {
|
|
295
|
+
probabilities_size = cur_p.size;
|
|
296
|
+
probabilities_tokens = new llama_token[probabilities_size];
|
|
297
|
+
probabilities_probs = new float[probabilities_size];
|
|
298
|
+
float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
|
|
299
|
+
|
|
300
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
301
|
+
auto logit = cur_p.data[i].logit;
|
|
302
|
+
|
|
303
|
+
probabilities_tokens[i] = cur_p.data[i].id;
|
|
304
|
+
probabilities_probs[i] = logit;
|
|
305
|
+
|
|
306
|
+
if (logit > maxLogit) {
|
|
307
|
+
maxLogit = logit;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
if (probabilities_size > 0 && maxLogit != -INFINITY) {
|
|
312
|
+
float sum = 0.0f;
|
|
313
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
314
|
+
float prob = expf(probabilities_probs[i] - maxLogit);
|
|
315
|
+
probabilities_probs[i] = prob;
|
|
316
|
+
sum += prob;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
320
|
+
probabilities_probs[i] /= sum;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
has_probabilities = true;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (returnConfidence) {
|
|
328
|
+
if (has_probabilities && cur_p.selected < probabilities_size) {
|
|
329
|
+
tokenConfidence = probabilities_probs[cur_p.selected];
|
|
330
|
+
} else {
|
|
331
|
+
float maxLogit = cur_p.data[0].logit;
|
|
332
|
+
float sum = 0.0f;
|
|
333
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
334
|
+
auto logit = cur_p.data[i].logit;
|
|
335
|
+
|
|
336
|
+
if (logit > maxLogit) {
|
|
337
|
+
maxLogit = logit;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
342
|
+
sum += expf(cur_p.data[i].logit - maxLogit);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
260
349
|
sampler->acceptToken(new_token_id);
|
|
261
350
|
result = new_token_id;
|
|
262
351
|
}
|
|
263
352
|
void OnOK() {
|
|
353
|
+
Napi::Number resultToken;
|
|
264
354
|
if (no_output) {
|
|
265
|
-
|
|
266
|
-
|
|
355
|
+
resultToken = Napi::Number::New(Env(), -1);
|
|
356
|
+
} else {
|
|
357
|
+
resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (!arrayResult) {
|
|
361
|
+
deferred.Resolve(resultToken);
|
|
267
362
|
return;
|
|
268
363
|
}
|
|
269
364
|
|
|
270
|
-
Napi::
|
|
271
|
-
|
|
365
|
+
Napi::Array resultArray = Napi::Array::New(Env(), 2);
|
|
366
|
+
resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
|
|
367
|
+
|
|
368
|
+
if (has_probabilities) {
|
|
369
|
+
Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
|
|
370
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
371
|
+
probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
|
|
372
|
+
probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
|
|
373
|
+
}
|
|
374
|
+
resultArray.Set(1, probabilities);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (returnConfidence && tokenConfidence != -1) {
|
|
378
|
+
resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
deferred.Resolve(resultArray);
|
|
272
382
|
}
|
|
273
383
|
void OnError(const Napi::Error& err) {
|
|
274
384
|
deferred.Reject(err.Value());
|
|
@@ -305,6 +415,10 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
305
415
|
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
306
416
|
}
|
|
307
417
|
|
|
418
|
+
if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
|
|
419
|
+
context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
|
420
|
+
}
|
|
421
|
+
|
|
308
422
|
if (options.Has("flashAttention")) {
|
|
309
423
|
context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
|
|
310
424
|
}
|
|
@@ -411,7 +525,7 @@ Napi::Value AddonContext::InitBatch(const Napi::CallbackInfo& info) {
|
|
|
411
525
|
has_batch = true;
|
|
412
526
|
batch_n_tokens = n_tokens;
|
|
413
527
|
|
|
414
|
-
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens,
|
|
528
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_model_n_embd(model->model), context_params.n_batch);
|
|
415
529
|
if (newBatchMemorySize > batchMemorySize) {
|
|
416
530
|
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
417
531
|
batchMemorySize = newBatchMemorySize;
|
|
@@ -441,24 +555,25 @@ Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
|
|
|
441
555
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
442
556
|
int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
|
|
443
557
|
Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
|
|
444
|
-
|
|
558
|
+
Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
|
|
445
559
|
|
|
446
560
|
auto tokensLength = tokens.ElementLength();
|
|
561
|
+
auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
|
|
447
562
|
GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
|
|
448
563
|
|
|
449
|
-
|
|
450
|
-
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
if (generateLogitAtTheEnd) {
|
|
454
|
-
batch.logits[batch.n_tokens - 1] = true;
|
|
564
|
+
Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
|
|
455
565
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
566
|
+
for (size_t i = 0, l = 0; i < tokensLength; i++) {
|
|
567
|
+
if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
|
|
568
|
+
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
|
|
569
|
+
resLogitIndexes[l] = batch.n_tokens - 1;
|
|
570
|
+
l++;
|
|
571
|
+
} else {
|
|
572
|
+
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
|
|
573
|
+
}
|
|
459
574
|
}
|
|
460
575
|
|
|
461
|
-
return
|
|
576
|
+
return resLogitIndexes;
|
|
462
577
|
}
|
|
463
578
|
Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
|
|
464
579
|
if (disposed) {
|
|
@@ -530,7 +645,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
|
|
|
530
645
|
return info.Env().Undefined();
|
|
531
646
|
}
|
|
532
647
|
|
|
533
|
-
const int n_embd =
|
|
648
|
+
const int n_embd = llama_model_n_embd(model->model);
|
|
534
649
|
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
|
535
650
|
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
|
|
536
651
|
if (embeddings == NULL) {
|
|
@@ -592,11 +707,69 @@ Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
|
|
|
592
707
|
return info.Env().Undefined();
|
|
593
708
|
}
|
|
594
709
|
|
|
710
|
+
Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
|
|
711
|
+
constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
|
|
712
|
+
constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
|
|
713
|
+
|
|
714
|
+
const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
|
|
715
|
+
const auto currentCtx = ctx;
|
|
716
|
+
const auto draftCtx = draftContext->ctx;
|
|
717
|
+
const auto currentModel = model->model;
|
|
718
|
+
const auto draftModel = draftContext->model->model;
|
|
719
|
+
const auto currentVocab = model->vocab;
|
|
720
|
+
const auto draftVocab = draftContext->model->vocab;
|
|
721
|
+
|
|
722
|
+
if (llama_vocab_type(currentVocab) != llama_vocab_type(draftVocab)) {
|
|
723
|
+
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
|
|
724
|
+
return info.Env().Undefined();
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
if (llama_vocab_get_add_bos(currentVocab) != llama_vocab_get_add_bos(draftVocab) ||
|
|
728
|
+
llama_vocab_get_add_eos(currentVocab) != llama_vocab_get_add_eos(draftVocab) ||
|
|
729
|
+
llama_vocab_bos(currentVocab) != llama_vocab_bos(draftVocab) ||
|
|
730
|
+
llama_vocab_eos(currentVocab) != llama_vocab_eos(draftVocab)
|
|
731
|
+
) {
|
|
732
|
+
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
|
|
733
|
+
return info.Env().Undefined();
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
const int currentModelVocabSize = llama_vocab_n_tokens(currentVocab);
|
|
737
|
+
const int draftModelVocabSize = llama_vocab_n_tokens(draftVocab);
|
|
738
|
+
|
|
739
|
+
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
|
|
740
|
+
|
|
741
|
+
if (vocabDiff > vocabSizeMaxDifference) {
|
|
742
|
+
Napi::Error::New(
|
|
743
|
+
info.Env(),
|
|
744
|
+
std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
|
|
745
|
+
std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
|
|
746
|
+
).ThrowAsJavaScriptException();
|
|
747
|
+
return info.Env().Undefined();
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
|
|
751
|
+
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
|
|
752
|
+
const char * currentTokenText = llama_vocab_get_text(currentVocab, i);
|
|
753
|
+
const char * draftTokenText = llama_vocab_get_text(draftVocab, i);
|
|
754
|
+
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
|
|
755
|
+
Napi::Error::New(
|
|
756
|
+
info.Env(),
|
|
757
|
+
std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
|
|
758
|
+
std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
|
|
759
|
+
std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
|
|
760
|
+
).ThrowAsJavaScriptException();
|
|
761
|
+
return info.Env().Undefined();
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
return info.Env().Undefined();
|
|
766
|
+
}
|
|
767
|
+
|
|
595
768
|
Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
|
|
596
769
|
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
|
|
597
770
|
float scale = info[1].As<Napi::Number>().FloatValue();
|
|
598
771
|
|
|
599
|
-
|
|
772
|
+
llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
|
|
600
773
|
|
|
601
774
|
return info.Env().Undefined();
|
|
602
775
|
}
|
|
@@ -622,6 +795,7 @@ void AddonContext::init(Napi::Object exports) {
|
|
|
622
795
|
InstanceMethod("getThreads", &AddonContext::GetThreads),
|
|
623
796
|
InstanceMethod("setThreads", &AddonContext::SetThreads),
|
|
624
797
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
798
|
+
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
|
|
625
799
|
InstanceMethod("setLora", &AddonContext::SetLora),
|
|
626
800
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
627
801
|
}
|
|
@@ -45,6 +45,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
45
45
|
Napi::Value SetThreads(const Napi::CallbackInfo& info);
|
|
46
46
|
|
|
47
47
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
|
|
48
|
+
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
|
|
48
49
|
|
|
49
50
|
Napi::Value SetLora(const Napi::CallbackInfo& info);
|
|
50
51
|
|
|
@@ -46,13 +46,10 @@ Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
const auto cpts = unicode_cpts_from_utf8(testText);
|
|
49
|
-
const llama_grammar_rules & rules = llama_grammar_get_rules(parsed_grammar);
|
|
50
49
|
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
|
|
51
50
|
|
|
52
51
|
for (const auto & cpt : cpts) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
|
|
52
|
+
llama_grammar_accept(parsed_grammar, cpt);
|
|
56
53
|
|
|
57
54
|
if (stacks_cur.empty()) {
|
|
58
55
|
// no stacks means that the grammar failed to match at this point
|
|
@@ -6,13 +6,24 @@
|
|
|
6
6
|
#include "AddonGrammar.h"
|
|
7
7
|
|
|
8
8
|
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
if (info.Length() == 1) {
|
|
10
|
+
AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
11
|
+
model = existingState->model;
|
|
12
|
+
model->Ref();
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
grammarDef = existingState->grammarDef;
|
|
15
|
+
grammarDef->Ref();
|
|
14
16
|
|
|
15
|
-
|
|
17
|
+
sampler = llama_sampler_clone(existingState->sampler);
|
|
18
|
+
} else {
|
|
19
|
+
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
|
20
|
+
model->Ref();
|
|
21
|
+
|
|
22
|
+
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
|
|
23
|
+
grammarDef->Ref();
|
|
24
|
+
|
|
25
|
+
sampler = llama_sampler_init_grammar(model->vocab, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
|
|
26
|
+
}
|
|
16
27
|
}
|
|
17
28
|
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
|
|
18
29
|
llama_sampler_free(sampler);
|
|
@@ -8,12 +8,12 @@
|
|
|
8
8
|
#include "AddonModelData.h"
|
|
9
9
|
#include "AddonModelLora.h"
|
|
10
10
|
|
|
11
|
-
static Napi::Value getNapiToken(const Napi::CallbackInfo& info,
|
|
11
|
+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
|
|
12
12
|
if (token < 0 || token == LLAMA_TOKEN_NULL) {
|
|
13
13
|
return Napi::Number::From(info.Env(), -1);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
auto tokenAttributes =
|
|
16
|
+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
|
17
17
|
|
|
18
18
|
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
|
|
19
19
|
return Napi::Number::From(info.Env(), -1);
|
|
@@ -22,12 +22,12 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
|
|
|
22
22
|
return Napi::Number::From(info.Env(), token);
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info,
|
|
25
|
+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, const llama_vocab* vocab, llama_token token) {
|
|
26
26
|
if (token < 0) {
|
|
27
27
|
return Napi::Number::From(info.Env(), -1);
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
auto tokenAttributes =
|
|
30
|
+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
|
31
31
|
|
|
32
32
|
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
|
|
33
33
|
return Napi::Number::From(info.Env(), -1);
|
|
@@ -92,13 +92,14 @@ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
|
92
92
|
|
|
93
93
|
void Execute() {
|
|
94
94
|
try {
|
|
95
|
-
model->model =
|
|
95
|
+
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
|
|
96
|
+
model->vocab = llama_model_get_vocab(model->model);
|
|
96
97
|
|
|
97
98
|
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
98
99
|
} catch (const std::exception& e) {
|
|
99
100
|
SetError(e.what());
|
|
100
101
|
} catch(...) {
|
|
101
|
-
SetError("Unknown error when calling \"
|
|
102
|
+
SetError("Unknown error when calling \"llama_model_load_from_file\"");
|
|
102
103
|
}
|
|
103
104
|
}
|
|
104
105
|
void OnOK() {
|
|
@@ -141,14 +142,14 @@ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
|
141
142
|
|
|
142
143
|
void Execute() {
|
|
143
144
|
try {
|
|
144
|
-
|
|
145
|
+
llama_model_free(model->model);
|
|
145
146
|
model->modelLoaded = false;
|
|
146
147
|
|
|
147
148
|
model->dispose();
|
|
148
149
|
} catch (const std::exception& e) {
|
|
149
150
|
SetError(e.what());
|
|
150
151
|
} catch(...) {
|
|
151
|
-
SetError("Unknown error when calling \"
|
|
152
|
+
SetError("Unknown error when calling \"llama_model_free\"");
|
|
152
153
|
}
|
|
153
154
|
}
|
|
154
155
|
void OnOK() {
|
|
@@ -190,7 +191,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
|
|
|
190
191
|
|
|
191
192
|
void Execute() {
|
|
192
193
|
try {
|
|
193
|
-
const auto loraAdapter =
|
|
194
|
+
const auto loraAdapter = llama_adapter_lora_init(modelLora->model->model, modelLora->loraFilePath.c_str());
|
|
194
195
|
|
|
195
196
|
if (loraAdapter == nullptr) {
|
|
196
197
|
SetError(
|
|
@@ -213,7 +214,7 @@ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
|
|
|
213
214
|
} catch (const std::exception& e) {
|
|
214
215
|
SetError(e.what());
|
|
215
216
|
} catch(...) {
|
|
216
|
-
SetError("Unknown error when calling \"
|
|
217
|
+
SetError("Unknown error when calling \"llama_adapter_lora_init\"");
|
|
217
218
|
}
|
|
218
219
|
}
|
|
219
220
|
void OnOK() {
|
|
@@ -359,7 +360,7 @@ void AddonModel::dispose() {
|
|
|
359
360
|
disposed = true;
|
|
360
361
|
if (modelLoaded) {
|
|
361
362
|
modelLoaded = false;
|
|
362
|
-
|
|
363
|
+
llama_model_free(model);
|
|
363
364
|
|
|
364
365
|
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
365
366
|
loadedModelSize = 0;
|
|
@@ -426,7 +427,7 @@ Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
|
|
|
426
427
|
std::string text = info[0].As<Napi::String>().Utf8Value();
|
|
427
428
|
bool specialTokens = info[1].As<Napi::Boolean>().Value();
|
|
428
429
|
|
|
429
|
-
std::vector<llama_token> tokens = common_tokenize(
|
|
430
|
+
std::vector<llama_token> tokens = common_tokenize(vocab, text, false, specialTokens);
|
|
430
431
|
|
|
431
432
|
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
|
|
432
433
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
|
@@ -449,10 +450,10 @@ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
|
|
|
449
450
|
std::string result;
|
|
450
451
|
result.resize(std::max(result.capacity(), tokens.ElementLength()));
|
|
451
452
|
|
|
452
|
-
int n_chars = llama_detokenize(
|
|
453
|
+
int n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
|
|
453
454
|
if (n_chars < 0) {
|
|
454
455
|
result.resize(-n_chars);
|
|
455
|
-
n_chars = llama_detokenize(
|
|
456
|
+
n_chars = llama_detokenize(vocab, (llama_token*)tokens.Data(), tokens.ElementLength(), &result[0], result.size(), false, decodeSpecialTokens);
|
|
456
457
|
GGML_ASSERT(n_chars <= result.size()); // whitespace trimming is performed after per-token detokenization
|
|
457
458
|
}
|
|
458
459
|
|
|
@@ -467,7 +468,7 @@ Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
|
467
468
|
return info.Env().Undefined();
|
|
468
469
|
}
|
|
469
470
|
|
|
470
|
-
return Napi::Number::From(info.Env(),
|
|
471
|
+
return Napi::Number::From(info.Env(), llama_model_n_ctx_train(model));
|
|
471
472
|
}
|
|
472
473
|
|
|
473
474
|
Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
@@ -476,7 +477,7 @@ Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
|
476
477
|
return info.Env().Undefined();
|
|
477
478
|
}
|
|
478
479
|
|
|
479
|
-
return Napi::Number::From(info.Env(),
|
|
480
|
+
return Napi::Number::From(info.Env(), llama_model_n_embd(model));
|
|
480
481
|
}
|
|
481
482
|
|
|
482
483
|
Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
|
|
@@ -515,7 +516,7 @@ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
|
|
|
515
516
|
return info.Env().Undefined();
|
|
516
517
|
}
|
|
517
518
|
|
|
518
|
-
return getNapiControlToken(info,
|
|
519
|
+
return getNapiControlToken(info, vocab, llama_vocab_bos(vocab));
|
|
519
520
|
}
|
|
520
521
|
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
|
|
521
522
|
if (disposed) {
|
|
@@ -523,7 +524,7 @@ Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
|
|
|
523
524
|
return info.Env().Undefined();
|
|
524
525
|
}
|
|
525
526
|
|
|
526
|
-
return getNapiControlToken(info,
|
|
527
|
+
return getNapiControlToken(info, vocab, llama_vocab_eos(vocab));
|
|
527
528
|
}
|
|
528
529
|
Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
|
|
529
530
|
if (disposed) {
|
|
@@ -531,7 +532,7 @@ Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
|
|
|
531
532
|
return info.Env().Undefined();
|
|
532
533
|
}
|
|
533
534
|
|
|
534
|
-
return getNapiToken(info,
|
|
535
|
+
return getNapiToken(info, vocab, llama_vocab_nl(vocab));
|
|
535
536
|
}
|
|
536
537
|
Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
|
|
537
538
|
if (disposed) {
|
|
@@ -539,7 +540,7 @@ Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
|
|
|
539
540
|
return info.Env().Undefined();
|
|
540
541
|
}
|
|
541
542
|
|
|
542
|
-
return getNapiToken(info,
|
|
543
|
+
return getNapiToken(info, vocab, llama_vocab_fim_pre(vocab));
|
|
543
544
|
}
|
|
544
545
|
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
|
|
545
546
|
if (disposed) {
|
|
@@ -547,7 +548,7 @@ Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
|
|
|
547
548
|
return info.Env().Undefined();
|
|
548
549
|
}
|
|
549
550
|
|
|
550
|
-
return getNapiToken(info,
|
|
551
|
+
return getNapiToken(info, vocab, llama_vocab_fim_mid(vocab));
|
|
551
552
|
}
|
|
552
553
|
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
|
|
553
554
|
if (disposed) {
|
|
@@ -555,7 +556,7 @@ Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
|
|
|
555
556
|
return info.Env().Undefined();
|
|
556
557
|
}
|
|
557
558
|
|
|
558
|
-
return getNapiToken(info,
|
|
559
|
+
return getNapiToken(info, vocab, llama_vocab_fim_suf(vocab));
|
|
559
560
|
}
|
|
560
561
|
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
|
561
562
|
if (disposed) {
|
|
@@ -563,15 +564,7 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
|
|
563
564
|
return info.Env().Undefined();
|
|
564
565
|
}
|
|
565
566
|
|
|
566
|
-
return getNapiToken(info,
|
|
567
|
-
}
|
|
568
|
-
Napi::Value AddonModel::ClsToken(const Napi::CallbackInfo& info) {
|
|
569
|
-
if (disposed) {
|
|
570
|
-
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
571
|
-
return info.Env().Undefined();
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
return getNapiToken(info, model, llama_token_cls(model));
|
|
567
|
+
return getNapiToken(info, vocab, llama_vocab_eot(vocab));
|
|
575
568
|
}
|
|
576
569
|
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
|
|
577
570
|
if (disposed) {
|
|
@@ -579,7 +572,7 @@ Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
|
|
|
579
572
|
return info.Env().Undefined();
|
|
580
573
|
}
|
|
581
574
|
|
|
582
|
-
return getNapiToken(info,
|
|
575
|
+
return getNapiToken(info, vocab, llama_vocab_sep(vocab));
|
|
583
576
|
}
|
|
584
577
|
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
|
|
585
578
|
if (disposed) {
|
|
@@ -590,7 +583,7 @@ Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
|
|
|
590
583
|
int token = info[0].As<Napi::Number>().Int32Value();
|
|
591
584
|
std::stringstream ss;
|
|
592
585
|
|
|
593
|
-
const char* str =
|
|
586
|
+
const char* str = llama_vocab_get_text(vocab, token);
|
|
594
587
|
if (str == nullptr) {
|
|
595
588
|
return info.Env().Undefined();
|
|
596
589
|
}
|
|
@@ -611,7 +604,7 @@ Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
|
|
|
611
604
|
}
|
|
612
605
|
|
|
613
606
|
int token = info[0].As<Napi::Number>().Int32Value();
|
|
614
|
-
auto tokenAttributes =
|
|
607
|
+
auto tokenAttributes = llama_vocab_get_attr(vocab, token);
|
|
615
608
|
|
|
616
609
|
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
|
|
617
610
|
}
|
|
@@ -627,7 +620,7 @@ Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
|
|
|
627
620
|
|
|
628
621
|
int token = info[0].As<Napi::Number>().Int32Value();
|
|
629
622
|
|
|
630
|
-
return Napi::Boolean::New(info.Env(),
|
|
623
|
+
return Napi::Boolean::New(info.Env(), llama_vocab_is_eog(vocab, token));
|
|
631
624
|
}
|
|
632
625
|
Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
633
626
|
if (disposed) {
|
|
@@ -635,17 +628,17 @@ Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
|
635
628
|
return info.Env().Undefined();
|
|
636
629
|
}
|
|
637
630
|
|
|
638
|
-
auto vocabularyType = llama_vocab_type(
|
|
631
|
+
auto vocabularyType = llama_vocab_type(vocab);
|
|
639
632
|
|
|
640
633
|
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
|
641
634
|
}
|
|
642
635
|
Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
643
|
-
const bool addBos =
|
|
636
|
+
const bool addBos = llama_vocab_get_add_bos(vocab);
|
|
644
637
|
|
|
645
638
|
return Napi::Boolean::New(info.Env(), addBos);
|
|
646
639
|
}
|
|
647
640
|
Napi::Value AddonModel::ShouldAppendEosToken(const Napi::CallbackInfo& info) {
|
|
648
|
-
const bool addEos =
|
|
641
|
+
const bool addEos = llama_vocab_get_add_eos(vocab);
|
|
649
642
|
|
|
650
643
|
return Napi::Boolean::New(info.Env(), addEos);
|
|
651
644
|
}
|
|
@@ -678,7 +671,6 @@ void AddonModel::init(Napi::Object exports) {
|
|
|
678
671
|
InstanceMethod("middleToken", &AddonModel::MiddleToken),
|
|
679
672
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
680
673
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
681
|
-
InstanceMethod("clsToken", &AddonModel::ClsToken),
|
|
682
674
|
InstanceMethod("sepToken", &AddonModel::SepToken),
|
|
683
675
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
684
676
|
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
|
package/llama/addon/AddonModel.h
CHANGED
|
@@ -9,6 +9,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
9
9
|
llama_model_params model_params;
|
|
10
10
|
std::vector<llama_model_kv_override> kv_overrides;
|
|
11
11
|
llama_model* model;
|
|
12
|
+
const llama_vocab* vocab;
|
|
12
13
|
uint64_t loadedModelSize = 0;
|
|
13
14
|
Napi::Reference<Napi::Object> addonExportsRef;
|
|
14
15
|
bool hasAddonExportsRef = false;
|
|
@@ -49,7 +50,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
49
50
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info);
|
|
50
51
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info);
|
|
51
52
|
Napi::Value EotToken(const Napi::CallbackInfo& info);
|
|
52
|
-
Napi::Value ClsToken(const Napi::CallbackInfo& info);
|
|
53
53
|
Napi::Value SepToken(const Napi::CallbackInfo& info);
|
|
54
54
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info);
|
|
55
55
|
|