node-llama-cpp 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/bindings/AddonTypes.d.ts +12 -4
- package/dist/bindings/Llama.d.ts +9 -0
- package/dist/bindings/Llama.js +52 -28
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +2 -1
- package/dist/bindings/getLlama.js +19 -9
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/asyncSome.js +2 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +108 -34
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +4 -4
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/detectBuildTools.d.ts +14 -0
- package/dist/bindings/utils/detectBuildTools.js +149 -0
- package/dist/bindings/utils/detectBuildTools.js.map +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.d.ts +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js +18 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -1
- package/dist/bindings/utils/testBindingBinary.js +58 -5
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +4 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/FalconChatWrapper.js +4 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +4 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +2 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +8 -27
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +4 -0
- package/dist/cli/commands/ChatCommand.js +155 -11
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +4 -0
- package/dist/cli/commands/CompleteCommand.js +143 -10
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +5 -5
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +4 -0
- package/dist/cli/commands/InfillCommand.js +142 -10
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +12 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +14 -7
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +13 -3
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +20 -10
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +2 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +234 -77
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/utils/ConsoleTable.d.ts +1 -0
- package/dist/cli/utils/ConsoleTable.js +5 -1
- package/dist/cli/utils/ConsoleTable.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
- package/dist/cli/utils/interactivelyAskForModel.js +16 -13
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/isRunningUnderRosetta.d.ts +1 -0
- package/dist/cli/utils/isRunningUnderRosetta.js +20 -0
- package/dist/cli/utils/isRunningUnderRosetta.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +4 -2
- package/dist/cli/utils/printCommonInfoLines.js +67 -5
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +3 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +6 -5
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/cli/utils/toBytes.d.ts +1 -0
- package/dist/cli/utils/toBytes.js +5 -0
- package/dist/cli/utils/toBytes.js.map +1 -0
- package/dist/config.d.ts +3 -0
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +12 -3
- package/dist/evaluator/LlamaChat/LlamaChat.js +21 -7
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +6 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +3 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +3 -0
- package/dist/evaluator/LlamaCompletion.js +5 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +81 -38
- package/dist/evaluator/LlamaContext/LlamaContext.js +678 -132
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/TokenPredictor.d.ts +55 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js +20 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +56 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +266 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.d.ts +58 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js +138 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +198 -5
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +3 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +3 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +7 -1
- package/dist/evaluator/LlamaGrammar.js +6 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js +16 -8
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +5 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +7 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +19 -11
- package/dist/evaluator/LlamaModel/LlamaModel.js +23 -29
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaRankingContext.d.ts +76 -0
- package/dist/evaluator/LlamaRankingContext.js +159 -0
- package/dist/evaluator/LlamaRankingContext.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +3 -0
- package/dist/evaluator/TokenBias.js +3 -0
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/evaluator/utils/chunkDocument.d.ts +86 -0
- package/dist/evaluator/utils/chunkDocument.js +212 -0
- package/dist/evaluator/utils/chunkDocument.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +3 -1
- package/dist/gguf/insights/GgufInsights.js +114 -8
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +11 -7
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +13 -7
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/parser/GgufV2Parser.js +29 -8
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
- package/dist/gguf/parser/parseGguf.js +11 -11
- package/dist/gguf/parser/parseGguf.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.js +8 -3
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +9 -9
- package/dist/gguf/types/GgufMetadataTypes.js +1 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +13 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
- package/dist/index.d.ts +7 -2
- package/dist/index.js +6 -1
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/LlamaText.d.ts +4 -1
- package/dist/utils/LlamaText.js +4 -1
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/cmake.js +23 -0
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/pushAll.d.ts +1 -1
- package/dist/utils/pushAll.js.map +1 -1
- package/dist/utils/tokenizerUtils.js +1 -1
- package/dist/utils/utilTypes.d.ts +5 -0
- package/llama/CMakeLists.txt +25 -8
- package/llama/addon/AddonContext.cpp +188 -16
- package/llama/addon/AddonContext.h +1 -0
- package/llama/addon/AddonGrammar.cpp +1 -4
- package/llama/addon/AddonGrammarEvaluationState.cpp +16 -5
- package/llama/addon/AddonModel.cpp +11 -15
- package/llama/addon/AddonModel.h +0 -1
- package/llama/addon/AddonSampler.cpp +1 -6
- package/llama/addon/addon.cpp +26 -7
- package/llama/addon/globals/getGpuInfo.cpp +30 -5
- package/llama/addon/globals/getGpuInfo.h +6 -1
- package/llama/addon/globals/getMemoryInfo.cpp +63 -0
- package/llama/addon/globals/getMemoryInfo.h +4 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/cmake/win32.ensureNinjaPath.cmake +68 -0
- package/llama/cmake/win32.ensureNodeLib.cmake +34 -0
- package/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake +12 -0
- package/llama/cmake/win32.llvmEnsureCmakeAr.cmake +37 -0
- package/llama/cmake/win32.llvmUseGnuModeCompilers.cmake +87 -0
- package/llama/cmake/win32.programFilesPaths.cmake +31 -0
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +29 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +1 -0
- package/llama/llama.cpp.info.json +1 -1
- package/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-x64.cmake +14 -0
- package/llama/toolchains/llvm.win32.host-x64.target-x64.cmake +20 -0
- package/llama/toolchains/win32.host-arm64.target-arm64.cmake +21 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +14 -34
- package/package.json +43 -43
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include <thread>
|
|
2
2
|
#include <algorithm>
|
|
3
|
+
#include <cmath>
|
|
3
4
|
#include "common/common.h"
|
|
4
5
|
#include "llama-grammar.h"
|
|
5
6
|
#include "llama.h"
|
|
@@ -190,6 +191,14 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
190
191
|
public:
|
|
191
192
|
AddonContext* ctx;
|
|
192
193
|
AddonSampler* sampler;
|
|
194
|
+
bool arrayResult = false;
|
|
195
|
+
bool returnProbabilities = false;
|
|
196
|
+
bool returnConfidence = false;
|
|
197
|
+
float tokenConfidence = -1;
|
|
198
|
+
bool has_probabilities = false;
|
|
199
|
+
size_t probabilities_size;
|
|
200
|
+
llama_token * probabilities_tokens;
|
|
201
|
+
float * probabilities_probs;
|
|
193
202
|
int32_t batchLogitIndex;
|
|
194
203
|
llama_token result;
|
|
195
204
|
bool no_output = false;
|
|
@@ -202,11 +211,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
202
211
|
|
|
203
212
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
204
213
|
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
|
|
214
|
+
arrayResult = info.Length() > 2 && info[2].IsBoolean();
|
|
215
|
+
returnProbabilities = arrayResult ? info[2].As<Napi::Boolean>().Value() : false;
|
|
216
|
+
returnConfidence = arrayResult && info.Length() > 3 && info[3].IsBoolean() ? info[3].As<Napi::Boolean>().Value() : false;
|
|
205
217
|
sampler->Ref();
|
|
206
218
|
}
|
|
207
219
|
~AddonContextSampleTokenWorker() {
|
|
208
220
|
ctx->Unref();
|
|
209
221
|
sampler->Unref();
|
|
222
|
+
|
|
223
|
+
if (has_probabilities) {
|
|
224
|
+
delete[] probabilities_tokens;
|
|
225
|
+
delete[] probabilities_probs;
|
|
226
|
+
}
|
|
210
227
|
}
|
|
211
228
|
|
|
212
229
|
Napi::Promise GetPromise() {
|
|
@@ -239,7 +256,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
239
256
|
|
|
240
257
|
auto & candidates = sampler->tokenCandidates;
|
|
241
258
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
242
|
-
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f}
|
|
259
|
+
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
|
243
260
|
}
|
|
244
261
|
|
|
245
262
|
llama_token_data_array cur_p = {
|
|
@@ -257,18 +274,111 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
257
274
|
}
|
|
258
275
|
|
|
259
276
|
auto new_token_id = cur_p.data[cur_p.selected].id;
|
|
277
|
+
|
|
278
|
+
if (returnProbabilities || returnConfidence) {
|
|
279
|
+
if (!cur_p.sorted) {
|
|
280
|
+
std::sort(cur_p.data, cur_p.data + cur_p.size, [](const llama_token_data & a, const llama_token_data & b) {
|
|
281
|
+
return a.logit > b.logit;
|
|
282
|
+
});
|
|
283
|
+
cur_p.sorted = true;
|
|
284
|
+
|
|
285
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
286
|
+
if (cur_p.data[i].id == new_token_id) {
|
|
287
|
+
cur_p.selected = i;
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (returnProbabilities) {
|
|
295
|
+
probabilities_size = cur_p.size;
|
|
296
|
+
probabilities_tokens = new llama_token[probabilities_size];
|
|
297
|
+
probabilities_probs = new float[probabilities_size];
|
|
298
|
+
float maxLogit = cur_p.size > 0 ? cur_p.data[0].logit : -INFINITY;
|
|
299
|
+
|
|
300
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
301
|
+
auto logit = cur_p.data[i].logit;
|
|
302
|
+
|
|
303
|
+
probabilities_tokens[i] = cur_p.data[i].id;
|
|
304
|
+
probabilities_probs[i] = logit;
|
|
305
|
+
|
|
306
|
+
if (logit > maxLogit) {
|
|
307
|
+
maxLogit = logit;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
if (probabilities_size > 0 && maxLogit != -INFINITY) {
|
|
312
|
+
float sum = 0.0f;
|
|
313
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
314
|
+
float prob = expf(probabilities_probs[i] - maxLogit);
|
|
315
|
+
probabilities_probs[i] = prob;
|
|
316
|
+
sum += prob;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
320
|
+
probabilities_probs[i] /= sum;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
has_probabilities = true;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (returnConfidence) {
|
|
328
|
+
if (has_probabilities && cur_p.selected < probabilities_size) {
|
|
329
|
+
tokenConfidence = probabilities_probs[cur_p.selected];
|
|
330
|
+
} else {
|
|
331
|
+
float maxLogit = cur_p.data[0].logit;
|
|
332
|
+
float sum = 0.0f;
|
|
333
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
334
|
+
auto logit = cur_p.data[i].logit;
|
|
335
|
+
|
|
336
|
+
if (logit > maxLogit) {
|
|
337
|
+
maxLogit = logit;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
for (size_t i = 0; i < cur_p.size; i++) {
|
|
342
|
+
sum += expf(cur_p.data[i].logit - maxLogit);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
tokenConfidence = expf(cur_p.data[cur_p.selected].logit - maxLogit) / sum;
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
260
349
|
sampler->acceptToken(new_token_id);
|
|
261
350
|
result = new_token_id;
|
|
262
351
|
}
|
|
263
352
|
void OnOK() {
|
|
353
|
+
Napi::Number resultToken;
|
|
264
354
|
if (no_output) {
|
|
265
|
-
|
|
266
|
-
|
|
355
|
+
resultToken = Napi::Number::New(Env(), -1);
|
|
356
|
+
} else {
|
|
357
|
+
resultToken = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (!arrayResult) {
|
|
361
|
+
deferred.Resolve(resultToken);
|
|
267
362
|
return;
|
|
268
363
|
}
|
|
269
364
|
|
|
270
|
-
Napi::
|
|
271
|
-
|
|
365
|
+
Napi::Array resultArray = Napi::Array::New(Env(), 2);
|
|
366
|
+
resultArray.Set(Napi::Number::New(Env(), 0), resultToken);
|
|
367
|
+
|
|
368
|
+
if (has_probabilities) {
|
|
369
|
+
Napi::Array probabilities = Napi::Array::New(Env(), probabilities_size * 2);
|
|
370
|
+
for (size_t i = 0; i < probabilities_size; i++) {
|
|
371
|
+
probabilities.Set(i * 2, Napi::Number::New(Env(), probabilities_tokens[i]));
|
|
372
|
+
probabilities.Set(i * 2 + 1, Napi::Number::New(Env(), probabilities_probs[i]));
|
|
373
|
+
}
|
|
374
|
+
resultArray.Set(1, probabilities);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (returnConfidence && tokenConfidence != -1) {
|
|
378
|
+
resultArray.Set(2, Napi::Number::New(Env(), tokenConfidence));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
deferred.Resolve(resultArray);
|
|
272
382
|
}
|
|
273
383
|
void OnError(const Napi::Error& err) {
|
|
274
384
|
deferred.Reject(err.Value());
|
|
@@ -305,6 +415,10 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
305
415
|
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
306
416
|
}
|
|
307
417
|
|
|
418
|
+
if (options.Has("ranking") && options.Get("ranking").As<Napi::Boolean>().Value()) {
|
|
419
|
+
context_params.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
|
420
|
+
}
|
|
421
|
+
|
|
308
422
|
if (options.Has("flashAttention")) {
|
|
309
423
|
context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
|
|
310
424
|
}
|
|
@@ -441,24 +555,25 @@ Napi::Value AddonContext::AddToBatch(const Napi::CallbackInfo& info) {
|
|
|
441
555
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
442
556
|
int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
|
|
443
557
|
Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
|
|
444
|
-
|
|
558
|
+
Napi::Uint32Array tokenLogitIndexes = info[3].As<Napi::Uint32Array>();
|
|
445
559
|
|
|
446
560
|
auto tokensLength = tokens.ElementLength();
|
|
561
|
+
auto tokenLogitIndexesLength = tokenLogitIndexes.ElementLength();
|
|
447
562
|
GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
|
|
448
563
|
|
|
449
|
-
|
|
450
|
-
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
if (generateLogitAtTheEnd) {
|
|
454
|
-
batch.logits[batch.n_tokens - 1] = true;
|
|
564
|
+
Napi::Uint32Array resLogitIndexes = Napi::Uint32Array::New(info.Env(), tokenLogitIndexesLength);
|
|
455
565
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
566
|
+
for (size_t i = 0, l = 0; i < tokensLength; i++) {
|
|
567
|
+
if (l < tokenLogitIndexesLength && l < tokenLogitIndexesLength && tokenLogitIndexes[l] == i) {
|
|
568
|
+
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, true);
|
|
569
|
+
resLogitIndexes[l] = batch.n_tokens - 1;
|
|
570
|
+
l++;
|
|
571
|
+
} else {
|
|
572
|
+
common_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
|
|
573
|
+
}
|
|
459
574
|
}
|
|
460
575
|
|
|
461
|
-
return
|
|
576
|
+
return resLogitIndexes;
|
|
462
577
|
}
|
|
463
578
|
Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
|
|
464
579
|
if (disposed) {
|
|
@@ -592,6 +707,62 @@ Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
|
|
|
592
707
|
return info.Env().Undefined();
|
|
593
708
|
}
|
|
594
709
|
|
|
710
|
+
Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info) {
|
|
711
|
+
constexpr auto vocabSizeMaxDifference = 128; // SPEC_VOCAB_MAX_SIZE_DIFFERENCE
|
|
712
|
+
constexpr auto vocabCheckStartTokenId = 5; // SPEC_VOCAB_CHECK_START_TOKEN_ID
|
|
713
|
+
|
|
714
|
+
const AddonContext * draftContext = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
|
|
715
|
+
const auto currentCtx = ctx;
|
|
716
|
+
const auto draftCtx = draftContext->ctx;
|
|
717
|
+
const auto currentModel = model->model;
|
|
718
|
+
const auto draftModel = draftContext->model->model;
|
|
719
|
+
|
|
720
|
+
if (llama_vocab_type(currentModel) != llama_vocab_type(draftModel)) {
|
|
721
|
+
Napi::Error::New(info.Env(), "Speculative draft model vocabulary type must match the target model vocabulary type").ThrowAsJavaScriptException();
|
|
722
|
+
return info.Env().Undefined();
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
if (llama_add_bos_token(currentModel) != llama_add_bos_token(draftModel) ||
|
|
726
|
+
llama_add_eos_token(currentModel) != llama_add_eos_token(draftModel) ||
|
|
727
|
+
llama_token_bos(currentModel) != llama_token_bos(draftModel) ||
|
|
728
|
+
llama_token_eos(currentModel) != llama_token_eos(draftModel)
|
|
729
|
+
) {
|
|
730
|
+
Napi::Error::New(info.Env(), "Speculative draft model special tokens must match the target model special tokens").ThrowAsJavaScriptException();
|
|
731
|
+
return info.Env().Undefined();
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
const int currentModelVocabSize = llama_n_vocab(currentModel);
|
|
735
|
+
const int draftModelVocabSize = llama_n_vocab(draftModel);
|
|
736
|
+
|
|
737
|
+
const int vocabDiff = std::abs(currentModelVocabSize - draftModelVocabSize);
|
|
738
|
+
|
|
739
|
+
if (vocabDiff > vocabSizeMaxDifference) {
|
|
740
|
+
Napi::Error::New(
|
|
741
|
+
info.Env(),
|
|
742
|
+
std::string("Speculative draft model vocabulary must closely match the target model vocabulary size (vocabulary size difference: ") +
|
|
743
|
+
std::to_string(vocabDiff) + std::string(", max allowed: ") + std::to_string(vocabSizeMaxDifference) + std::string(")")
|
|
744
|
+
).ThrowAsJavaScriptException();
|
|
745
|
+
return info.Env().Undefined();
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
const int minVocabSize = std::min(currentModelVocabSize, draftModelVocabSize);
|
|
749
|
+
for (int i = vocabCheckStartTokenId; i < minVocabSize; ++i) {
|
|
750
|
+
const char * currentTokenText = llama_token_get_text(currentModel, i);
|
|
751
|
+
const char * draftTokenText = llama_token_get_text(draftModel, i);
|
|
752
|
+
if (std::strcmp(currentTokenText, draftTokenText) != 0) {
|
|
753
|
+
Napi::Error::New(
|
|
754
|
+
info.Env(),
|
|
755
|
+
std::string("Speculative draft model vocabulary must match the target model vocabulary, but token ") +
|
|
756
|
+
std::to_string(i) + std::string(" content differs. Target: \"") + std::string(currentTokenText) +
|
|
757
|
+
std::string("\", Draft: \"") + std::string(draftTokenText) + std::string("")
|
|
758
|
+
).ThrowAsJavaScriptException();
|
|
759
|
+
return info.Env().Undefined();
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
return info.Env().Undefined();
|
|
764
|
+
}
|
|
765
|
+
|
|
595
766
|
Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
|
|
596
767
|
AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
|
|
597
768
|
float scale = info[1].As<Napi::Number>().FloatValue();
|
|
@@ -622,6 +793,7 @@ void AddonContext::init(Napi::Object exports) {
|
|
|
622
793
|
InstanceMethod("getThreads", &AddonContext::GetThreads),
|
|
623
794
|
InstanceMethod("setThreads", &AddonContext::SetThreads),
|
|
624
795
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
796
|
+
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
|
|
625
797
|
InstanceMethod("setLora", &AddonContext::SetLora),
|
|
626
798
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
627
799
|
}
|
|
@@ -45,6 +45,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
45
45
|
Napi::Value SetThreads(const Napi::CallbackInfo& info);
|
|
46
46
|
|
|
47
47
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
|
|
48
|
+
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
|
|
48
49
|
|
|
49
50
|
Napi::Value SetLora(const Napi::CallbackInfo& info);
|
|
50
51
|
|
|
@@ -46,13 +46,10 @@ Napi::Value AddonGrammar::isTextCompatible(const Napi::CallbackInfo& info) {
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
const auto cpts = unicode_cpts_from_utf8(testText);
|
|
49
|
-
const llama_grammar_rules & rules = llama_grammar_get_rules(parsed_grammar);
|
|
50
49
|
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(parsed_grammar);
|
|
51
50
|
|
|
52
51
|
for (const auto & cpt : cpts) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
|
|
52
|
+
llama_grammar_accept(parsed_grammar, cpt);
|
|
56
53
|
|
|
57
54
|
if (stacks_cur.empty()) {
|
|
58
55
|
// no stacks means that the grammar failed to match at this point
|
|
@@ -6,13 +6,24 @@
|
|
|
6
6
|
#include "AddonGrammar.h"
|
|
7
7
|
|
|
8
8
|
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
if (info.Length() == 1) {
|
|
10
|
+
AddonGrammarEvaluationState* existingState = Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
11
|
+
model = existingState->model;
|
|
12
|
+
model->Ref();
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
grammarDef = existingState->grammarDef;
|
|
15
|
+
grammarDef->Ref();
|
|
14
16
|
|
|
15
|
-
|
|
17
|
+
sampler = llama_sampler_clone(existingState->sampler);
|
|
18
|
+
} else {
|
|
19
|
+
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
|
20
|
+
model->Ref();
|
|
21
|
+
|
|
22
|
+
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
|
|
23
|
+
grammarDef->Ref();
|
|
24
|
+
|
|
25
|
+
sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
|
|
26
|
+
}
|
|
16
27
|
}
|
|
17
28
|
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
|
|
18
29
|
llama_sampler_free(sampler);
|
|
@@ -92,13 +92,13 @@ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
|
92
92
|
|
|
93
93
|
void Execute() {
|
|
94
94
|
try {
|
|
95
|
-
model->model =
|
|
95
|
+
model->model = llama_model_load_from_file(model->modelPath.c_str(), model->model_params);
|
|
96
96
|
|
|
97
97
|
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
98
98
|
} catch (const std::exception& e) {
|
|
99
99
|
SetError(e.what());
|
|
100
100
|
} catch(...) {
|
|
101
|
-
SetError("Unknown error when calling \"
|
|
101
|
+
SetError("Unknown error when calling \"llama_model_load_from_file\"");
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
void OnOK() {
|
|
@@ -141,14 +141,14 @@ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
|
141
141
|
|
|
142
142
|
void Execute() {
|
|
143
143
|
try {
|
|
144
|
-
|
|
144
|
+
llama_model_free(model->model);
|
|
145
145
|
model->modelLoaded = false;
|
|
146
146
|
|
|
147
147
|
model->dispose();
|
|
148
148
|
} catch (const std::exception& e) {
|
|
149
149
|
SetError(e.what());
|
|
150
150
|
} catch(...) {
|
|
151
|
-
SetError("Unknown error when calling \"
|
|
151
|
+
SetError("Unknown error when calling \"llama_model_free\"");
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
void OnOK() {
|
|
@@ -359,7 +359,7 @@ void AddonModel::dispose() {
|
|
|
359
359
|
disposed = true;
|
|
360
360
|
if (modelLoaded) {
|
|
361
361
|
modelLoaded = false;
|
|
362
|
-
|
|
362
|
+
llama_model_free(model);
|
|
363
363
|
|
|
364
364
|
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
365
365
|
loadedModelSize = 0;
|
|
@@ -515,7 +515,12 @@ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
|
|
|
515
515
|
return info.Env().Undefined();
|
|
516
516
|
}
|
|
517
517
|
|
|
518
|
-
|
|
518
|
+
auto token = llama_token_bos(model);
|
|
519
|
+
if (token == LLAMA_TOKEN_NULL) {
|
|
520
|
+
token = llama_token_cls(model);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return getNapiControlToken(info, model, token);
|
|
519
524
|
}
|
|
520
525
|
Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
|
|
521
526
|
if (disposed) {
|
|
@@ -565,14 +570,6 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
|
|
565
570
|
|
|
566
571
|
return getNapiToken(info, model, llama_token_eot(model));
|
|
567
572
|
}
|
|
568
|
-
Napi::Value AddonModel::ClsToken(const Napi::CallbackInfo& info) {
|
|
569
|
-
if (disposed) {
|
|
570
|
-
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
571
|
-
return info.Env().Undefined();
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
return getNapiToken(info, model, llama_token_cls(model));
|
|
575
|
-
}
|
|
576
573
|
Napi::Value AddonModel::SepToken(const Napi::CallbackInfo& info) {
|
|
577
574
|
if (disposed) {
|
|
578
575
|
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
@@ -678,7 +675,6 @@ void AddonModel::init(Napi::Object exports) {
|
|
|
678
675
|
InstanceMethod("middleToken", &AddonModel::MiddleToken),
|
|
679
676
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
680
677
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
681
|
-
InstanceMethod("clsToken", &AddonModel::ClsToken),
|
|
682
678
|
InstanceMethod("sepToken", &AddonModel::SepToken),
|
|
683
679
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
684
680
|
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
|
package/llama/addon/AddonModel.h
CHANGED
|
@@ -49,7 +49,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
49
49
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info);
|
|
50
50
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info);
|
|
51
51
|
Napi::Value EotToken(const Napi::CallbackInfo& info);
|
|
52
|
-
Napi::Value ClsToken(const Napi::CallbackInfo& info);
|
|
53
52
|
Napi::Value SepToken(const Napi::CallbackInfo& info);
|
|
54
53
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info);
|
|
55
54
|
|
|
@@ -350,15 +350,10 @@ Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
|
|
|
350
350
|
|
|
351
351
|
if (shouldCreateSampler) {
|
|
352
352
|
repeatPenaltySampler = llama_sampler_init_penalties(
|
|
353
|
-
llama_n_vocab(model->model),
|
|
354
|
-
llama_token_eos(model->model),
|
|
355
|
-
llama_token_nl(model->model),
|
|
356
353
|
repeatPenaltyMaxTokens,
|
|
357
354
|
repeatPenalty,
|
|
358
355
|
repeatPenaltyFrequencyPenalty,
|
|
359
|
-
repeatPenaltyPresencePenalty
|
|
360
|
-
true,
|
|
361
|
-
false
|
|
356
|
+
repeatPenaltyPresencePenalty
|
|
362
357
|
);
|
|
363
358
|
repeatPenalty_lastTokens = RingBuffer<llama_token>(repeatPenaltyMaxTokens);
|
|
364
359
|
|
package/llama/addon/addon.cpp
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#include "globals/addonProgress.h"
|
|
10
10
|
#include "globals/getGpuInfo.h"
|
|
11
11
|
#include "globals/getSwapInfo.h"
|
|
12
|
+
#include "globals/getMemoryInfo.h"
|
|
12
13
|
|
|
13
14
|
bool backendInitialized = false;
|
|
14
15
|
bool backendDisposed = false;
|
|
@@ -25,6 +26,21 @@ Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
|
|
25
26
|
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
|
26
27
|
}
|
|
27
28
|
|
|
29
|
+
Napi::Value addonGetGpuSupportsMmap(const Napi::CallbackInfo& info) {
|
|
30
|
+
const auto llamaSupportsMmap = llama_supports_mmap();
|
|
31
|
+
const auto gpuDevice = getGpuDevice().first;
|
|
32
|
+
|
|
33
|
+
if (gpuDevice == nullptr) {
|
|
34
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
ggml_backend_dev_props props;
|
|
38
|
+
ggml_backend_dev_get_props(gpuDevice, &props);
|
|
39
|
+
|
|
40
|
+
const bool gpuSupportsMmap = llama_supports_mmap() && props.caps.buffer_from_host_ptr;
|
|
41
|
+
return Napi::Boolean::New(info.Env(), gpuSupportsMmap);
|
|
42
|
+
}
|
|
43
|
+
|
|
28
44
|
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
|
29
45
|
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
|
30
46
|
}
|
|
@@ -152,16 +168,16 @@ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
|
152
168
|
};
|
|
153
169
|
|
|
154
170
|
Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
|
|
155
|
-
const
|
|
156
|
-
?
|
|
157
|
-
: info[0].
|
|
158
|
-
? info[0].As<Napi::
|
|
159
|
-
:
|
|
171
|
+
const std::string forceLoadLibrariesSearchPath = info.Length() == 0
|
|
172
|
+
? ""
|
|
173
|
+
: info[0].IsString()
|
|
174
|
+
? info[0].As<Napi::String>().Utf8Value()
|
|
175
|
+
: "";
|
|
160
176
|
|
|
161
177
|
ggml_backend_reg_count();
|
|
162
178
|
|
|
163
|
-
if (
|
|
164
|
-
|
|
179
|
+
if (forceLoadLibrariesSearchPath.length() > 0) {
|
|
180
|
+
ggml_backend_load_all_from_path(forceLoadLibrariesSearchPath.c_str());
|
|
165
181
|
}
|
|
166
182
|
|
|
167
183
|
return info.Env().Undefined();
|
|
@@ -210,6 +226,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
|
210
226
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
211
227
|
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
|
212
228
|
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
|
229
|
+
Napi::PropertyDescriptor::Function("getGpuSupportsMmap", addonGetGpuSupportsMmap),
|
|
213
230
|
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
|
214
231
|
Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
|
|
215
232
|
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
@@ -220,7 +237,9 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
|
220
237
|
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
221
238
|
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
|
222
239
|
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
240
|
+
Napi::PropertyDescriptor::Function("ensureGpuDeviceIsSupported", ensureGpuDeviceIsSupported),
|
|
223
241
|
Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
|
|
242
|
+
Napi::PropertyDescriptor::Function("getMemoryInfo", getMemoryInfo),
|
|
224
243
|
Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
|
|
225
244
|
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
226
245
|
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
@@ -89,17 +89,17 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
|
|
89
89
|
return result;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
|
|
93
93
|
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
94
94
|
ggml_backend_dev_t device = ggml_backend_dev_get(i);
|
|
95
95
|
const auto deviceName = std::string(ggml_backend_dev_name(device));
|
|
96
96
|
|
|
97
97
|
if (deviceName == "Metal") {
|
|
98
|
-
return
|
|
98
|
+
return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
|
|
99
99
|
} else if (std::string(deviceName).find("Vulkan") == 0) {
|
|
100
|
-
return
|
|
100
|
+
return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
|
|
101
101
|
} else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
|
|
102
|
-
return
|
|
102
|
+
return std::pair<ggml_backend_dev_t, std::string>(device, "cuda");
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
105
|
|
|
@@ -108,9 +108,34 @@ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
|
108
108
|
const auto deviceName = std::string(ggml_backend_dev_name(device));
|
|
109
109
|
|
|
110
110
|
if (deviceName == "CPU") {
|
|
111
|
-
return
|
|
111
|
+
return std::pair<ggml_backend_dev_t, std::string>(device, "cpu");
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
+
return std::pair<ggml_backend_dev_t, std::string>(nullptr, "");
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
119
|
+
const auto gpuDeviceRes = getGpuDevice();
|
|
120
|
+
const auto device = gpuDeviceRes.first;
|
|
121
|
+
const auto deviceType = gpuDeviceRes.second;
|
|
122
|
+
|
|
123
|
+
if (deviceType == "cpu") {
|
|
124
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
125
|
+
} else if (device != nullptr && deviceType != "") {
|
|
126
|
+
return Napi::String::New(info.Env(), deviceType);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return info.Env().Undefined();
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info) {
|
|
133
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
134
|
+
if (!checkIsVulkanEnvSupported(logVulkanWarning)) {
|
|
135
|
+
Napi::Error::New(info.Env(), "Vulkan device is not supported").ThrowAsJavaScriptException();
|
|
136
|
+
return info.Env().Undefined();
|
|
137
|
+
}
|
|
138
|
+
#endif
|
|
139
|
+
|
|
115
140
|
return info.Env().Undefined();
|
|
116
141
|
}
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
#pragma once
|
|
2
|
+
#include <utility>
|
|
3
|
+
#include <string>
|
|
2
4
|
#include "napi.h"
|
|
5
|
+
#include "llama.h"
|
|
3
6
|
|
|
4
7
|
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info);
|
|
5
8
|
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info);
|
|
6
|
-
|
|
9
|
+
std::pair<ggml_backend_dev_t, std::string> getGpuDevice();
|
|
10
|
+
Napi::Value getGpuType(const Napi::CallbackInfo& info);
|
|
11
|
+
Napi::Value ensureGpuDeviceIsSupported(const Napi::CallbackInfo& info);
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#include "getMemoryInfo.h"
|
|
2
|
+
#include "addonLog.h"
|
|
3
|
+
|
|
4
|
+
#ifdef __APPLE__
|
|
5
|
+
#include <iostream>
|
|
6
|
+
#include <mach/mach.h>
|
|
7
|
+
#include <sys/sysctl.h>
|
|
8
|
+
#elif __linux__
|
|
9
|
+
#include <fstream>
|
|
10
|
+
#include <sstream>
|
|
11
|
+
#include <string>
|
|
12
|
+
#elif _WIN32
|
|
13
|
+
#include <iostream>
|
|
14
|
+
#include <windows.h>
|
|
15
|
+
#include <psapi.h>
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Napi::Value getMemoryInfo(const Napi::CallbackInfo& info) {
|
|
20
|
+
uint64_t totalMemoryUsage = 0;
|
|
21
|
+
|
|
22
|
+
#ifdef __APPLE__
|
|
23
|
+
struct mach_task_basic_info taskInfo;
|
|
24
|
+
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
|
25
|
+
if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&taskInfo, &infoCount) == KERN_SUCCESS) {
|
|
26
|
+
totalMemoryUsage = taskInfo.virtual_size;
|
|
27
|
+
} else {
|
|
28
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
|
29
|
+
}
|
|
30
|
+
#elif __linux__
|
|
31
|
+
std::ifstream procStatus("/proc/self/status");
|
|
32
|
+
std::string line;
|
|
33
|
+
bool foundMemoryUsage = false;
|
|
34
|
+
while (std::getline(procStatus, line)) {
|
|
35
|
+
if (line.rfind("VmSize:", 0) == 0) { // Resident Set Size (current memory usage)
|
|
36
|
+
std::istringstream iss(line);
|
|
37
|
+
std::string key, unit;
|
|
38
|
+
size_t value;
|
|
39
|
+
if (iss >> key >> value >> unit) {
|
|
40
|
+
totalMemoryUsage = value * 1024; // Convert from kB to bytes
|
|
41
|
+
foundMemoryUsage = true;
|
|
42
|
+
}
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!foundMemoryUsage) {
|
|
48
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
|
49
|
+
}
|
|
50
|
+
#elif _WIN32
|
|
51
|
+
PROCESS_MEMORY_COUNTERS_EX memCounters;
|
|
52
|
+
|
|
53
|
+
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&memCounters, sizeof(memCounters))) {
|
|
54
|
+
totalMemoryUsage = memCounters.PrivateUsage;
|
|
55
|
+
} else {
|
|
56
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, std::string("Failed to get memory usage info").c_str(), nullptr);
|
|
57
|
+
}
|
|
58
|
+
#endif
|
|
59
|
+
|
|
60
|
+
Napi::Object obj = Napi::Object::New(info.Env());
|
|
61
|
+
obj.Set("total", Napi::Number::New(info.Env(), totalMemoryUsage));
|
|
62
|
+
return obj;
|
|
63
|
+
}
|