node-llama-cpp 3.0.0-beta.10 → 3.0.0-beta.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/bindings/AddonTypes.d.ts +3 -0
- package/dist/bindings/Llama.d.ts +1 -0
- package/dist/bindings/Llama.js +7 -1
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +24 -1
- package/dist/bindings/getLlama.js +10 -4
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +1 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +2 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/cli/cli.js +4 -0
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +2 -1
- package/dist/cli/commands/BuildCommand.js +11 -9
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -2
- package/dist/cli/commands/ChatCommand.js +3 -39
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +25 -0
- package/dist/cli/commands/CompleteCommand.js +278 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.js +16 -13
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +2 -1
- package/dist/cli/commands/DownloadCommand.js +11 -9
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +27 -0
- package/dist/cli/commands/InfillCommand.js +316 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/utils/logEnabledComputeLayers.d.ts +8 -0
- package/dist/cli/utils/logEnabledComputeLayers.js +11 -0
- package/dist/cli/utils/logEnabledComputeLayers.js.map +1 -0
- package/dist/config.d.ts +1 -0
- package/dist/config.js +5 -2
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +1 -0
- package/dist/consts.js +2 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +2 -33
- package/dist/evaluator/LlamaChat/LlamaChat.js +7 -28
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +148 -0
- package/dist/evaluator/LlamaCompletion.js +402 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +6 -2
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +10 -1
- package/dist/evaluator/LlamaModel.js +33 -3
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +31 -0
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -0
- package/dist/utils/getBuildDefaults.js +3 -2
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/llama/CMakeLists.txt +20 -0
- package/llama/addon.cpp +97 -12
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +5 -5
- package/llama/gpuInfo/cuda-gpu-info.h +2 -2
- package/llama/gpuInfo/vulkan-gpu-info.cpp +65 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +7 -0
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/.buildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +1035 -132
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/.buildMetadata.json +1 -0
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +3 -2
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
package/llama/addon.cpp
CHANGED
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
#ifdef GPU_INFO_USE_CUBLAS
|
|
13
13
|
# include "gpuInfo/cuda-gpu-info.h"
|
|
14
14
|
#endif
|
|
15
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
16
|
+
# include "gpuInfo/vulkan-gpu-info.h"
|
|
17
|
+
#endif
|
|
15
18
|
#ifdef GPU_INFO_USE_METAL
|
|
16
19
|
# include "gpuInfo/metal-gpu-info.h"
|
|
17
20
|
#endif
|
|
@@ -35,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
|
|
|
35
38
|
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
36
39
|
bool addonJsLoggerCallbackSet = false;
|
|
37
40
|
int addonLoggerLogLevel = 5;
|
|
41
|
+
bool backendInitialized = false;
|
|
38
42
|
|
|
39
43
|
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
40
44
|
std::vector<char> result(8, 0);
|
|
@@ -51,10 +55,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
|
|
|
51
55
|
}
|
|
52
56
|
|
|
53
57
|
#ifdef GPU_INFO_USE_CUBLAS
|
|
54
|
-
void
|
|
58
|
+
void logCudaError(const char* message) {
|
|
55
59
|
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
56
60
|
}
|
|
57
61
|
#endif
|
|
62
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
63
|
+
void logVulkanWarning(const char* message) {
|
|
64
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
|
|
65
|
+
}
|
|
66
|
+
#endif
|
|
58
67
|
|
|
59
68
|
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
60
69
|
uint64_t total = 0;
|
|
@@ -63,7 +72,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
63
72
|
#ifdef GPU_INFO_USE_CUBLAS
|
|
64
73
|
size_t cudaDeviceTotal = 0;
|
|
65
74
|
size_t cudaDeviceUsed = 0;
|
|
66
|
-
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed,
|
|
75
|
+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
|
|
67
76
|
|
|
68
77
|
if (cudeGetInfoSuccess) {
|
|
69
78
|
total += cudaDeviceTotal;
|
|
@@ -71,6 +80,17 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
71
80
|
}
|
|
72
81
|
#endif
|
|
73
82
|
|
|
83
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
84
|
+
uint64_t vulkanDeviceTotal = 0;
|
|
85
|
+
uint64_t vulkanDeviceUsed = 0;
|
|
86
|
+
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
|
|
87
|
+
|
|
88
|
+
if (vulkanDeviceSupportsMemoryBudgetExtension) {
|
|
89
|
+
total += vulkanDeviceTotal;
|
|
90
|
+
used += vulkanDeviceUsed;
|
|
91
|
+
}
|
|
92
|
+
#endif
|
|
93
|
+
|
|
74
94
|
#ifdef GPU_INFO_USE_METAL
|
|
75
95
|
uint64_t metalDeviceTotal = 0;
|
|
76
96
|
uint64_t metalDeviceUsed = 0;
|
|
@@ -87,6 +107,26 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
87
107
|
return result;
|
|
88
108
|
}
|
|
89
109
|
|
|
110
|
+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
111
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
112
|
+
|
|
113
|
+
if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
|
|
114
|
+
return Napi::Number::From(info.Env(), -1);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return Napi::Number::From(info.Env(), token);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
121
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
122
|
+
|
|
123
|
+
if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
|
|
124
|
+
return Napi::Number::From(info.Env(), -1);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return Napi::Number::From(info.Env(), token);
|
|
128
|
+
}
|
|
129
|
+
|
|
90
130
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
91
131
|
public:
|
|
92
132
|
llama_model_params model_params;
|
|
@@ -119,7 +159,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
119
159
|
}
|
|
120
160
|
}
|
|
121
161
|
|
|
122
|
-
llama_backend_init(false);
|
|
123
162
|
model = llama_load_model_from_file(modelPath.c_str(), model_params);
|
|
124
163
|
|
|
125
164
|
if (model == NULL) {
|
|
@@ -203,6 +242,15 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
203
242
|
return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
|
|
204
243
|
}
|
|
205
244
|
|
|
245
|
+
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
246
|
+
if (disposed) {
|
|
247
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
248
|
+
return info.Env().Undefined();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return Napi::Number::From(info.Env(), llama_n_embd(model));
|
|
252
|
+
}
|
|
253
|
+
|
|
206
254
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
207
255
|
if (disposed) {
|
|
208
256
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -239,7 +287,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
239
287
|
return info.Env().Undefined();
|
|
240
288
|
}
|
|
241
289
|
|
|
242
|
-
return
|
|
290
|
+
return getNapiControlToken(info, model, llama_token_bos(model));
|
|
243
291
|
}
|
|
244
292
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
245
293
|
if (disposed) {
|
|
@@ -247,7 +295,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
247
295
|
return info.Env().Undefined();
|
|
248
296
|
}
|
|
249
297
|
|
|
250
|
-
return
|
|
298
|
+
return getNapiControlToken(info, model, llama_token_eos(model));
|
|
251
299
|
}
|
|
252
300
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
253
301
|
if (disposed) {
|
|
@@ -255,7 +303,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
255
303
|
return info.Env().Undefined();
|
|
256
304
|
}
|
|
257
305
|
|
|
258
|
-
return
|
|
306
|
+
return getNapiToken(info, model, llama_token_nl(model));
|
|
259
307
|
}
|
|
260
308
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
261
309
|
if (disposed) {
|
|
@@ -263,7 +311,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
263
311
|
return info.Env().Undefined();
|
|
264
312
|
}
|
|
265
313
|
|
|
266
|
-
return
|
|
314
|
+
return getNapiControlToken(info, model, llama_token_prefix(model));
|
|
267
315
|
}
|
|
268
316
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
269
317
|
if (disposed) {
|
|
@@ -271,7 +319,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
271
319
|
return info.Env().Undefined();
|
|
272
320
|
}
|
|
273
321
|
|
|
274
|
-
return
|
|
322
|
+
return getNapiControlToken(info, model, llama_token_middle(model));
|
|
275
323
|
}
|
|
276
324
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
277
325
|
if (disposed) {
|
|
@@ -279,7 +327,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
279
327
|
return info.Env().Undefined();
|
|
280
328
|
}
|
|
281
329
|
|
|
282
|
-
return
|
|
330
|
+
return getNapiControlToken(info, model, llama_token_suffix(model));
|
|
283
331
|
}
|
|
284
332
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
285
333
|
if (disposed) {
|
|
@@ -287,7 +335,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
287
335
|
return info.Env().Undefined();
|
|
288
336
|
}
|
|
289
337
|
|
|
290
|
-
return
|
|
338
|
+
return getNapiControlToken(info, model, llama_token_eot(model));
|
|
291
339
|
}
|
|
292
340
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
293
341
|
if (disposed) {
|
|
@@ -308,6 +356,29 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
308
356
|
return Napi::String::New(info.Env(), ss.str());
|
|
309
357
|
}
|
|
310
358
|
|
|
359
|
+
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
360
|
+
if (disposed) {
|
|
361
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
362
|
+
return info.Env().Undefined();
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if (info[0].IsNumber() == false) {
|
|
366
|
+
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
370
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
371
|
+
|
|
372
|
+
return Napi::Number::From(info.Env(), int32_t(tokenType));
|
|
373
|
+
}
|
|
374
|
+
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
375
|
+
const int addBos = llama_add_bos_token(model);
|
|
376
|
+
|
|
377
|
+
bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
|
378
|
+
|
|
379
|
+
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
380
|
+
}
|
|
381
|
+
|
|
311
382
|
static void init(Napi::Object exports) {
|
|
312
383
|
exports.Set(
|
|
313
384
|
"AddonModel",
|
|
@@ -318,6 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
318
389
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
319
390
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
320
391
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
392
|
+
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
|
|
321
393
|
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
|
|
322
394
|
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
|
|
323
395
|
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
|
|
@@ -329,6 +401,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
329
401
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
330
402
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
331
403
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
404
|
+
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
|
|
405
|
+
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
332
406
|
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
333
407
|
}
|
|
334
408
|
)
|
|
@@ -896,7 +970,7 @@ void addonCallJsLogCallback(
|
|
|
896
970
|
called = false;
|
|
897
971
|
}
|
|
898
972
|
}
|
|
899
|
-
|
|
973
|
+
|
|
900
974
|
if (!called && data != nullptr) {
|
|
901
975
|
if (data->logLevelNumber == 2) {
|
|
902
976
|
fputs(data->stringStream->str().c_str(), stderr);
|
|
@@ -992,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
|
992
1066
|
return info.Env().Undefined();
|
|
993
1067
|
}
|
|
994
1068
|
|
|
1069
|
+
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1070
|
+
if (backendInitialized) {
|
|
1071
|
+
llama_backend_free();
|
|
1072
|
+
backendInitialized = false;
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
|
|
995
1076
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
996
|
-
llama_backend_init(
|
|
1077
|
+
llama_backend_init();
|
|
1078
|
+
backendInitialized = true;
|
|
1079
|
+
|
|
997
1080
|
exports.DefineProperties({
|
|
998
1081
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
999
1082
|
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
@@ -1007,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
|
1007
1090
|
|
|
1008
1091
|
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
1009
1092
|
|
|
1093
|
+
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
|
1094
|
+
|
|
1010
1095
|
return exports;
|
|
1011
1096
|
}
|
|
1012
1097
|
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
#endif
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
typedef void (*
|
|
18
|
+
typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
|
|
19
19
|
|
|
20
|
-
bool gpuInfoSetCudaDevice(const int device,
|
|
20
|
+
bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
|
|
21
21
|
int current_device;
|
|
22
22
|
auto getDeviceResult = cudaGetDevice(¤t_device);
|
|
23
23
|
|
|
@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
|
|
|
40
40
|
return true;
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used,
|
|
43
|
+
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
|
|
44
44
|
gpuInfoSetCudaDevice(device, errorLogCallback);
|
|
45
45
|
|
|
46
46
|
size_t freeMem;
|
|
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
|
|
|
58
58
|
return true;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
int gpuInfoGetCudaDeviceCount(
|
|
61
|
+
int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
|
|
62
62
|
int deviceCount;
|
|
63
63
|
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
|
|
64
64
|
|
|
@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
|
70
70
|
return deviceCount;
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used,
|
|
73
|
+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
|
|
74
74
|
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
|
|
75
75
|
|
|
76
76
|
if (deviceCount < 0) {
|
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
#include <stddef.h>
|
|
4
4
|
|
|
5
|
-
typedef void (*
|
|
5
|
+
typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
|
|
6
6
|
|
|
7
|
-
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used,
|
|
7
|
+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#include <stddef.h>
|
|
2
|
+
|
|
3
|
+
#include <vulkan/vulkan.hpp>
|
|
4
|
+
|
|
5
|
+
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
|
|
6
|
+
|
|
7
|
+
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
|
|
8
|
+
vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
|
|
9
|
+
vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
|
|
10
|
+
vk::Instance instance = vk::createInstance(createInfo);
|
|
11
|
+
|
|
12
|
+
auto physicalDevices = instance.enumeratePhysicalDevices();
|
|
13
|
+
|
|
14
|
+
size_t usedMem = 0;
|
|
15
|
+
size_t totalMem = 0;
|
|
16
|
+
|
|
17
|
+
for (size_t i = 0; i < physicalDevices.size(); i++) {
|
|
18
|
+
vk::PhysicalDevice physicalDevice = physicalDevices[i];
|
|
19
|
+
vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
|
|
20
|
+
vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
|
|
21
|
+
|
|
22
|
+
if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
|
|
23
|
+
// ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
|
|
24
|
+
continue;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
|
|
28
|
+
bool memoryBudgetExtensionSupported =
|
|
29
|
+
std::any_of(
|
|
30
|
+
extensionProperties.begin(),
|
|
31
|
+
extensionProperties.end(),
|
|
32
|
+
[](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
if (memoryBudgetExtensionSupported) {
|
|
36
|
+
vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
|
|
37
|
+
vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
|
|
38
|
+
memProps2.pNext = &memoryBudgetProperties;
|
|
39
|
+
|
|
40
|
+
physicalDevice.getMemoryProperties2(&memProps2);
|
|
41
|
+
|
|
42
|
+
for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
|
|
43
|
+
if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
44
|
+
totalMem += memProps.memoryHeaps[i].size;
|
|
45
|
+
usedMem += memoryBudgetProperties.heapUsage[i];
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
// VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
|
|
51
|
+
warningLogCallback(
|
|
52
|
+
(
|
|
53
|
+
"Vulkan VK_EXT_memory_budget extension not supported for device \"" +
|
|
54
|
+
std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it"
|
|
55
|
+
)
|
|
56
|
+
.c_str()
|
|
57
|
+
);
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
*total = totalMem;
|
|
63
|
+
*used = usedMem;
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false,"vulkan":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
|