node-llama-cpp 3.0.0-beta.10 → 3.0.0-beta.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +4 -4
  2. package/dist/bindings/AddonTypes.d.ts +3 -0
  3. package/dist/bindings/Llama.d.ts +1 -0
  4. package/dist/bindings/Llama.js +7 -1
  5. package/dist/bindings/Llama.js.map +1 -1
  6. package/dist/bindings/getLlama.d.ts +24 -1
  7. package/dist/bindings/getLlama.js +10 -4
  8. package/dist/bindings/getLlama.js.map +1 -1
  9. package/dist/bindings/types.d.ts +1 -0
  10. package/dist/bindings/types.js.map +1 -1
  11. package/dist/bindings/utils/compileLLamaCpp.js +2 -0
  12. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  13. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -0
  14. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  15. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
  16. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
  17. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
  18. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -0
  19. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  20. package/dist/cli/cli.js +4 -0
  21. package/dist/cli/cli.js.map +1 -1
  22. package/dist/cli/commands/BuildCommand.d.ts +2 -1
  23. package/dist/cli/commands/BuildCommand.js +11 -9
  24. package/dist/cli/commands/BuildCommand.js.map +1 -1
  25. package/dist/cli/commands/ChatCommand.d.ts +2 -2
  26. package/dist/cli/commands/ChatCommand.js +3 -39
  27. package/dist/cli/commands/ChatCommand.js.map +1 -1
  28. package/dist/cli/commands/CompleteCommand.d.ts +25 -0
  29. package/dist/cli/commands/CompleteCommand.js +278 -0
  30. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  31. package/dist/cli/commands/DebugCommand.js +16 -13
  32. package/dist/cli/commands/DebugCommand.js.map +1 -1
  33. package/dist/cli/commands/DownloadCommand.d.ts +2 -1
  34. package/dist/cli/commands/DownloadCommand.js +11 -9
  35. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  36. package/dist/cli/commands/InfillCommand.d.ts +27 -0
  37. package/dist/cli/commands/InfillCommand.js +316 -0
  38. package/dist/cli/commands/InfillCommand.js.map +1 -0
  39. package/dist/cli/utils/logEnabledComputeLayers.d.ts +8 -0
  40. package/dist/cli/utils/logEnabledComputeLayers.js +11 -0
  41. package/dist/cli/utils/logEnabledComputeLayers.js.map +1 -0
  42. package/dist/config.d.ts +1 -0
  43. package/dist/config.js +5 -2
  44. package/dist/config.js.map +1 -1
  45. package/dist/consts.d.ts +1 -0
  46. package/dist/consts.js +2 -0
  47. package/dist/consts.js.map +1 -0
  48. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +2 -33
  49. package/dist/evaluator/LlamaChat/LlamaChat.js +7 -28
  50. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  51. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +1 -1
  52. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  53. package/dist/evaluator/LlamaCompletion.d.ts +148 -0
  54. package/dist/evaluator/LlamaCompletion.js +402 -0
  55. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  56. package/dist/evaluator/LlamaContext/LlamaContext.js +6 -2
  57. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  58. package/dist/evaluator/LlamaModel.d.ts +10 -1
  59. package/dist/evaluator/LlamaModel.js +33 -3
  60. package/dist/evaluator/LlamaModel.js.map +1 -1
  61. package/dist/index.d.ts +6 -4
  62. package/dist/index.js +4 -2
  63. package/dist/index.js.map +1 -1
  64. package/dist/types.d.ts +31 -0
  65. package/dist/utils/UnsupportedError.d.ts +2 -0
  66. package/dist/utils/UnsupportedError.js +7 -0
  67. package/dist/utils/UnsupportedError.js.map +1 -0
  68. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  69. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  70. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  71. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  72. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  73. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  74. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  75. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  76. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  77. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  78. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  79. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  80. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  81. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  82. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  83. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  84. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +1 -1
  85. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  86. package/dist/utils/getBuildDefaults.d.ts +1 -0
  87. package/dist/utils/getBuildDefaults.js +3 -2
  88. package/dist/utils/getBuildDefaults.js.map +1 -1
  89. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  90. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  91. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  92. package/llama/CMakeLists.txt +20 -0
  93. package/llama/addon.cpp +97 -12
  94. package/llama/binariesGithubRelease.json +1 -1
  95. package/llama/gitRelease.bundle +0 -0
  96. package/llama/gpuInfo/cuda-gpu-info.cu +5 -5
  97. package/llama/gpuInfo/cuda-gpu-info.h +2 -2
  98. package/llama/gpuInfo/vulkan-gpu-info.cpp +65 -0
  99. package/llama/gpuInfo/vulkan-gpu-info.h +7 -0
  100. package/llama/llama.cpp.info.json +1 -1
  101. package/llamaBins/linux-arm64/.buildMetadata.json +1 -1
  102. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  103. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -1
  104. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  105. package/llamaBins/linux-x64/.buildMetadata.json +1 -1
  106. package/llamaBins/linux-x64/llama-addon.node +0 -0
  107. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -1
  108. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  109. package/llamaBins/linux-x64-vulkan/.buildMetadata.json +1 -0
  110. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  111. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -1
  112. package/llamaBins/mac-arm64-metal/ggml-metal.metal +1035 -132
  113. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  114. package/llamaBins/mac-x64/.buildMetadata.json +1 -1
  115. package/llamaBins/mac-x64/llama-addon.node +0 -0
  116. package/llamaBins/win-x64/.buildMetadata.json +1 -1
  117. package/llamaBins/win-x64/llama-addon.exp +0 -0
  118. package/llamaBins/win-x64/llama-addon.lib +0 -0
  119. package/llamaBins/win-x64/llama-addon.node +0 -0
  120. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -1
  121. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  122. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  123. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  124. package/llamaBins/win-x64-vulkan/.buildMetadata.json +1 -0
  125. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  126. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  127. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  128. package/package.json +3 -2
  129. package/dist/AbortError.d.ts +0 -2
  130. package/dist/AbortError.js +0 -7
  131. package/dist/AbortError.js.map +0 -1
package/llama/addon.cpp CHANGED
@@ -12,6 +12,9 @@
12
12
  #ifdef GPU_INFO_USE_CUBLAS
13
13
  # include "gpuInfo/cuda-gpu-info.h"
14
14
  #endif
15
+ #ifdef GPU_INFO_USE_VULKAN
16
+ # include "gpuInfo/vulkan-gpu-info.h"
17
+ #endif
15
18
  #ifdef GPU_INFO_USE_METAL
16
19
  # include "gpuInfo/metal-gpu-info.h"
17
20
  #endif
@@ -35,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
35
38
  AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
36
39
  bool addonJsLoggerCallbackSet = false;
37
40
  int addonLoggerLogLevel = 5;
41
+ bool backendInitialized = false;
38
42
 
39
43
  std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
40
44
  std::vector<char> result(8, 0);
@@ -51,10 +55,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
51
55
  }
52
56
 
53
57
  #ifdef GPU_INFO_USE_CUBLAS
54
- void lodCudaError(const char* message) {
58
+ void logCudaError(const char* message) {
55
59
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56
60
  }
57
61
  #endif
62
+ #ifdef GPU_INFO_USE_VULKAN
63
+ void logVulkanWarning(const char* message) {
64
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
65
+ }
66
+ #endif
58
67
 
59
68
  Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60
69
  uint64_t total = 0;
@@ -63,7 +72,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
63
72
  #ifdef GPU_INFO_USE_CUBLAS
64
73
  size_t cudaDeviceTotal = 0;
65
74
  size_t cudaDeviceUsed = 0;
66
- bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
75
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
67
76
 
68
77
  if (cudeGetInfoSuccess) {
69
78
  total += cudaDeviceTotal;
@@ -71,6 +80,17 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
71
80
  }
72
81
  #endif
73
82
 
83
+ #ifdef GPU_INFO_USE_VULKAN
84
+ uint64_t vulkanDeviceTotal = 0;
85
+ uint64_t vulkanDeviceUsed = 0;
86
+ const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
87
+
88
+ if (vulkanDeviceSupportsMemoryBudgetExtension) {
89
+ total += vulkanDeviceTotal;
90
+ used += vulkanDeviceUsed;
91
+ }
92
+ #endif
93
+
74
94
  #ifdef GPU_INFO_USE_METAL
75
95
  uint64_t metalDeviceTotal = 0;
76
96
  uint64_t metalDeviceUsed = 0;
@@ -87,6 +107,26 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
87
107
  return result;
88
108
  }
89
109
 
110
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
111
+ auto tokenType = llama_token_get_type(model, token);
112
+
113
+ if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
114
+ return Napi::Number::From(info.Env(), -1);
115
+ }
116
+
117
+ return Napi::Number::From(info.Env(), token);
118
+ }
119
+
120
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
121
+ auto tokenType = llama_token_get_type(model, token);
122
+
123
+ if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
124
+ return Napi::Number::From(info.Env(), -1);
125
+ }
126
+
127
+ return Napi::Number::From(info.Env(), token);
128
+ }
129
+
90
130
  class AddonModel : public Napi::ObjectWrap<AddonModel> {
91
131
  public:
92
132
  llama_model_params model_params;
@@ -119,7 +159,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
119
159
  }
120
160
  }
121
161
 
122
- llama_backend_init(false);
123
162
  model = llama_load_model_from_file(modelPath.c_str(), model_params);
124
163
 
125
164
  if (model == NULL) {
@@ -203,6 +242,15 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
203
242
  return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
204
243
  }
205
244
 
245
+ Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
246
+ if (disposed) {
247
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
248
+ return info.Env().Undefined();
249
+ }
250
+
251
+ return Napi::Number::From(info.Env(), llama_n_embd(model));
252
+ }
253
+
206
254
  Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
207
255
  if (disposed) {
208
256
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -239,7 +287,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
239
287
  return info.Env().Undefined();
240
288
  }
241
289
 
242
- return Napi::Number::From(info.Env(), llama_token_bos(model));
290
+ return getNapiControlToken(info, model, llama_token_bos(model));
243
291
  }
244
292
  Napi::Value TokenEos(const Napi::CallbackInfo& info) {
245
293
  if (disposed) {
@@ -247,7 +295,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
247
295
  return info.Env().Undefined();
248
296
  }
249
297
 
250
- return Napi::Number::From(info.Env(), llama_token_eos(model));
298
+ return getNapiControlToken(info, model, llama_token_eos(model));
251
299
  }
252
300
  Napi::Value TokenNl(const Napi::CallbackInfo& info) {
253
301
  if (disposed) {
@@ -255,7 +303,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
255
303
  return info.Env().Undefined();
256
304
  }
257
305
 
258
- return Napi::Number::From(info.Env(), llama_token_nl(model));
306
+ return getNapiToken(info, model, llama_token_nl(model));
259
307
  }
260
308
  Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
261
309
  if (disposed) {
@@ -263,7 +311,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
263
311
  return info.Env().Undefined();
264
312
  }
265
313
 
266
- return Napi::Number::From(info.Env(), llama_token_prefix(model));
314
+ return getNapiControlToken(info, model, llama_token_prefix(model));
267
315
  }
268
316
  Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
269
317
  if (disposed) {
@@ -271,7 +319,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
271
319
  return info.Env().Undefined();
272
320
  }
273
321
 
274
- return Napi::Number::From(info.Env(), llama_token_middle(model));
322
+ return getNapiControlToken(info, model, llama_token_middle(model));
275
323
  }
276
324
  Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
277
325
  if (disposed) {
@@ -279,7 +327,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
279
327
  return info.Env().Undefined();
280
328
  }
281
329
 
282
- return Napi::Number::From(info.Env(), llama_token_suffix(model));
330
+ return getNapiControlToken(info, model, llama_token_suffix(model));
283
331
  }
284
332
  Napi::Value EotToken(const Napi::CallbackInfo& info) {
285
333
  if (disposed) {
@@ -287,7 +335,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
287
335
  return info.Env().Undefined();
288
336
  }
289
337
 
290
- return Napi::Number::From(info.Env(), llama_token_eot(model));
338
+ return getNapiControlToken(info, model, llama_token_eot(model));
291
339
  }
292
340
  Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
293
341
  if (disposed) {
@@ -308,6 +356,29 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
308
356
  return Napi::String::New(info.Env(), ss.str());
309
357
  }
310
358
 
359
+ Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
360
+ if (disposed) {
361
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
362
+ return info.Env().Undefined();
363
+ }
364
+
365
+ if (info[0].IsNumber() == false) {
366
+ return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
367
+ }
368
+
369
+ int token = info[0].As<Napi::Number>().Int32Value();
370
+ auto tokenType = llama_token_get_type(model, token);
371
+
372
+ return Napi::Number::From(info.Env(), int32_t(tokenType));
373
+ }
374
+ Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
375
+ const int addBos = llama_add_bos_token(model);
376
+
377
+ bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
378
+
379
+ return Napi::Boolean::New(info.Env(), shouldPrependBos);
380
+ }
381
+
311
382
  static void init(Napi::Object exports) {
312
383
  exports.Set(
313
384
  "AddonModel",
@@ -318,6 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
318
389
  InstanceMethod("tokenize", &AddonModel::Tokenize),
319
390
  InstanceMethod("detokenize", &AddonModel::Detokenize),
320
391
  InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
392
+ InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
321
393
  InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
322
394
  InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
323
395
  InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
@@ -329,6 +401,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
329
401
  InstanceMethod("suffixToken", &AddonModel::SuffixToken),
330
402
  InstanceMethod("eotToken", &AddonModel::EotToken),
331
403
  InstanceMethod("getTokenString", &AddonModel::GetTokenString),
404
+ InstanceMethod("getTokenType", &AddonModel::GetTokenType),
405
+ InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
332
406
  InstanceMethod("dispose", &AddonModel::Dispose),
333
407
  }
334
408
  )
@@ -896,7 +970,7 @@ void addonCallJsLogCallback(
896
970
  called = false;
897
971
  }
898
972
  }
899
-
973
+
900
974
  if (!called && data != nullptr) {
901
975
  if (data->logLevelNumber == 2) {
902
976
  fputs(data->stringStream->str().c_str(), stderr);
@@ -992,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
992
1066
  return info.Env().Undefined();
993
1067
  }
994
1068
 
1069
+ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1070
+ if (backendInitialized) {
1071
+ llama_backend_free();
1072
+ backendInitialized = false;
1073
+ }
1074
+ }
1075
+
995
1076
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
996
- llama_backend_init(false);
1077
+ llama_backend_init();
1078
+ backendInitialized = true;
1079
+
997
1080
  exports.DefineProperties({
998
1081
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
999
1082
  Napi::PropertyDescriptor::Function("setLogger", setLogger),
@@ -1007,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1007
1090
 
1008
1091
  llama_log_set(addonLlamaCppLogCallback, nullptr);
1009
1092
 
1093
+ exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
1094
+
1010
1095
  return exports;
1011
1096
  }
1012
1097
 
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b2127"
2
+ "release": "b2254"
3
3
  }
Binary file
@@ -15,9 +15,9 @@
15
15
  #endif
16
16
 
17
17
 
18
- typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
18
+ typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
19
19
 
20
- bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
20
+ bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
21
21
  int current_device;
22
22
  auto getDeviceResult = cudaGetDevice(&current_device);
23
23
 
@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
40
40
  return true;
41
41
  }
42
42
 
43
- bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
43
+ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
44
44
  gpuInfoSetCudaDevice(device, errorLogCallback);
45
45
 
46
46
  size_t freeMem;
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
58
58
  return true;
59
59
  }
60
60
 
61
- int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
61
+ int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
62
62
  int deviceCount;
63
63
  auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
64
64
 
@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
70
70
  return deviceCount;
71
71
  }
72
72
 
73
- bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
73
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
74
74
  int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
75
75
 
76
76
  if (deviceCount < 0) {
@@ -2,6 +2,6 @@
2
2
 
3
3
  #include <stddef.h>
4
4
 
5
- typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
5
+ typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
6
6
 
7
- bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
7
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
@@ -0,0 +1,65 @@
1
+ #include <stddef.h>
2
+
3
+ #include <vulkan/vulkan.hpp>
4
+
5
+ typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
8
+ vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
9
+ vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
10
+ vk::Instance instance = vk::createInstance(createInfo);
11
+
12
+ auto physicalDevices = instance.enumeratePhysicalDevices();
13
+
14
+ size_t usedMem = 0;
15
+ size_t totalMem = 0;
16
+
17
+ for (size_t i = 0; i < physicalDevices.size(); i++) {
18
+ vk::PhysicalDevice physicalDevice = physicalDevices[i];
19
+ vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
20
+ vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
21
+
22
+ if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
23
+ // ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
24
+ continue;
25
+ }
26
+
27
+ std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
28
+ bool memoryBudgetExtensionSupported =
29
+ std::any_of(
30
+ extensionProperties.begin(),
31
+ extensionProperties.end(),
32
+ [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
33
+ );
34
+
35
+ if (memoryBudgetExtensionSupported) {
36
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
37
+ vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
38
+ memProps2.pNext = &memoryBudgetProperties;
39
+
40
+ physicalDevice.getMemoryProperties2(&memProps2);
41
+
42
+ for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
43
+ if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
44
+ totalMem += memProps.memoryHeaps[i].size;
45
+ usedMem += memoryBudgetProperties.heapUsage[i];
46
+ break;
47
+ }
48
+ }
49
+ } else {
50
+ // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
51
+ warningLogCallback(
52
+ (
53
+ "Vulkan VK_EXT_memory_budget extension not supported for device \"" +
54
+ std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it"
55
+ )
56
+ .c_str()
57
+ );
58
+ return false;
59
+ }
60
+ }
61
+
62
+ *total = totalMem;
63
+ *used = usedMem;
64
+ return true;
65
+ }
@@ -0,0 +1,7 @@
1
+ #pragma once
2
+
3
+ #include <stddef.h>
4
+
5
+ typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b2127",
2
+ "tag": "b2254",
3
3
  "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
4
  }
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false,"vulkan":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false,"vulkan":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2254"}}}