npm - node-llama-cpp - Versions diffs - 3.2.0 → 3.3.0 - Mend

node-llama-cpp 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/llama/addon/addon.cpp CHANGED Viewed

@@ -151,6 +151,22 @@ class AddonBackendUnloadWorker : public Napi::AsyncWorker {
         }
 };
+Napi::Value addonLoadBackends(const Napi::CallbackInfo& info) {
+    const bool forceLoadLibraries = info.Length() == 0
+        ? false
+        : info[0].IsBoolean()
+            ? info[0].As<Napi::Boolean>().Value()
+            : false;
+    ggml_backend_reg_count();
+    if (forceLoadLibraries) {
+        ggml_backend_load_all();
+    }
+    return info.Env().Undefined();
+}
 Napi::Value addonInit(const Napi::CallbackInfo& info) {
     if (backendInitialized) {
         Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
@@ -205,6 +221,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
         Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
         Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
         Napi::PropertyDescriptor::Function("getSwapInfo", getSwapInfo),
+        Napi::PropertyDescriptor::Function("loadBackends", addonLoadBackends),
         Napi::PropertyDescriptor::Function("init", addonInit),
         Napi::PropertyDescriptor::Function("dispose", addonDispose),
     });

package/llama/addon/globals/getGpuInfo.cpp CHANGED Viewed

@@ -1,22 +1,15 @@
 #include "getGpuInfo.h"
 #include "addonLog.h"
-#ifdef GPU_INFO_USE_CUDA
-#  include "../../gpuInfo/cuda-gpu-info.h"
+#ifdef __APPLE__
+    #include <TargetConditionals.h>
 #endif
 #ifdef GPU_INFO_USE_VULKAN
 #  include "../../gpuInfo/vulkan-gpu-info.h"
 #endif
-#ifdef GPU_INFO_USE_METAL
-#  include "../../gpuInfo/metal-gpu-info.h"
-#endif
-#ifdef GPU_INFO_USE_CUDA
-void logCudaError(const char* message) {
-    addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
-}
-#endif
 #ifdef GPU_INFO_USE_VULKAN
 void logVulkanWarning(const char* message) {
     addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
@@ -24,20 +17,31 @@ void logVulkanWarning(const char* message) {
 #endif
 Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
+    ggml_backend_dev_t device = NULL;
+    size_t deviceTotal = 0;
+    size_t deviceFree = 0;
     uint64_t total = 0;
     uint64_t used = 0;
     uint64_t unifiedVramSize = 0;
-#ifdef GPU_INFO_USE_CUDA
-    size_t cudaDeviceTotal = 0;
-    size_t cudaDeviceUsed = 0;
-    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        device = ggml_backend_dev_get(i);
+        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
+            deviceTotal = 0;
+            deviceFree = 0;
+            ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
-    if (cudeGetInfoSuccess) {
-        total += cudaDeviceTotal;
-        used += cudaDeviceUsed;
-    }
+            total += deviceTotal;
+            used += deviceTotal - deviceFree;
+#if defined(__arm64__) || defined(__aarch64__)
+            if (std::string(ggml_backend_dev_name(device)) == "Metal") {
+                unifiedVramSize += deviceTotal;
+            }
 #endif
+        }
+    }
 #ifdef GPU_INFO_USE_VULKAN
     uint64_t vulkanDeviceTotal = 0;
@@ -46,23 +50,15 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, &vulkanDeviceUnifiedVramSize, logVulkanWarning);
     if (vulkanDeviceSupportsMemoryBudgetExtension) {
-        total += vulkanDeviceTotal;
-        used += vulkanDeviceUsed;
+        if (vulkanDeviceUnifiedVramSize > total) {
+            // this means that we counted memory from devices that aren't used by llama.cpp
+            vulkanDeviceUnifiedVramSize = 0;
+        }
         unifiedVramSize += vulkanDeviceUnifiedVramSize;
     }
 #endif
-#ifdef GPU_INFO_USE_METAL
-    uint64_t metalDeviceTotal = 0;
-    uint64_t metalDeviceUsed = 0;
-    uint64_t metalDeviceUnifiedVramSize = 0;
-    getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed, &metalDeviceUnifiedVramSize);
-    total += metalDeviceTotal;
-    used += metalDeviceUsed;
-    unifiedVramSize += metalDeviceUnifiedVramSize;
-#endif
     Napi::Object result = Napi::Object::New(info.Env());
     result.Set("total", Napi::Number::From(info.Env(), total));
     result.Set("used", Napi::Number::From(info.Env(), used));
@@ -74,17 +70,13 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
     std::vector<std::string> deviceNames;
-#ifdef GPU_INFO_USE_CUDA
-    gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
-#ifdef GPU_INFO_USE_VULKAN
-    gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
-#endif
-#ifdef GPU_INFO_USE_METAL
-    getMetalGpuDeviceNames(&deviceNames);
-#endif
+            deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
+        }
+    }
     Napi::Object result = Napi::Object::New(info.Env());
@@ -98,17 +90,27 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
 }
 Napi::Value getGpuType(const Napi::CallbackInfo& info) {
-#ifdef GPU_INFO_USE_CUDA
-    return Napi::String::New(info.Env(), "cuda");
-#endif
-#ifdef GPU_INFO_USE_VULKAN
-    return Napi::String::New(info.Env(), "vulkan");
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+        if (deviceName == "Metal") {
+            return Napi::String::New(info.Env(), "metal");
+        } else if (std::string(deviceName).find("Vulkan") == 0) {
+            return Napi::String::New(info.Env(), "vulkan");
+        } else if (std::string(deviceName).find("CUDA") == 0 || std::string(deviceName).find("ROCm") == 0 || std::string(deviceName).find("MUSA") == 0) {
+            return Napi::String::New(info.Env(), "cuda");
+        }
+    }
-#ifdef GPU_INFO_USE_METAL
-    return Napi::String::New(info.Env(), "metal");
-#endif
+    for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
+        ggml_backend_dev_t device = ggml_backend_dev_get(i);
+        const auto deviceName = std::string(ggml_backend_dev_name(device));
+        if (deviceName == "CPU") {
+            return Napi::Boolean::New(info.Env(), false);
+        }
+    }
     return info.Env().Undefined();
-}
+}

package/llama/binariesGithubRelease.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-    "release": "b3995"
+    "release": "b4234"
 }

package/llama/gitRelease.bundle CHANGED Viewed

Binary file

package/llama/gpuInfo/vulkan-gpu-info.cpp CHANGED Viewed

@@ -80,11 +80,3 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
     return enumerateVulkanDevices(total, used, unifiedMemorySize, false, nullptr, warningLogCallback);
 }
-bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
-    size_t vulkanDeviceTotal = 0;
-    size_t vulkanDeviceUsed = 0;
-    size_t unifiedMemorySize = 0;
-    return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, &unifiedMemorySize, true, deviceNames, warningLogCallback);
-}

package/llama/gpuInfo/vulkan-gpu-info.h CHANGED Viewed

@@ -6,4 +6,3 @@
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, size_t* unifiedMemorySize, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
-bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);

package/llama/grammars/README.md CHANGED Viewed

@@ -124,7 +124,7 @@ You can use GBNF grammars:
 - In [llama-cli](../examples/main), passed as the `--json` / `-j` flag
 - To convert to a grammar ahead of time:
     - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
-    - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
+    - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
 Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggerganov/llama.cpp/pull/5978, https://github.com/ggerganov/llama.cpp/pull/6659 & https://github.com/ggerganov/llama.cpp/pull/6555).

package/llama/llama.cpp.info.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-    "tag": "b3995",
+    "tag": "b4234",
     "llamaCppGithubRepo": "ggerganov/llama.cpp"
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "node-llama-cpp",
-  "version": "3.2.0",
+  "version": "3.3.0",
   "description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level",
   "main": "./dist/index.js",
   "type": "module",
@@ -112,7 +112,10 @@
     "batching",
     "gpu"
   ],
-  "author": "Gilad S.",
+  "author": {
+    "name": "Gilad S.",
+    "url": "https://github.com/giladgd"
+  },
   "license": "MIT",
   "preferUnplugged": true,
   "bugs": {
@@ -124,52 +127,53 @@
   },
   "homepage": "https://node-llama-cpp.withcat.ai",
   "devDependencies": {
-    "@commitlint/cli": "^19.5.0",
-    "@commitlint/config-conventional": "^19.5.0",
-    "@eslint/compat": "^1.2.2",
+    "@commitlint/cli": "^19.6.0",
+    "@commitlint/config-conventional": "^19.6.0",
+    "@eslint/compat": "^1.2.3",
     "@fontsource/inter": "^5.1.0",
-    "@nolebase/vitepress-plugin-git-changelog": "^2.6.1",
-    "@nolebase/vitepress-plugin-og-image": "^2.6.1",
+    "@nolebase/vitepress-plugin-git-changelog": "^2.11.1",
+    "@nolebase/vitepress-plugin-og-image": "^2.11.1",
     "@resvg/resvg-js": "^2.6.2",
     "@semantic-release/exec": "^6.0.3",
-    "@semantic-release/github": "11.0.0",
+    "@semantic-release/github": "11.0.1",
     "@semantic-release/npm": "12.0.1",
-    "@shikijs/vitepress-twoslash": "^1.22.2",
-    "@stylistic/eslint-plugin": "^2.9.0",
+    "@shikijs/vitepress-twoslash": "^1.24.0",
+    "@stylistic/eslint-plugin": "^2.11.0",
     "@types/async-retry": "^1.4.9",
     "@types/bytes": "^3.1.4",
     "@types/cross-spawn": "^6.0.2",
     "@types/fs-extra": "^11.0.4",
-    "@types/node": "^22.8.4",
+    "@types/node": "^22.10.1",
     "@types/proper-lockfile": "^4.1.4",
     "@types/semver": "^7.5.8",
     "@types/validate-npm-package-name": "^4.0.2",
     "@types/which": "^3.0.4",
     "@types/yargs": "^17.0.33",
-    "@vitest/coverage-v8": "^2.1.4",
-    "@vitest/ui": "^2.1.4",
-    "eslint": "^9.13.0",
+    "@vitest/coverage-v8": "^2.1.6",
+    "@vitest/ui": "^2.1.6",
+    "electron": "^33.2.1",
+    "eslint": "^9.16.0",
     "eslint-import-resolver-typescript": "^3.6.3",
     "eslint-plugin-import": "^2.31.0",
-    "eslint-plugin-jsdoc": "^50.4.3",
-    "eslint-plugin-n": "^17.12.0",
+    "eslint-plugin-jsdoc": "^50.6.0",
+    "eslint-plugin-n": "^17.14.0",
     "feed": "^4.2.2",
-    "husky": "^9.1.6",
+    "husky": "^9.1.7",
     "rehype": "^13.0.1",
     "rimraf": "^6.0.1",
     "semantic-release": "^24.2.0",
     "sharp": "^0.33.5",
-    "tslib": "^2.8.0",
-    "typedoc": "^0.26.10",
-    "typedoc-plugin-markdown": "^4.2.9",
-    "typedoc-plugin-mdn-links": "^3.3.5",
-    "typedoc-vitepress-theme": "^1.0.2",
-    "typescript": "^5.6.3",
-    "typescript-eslint": "^8.12.2",
-    "vite-node": "^2.1.4",
-    "vitepress": "^1.4.2",
-    "vitest": "^2.1.4",
-    "zx": "^8.1.9"
+    "tslib": "^2.8.1",
+    "typedoc": "^0.27.2",
+    "typedoc-plugin-markdown": "^4.3.1",
+    "typedoc-plugin-mdn-links": "^4.0.3",
+    "typedoc-vitepress-theme": "^1.1.0",
+    "typescript": "^5.7.2",
+    "typescript-eslint": "^8.16.0",
+    "vite-node": "^2.1.6",
+    "vitepress": "^1.5.0",
+    "vitest": "^2.1.6",
+    "zx": "^8.2.4"
   },
   "dependencies": {
     "@huggingface/jinja": "^0.3.2",
@@ -179,20 +183,20 @@
     "chmodrp": "^1.0.2",
     "cmake-js": "^7.3.0",
     "cross-env": "^7.0.3",
-    "cross-spawn": "^7.0.3",
+    "cross-spawn": "^7.0.6",
     "env-var": "^7.5.0",
     "filenamify": "^6.0.0",
     "fs-extra": "^11.2.0",
     "ignore": "^5.3.2",
-    "ipull": "^3.9.0",
+    "ipull": "^3.9.2",
     "is-unicode-supported": "^2.1.0",
     "lifecycle-utils": "^1.7.0",
     "log-symbols": "^7.0.0",
-    "nanoid": "^5.0.8",
-    "node-addon-api": "^8.2.1",
+    "nanoid": "^5.0.9",
+    "node-addon-api": "^8.3.0",
     "octokit": "^4.0.2",
-    "ora": "^8.1.0",
-    "pretty-ms": "^9.1.0",
+    "ora": "^8.1.1",
+    "pretty-ms": "^9.2.0",
     "proper-lockfile": "^4.1.2",
     "semver": "^7.6.3",
     "simple-git": "^3.27.0",
@@ -212,16 +216,16 @@
     }
   },
   "optionalDependencies": {
-    "@node-llama-cpp/linux-arm64": "3.2.0",
-    "@node-llama-cpp/linux-armv7l": "3.2.0",
-    "@node-llama-cpp/linux-x64": "3.2.0",
-    "@node-llama-cpp/linux-x64-cuda": "3.2.0",
-    "@node-llama-cpp/linux-x64-vulkan": "3.2.0",
-    "@node-llama-cpp/mac-arm64-metal": "3.2.0",
-    "@node-llama-cpp/mac-x64": "3.2.0",
-    "@node-llama-cpp/win-arm64": "3.2.0",
-    "@node-llama-cpp/win-x64": "3.2.0",
-    "@node-llama-cpp/win-x64-cuda": "3.2.0",
-    "@node-llama-cpp/win-x64-vulkan": "3.2.0"
+    "@node-llama-cpp/linux-arm64": "3.3.0",
+    "@node-llama-cpp/linux-armv7l": "3.3.0",
+    "@node-llama-cpp/linux-x64": "3.3.0",
+    "@node-llama-cpp/linux-x64-cuda": "3.3.0",
+    "@node-llama-cpp/linux-x64-vulkan": "3.3.0",
+    "@node-llama-cpp/mac-arm64-metal": "3.3.0",
+    "@node-llama-cpp/mac-x64": "3.3.0",
+    "@node-llama-cpp/win-arm64": "3.3.0",
+    "@node-llama-cpp/win-x64": "3.3.0",
+    "@node-llama-cpp/win-x64-cuda": "3.3.0",
+    "@node-llama-cpp/win-x64-vulkan": "3.3.0"
   }
 }

package/templates/README.md ADDED Viewed

@@ -0,0 +1,6 @@
+Use the following command to scaffold a project from a template:
+```bash
+npm create node-llama-cpp@latest
+```
+> Don't use the templates in this repository directly. They are built to be used with `npm create` command ([learn more](https://node-llama-cpp.withcat.ai/guide/))