npm - @fugood/llama.node - Versions diffs - 1.3.8 → 1.4.1 - Mend

@fugood/llama.node 1.3.8 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/lib/binding.js +25 -18
package/lib/binding.ts +19 -1
package/lib/index.js +3 -3
package/lib/index.ts +1 -1
package/package.json +17 -17
package/scripts/llama.cpp.patch +53 -4
package/src/LlamaCompletionWorker.cpp +2 -2
package/src/LlamaContext.cpp +6 -1
package/src/llama.cpp/common/arg.cpp +1 -1
package/src/llama.cpp/common/chat-parser.cpp +968 -0
package/src/llama.cpp/common/chat.cpp +0 -952
package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-arch.cpp +48 -3
package/src/llama.cpp/src/llama-arch.h +2 -0
package/src/llama.cpp/src/llama-context.cpp +6 -2
package/src/llama.cpp/src/llama-hparams.h +1 -1
package/src/llama.cpp/src/llama-model.cpp +102 -5
package/src/llama.cpp/src/llama-model.h +4 -0
package/src/llama.cpp/src/llama-quant.cpp +13 -5
package/src/llama.cpp/src/models/lfm2.cpp +5 -3
package/src/llama.cpp/src/models/models.h +51 -1
package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0

package/lib/binding.js CHANGED Viewed

@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
 }) : function(o, v) {
     o["default"] = v;
 });
-var __importStar = (this && this.__importStar) || (function () {
-    var ownKeys = function(o) {
-        ownKeys = Object.getOwnPropertyNames || function (o) {
-            var ar = [];
-            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
-            return ar;
-        };
-        return ownKeys(o);
-    };
-    return function (mod) {
-        if (mod && mod.__esModule) return mod;
-        var result = {};
-        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
-        __setModuleDefault(result, mod);
-        return result;
-    };
-})();
+var __importStar = (this && this.__importStar) || function (mod) {
+    if (mod && mod.__esModule) return mod;
+    var result = {};
+    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
+    __setModuleDefault(result, mod);
+    return result;
+};
 var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
     function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
     return new (P || (P = Promise))(function (resolve, reject) {
@@ -41,8 +31,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
         step((generator = generator.apply(thisArg, _arguments || [])).next());
     });
 };
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.isLibVariantAvailable = exports.loadModule = void 0;
+const path_1 = __importDefault(require("path"));
 const getPlatformPackageName = (variant) => {
     const platform = process.platform;
     const arch = process.arch;
@@ -58,7 +52,20 @@ const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, f
     }
 });
 const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
-    let module = yield loadPlatformPackage(getPlatformPackageName(variant));
+    const packageName = getPlatformPackageName(variant);
+    // Set ADSP_LIBRARY_PATH for load HTP libs
+    if (variant === 'snapdragon') {
+        const adspLibraryPath = process.env.ADSP_LIBRARY_PATH;
+        if (!adspLibraryPath) {
+            try {
+                process.env.ADSP_LIBRARY_PATH = path_1.default.dirname(require.resolve(packageName));
+            }
+            catch (_a) {
+                /* no-op */
+            }
+        }
+    }
+    let module = yield loadPlatformPackage(packageName);
     if (module) {
         return module;
     }

package/lib/binding.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import path from 'path'
 export type MessagePart = {
   type: string
   text?: string
@@ -600,7 +602,23 @@ const loadPlatformPackage = async (
 }
 export const loadModule = async (variant?: LibVariant): Promise<Module> => {
-  let module = await loadPlatformPackage(getPlatformPackageName(variant))
+  const packageName = getPlatformPackageName(variant)
+  // Set ADSP_LIBRARY_PATH for load HTP libs
+  if (variant === 'snapdragon') {
+    const adspLibraryPath = process.env.ADSP_LIBRARY_PATH
+    if (!adspLibraryPath) {
+      try {
+        process.env.ADSP_LIBRARY_PATH = path.dirname(
+          require.resolve(packageName),
+        )
+      } catch {
+        /* no-op */
+      }
+    }
+  }
+  let module = await loadPlatformPackage(packageName)
   if (module) {
     return module
   }

package/lib/index.js CHANGED Viewed

@@ -87,9 +87,9 @@ class LlamaContextWrapper {
         return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
     }
     getFormattedChat(messages, template, params) {
-        var _a;
+        var _a, _b;
         const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
-        const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
+        const useJinja = this.isJinjaSupported() && ((_a = params === null || params === void 0 ? void 0 : params.jinja) !== null && _a !== void 0 ? _a : true);
         let tmpl;
         if (template)
             tmpl = template; // Force replace if provided
@@ -99,7 +99,7 @@ class LlamaContextWrapper {
             tools: params === null || params === void 0 ? void 0 : params.tools,
             parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
             tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
-            enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
+            enable_thinking: (_b = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _b !== void 0 ? _b : true,
             add_generation_prompt: params === null || params === void 0 ? void 0 : params.add_generation_prompt,
             now: params === null || params === void 0 ? void 0 : params.now,
             chat_template_kwargs: (params === null || params === void 0 ? void 0 : params.chat_template_kwargs)

package/lib/index.ts CHANGED Viewed

@@ -124,7 +124,7 @@ class LlamaContextWrapper {
   ): FormattedChatResult {
     const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
-    const useJinja = this.isJinjaSupported() && params?.jinja
+    const useJinja = this.isJinjaSupported() && (params?.jinja ?? true)
     let tmpl
     if (template) tmpl = template // Force replace if provided

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.3.8",
+  "version": "1.4.1",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,20 +72,20 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-linux-x64": "1.3.8",
-    "@fugood/node-llama-linux-x64-vulkan": "1.3.8",
-    "@fugood/node-llama-linux-x64-cuda": "1.3.8",
-    "@fugood/node-llama-linux-arm64-snapdragon": "1.3.8",
-    "@fugood/node-llama-linux-arm64": "1.3.8",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.3.8",
-    "@fugood/node-llama-linux-arm64-cuda": "1.3.8",
-    "@fugood/node-llama-win32-x64": "1.3.8",
-    "@fugood/node-llama-win32-x64-vulkan": "1.3.8",
-    "@fugood/node-llama-win32-x64-cuda": "1.3.8",
-    "@fugood/node-llama-win32-arm64": "1.3.8",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.3.8",
-    "@fugood/node-llama-darwin-x64": "1.3.8",
-    "@fugood/node-llama-darwin-arm64": "1.3.8"
+    "@fugood/node-llama-darwin-arm64": "1.4.1",
+    "@fugood/node-llama-darwin-x64": "1.4.1",
+    "@fugood/node-llama-linux-arm64": "1.4.1",
+    "@fugood/node-llama-linux-arm64-cuda": "1.4.1",
+    "@fugood/node-llama-linux-arm64-snapdragon": "1.4.1",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.4.1",
+    "@fugood/node-llama-linux-x64": "1.4.1",
+    "@fugood/node-llama-linux-x64-cuda": "1.4.1",
+    "@fugood/node-llama-linux-x64-vulkan": "1.4.1",
+    "@fugood/node-llama-win32-arm64": "1.4.1",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.4.1",
+    "@fugood/node-llama-win32-x64": "1.4.1",
+    "@fugood/node-llama-win32-x64-cuda": "1.4.1",
+    "@fugood/node-llama-win32-x64-vulkan": "1.4.1"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",
@@ -100,7 +100,7 @@
     "jest": "^29.7.0",
     "node-addon-api": "^8.0.0",
     "node-wav": "^0.0.2",
-    "release-it": "^17.7.0",
+    "release-it": "^19.0.6",
     "rimraf": "^6.0.1",
     "typescript": "^5.4.5",
     "wait-for-expect": "^3.0.2"
@@ -130,4 +130,4 @@
     "singleQuote": true,
     "printWidth": 80
   }
-}
+}

package/scripts/llama.cpp.patch CHANGED Viewed

@@ -21,7 +21,7 @@ index bb168e835..cfc0e2c2e 100644
  #
 diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
-index 6fa05a604..87dfa7a8b 100644
+index b4a0f985e..2383d2ea9 100644
 --- a/src/llama.cpp/common/chat.cpp
 +++ b/src/llama.cpp/common/chat.cpp
@@ -6,9 +6,6 @@
@@ -51,7 +51,7 @@ index 6fa05a604..87dfa7a8b 100644
  struct templates_params {
      json messages;
      json tools;
-@@ -817,7 +804,7 @@ static std::string apply(
+@@ -709,7 +696,7 @@ static std::string apply(
          tmpl_inputs.extra_context.merge_patch(*additional_context);
      }
      // TODO: add flag to control date/time, if only for testing purposes.
@@ -122,10 +122,59 @@ index 7e53a57b7..a328d4db4 100644
              check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
              if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
 diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
-index 72a82a891..7869ad323 100644
+index 72a82a891..1b681f4dd 100644
 --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
 +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
-@@ -3417,6 +3417,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
+@@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
+     GGML_UNUSED(dev);
+ }
++
++// ~2GB per session for now
++#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024)
++// Max to 3.5GB
++#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024  +  512ULL * 1024 * 1024)
++
+ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+-    // ~2GB per session for now
+-    *free  = 2ULL * 1024 * 1024 * 1024;
+-    *total = *free;
++    const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY");
++    if (str_mem) {
++        *free = std::stoull(str_mem);
++        if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) {
++            *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
++        } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) {
++            *free = GGML_HEXAGON_SESSION_MEMORY_MAX;
++        }
++    } else {
++        *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
++    }
++    *total = *free;
+     GGML_UNUSED(dev);
+ }
+@@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
+         }
+     }
++#if defined(__ANDROID__)
+     if(opt_arch < 75) {
+         opt_ndev = 1;
+-        GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
++        GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n");
++    }
++#else
++    if(opt_arch < 73) {
++        opt_ndev = 1;
++        GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n");
+     }
++#endif
+     GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
+@@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
          } catch (std::exception const &exc) {
              GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
              devices[i].context = nullptr;

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -10,14 +10,14 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
     const auto &prob = probs[i];
     Napi::Object token_obj = Napi::Object::New(env);
-    std::string token_str = common_token_to_piece(ctx, prob.tok);
+    std::string token_str = rnllama::tokens_to_output_formatted_string(ctx, prob.tok);
     token_obj.Set("content", Napi::String::New(env, token_str));
     Napi::Array token_probs = Napi::Array::New(env);
     for (size_t j = 0; j < prob.probs.size(); j++) {
       const auto &p = prob.probs[j];
       Napi::Object prob_obj = Napi::Object::New(env);
-      std::string tok_str = common_token_to_piece(ctx, p.tok);
+      std::string tok_str = rnllama::tokens_to_output_formatted_string(ctx, p.tok);
       prob_obj.Set("tok_str", Napi::String::New(env, tok_str));
       prob_obj.Set("prob", Napi::Number::New(env, p.prob));
       token_probs.Set(j, prob_obj);

package/src/LlamaContext.cpp CHANGED Viewed

@@ -321,15 +321,20 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   // Parse devices array
   if (options.Has("devices") && options.Get("devices").IsArray()) {
+    std::vector<ggml_backend_dev_t> devs;
     auto devices_array = options.Get("devices").As<Napi::Array>();
     for (size_t i = 0; i < devices_array.Length(); i++) {
       auto device_name = devices_array.Get(i).ToString().Utf8Value();
       auto * dev = ggml_backend_dev_by_name(device_name.c_str());
       if (dev) {
-        params.devices.push_back(dev);
+        devs.push_back(dev);
       }
       // Skip invalid device names silently
     }
+    if (!devs.empty()) {
+      params.devices = devs;
+      params.devices.push_back(nullptr); // nullptr terminator required by llama.cpp
+    }
   }
   std::vector<common_adapter_lora_info> lora;

package/src/llama.cpp/common/arg.cpp CHANGED Viewed

@@ -980,7 +980,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params) {
             params.kv_unified = true;
         }
-    ).set_env("LLAMA_ARG_KV_SPLIT"));
+    ).set_env("LLAMA_ARG_KV_UNIFIED"));
     add_opt(common_arg(
         {"--no-context-shift"},
         string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),