npm - @fugood/llama.node - Versions diffs - 1.4.7 → 1.4.8 - Mend

@fugood/llama.node 1.4.7 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/lib/binding.ts +8 -0
package/package.json +15 -15
package/scripts/llama.cpp.patch +22 -23
package/src/LlamaContext.cpp +2 -2
package/src/llama.cpp/common/CMakeLists.txt +2 -0
package/src/llama.cpp/common/arg.cpp +364 -193
package/src/llama.cpp/common/arg.h +43 -2
package/src/llama.cpp/common/chat-peg-parser.cpp +16 -2
package/src/llama.cpp/common/chat.cpp +140 -0
package/src/llama.cpp/common/common.cpp +130 -67
package/src/llama.cpp/common/common.h +40 -16
package/src/llama.cpp/common/console.cpp +98 -18
package/src/llama.cpp/common/console.h +30 -8
package/src/llama.cpp/common/download.cpp +69 -25
package/src/llama.cpp/common/json-schema-to-grammar.cpp +132 -3
package/src/llama.cpp/common/json-schema-to-grammar.h +20 -0
package/src/llama.cpp/common/log.cpp +5 -0
package/src/llama.cpp/common/log.h +1 -0
package/src/llama.cpp/common/peg-parser.cpp +1 -1
package/src/llama.cpp/common/preset.cpp +206 -0
package/src/llama.cpp/common/preset.h +32 -0
package/src/llama.cpp/common/sampling.cpp +91 -92
package/src/llama.cpp/common/sampling.h +11 -6
package/src/llama.cpp/common/speculative.cpp +1 -1
package/src/llama.cpp/ggml/CMakeLists.txt +4 -0
package/src/llama.cpp/ggml/include/ggml-alloc.h +9 -0
package/src/llama.cpp/ggml/include/ggml-backend.h +1 -0
package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
package/src/llama.cpp/ggml/include/ggml.h +7 -8
package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +60 -39
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +2 -1
package/src/llama.cpp/include/llama.h +18 -1
package/src/llama.cpp/src/llama-arch.cpp +1890 -2248
package/src/llama.cpp/src/llama-arch.h +9 -2
package/src/llama.cpp/src/llama-batch.cpp +12 -2
package/src/llama.cpp/src/llama-batch.h +4 -2
package/src/llama.cpp/src/llama-context.cpp +93 -23
package/src/llama.cpp/src/llama-context.h +8 -2
package/src/llama.cpp/src/llama-graph.cpp +84 -16
package/src/llama.cpp/src/llama-graph.h +17 -4
package/src/llama.cpp/src/llama-hparams.cpp +6 -0
package/src/llama.cpp/src/llama-hparams.h +5 -1
package/src/llama.cpp/src/llama-impl.cpp +4 -0
package/src/llama.cpp/src/llama-kv-cache.cpp +90 -42
package/src/llama.cpp/src/llama-kv-cache.h +19 -2
package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -1
package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
package/src/llama.cpp/src/llama-model-loader.h +2 -0
package/src/llama.cpp/src/llama-model.cpp +103 -44
package/src/llama.cpp/src/llama-model.h +1 -0
package/src/llama.cpp/src/llama-quant.cpp +1 -1
package/src/llama.cpp/src/llama-vocab.cpp +2 -1
package/src/llama.cpp/src/llama.cpp +675 -1
package/src/llama.cpp/src/models/deepseek2.cpp +9 -5
package/src/llama.cpp/src/models/glm4-moe.cpp +28 -11
package/src/llama.cpp/src/models/glm4.cpp +27 -4
package/src/llama.cpp/src/models/models.h +5 -5
package/src/llama.cpp/src/models/nemotron-h.cpp +35 -6
package/src/llama.cpp/src/models/qwen2.cpp +12 -3
package/src/llama.cpp/src/models/qwen3next.cpp +81 -266

package/lib/binding.ts CHANGED Viewed

@@ -198,6 +198,14 @@ export type LlamaParallelCompletionOptions = LlamaCompletionOptions & {
    */
   save_state_path?: string
+  /**
+   * Number of tokens to load when loading state.
+   * If not specified or <= 0, all tokens from the state file will be loaded.
+   * Use this to limit how much of a saved state is restored.
+   * Example: `512` to load only the first 512 tokens from the state file
+   */
+  load_state_size?: number
   /**
    * Number of tokens to save when saving session state.
    * If not specified or <= 0, all tokens will be saved.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.4.7",
+  "version": "1.4.8",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,20 +72,20 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-darwin-arm64": "1.4.7",
-    "@fugood/node-llama-darwin-x64": "1.4.7",
-    "@fugood/node-llama-linux-arm64": "1.4.7",
-    "@fugood/node-llama-linux-arm64-cuda": "1.4.7",
-    "@fugood/node-llama-linux-arm64-snapdragon": "1.4.7",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.4.7",
-    "@fugood/node-llama-linux-x64": "1.4.7",
-    "@fugood/node-llama-linux-x64-cuda": "1.4.7",
-    "@fugood/node-llama-linux-x64-vulkan": "1.4.7",
-    "@fugood/node-llama-win32-arm64": "1.4.7",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.4.7",
-    "@fugood/node-llama-win32-x64": "1.4.7",
-    "@fugood/node-llama-win32-x64-cuda": "1.4.7",
-    "@fugood/node-llama-win32-x64-vulkan": "1.4.7"
+    "@fugood/node-llama-darwin-arm64": "1.4.8",
+    "@fugood/node-llama-darwin-x64": "1.4.8",
+    "@fugood/node-llama-linux-arm64": "1.4.8",
+    "@fugood/node-llama-linux-arm64-cuda": "1.4.8",
+    "@fugood/node-llama-linux-arm64-snapdragon": "1.4.8",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.4.8",
+    "@fugood/node-llama-linux-x64": "1.4.8",
+    "@fugood/node-llama-linux-x64-cuda": "1.4.8",
+    "@fugood/node-llama-linux-x64-vulkan": "1.4.8",
+    "@fugood/node-llama-win32-arm64": "1.4.8",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.4.8",
+    "@fugood/node-llama-win32-x64": "1.4.8",
+    "@fugood/node-llama-win32-x64-cuda": "1.4.8",
+    "@fugood/node-llama-win32-x64-vulkan": "1.4.8"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/scripts/llama.cpp.patch CHANGED Viewed

@@ -1,8 +1,8 @@
 diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
-index 377b26846..1873b5206 100644
+index 0182767c2..f8c4a4f63 100644
 --- a/src/llama.cpp/common/CMakeLists.txt
 +++ b/src/llama.cpp/common/CMakeLists.txt
-@@ -149,9 +149,16 @@ if (LLAMA_LLGUIDANCE)
+@@ -151,9 +151,16 @@ if (LLAMA_LLGUIDANCE)
      set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
  endif ()
@@ -21,21 +21,20 @@ index 377b26846..1873b5206 100644
  #
 diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
-index 74a7b6a46..7b7a1bd50 100644
+index 1bcba9cd8..b7cd68734 100644
 --- a/src/llama.cpp/common/chat-peg-parser.cpp
 +++ b/src/llama.cpp/common/chat-peg-parser.cpp
-@@ -1,9 +1,5 @@
- #include "chat-peg-parser.h"
+@@ -2,7 +2,7 @@
+ #include <nlohmann/json.hpp>
--#include <nlohmann/json.hpp>
--
 -using json = nlohmann::json;
--
- static std::string_view trim_trailing_space(std::string_view sv) {
-     while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
-         sv.remove_suffix(1);
++using json = nlohmann::ordered_json;
+ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
+     int count = 0;
 diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
-index c371edaa5..ec032e351 100644
+index 0a426f447..ab02be247 100644
 --- a/src/llama.cpp/common/chat.cpp
 +++ b/src/llama.cpp/common/chat.cpp
@@ -7,9 +7,6 @@
@@ -65,7 +64,7 @@ index c371edaa5..ec032e351 100644
  struct templates_params {
      json messages;
      json tools;
-@@ -732,7 +719,7 @@ static std::string apply(
+@@ -751,7 +738,7 @@ static std::string apply(
          tmpl_inputs.extra_context.merge_patch(*additional_context);
      }
      // TODO: add flag to control date/time, if only for testing purposes.
@@ -99,10 +98,10 @@ index 6085510a4..263076ce2 100644
  struct common_chat_tool_call {
      std::string name;
 diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
-index 0497f90a2..29b36f3fe 100644
+index 5a8cf5248..8010a990e 100644
 --- a/src/llama.cpp/common/common.cpp
 +++ b/src/llama.cpp/common/common.cpp
-@@ -1280,6 +1280,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
+@@ -1343,6 +1343,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
          mparams.n_gpu_layers = params.n_gpu_layers;
      }
@@ -111,16 +110,16 @@ index 0497f90a2..29b36f3fe 100644
      mparams.split_mode      = params.split_mode;
      mparams.tensor_split    = params.tensor_split;
 diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
-index d28e48991..562203d02 100644
+index d70744840..dea8c4546 100644
 --- a/src/llama.cpp/common/common.h
 +++ b/src/llama.cpp/common/common.h
-@@ -302,6 +302,7 @@ struct lr_opt {
+@@ -307,6 +307,7 @@ struct lr_opt {
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
  struct common_params {
 +    bool vocab_only               = false;
-     int32_t n_predict             =    -1; // new tokens to predict
-     int32_t n_ctx                 =  4096; // context size
+     int32_t n_predict             =    -1; // max. number of new tokens to predict, -1 == no limit
+     int32_t n_ctx                 =     0; // context size, 0 == context the model was trained with
      int32_t n_batch               =  2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
 diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
 index fc31089f3..aa9befe4c 100644
@@ -136,10 +135,10 @@ index fc31089f3..aa9befe4c 100644
              check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
              if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
 diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
-index 72a82a891..1b681f4dd 100644
+index 514f086f6..792abaa58 100644
 --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
 +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
-@@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
+@@ -3213,11 +3213,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
      GGML_UNUSED(dev);
  }
@@ -169,7 +168,7 @@ index 72a82a891..1b681f4dd 100644
      GGML_UNUSED(dev);
  }
-@@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
+@@ -3398,10 +3413,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
          }
      }
@@ -188,7 +187,7 @@ index 72a82a891..1b681f4dd 100644
      GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
-@@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
+@@ -3414,6 +3436,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
          } catch (std::exception const &exc) {
              GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
              devices[i].context = nullptr;

package/src/LlamaContext.cpp CHANGED Viewed

@@ -416,8 +416,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   _rn_ctx->attachThreadpoolsIfAvailable();
   // Collect used devices from the loaded model
-  if (_rn_ctx->llama_init.model) {
-    const auto &model_devices = _rn_ctx->llama_init.model->devices;
+  if (_rn_ctx->llama_init->model()) {
+    const auto &model_devices = _rn_ctx->llama_init->model()->devices;
     for (auto dev : model_devices) {
       const char *dev_name = ggml_backend_dev_name(dev);
       if (dev_name != nullptr) {

package/src/llama.cpp/common/CMakeLists.txt CHANGED Viewed

@@ -73,6 +73,8 @@ add_library(${TARGET} STATIC
     ngram-cache.h
     peg-parser.cpp
     peg-parser.h
+    preset.cpp
+    preset.h
     regex-partial.cpp
     regex-partial.h
     sampling.cpp