npm - @fugood/llama.node - Versions diffs - 1.3.7 → 1.4.0 - Mend

@fugood/llama.node 1.3.7 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/lib/binding.js +18 -1
package/lib/binding.ts +19 -1
package/lib/index.js +3 -3
package/lib/index.ts +1 -1
package/package.json +15 -15
package/scripts/llama.cpp.patch +7 -7
package/src/LlamaCompletionWorker.cpp +2 -2
package/src/llama.cpp/common/arg.cpp +27 -2
package/src/llama.cpp/common/chat-parser.cpp +968 -0
package/src/llama.cpp/common/chat.cpp +0 -952
package/src/llama.cpp/common/common.cpp +55 -0
package/src/llama.cpp/common/common.h +18 -0
package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
package/src/llama.cpp/ggml/CMakeLists.txt +6 -4
package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
package/src/llama.cpp/ggml/include/ggml.h +12 -4
package/src/llama.cpp/ggml/src/CMakeLists.txt +26 -4
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -15
package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +721 -0
package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +71 -4
package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +243 -4
package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +84 -85
package/src/llama.cpp/include/llama.h +18 -0
package/src/llama.cpp/src/CMakeLists.txt +2 -0
package/src/llama.cpp/src/llama-arch.cpp +95 -16
package/src/llama.cpp/src/llama-arch.h +15 -0
package/src/llama.cpp/src/llama-context.cpp +7 -3
package/src/llama.cpp/src/llama-graph.cpp +3 -3
package/src/llama.cpp/src/llama-hparams.h +1 -1
package/src/llama.cpp/src/llama-model.cpp +141 -6
package/src/llama.cpp/src/llama-model.h +4 -0
package/src/llama.cpp/src/llama-quant.cpp +13 -5
package/src/llama.cpp/src/models/lfm2.cpp +5 -3
package/src/llama.cpp/src/models/models.h +55 -1
package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
package/src/llama.cpp/src/models/rnd1.cpp +126 -0

package/src/llama.cpp/common/common.cpp CHANGED Viewed

@@ -8,6 +8,7 @@
 #include "common.h"
 #include "log.h"
 #include "llama.h"
+#include "sampling.h"
 #include <algorithm>
 #include <cinttypes>
@@ -949,6 +950,58 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
 // Model utils
 //
+static inline void common_init_sampler_from_model(
+    const llama_model * model,
+    common_params_sampling & sparams) {
+    const uint64_t config = sparams.user_sampling_config;
+    auto get_int32 = [&](const char * key, int32_t & dst, uint64_t user_config) {
+        if (config & user_config) return;
+        char buf[64] = {0};
+        if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
+            char * end = nullptr;
+            int32_t v = strtol(buf, &end, 10);
+            if (end && end != buf) dst = v;
+        }
+    };
+    auto get_float = [&](const char * key, float & dst, uint64_t user_config) {
+        if (config & user_config) return;
+        char buf[128] = {0};
+        if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
+            char * end = nullptr;
+            float v = strtof(buf, &end);
+            if (end && end != buf) dst = v;
+        }
+    };
+    // Sampling sequence
+    if (!(config & common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS)) {
+        char buf[512] = {0};
+        if (llama_model_meta_val_str(model, llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE), buf, sizeof(buf)) > 0) {
+            const std::vector<std::string> sampler_names = string_split<std::string>(std::string(buf), ';');
+            if (!sampler_names.empty()) {
+                sparams.samplers = common_sampler_types_from_names(sampler_names, true);
+            }
+        }
+    }
+    get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_K),           sparams.top_k,           common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_P),           sparams.top_p,           common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIN_P),           sparams.min_p,           common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY), sparams.xtc_probability, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD),   sparams.xtc_threshold,   common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TEMP),            sparams.temp,            common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP);
+    get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N),  sparams.penalty_last_n,  common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT),  sparams.penalty_repeat,  common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT);
+    get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT),        sparams.mirostat,        common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU),    sparams.mirostat_tau,    common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU);
+    get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA),    sparams.mirostat_eta,    common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA);
+}
 struct common_init_result common_init_from_params(common_params & params) {
     common_init_result iparams;
     auto mparams = common_model_params_to_llama(params);
@@ -960,6 +1013,8 @@ struct common_init_result common_init_from_params(common_params & params) {
         return iparams;
     }
+    common_init_sampler_from_model(model, params.sampling);
     const llama_vocab * vocab = llama_model_get_vocab(model);
     auto cparams = common_context_params_to_llama(params);

package/src/llama.cpp/common/common.h CHANGED Viewed

@@ -140,6 +140,22 @@ struct common_grammar_trigger {
     llama_token token = LLAMA_TOKEN_NULL;
 };
+enum common_params_sampling_config : uint64_t {
+    COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS        = 1 << 0,
+    COMMON_PARAMS_SAMPLING_CONFIG_TOP_K           = 1 << 1,
+    COMMON_PARAMS_SAMPLING_CONFIG_TOP_P           = 1 << 2,
+    COMMON_PARAMS_SAMPLING_CONFIG_MIN_P           = 1 << 3,
+    COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY = 1 << 4,
+    COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD   = 1 << 5,
+    COMMON_PARAMS_SAMPLING_CONFIG_TEMP            = 1 << 6,
+    COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N  = 1 << 7,
+    COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT  = 1 << 8,
+    COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT        = 1 << 9,
+    COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU    = 1 << 10,
+    COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA    = 1 << 11,
+};
 // sampling parameters
 struct common_params_sampling {
     uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
@@ -172,6 +188,8 @@ struct common_params_sampling {
     bool    no_perf            = false; // disable performance metrics
     bool    timing_per_token   = false;
+    uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
     std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"};     // default sequence breakers for DRY

package/src/llama.cpp/common/json-schema-to-grammar.cpp CHANGED Viewed

@@ -268,10 +268,10 @@ static bool is_reserved_name(const std::string & name) {
 }
 std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
-std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]");
+std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
 std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
 std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
-    {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}
+    {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
 };
 std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};

package/src/llama.cpp/ggml/CMakeLists.txt CHANGED Viewed

@@ -25,16 +25,17 @@ if(GIT_EXE)
     )
 endif()
-# Build the version string with optional dirty flag
 set(GGML_VERSION "${GGML_VERSION_BASE}")
-if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
-    set(GGML_VERSION "${GGML_VERSION}-dirty")
-endif()
 if(NOT GGML_BUILD_COMMIT)
     set(GGML_BUILD_COMMIT "unknown")
 endif()
+# Build the commit string with optional dirty flag
+if(DEFINED GGML_GIT_DIRTY AND GGML_GIT_DIRTY EQUAL 1)
+    set(GGML_BUILD_COMMIT "${GGML_BUILD_COMMIT}-dirty")
+endif()
 include(CheckIncludeFileCXX)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -182,6 +183,7 @@ endif()
 # ggml core
 set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
 option(GGML_CPU                             "ggml: enable CPU backend"                        ON)
+option(GGML_SCHED_NO_REALLOC                "ggml: disallow reallocations in ggml-alloc (for debugging)" OFF)
 # 3rd party libs / backends
 option(GGML_ACCELERATE                      "ggml: enable Accelerate framework"               ON)

package/src/llama.cpp/ggml/include/ggml-rpc.h CHANGED Viewed

@@ -8,7 +8,7 @@ extern "C" {
 #endif
 #define RPC_PROTO_MAJOR_VERSION    3
-#define RPC_PROTO_MINOR_VERSION    0
+#define RPC_PROTO_MINOR_VERSION    5
 #define RPC_PROTO_PATCH_VERSION    0
 #define GGML_RPC_MAX_SERVERS       16

package/src/llama.cpp/ggml/include/ggml.h CHANGED Viewed

@@ -530,6 +530,7 @@ extern "C" {
         GGML_OP_ARANGE,
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
+        GGML_OP_TOP_K,
         GGML_OP_LEAKY_RELU,
         GGML_OP_TRI,
         GGML_OP_FILL,
@@ -2258,18 +2259,25 @@ extern "C" {
             struct ggml_tensor  * a,
             enum ggml_sort_order  order);
-    GGML_API struct ggml_tensor * ggml_arange(
+    // similar to ggml_top_k but implemented as `argsort` + `view`
+    GGML_API struct ggml_tensor * ggml_argsort_top_k(
             struct ggml_context * ctx,
-            float                 start,
-            float                 stop,
-            float                 step);
+            struct ggml_tensor  * a,
+            int                   k);
     // top k elements per row
+    // note: the resulting top k indices are in no particular order
     GGML_API struct ggml_tensor * ggml_top_k(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             int                   k);
+    GGML_API struct ggml_tensor * ggml_arange(
+            struct ggml_context * ctx,
+            float                 start,
+            float                 stop,
+            float                 step);
 #define GGML_KQ_MASK_PAD 64
     // q:    [n_embd_k, n_batch,     n_head,    ne3 ]

package/src/llama.cpp/ggml/src/CMakeLists.txt CHANGED Viewed

@@ -221,6 +221,10 @@ if (GGML_BACKEND_DL)
     target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
 endif()
+if (GGML_SCHED_NO_REALLOC)
+    target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC)
+endif()
 add_library(ggml
             ggml-backend-reg.cpp)
 add_library(ggml::ggml ALIAS ggml)
@@ -270,10 +274,13 @@ function(ggml_add_backend_library backend)
     endif()
     # Set versioning properties for all backend libraries
-    set_target_properties(${backend} PROPERTIES
-        VERSION ${GGML_VERSION}
-        SOVERSION ${GGML_VERSION_MAJOR}
-    )
+    # Building a MODULE library with a version is not supported on macOS (https://gitlab.kitware.com/cmake/cmake/-/issues/20782)
+    if (NOT (APPLE AND GGML_BACKEND_DL))
+        set_target_properties(${backend} PROPERTIES
+            VERSION ${GGML_VERSION}
+            SOVERSION ${GGML_VERSION_MAJOR}
+        )
+    endif()
     if(NOT GGML_AVAILABLE_BACKENDS)
         set(GGML_AVAILABLE_BACKENDS "${backend}"
@@ -328,6 +335,14 @@ function(ggml_add_cpu_backend_variant tag_name)
             set(GGML_INTERNAL_${feat} OFF)
         endforeach()
+        foreach (feat ${ARGN})
+            set(GGML_INTERNAL_${feat} ON)
+        endforeach()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
+        foreach (feat RVV)
+            set(GGML_INTERNAL_${feat} OFF)
+        endforeach()
         foreach (feat ${ARGN})
             set(GGML_INTERNAL_${feat} ON)
         endforeach()
@@ -402,6 +417,13 @@ if (GGML_CPU_ALL_VARIANTS)
         else()
             message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
         endif()
+    elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
+        if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+            ggml_add_cpu_backend_variant(riscv64_0)
+            ggml_add_cpu_backend_variant(riscv64_v   RVV)
+        else()
+            message(FATAL_ERROR "Unsupported RISC-V target OS: ${CMAKE_SYSTEM_NAME}")
+        endif()
     else()
         message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
     endif()

package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt CHANGED Viewed

@@ -224,7 +224,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
             include(CheckCXXSourceCompiles)
             set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
-            set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}")
+            string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
+            set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
             foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
                 set(ARM_FEATURE "HAVE_${feature}")
                 check_cxx_source_compiles(
@@ -452,22 +453,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
                 ggml-cpu/spacemit/ime_kernels.h
             )
         endif()
-        set(MARCH_STR "rv64gc")
-        if (GGML_RV_ZFH)
-            string(APPEND MARCH_STR "_zfh")
-        endif()
-        if (GGML_XTHEADVECTOR)
-            string(APPEND MARCH_STR "_xtheadvector")
-        elseif (GGML_RVV)
-            string(APPEND MARCH_STR "_v")
-            if (GGML_RV_ZVFH)
-                string(APPEND MARCH_STR "_zvfh")
+        if(NOT GGML_CPU_ALL_VARIANTS)
+            set(MARCH_STR "rv64gc")
+            if (GGML_RV_ZFH)
+                string(APPEND MARCH_STR "_zfh")
             endif()
+            if (GGML_XTHEADVECTOR)
+                string(APPEND MARCH_STR "_xtheadvector")
+            elseif (GGML_RVV)
+                string(APPEND MARCH_STR "_v")
+                if (GGML_RV_ZVFH)
+                    string(APPEND MARCH_STR "_zvfh")
+                endif()
+            endif()
+            if (GGML_RV_ZICBOP)
+                string(APPEND MARCH_STR "_zicbop")
+            endif()
+            list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
+        else()
+            # Begin with the lowest baseline
+            set(ARCH_DEFINITIONS "")
+            if (GGML_INTERNAL_RVV)
+                message(STATUS "RVV enabled")
+                list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
+                list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
+            endif()
+            ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
         endif()
-        if (GGML_RV_ZICBOP)
-            string(APPEND MARCH_STR "_zicbop")
-        endif()
-        list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
     elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
         message(STATUS "s390x detected")
         list(APPEND GGML_CPU_SOURCES