npm - @fugood/llama.node - Versions diffs - 1.1.4 → 1.1.5 - Mend

@fugood/llama.node 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/lib/binding.ts +8 -0
package/package.json +14 -14
package/src/LlamaContext.cpp +3 -0
package/src/llama.cpp/common/arg.cpp +60 -7
package/src/llama.cpp/common/chat.cpp +6 -6
package/src/llama.cpp/common/common.cpp +1 -0
package/src/llama.cpp/common/common.h +14 -5
package/src/llama.cpp/common/speculative.cpp +135 -54
package/src/llama.cpp/common/speculative.h +8 -1
package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +3196 -0
package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +14 -0
package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +263 -0
package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
package/src/llama.cpp/include/llama.h +8 -4
package/src/llama.cpp/src/llama-arch.cpp +40 -0
package/src/llama.cpp/src/llama-arch.h +2 -0
package/src/llama.cpp/src/llama-batch.cpp +1 -1
package/src/llama.cpp/src/llama-chat.cpp +20 -1
package/src/llama.cpp/src/llama-chat.h +1 -0
package/src/llama.cpp/src/llama-context.cpp +11 -2
package/src/llama.cpp/src/llama-context.h +4 -1
package/src/llama.cpp/src/llama-graph.cpp +57 -139
package/src/llama.cpp/src/llama-graph.h +31 -32
package/src/llama.cpp/src/llama-kv-cache-unified.cpp +2 -2
package/src/llama.cpp/src/llama-kv-cache-unified.h +1 -1
package/src/llama.cpp/src/llama-memory-hybrid.cpp +2 -1
package/src/llama.cpp/src/llama-memory-hybrid.h +1 -0
package/src/llama.cpp/src/llama-model.cpp +400 -21
package/src/llama.cpp/src/llama-quant.cpp +3 -3
package/src/llama.cpp/src/llama-vocab.cpp +7 -1
package/src/llama.cpp/src/llama-vocab.h +1 -0

package/src/llama.cpp/common/speculative.h CHANGED Viewed

@@ -12,7 +12,10 @@ struct common_speculative_params {
     float p_min = 0.75f; // min probability required to accept a token in the draft
 };
-struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);
+struct common_speculative * common_speculative_init(
+        struct llama_context * ctx_tgt,
+        struct llama_context * ctx_dft
+);
 void common_speculative_free(struct common_speculative * spec);
@@ -20,6 +23,10 @@ bool common_speculative_are_compatible(
         const struct llama_context * ctx_tgt,
         const struct llama_context * ctx_dft);
+void common_speculative_add_replacement_tgt_dft(
+        struct common_speculative * spec,
+        const char *source, const char *dest);
 // sample up to n_draft tokens and add them to the batch using the draft model
 llama_tokens common_speculative_gen_draft(
                struct common_speculative * spec,

package/src/llama.cpp/ggml/CMakeLists.txt CHANGED Viewed

@@ -174,6 +174,7 @@ option(GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental,
 option(GGML_HIP_NO_VMM                      "ggml: do not try to use HIP VMM"                 ON)
 option(GGML_HIP_ROCWMMA_FATTN               "ggml: enable rocWMMA for FlashAttention"         OFF)
 option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12   "ggml: enable rocWMMA FlashAttention on GFX12"    OFF)
+option(GGML_HIP_MMQ_MFMA                    "ggml: enable MFMA MMA for CDNA in MMQ"           ON)
 option(GGML_MUSA_GRAPHS                     "ggml: use MUSA graph, experimental, unstable"    OFF)
 option(GGML_MUSA_MUDNN_COPY                 "ggml: enable muDNN for accelerated copy"         OFF)
 option(GGML_VULKAN                          "ggml: use Vulkan"                                OFF)