@fugood/llama.node 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/lib/binding.ts +8 -0
  2. package/package.json +14 -14
  3. package/src/LlamaContext.cpp +3 -0
  4. package/src/llama.cpp/common/arg.cpp +60 -7
  5. package/src/llama.cpp/common/chat.cpp +6 -6
  6. package/src/llama.cpp/common/common.cpp +1 -0
  7. package/src/llama.cpp/common/common.h +14 -5
  8. package/src/llama.cpp/common/speculative.cpp +135 -54
  9. package/src/llama.cpp/common/speculative.h +8 -1
  10. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  11. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +3196 -0
  12. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +14 -0
  13. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +263 -0
  14. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
  15. package/src/llama.cpp/include/llama.h +8 -4
  16. package/src/llama.cpp/src/llama-arch.cpp +40 -0
  17. package/src/llama.cpp/src/llama-arch.h +2 -0
  18. package/src/llama.cpp/src/llama-batch.cpp +1 -1
  19. package/src/llama.cpp/src/llama-chat.cpp +20 -1
  20. package/src/llama.cpp/src/llama-chat.h +1 -0
  21. package/src/llama.cpp/src/llama-context.cpp +11 -2
  22. package/src/llama.cpp/src/llama-context.h +4 -1
  23. package/src/llama.cpp/src/llama-graph.cpp +57 -139
  24. package/src/llama.cpp/src/llama-graph.h +31 -32
  25. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +2 -2
  26. package/src/llama.cpp/src/llama-kv-cache-unified.h +1 -1
  27. package/src/llama.cpp/src/llama-memory-hybrid.cpp +2 -1
  28. package/src/llama.cpp/src/llama-memory-hybrid.h +1 -0
  29. package/src/llama.cpp/src/llama-model.cpp +400 -21
  30. package/src/llama.cpp/src/llama-quant.cpp +3 -3
  31. package/src/llama.cpp/src/llama-vocab.cpp +7 -1
  32. package/src/llama.cpp/src/llama-vocab.h +1 -0
@@ -12,7 +12,10 @@ struct common_speculative_params {
12
12
  float p_min = 0.75f; // min probability required to accept a token in the draft
13
13
  };
14
14
 
15
- struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);
15
+ struct common_speculative * common_speculative_init(
16
+ struct llama_context * ctx_tgt,
17
+ struct llama_context * ctx_dft
18
+ );
16
19
 
17
20
  void common_speculative_free(struct common_speculative * spec);
18
21
 
@@ -20,6 +23,10 @@ bool common_speculative_are_compatible(
20
23
  const struct llama_context * ctx_tgt,
21
24
  const struct llama_context * ctx_dft);
22
25
 
26
+ void common_speculative_add_replacement_tgt_dft(
27
+ struct common_speculative * spec,
28
+ const char *source, const char *dest);
29
+
23
30
  // sample up to n_draft tokens and add them to the batch using the draft model
24
31
  llama_tokens common_speculative_gen_draft(
25
32
  struct common_speculative * spec,
@@ -174,6 +174,7 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
174
174
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
175
175
  option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
176
176
  option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
177
+ option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
177
178
  option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
178
179
  option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
179
180
  option(GGML_VULKAN "ggml: use Vulkan" OFF)