@fugood/llama.node 1.4.11 → 1.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/package.json +15 -15
  2. package/scripts/llama.cpp.patch +31 -31
  3. package/src/llama.cpp/common/arg.cpp +128 -59
  4. package/src/llama.cpp/common/arg.h +1 -0
  5. package/src/llama.cpp/common/chat-parser.cpp +11 -0
  6. package/src/llama.cpp/common/chat.cpp +36 -7
  7. package/src/llama.cpp/common/chat.h +1 -0
  8. package/src/llama.cpp/common/common.cpp +42 -23
  9. package/src/llama.cpp/common/common.h +11 -1
  10. package/src/llama.cpp/common/llguidance.cpp +10 -6
  11. package/src/llama.cpp/common/regex-partial.cpp +13 -13
  12. package/src/llama.cpp/common/sampling.cpp +58 -14
  13. package/src/llama.cpp/common/sampling.h +3 -1
  14. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  15. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
  16. package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
  17. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
  18. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
  19. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
  20. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
  21. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
  23. package/src/llama.cpp/include/llama.h +100 -12
  24. package/src/llama.cpp/src/CMakeLists.txt +4 -0
  25. package/src/llama.cpp/src/llama-adapter.cpp +12 -3
  26. package/src/llama.cpp/src/llama-adapter.h +7 -1
  27. package/src/llama.cpp/src/llama-arch.cpp +78 -0
  28. package/src/llama.cpp/src/llama-arch.h +8 -0
  29. package/src/llama.cpp/src/llama-chat.cpp +11 -0
  30. package/src/llama.cpp/src/llama-chat.h +1 -0
  31. package/src/llama.cpp/src/llama-context.cpp +637 -49
  32. package/src/llama.cpp/src/llama-context.h +43 -1
  33. package/src/llama.cpp/src/llama-grammar.cpp +40 -13
  34. package/src/llama.cpp/src/llama-grammar.h +2 -0
  35. package/src/llama.cpp/src/llama-graph.cpp +173 -5
  36. package/src/llama.cpp/src/llama-graph.h +71 -6
  37. package/src/llama.cpp/src/llama-hparams.cpp +4 -0
  38. package/src/llama.cpp/src/llama-hparams.h +12 -5
  39. package/src/llama.cpp/src/llama-kv-cache.h +1 -1
  40. package/src/llama.cpp/src/llama-mmap.cpp +11 -4
  41. package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
  42. package/src/llama.cpp/src/llama-model-loader.h +2 -0
  43. package/src/llama.cpp/src/llama-model-saver.cpp +3 -0
  44. package/src/llama.cpp/src/llama-model.cpp +337 -26
  45. package/src/llama.cpp/src/llama-model.h +13 -2
  46. package/src/llama.cpp/src/llama-sampling.cpp +1259 -186
  47. package/src/llama.cpp/src/llama-sampling.h +19 -7
  48. package/src/llama.cpp/src/llama-vocab.cpp +101 -33
  49. package/src/llama.cpp/src/llama-vocab.h +2 -0
  50. package/src/llama.cpp/src/llama.cpp +87 -64
  51. package/src/llama.cpp/src/models/afmoe.cpp +9 -5
  52. package/src/llama.cpp/src/models/bert.cpp +4 -2
  53. package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
  54. package/src/llama.cpp/src/models/cohere2-iswa.cpp +3 -0
  55. package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
  56. package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
  57. package/src/llama.cpp/src/models/gemma2-iswa.cpp +5 -2
  58. package/src/llama.cpp/src/models/gemma3.cpp +3 -4
  59. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
  60. package/src/llama.cpp/src/models/llama-iswa.cpp +6 -2
  61. package/src/llama.cpp/src/models/llama.cpp +19 -6
  62. package/src/llama.cpp/src/models/maincoder.cpp +117 -0
  63. package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
  64. package/src/llama.cpp/src/models/models.h +18 -0
  65. package/src/llama.cpp/src/models/modern-bert.cpp +116 -0
  66. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +5 -2
  67. package/src/llama.cpp/src/models/plamo3.cpp +128 -0
  68. package/src/llama.cpp/src/models/smallthinker.cpp +11 -5
  69. package/src/llama.cpp/src/unicode.cpp +23 -14
@@ -131,6 +131,8 @@ struct llama_model_loader {
131
131
  template<typename T>
132
132
  bool get_key_or_arr(enum llm_kv kid, T & result, uint32_t n, bool required = true);
133
133
 
134
+ bool get_key_or_arr(enum llm_kv kid, uint32_t & result, bool required = true);
135
+
134
136
  std::string get_arch_name() const;
135
137
 
136
138
  enum llm_arch get_arch() const;
@@ -146,6 +146,9 @@ void llama_model_saver::add_kv_from_model() {
146
146
  add_kv(LLM_KV_VOCAB_SIZE, vocab.n_tokens());
147
147
  add_kv(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
148
148
  add_kv(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
149
+ if (hparams.n_embd_out > 0) {
150
+ add_kv(LLM_KV_EMBEDDING_LENGTH_OUT, hparams.n_embd_out);
151
+ }
149
152
  add_kv(LLM_KV_BLOCK_COUNT, hparams.n_layer);
150
153
  add_kv(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
151
154
  add_kv(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, true);