@fugood/llama.node 1.4.11 → 1.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/package.json +15 -15
  2. package/scripts/llama.cpp.patch +31 -31
  3. package/src/llama.cpp/common/arg.cpp +128 -59
  4. package/src/llama.cpp/common/arg.h +1 -0
  5. package/src/llama.cpp/common/chat-parser.cpp +11 -0
  6. package/src/llama.cpp/common/chat.cpp +36 -7
  7. package/src/llama.cpp/common/chat.h +1 -0
  8. package/src/llama.cpp/common/common.cpp +42 -23
  9. package/src/llama.cpp/common/common.h +11 -1
  10. package/src/llama.cpp/common/llguidance.cpp +10 -6
  11. package/src/llama.cpp/common/regex-partial.cpp +13 -13
  12. package/src/llama.cpp/common/sampling.cpp +58 -14
  13. package/src/llama.cpp/common/sampling.h +3 -1
  14. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  15. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
  16. package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
  17. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
  18. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
  19. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
  20. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
  21. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
  23. package/src/llama.cpp/include/llama.h +100 -12
  24. package/src/llama.cpp/src/CMakeLists.txt +4 -0
  25. package/src/llama.cpp/src/llama-adapter.cpp +12 -3
  26. package/src/llama.cpp/src/llama-adapter.h +7 -1
  27. package/src/llama.cpp/src/llama-arch.cpp +78 -0
  28. package/src/llama.cpp/src/llama-arch.h +8 -0
  29. package/src/llama.cpp/src/llama-chat.cpp +11 -0
  30. package/src/llama.cpp/src/llama-chat.h +1 -0
  31. package/src/llama.cpp/src/llama-context.cpp +637 -49
  32. package/src/llama.cpp/src/llama-context.h +43 -1
  33. package/src/llama.cpp/src/llama-grammar.cpp +40 -13
  34. package/src/llama.cpp/src/llama-grammar.h +2 -0
  35. package/src/llama.cpp/src/llama-graph.cpp +173 -5
  36. package/src/llama.cpp/src/llama-graph.h +71 -6
  37. package/src/llama.cpp/src/llama-hparams.cpp +4 -0
  38. package/src/llama.cpp/src/llama-hparams.h +12 -5
  39. package/src/llama.cpp/src/llama-kv-cache.h +1 -1
  40. package/src/llama.cpp/src/llama-mmap.cpp +11 -4
  41. package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
  42. package/src/llama.cpp/src/llama-model-loader.h +2 -0
  43. package/src/llama.cpp/src/llama-model-saver.cpp +3 -0
  44. package/src/llama.cpp/src/llama-model.cpp +337 -26
  45. package/src/llama.cpp/src/llama-model.h +13 -2
  46. package/src/llama.cpp/src/llama-sampling.cpp +1259 -186
  47. package/src/llama.cpp/src/llama-sampling.h +19 -7
  48. package/src/llama.cpp/src/llama-vocab.cpp +101 -33
  49. package/src/llama.cpp/src/llama-vocab.h +2 -0
  50. package/src/llama.cpp/src/llama.cpp +87 -64
  51. package/src/llama.cpp/src/models/afmoe.cpp +9 -5
  52. package/src/llama.cpp/src/models/bert.cpp +4 -2
  53. package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
  54. package/src/llama.cpp/src/models/cohere2-iswa.cpp +3 -0
  55. package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
  56. package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
  57. package/src/llama.cpp/src/models/gemma2-iswa.cpp +5 -2
  58. package/src/llama.cpp/src/models/gemma3.cpp +3 -4
  59. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
  60. package/src/llama.cpp/src/models/llama-iswa.cpp +6 -2
  61. package/src/llama.cpp/src/models/llama.cpp +19 -6
  62. package/src/llama.cpp/src/models/maincoder.cpp +117 -0
  63. package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
  64. package/src/llama.cpp/src/models/models.h +18 -0
  65. package/src/llama.cpp/src/models/modern-bert.cpp +116 -0
  66. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +5 -2
  67. package/src/llama.cpp/src/models/plamo3.cpp +128 -0
  68. package/src/llama.cpp/src/models/smallthinker.cpp +11 -5
  69. package/src/llama.cpp/src/unicode.cpp +23 -14
@@ -24,12 +24,14 @@ enum llm_type {
24
24
  LLM_TYPE_17M,
25
25
  LLM_TYPE_22M,
26
26
  LLM_TYPE_33M,
27
+ LLM_TYPE_47M,
27
28
  LLM_TYPE_60M,
28
29
  LLM_TYPE_70M,
29
30
  LLM_TYPE_80M,
30
31
  LLM_TYPE_109M,
31
32
  LLM_TYPE_137M,
32
33
  LLM_TYPE_140M,
34
+ LLM_TYPE_149M,
33
35
  LLM_TYPE_160M,
34
36
  LLM_TYPE_190M,
35
37
  LLM_TYPE_220M,
@@ -39,6 +41,7 @@ enum llm_type {
39
41
  LLM_TYPE_335M,
40
42
  LLM_TYPE_350M,
41
43
  LLM_TYPE_360M,
44
+ LLM_TYPE_395M,
42
45
  LLM_TYPE_410M,
43
46
  LLM_TYPE_450M,
44
47
  LLM_TYPE_475M,
@@ -116,10 +119,12 @@ enum llm_type {
116
119
  LLM_TYPE_31B_A3_5B,
117
120
  LLM_TYPE_80B_A3B, // Qwen3 Next
118
121
  LLM_TYPE_100B_A6B,
122
+ LLM_TYPE_102B_A12B, // Solar-Open
119
123
  LLM_TYPE_106B_A12B, // GLM-4.5-Air
120
124
  LLM_TYPE_230B_A10B, // Minimax M2
121
125
  LLM_TYPE_235B_A22B,
122
126
  LLM_TYPE_300B_A47B, // Ernie MoE big
127
+ LLM_TYPE_310B_A15B, // /MiMo-V2-Flash
123
128
  LLM_TYPE_355B_A32B, // GLM-4.5
124
129
  LLM_TYPE_E2B,
125
130
  LLM_TYPE_E4B,
@@ -462,8 +467,6 @@ struct llama_model {
462
467
  struct ggml_tensor * dense_2_out_layers = nullptr;
463
468
  struct ggml_tensor * dense_3_out_layers = nullptr;
464
469
 
465
- llama_model_params params;
466
-
467
470
  // gguf metadata
468
471
  std::unordered_map<std::string, std::string> gguf_kv;
469
472
 
@@ -473,6 +476,9 @@ struct llama_model {
473
476
  // for quantize-stats only
474
477
  std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
475
478
 
479
+ // for keeping track of extra nodes used by lora adapters
480
+ uint32_t n_lora_nodes = 0;
481
+
476
482
  int64_t t_load_us = 0;
477
483
  int64_t t_start_us = 0;
478
484
 
@@ -494,6 +500,9 @@ struct llama_model {
494
500
  size_t n_tensors() const;
495
501
  size_t n_devices() const;
496
502
 
503
+ uint32_t n_gpu_layers() const;
504
+ llama_split_mode split_mode() const;
505
+
497
506
  std::map<ggml_backend_buffer_type_t, size_t> memory_breakdown() const;
498
507
 
499
508
  // total number of parameters in the model
@@ -522,6 +531,8 @@ struct llama_model {
522
531
  ggml_cgraph * build_graph(const llm_graph_params & params) const;
523
532
 
524
533
  private:
534
+ llama_model_params params;
535
+
525
536
  struct impl;
526
537
  std::unique_ptr<impl> pimpl;
527
538
  };