@fugood/llama.node 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/package.json +14 -14
  2. package/scripts/llama.cpp.patch +8 -8
  3. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  4. package/src/llama.cpp/common/arg.cpp +44 -999
  5. package/src/llama.cpp/common/arg.h +2 -2
  6. package/src/llama.cpp/common/chat.cpp +17 -2
  7. package/src/llama.cpp/common/common.cpp +33 -0
  8. package/src/llama.cpp/common/common.h +15 -1
  9. package/src/llama.cpp/common/download.cpp +1054 -0
  10. package/src/llama.cpp/common/download.h +55 -0
  11. package/src/llama.cpp/ggml/CMakeLists.txt +1 -1
  12. package/src/llama.cpp/ggml/include/ggml.h +2 -0
  13. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  14. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -11
  15. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  16. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  17. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  18. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  19. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
  20. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +21 -21
  21. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -75
  22. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +0 -4
  23. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
  24. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
  25. package/src/llama.cpp/include/llama.h +7 -3
  26. package/src/llama.cpp/src/CMakeLists.txt +95 -0
  27. package/src/llama.cpp/src/llama-arch.cpp +108 -0
  28. package/src/llama.cpp/src/llama-arch.h +11 -0
  29. package/src/llama.cpp/src/llama-batch.cpp +63 -31
  30. package/src/llama.cpp/src/llama-batch.h +12 -1
  31. package/src/llama.cpp/src/llama-chat.cpp +32 -0
  32. package/src/llama.cpp/src/llama-chat.h +1 -0
  33. package/src/llama.cpp/src/llama-context.cpp +36 -13
  34. package/src/llama.cpp/src/llama-context.h +5 -5
  35. package/src/llama.cpp/src/llama-cparams.h +1 -0
  36. package/src/llama.cpp/src/llama-graph.cpp +3 -3
  37. package/src/llama.cpp/src/llama-hparams.cpp +11 -1
  38. package/src/llama.cpp/src/llama-hparams.h +6 -0
  39. package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
  40. package/src/llama.cpp/src/llama-kv-cache.cpp +33 -1
  41. package/src/llama.cpp/src/llama-kv-cells.h +44 -2
  42. package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
  43. package/src/llama.cpp/src/llama-model.cpp +320 -13171
  44. package/src/llama.cpp/src/llama-model.h +8 -0
  45. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  46. package/src/llama.cpp/src/llama-vocab.cpp +5 -0
  47. package/src/llama.cpp/src/llama-vocab.h +1 -0
  48. package/src/llama.cpp/src/models/apertus.cpp +125 -0
  49. package/src/llama.cpp/src/models/arcee.cpp +135 -0
  50. package/src/llama.cpp/src/models/arctic.cpp +138 -0
  51. package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
  52. package/src/llama.cpp/src/models/baichuan.cpp +122 -0
  53. package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
  54. package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  55. package/src/llama.cpp/src/models/bert.cpp +176 -0
  56. package/src/llama.cpp/src/models/bitnet.cpp +160 -0
  57. package/src/llama.cpp/src/models/bloom.cpp +101 -0
  58. package/src/llama.cpp/src/models/chameleon.cpp +178 -0
  59. package/src/llama.cpp/src/models/chatglm.cpp +132 -0
  60. package/src/llama.cpp/src/models/codeshell.cpp +111 -0
  61. package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
  62. package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  63. package/src/llama.cpp/src/models/command-r.cpp +122 -0
  64. package/src/llama.cpp/src/models/dbrx.cpp +123 -0
  65. package/src/llama.cpp/src/models/deci.cpp +135 -0
  66. package/src/llama.cpp/src/models/deepseek.cpp +144 -0
  67. package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
  68. package/src/llama.cpp/src/models/dots1.cpp +134 -0
  69. package/src/llama.cpp/src/models/dream.cpp +105 -0
  70. package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  71. package/src/llama.cpp/src/models/ernie4-5.cpp +110 -0
  72. package/src/llama.cpp/src/models/exaone.cpp +114 -0
  73. package/src/llama.cpp/src/models/exaone4.cpp +123 -0
  74. package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
  75. package/src/llama.cpp/src/models/falcon.cpp +120 -0
  76. package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  77. package/src/llama.cpp/src/models/gemma.cpp +112 -0
  78. package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  79. package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  80. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  81. package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
  82. package/src/llama.cpp/src/models/glm4.cpp +127 -0
  83. package/src/llama.cpp/src/models/gpt2.cpp +105 -0
  84. package/src/llama.cpp/src/models/gptneox.cpp +144 -0
  85. package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  86. package/src/llama.cpp/src/models/granite.cpp +211 -0
  87. package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  88. package/src/llama.cpp/src/models/grok.cpp +159 -0
  89. package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
  90. package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  91. package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  92. package/src/llama.cpp/src/models/internlm2.cpp +120 -0
  93. package/src/llama.cpp/src/models/jais.cpp +86 -0
  94. package/src/llama.cpp/src/models/jamba.cpp +106 -0
  95. package/src/llama.cpp/src/models/lfm2.cpp +173 -0
  96. package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
  97. package/src/llama.cpp/src/models/llada.cpp +99 -0
  98. package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
  99. package/src/llama.cpp/src/models/llama.cpp +155 -0
  100. package/src/llama.cpp/src/models/mamba.cpp +55 -0
  101. package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
  102. package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
  103. package/src/llama.cpp/src/models/models.h +481 -0
  104. package/src/llama.cpp/src/models/mpt.cpp +126 -0
  105. package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
  106. package/src/llama.cpp/src/models/nemotron.cpp +122 -0
  107. package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
  108. package/src/llama.cpp/src/models/olmo.cpp +121 -0
  109. package/src/llama.cpp/src/models/olmo2.cpp +150 -0
  110. package/src/llama.cpp/src/models/olmoe.cpp +124 -0
  111. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
  112. package/src/llama.cpp/src/models/openelm.cpp +124 -0
  113. package/src/llama.cpp/src/models/orion.cpp +123 -0
  114. package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  115. package/src/llama.cpp/src/models/phi2.cpp +121 -0
  116. package/src/llama.cpp/src/models/phi3.cpp +152 -0
  117. package/src/llama.cpp/src/models/plamo.cpp +110 -0
  118. package/src/llama.cpp/src/models/plamo2.cpp +316 -0
  119. package/src/llama.cpp/src/models/plm.cpp +168 -0
  120. package/src/llama.cpp/src/models/qwen.cpp +108 -0
  121. package/src/llama.cpp/src/models/qwen2.cpp +117 -0
  122. package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
  123. package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
  124. package/src/llama.cpp/src/models/qwen3.cpp +117 -0
  125. package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
  126. package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  127. package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
  128. package/src/llama.cpp/src/models/refact.cpp +94 -0
  129. package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  130. package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
  131. package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  132. package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  133. package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
  134. package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
  135. package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
  136. package/src/llama.cpp/src/models/smollm3.cpp +128 -0
  137. package/src/llama.cpp/src/models/stablelm.cpp +146 -0
  138. package/src/llama.cpp/src/models/starcoder.cpp +100 -0
  139. package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
  140. package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
  141. package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
  142. package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  143. package/src/llama.cpp/src/models/xverse.cpp +108 -0
@@ -59,8 +59,8 @@ struct common_arg {
59
59
  common_arg & set_sparam();
60
60
  bool in_example(enum llama_example ex);
61
61
  bool is_exclude(enum llama_example ex);
62
- bool get_value_from_env(std::string & output);
63
- bool has_value_from_env();
62
+ bool get_value_from_env(std::string & output) const;
63
+ bool has_value_from_env() const;
64
64
  std::string to_string();
65
65
  };
66
66
 
@@ -300,7 +300,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
300
300
  }
301
301
  if (!msg.reasoning_content.empty()) {
302
302
  jmsg["reasoning_content"] = msg.reasoning_content;
303
- jmsg["thinking"] = msg.reasoning_content; // gpt-oss
304
303
  }
305
304
  if (!msg.tool_name.empty()) {
306
305
  jmsg["name"] = msg.tool_name;
@@ -1797,7 +1796,23 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1797
1796
 
1798
1797
  static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
1799
1798
  common_chat_params data;
1800
- auto prompt = apply(tmpl, inputs);
1799
+
1800
+ // Copy reasoning to the "thinking" field as expected by the gpt-oss template
1801
+ auto adjusted_messages = json::array();
1802
+ for (const auto & msg : inputs.messages) {
1803
+ auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
1804
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
1805
+
1806
+ if (has_reasoning_content && has_tool_calls) {
1807
+ auto adjusted_message = msg;
1808
+ adjusted_message["thinking"] = msg.at("reasoning_content");
1809
+ adjusted_messages.push_back(adjusted_message);
1810
+ } else {
1811
+ adjusted_messages.push_back(msg);
1812
+ }
1813
+ }
1814
+
1815
+ auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1801
1816
 
1802
1817
  // Check if we need to replace the return token with end token during
1803
1818
  // inference and without generation prompt. For more details see:
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
908
908
  return cache_directory + filename;
909
909
  }
910
910
 
911
+ std::vector<common_file_info> fs_list_files(const std::string & path) {
912
+ std::vector<common_file_info> files;
913
+ if (path.empty()) return files;
914
+
915
+ std::filesystem::path dir(path);
916
+ if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
917
+ return files;
918
+ }
919
+
920
+ for (const auto & entry : std::filesystem::directory_iterator(dir)) {
921
+ try {
922
+ // Only include regular files (skip directories)
923
+ const auto & p = entry.path();
924
+ if (std::filesystem::is_regular_file(p)) {
925
+ common_file_info info;
926
+ info.path = p.string();
927
+ info.name = p.filename().string();
928
+ try {
929
+ info.size = static_cast<size_t>(std::filesystem::file_size(p));
930
+ } catch (const std::filesystem::filesystem_error &) {
931
+ info.size = 0;
932
+ }
933
+ files.push_back(std::move(info));
934
+ }
935
+ } catch (const std::filesystem::filesystem_error &) {
936
+ // skip entries we cannot inspect
937
+ continue;
938
+ }
939
+ }
940
+
941
+ return files;
942
+ }
943
+
911
944
 
912
945
  //
913
946
  // Model utils
@@ -407,6 +407,8 @@ struct common_params {
407
407
  bool mmproj_use_gpu = true; // use GPU for multimodal model
408
408
  bool no_mmproj = false; // explicitly disable multimodal model
409
409
  std::vector<std::string> image; // path to image file(s)
410
+ int image_min_tokens = -1;
411
+ int image_max_tokens = -1;
410
412
 
411
413
  // finetune
412
414
  struct lr_opt lr;
@@ -459,7 +461,8 @@ struct common_params {
459
461
  float slot_prompt_similarity = 0.1f;
460
462
 
461
463
  // batched-bench params
462
- bool is_pp_shared = false;
464
+ bool is_pp_shared = false;
465
+ bool is_tg_separate = false;
463
466
 
464
467
  std::vector<int32_t> n_pp;
465
468
  std::vector<int32_t> n_tg;
@@ -506,6 +509,10 @@ struct common_params {
506
509
  // return false from callback to abort model loading or true to continue
507
510
  llama_progress_callback load_progress_callback = NULL;
508
511
  void * load_progress_callback_user_data = NULL;
512
+
513
+ bool has_speculative() const {
514
+ return !speculative.model.path.empty() || !speculative.model.hf_repo.empty();
515
+ }
509
516
  };
510
517
 
511
518
  // call once at the start of a program if it uses libcommon
@@ -606,6 +613,13 @@ bool fs_create_directory_with_parents(const std::string & path);
606
613
  std::string fs_get_cache_directory();
607
614
  std::string fs_get_cache_file(const std::string & filename);
608
615
 
616
+ struct common_file_info {
617
+ std::string path;
618
+ std::string name;
619
+ size_t size = 0; // in bytes
620
+ };
621
+ std::vector<common_file_info> fs_list_files(const std::string & path);
622
+
609
623
  //
610
624
  // Model utils
611
625
  //