@fugood/llama.node 1.3.0-rc.6 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/CMakeLists.txt +12 -2
  2. package/package.json +14 -14
  3. package/scripts/llama.cpp.patch +8 -9
  4. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  5. package/src/llama.cpp/common/arg.cpp +39 -1001
  6. package/src/llama.cpp/common/arg.h +2 -2
  7. package/src/llama.cpp/common/chat.cpp +216 -2
  8. package/src/llama.cpp/common/chat.h +1 -0
  9. package/src/llama.cpp/common/common.cpp +33 -0
  10. package/src/llama.cpp/common/common.h +13 -0
  11. package/src/llama.cpp/common/download.cpp +1054 -0
  12. package/src/llama.cpp/common/download.h +55 -0
  13. package/src/llama.cpp/common/json-schema-to-grammar.cpp +19 -3
  14. package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
  15. package/src/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
  16. package/src/llama.cpp/ggml/include/ggml.h +2 -0
  17. package/src/llama.cpp/ggml/src/CMakeLists.txt +7 -3
  18. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +10 -3
  19. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  20. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  21. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
  23. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +0 -5
  24. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -35
  25. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
  26. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
  27. package/src/llama.cpp/include/llama.h +7 -3
  28. package/src/llama.cpp/src/CMakeLists.txt +95 -0
  29. package/src/llama.cpp/src/llama-arch.cpp +108 -0
  30. package/src/llama.cpp/src/llama-arch.h +11 -0
  31. package/src/llama.cpp/src/llama-batch.cpp +63 -31
  32. package/src/llama.cpp/src/llama-batch.h +12 -1
  33. package/src/llama.cpp/src/llama-chat.cpp +32 -0
  34. package/src/llama.cpp/src/llama-chat.h +1 -0
  35. package/src/llama.cpp/src/llama-context.cpp +44 -16
  36. package/src/llama.cpp/src/llama-context.h +5 -5
  37. package/src/llama.cpp/src/llama-cparams.h +1 -0
  38. package/src/llama.cpp/src/llama-graph.cpp +12 -7
  39. package/src/llama.cpp/src/llama-hparams.cpp +11 -1
  40. package/src/llama.cpp/src/llama-hparams.h +6 -0
  41. package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
  42. package/src/llama.cpp/src/llama-kv-cache.cpp +56 -21
  43. package/src/llama.cpp/src/llama-kv-cache.h +2 -4
  44. package/src/llama.cpp/src/llama-kv-cells.h +44 -2
  45. package/src/llama.cpp/src/llama-memory-recurrent.cpp +18 -14
  46. package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
  47. package/src/llama.cpp/src/llama-model.cpp +350 -13194
  48. package/src/llama.cpp/src/llama-model.h +9 -2
  49. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  50. package/src/llama.cpp/src/llama-vocab.cpp +5 -0
  51. package/src/llama.cpp/src/llama-vocab.h +1 -0
  52. package/src/llama.cpp/src/models/apertus.cpp +125 -0
  53. package/src/llama.cpp/src/models/arcee.cpp +135 -0
  54. package/src/llama.cpp/src/models/arctic.cpp +138 -0
  55. package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
  56. package/src/llama.cpp/src/models/baichuan.cpp +122 -0
  57. package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
  58. package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  59. package/src/llama.cpp/src/models/bert.cpp +176 -0
  60. package/src/llama.cpp/src/models/bitnet.cpp +160 -0
  61. package/src/llama.cpp/src/models/bloom.cpp +101 -0
  62. package/src/llama.cpp/src/models/chameleon.cpp +178 -0
  63. package/src/llama.cpp/src/models/chatglm.cpp +132 -0
  64. package/src/llama.cpp/src/models/codeshell.cpp +111 -0
  65. package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
  66. package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  67. package/src/llama.cpp/src/models/command-r.cpp +122 -0
  68. package/src/llama.cpp/src/models/dbrx.cpp +123 -0
  69. package/src/llama.cpp/src/models/deci.cpp +135 -0
  70. package/src/llama.cpp/src/models/deepseek.cpp +144 -0
  71. package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
  72. package/src/llama.cpp/src/models/dots1.cpp +134 -0
  73. package/src/llama.cpp/src/models/dream.cpp +105 -0
  74. package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  75. package/src/llama.cpp/src/models/ernie4-5.cpp +111 -0
  76. package/src/llama.cpp/src/models/exaone.cpp +114 -0
  77. package/src/llama.cpp/src/models/exaone4.cpp +123 -0
  78. package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
  79. package/src/llama.cpp/src/models/falcon.cpp +120 -0
  80. package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  81. package/src/llama.cpp/src/models/gemma.cpp +112 -0
  82. package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  83. package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  84. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  85. package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
  86. package/src/llama.cpp/src/models/glm4.cpp +127 -0
  87. package/src/llama.cpp/src/models/gpt2.cpp +105 -0
  88. package/src/llama.cpp/src/models/gptneox.cpp +144 -0
  89. package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  90. package/src/llama.cpp/src/models/granite.cpp +211 -0
  91. package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  92. package/src/llama.cpp/src/models/grok.cpp +159 -0
  93. package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
  94. package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  95. package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  96. package/src/llama.cpp/src/models/internlm2.cpp +120 -0
  97. package/src/llama.cpp/src/models/jais.cpp +86 -0
  98. package/src/llama.cpp/src/models/jamba.cpp +106 -0
  99. package/src/llama.cpp/src/models/lfm2.cpp +173 -0
  100. package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
  101. package/src/llama.cpp/src/models/llada.cpp +99 -0
  102. package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
  103. package/src/llama.cpp/src/models/llama.cpp +155 -0
  104. package/src/llama.cpp/src/models/mamba.cpp +55 -0
  105. package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
  106. package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
  107. package/src/llama.cpp/src/models/models.h +481 -0
  108. package/src/llama.cpp/src/models/mpt.cpp +126 -0
  109. package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
  110. package/src/llama.cpp/src/models/nemotron.cpp +122 -0
  111. package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
  112. package/src/llama.cpp/src/models/olmo.cpp +121 -0
  113. package/src/llama.cpp/src/models/olmo2.cpp +150 -0
  114. package/src/llama.cpp/src/models/olmoe.cpp +124 -0
  115. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +123 -0
  116. package/src/llama.cpp/src/models/openelm.cpp +124 -0
  117. package/src/llama.cpp/src/models/orion.cpp +123 -0
  118. package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  119. package/src/llama.cpp/src/models/phi2.cpp +121 -0
  120. package/src/llama.cpp/src/models/phi3.cpp +152 -0
  121. package/src/llama.cpp/src/models/plamo.cpp +110 -0
  122. package/src/llama.cpp/src/models/plamo2.cpp +316 -0
  123. package/src/llama.cpp/src/models/plm.cpp +168 -0
  124. package/src/llama.cpp/src/models/qwen.cpp +108 -0
  125. package/src/llama.cpp/src/models/qwen2.cpp +117 -0
  126. package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
  127. package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
  128. package/src/llama.cpp/src/models/qwen3.cpp +117 -0
  129. package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
  130. package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  131. package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
  132. package/src/llama.cpp/src/models/refact.cpp +94 -0
  133. package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  134. package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
  135. package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  136. package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  137. package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
  138. package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
  139. package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
  140. package/src/llama.cpp/src/models/smollm3.cpp +128 -0
  141. package/src/llama.cpp/src/models/stablelm.cpp +146 -0
  142. package/src/llama.cpp/src/models/starcoder.cpp +100 -0
  143. package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
  144. package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
  145. package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
  146. package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  147. package/src/llama.cpp/src/models/xverse.cpp +108 -0
package/CMakeLists.txt CHANGED
@@ -64,13 +64,23 @@ endif()
64
64
  # Improve speed
65
65
  if(CMAKE_BUILD_TYPE STREQUAL "Release")
66
66
  if (MSVC)
67
- if (NOT GGML_VULKAN)
67
+ # Enable parallel compilation for all MSVC builds
68
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
69
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /MP")
70
+
71
+ if (NOT GGML_VULKAN AND NOT GGML_CUDA)
72
+ # Full optimization with LTCG for default builds
68
73
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
69
74
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi /Ot /Oy /GL")
70
75
  set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /LTCG")
71
- else()
76
+ elseif(GGML_VULKAN)
77
+ # Reduced optimization for Vulkan builds
72
78
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O1 /Ob1 /bigobj")
73
79
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O1 /Ob1 /bigobj")
80
+ else()
81
+ # Faster linking for CUDA builds (no LTCG)
82
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Ob2 /Oi")
83
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /Ob2 /Oi")
74
84
  endif()
75
85
  else()
76
86
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto=auto")
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.0-rc.6",
4
+ "version": "1.3.1",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,19 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.0-rc.6",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.6",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.6",
78
- "@fugood/node-llama-linux-arm64": "1.3.0-rc.6",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.6",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.6",
81
- "@fugood/node-llama-win32-x64": "1.3.0-rc.6",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.6",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.6",
84
- "@fugood/node-llama-win32-arm64": "1.3.0-rc.6",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.6",
86
- "@fugood/node-llama-darwin-x64": "1.3.0-rc.6",
87
- "@fugood/node-llama-darwin-arm64": "1.3.0-rc.6"
75
+ "@fugood/node-llama-linux-x64": "1.3.1",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.1",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.1",
78
+ "@fugood/node-llama-linux-arm64": "1.3.1",
79
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.1",
80
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.1",
81
+ "@fugood/node-llama-win32-x64": "1.3.1",
82
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.1",
83
+ "@fugood/node-llama-win32-x64-cuda": "1.3.1",
84
+ "@fugood/node-llama-win32-arm64": "1.3.1",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.1",
86
+ "@fugood/node-llama-darwin-x64": "1.3.1",
87
+ "@fugood/node-llama-darwin-arm64": "1.3.1"
88
88
  },
89
89
  "devDependencies": {
90
90
  "@babel/preset-env": "^7.24.4",
@@ -21,7 +21,7 @@ index fe290bf8f..d377e29b9 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
24
- index 8587140e1..7931a31a1 100644
24
+ index 938872e82..6364f173f 100644
25
25
  --- a/src/llama.cpp/common/chat.cpp
26
26
  +++ b/src/llama.cpp/common/chat.cpp
27
27
  @@ -6,9 +6,6 @@
@@ -31,10 +31,10 @@ index 8587140e1..7931a31a1 100644
31
31
  -#include <minja/chat-template.hpp>
32
32
  -#include <minja/minja.hpp>
33
33
  -
34
+ #include <algorithm>
34
35
  #include <cstdio>
35
- #include <exception>
36
- #include <iostream>
37
- @@ -123,16 +120,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
36
+ #include <cctype>
37
+ @@ -126,16 +123,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
38
38
  return diffs;
39
39
  }
40
40
 
@@ -51,18 +51,17 @@ index 8587140e1..7931a31a1 100644
51
51
  struct templates_params {
52
52
  json messages;
53
53
  json tools;
54
- @@ -807,8 +794,7 @@ static std::string apply(
55
- if (additional_context) {
54
+ @@ -811,7 +798,7 @@ static std::string apply(
56
55
  tmpl_inputs.extra_context.merge_patch(*additional_context);
57
56
  }
58
- - // TODO: add flag to control date/time, if only for testing purposes.
57
+ // TODO: add flag to control date/time, if only for testing purposes.
59
58
  - // tmpl_inputs.now = std::chrono::system_clock::now();
60
59
  + tmpl_inputs.now = inputs.now;
61
60
 
62
61
  minja::chat_template_options tmpl_opts;
63
62
  // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
64
63
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
65
- index f7b36ec71..c07429f08 100644
64
+ index 50efb0d4e..f471a84c7 100644
66
65
  --- a/src/llama.cpp/common/chat.h
67
66
  +++ b/src/llama.cpp/common/chat.h
68
67
  @@ -9,7 +9,18 @@
@@ -110,7 +109,7 @@ index a8cb630ea..0919ec5d3 100644
110
109
  int32_t n_ctx = 4096; // context size
111
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
112
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
113
- index 34323afa0..1a6924db0 100644
112
+ index 23ec8bb08..33c93cba7 100644
114
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
116
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
56
56
  common.h
57
57
  console.cpp
58
58
  console.h
59
+ download.cpp
60
+ download.h
59
61
  http.h
60
62
  json-partial.cpp
61
63
  json-partial.h