@fugood/llama.node 1.4.7 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/lib/binding.ts +8 -0
  2. package/package.json +15 -15
  3. package/scripts/llama.cpp.patch +22 -23
  4. package/src/LlamaContext.cpp +2 -2
  5. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  6. package/src/llama.cpp/common/arg.cpp +364 -193
  7. package/src/llama.cpp/common/arg.h +43 -2
  8. package/src/llama.cpp/common/chat-peg-parser.cpp +16 -2
  9. package/src/llama.cpp/common/chat.cpp +140 -0
  10. package/src/llama.cpp/common/common.cpp +130 -67
  11. package/src/llama.cpp/common/common.h +40 -16
  12. package/src/llama.cpp/common/console.cpp +98 -18
  13. package/src/llama.cpp/common/console.h +30 -8
  14. package/src/llama.cpp/common/download.cpp +69 -25
  15. package/src/llama.cpp/common/json-schema-to-grammar.cpp +132 -3
  16. package/src/llama.cpp/common/json-schema-to-grammar.h +20 -0
  17. package/src/llama.cpp/common/log.cpp +5 -0
  18. package/src/llama.cpp/common/log.h +1 -0
  19. package/src/llama.cpp/common/peg-parser.cpp +1 -1
  20. package/src/llama.cpp/common/preset.cpp +206 -0
  21. package/src/llama.cpp/common/preset.h +32 -0
  22. package/src/llama.cpp/common/sampling.cpp +91 -92
  23. package/src/llama.cpp/common/sampling.h +11 -6
  24. package/src/llama.cpp/common/speculative.cpp +1 -1
  25. package/src/llama.cpp/ggml/CMakeLists.txt +4 -0
  26. package/src/llama.cpp/ggml/include/ggml-alloc.h +9 -0
  27. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -0
  28. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
  29. package/src/llama.cpp/ggml/include/ggml.h +7 -8
  30. package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
  31. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2 -0
  32. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +60 -39
  33. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  34. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +2 -1
  35. package/src/llama.cpp/include/llama.h +18 -1
  36. package/src/llama.cpp/src/llama-arch.cpp +1890 -2248
  37. package/src/llama.cpp/src/llama-arch.h +9 -2
  38. package/src/llama.cpp/src/llama-batch.cpp +12 -2
  39. package/src/llama.cpp/src/llama-batch.h +4 -2
  40. package/src/llama.cpp/src/llama-context.cpp +93 -23
  41. package/src/llama.cpp/src/llama-context.h +8 -2
  42. package/src/llama.cpp/src/llama-graph.cpp +84 -16
  43. package/src/llama.cpp/src/llama-graph.h +17 -4
  44. package/src/llama.cpp/src/llama-hparams.cpp +6 -0
  45. package/src/llama.cpp/src/llama-hparams.h +5 -1
  46. package/src/llama.cpp/src/llama-impl.cpp +4 -0
  47. package/src/llama.cpp/src/llama-kv-cache.cpp +90 -42
  48. package/src/llama.cpp/src/llama-kv-cache.h +19 -2
  49. package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -1
  50. package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
  51. package/src/llama.cpp/src/llama-model-loader.h +2 -0
  52. package/src/llama.cpp/src/llama-model.cpp +103 -44
  53. package/src/llama.cpp/src/llama-model.h +1 -0
  54. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  55. package/src/llama.cpp/src/llama-vocab.cpp +2 -1
  56. package/src/llama.cpp/src/llama.cpp +675 -1
  57. package/src/llama.cpp/src/models/deepseek2.cpp +9 -5
  58. package/src/llama.cpp/src/models/glm4-moe.cpp +28 -11
  59. package/src/llama.cpp/src/models/glm4.cpp +27 -4
  60. package/src/llama.cpp/src/models/models.h +5 -5
  61. package/src/llama.cpp/src/models/nemotron-h.cpp +35 -6
  62. package/src/llama.cpp/src/models/qwen2.cpp +12 -3
  63. package/src/llama.cpp/src/models/qwen3next.cpp +81 -266
package/lib/binding.ts CHANGED
@@ -198,6 +198,14 @@ export type LlamaParallelCompletionOptions = LlamaCompletionOptions & {
198
198
  */
199
199
  save_state_path?: string
200
200
 
201
+ /**
202
+ * Number of tokens to load when loading state.
203
+ * If not specified or <= 0, all tokens from the state file will be loaded.
204
+ * Use this to limit how much of a saved state is restored.
205
+ * Example: `512` to load only the first 512 tokens from the state file
206
+ */
207
+ load_state_size?: number
208
+
201
209
  /**
202
210
  * Number of tokens to save when saving session state.
203
211
  * If not specified or <= 0, all tokens will be saved.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.7",
4
+ "version": "1.4.8",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.4.7",
76
- "@fugood/node-llama-darwin-x64": "1.4.7",
77
- "@fugood/node-llama-linux-arm64": "1.4.7",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.4.7",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.7",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.7",
81
- "@fugood/node-llama-linux-x64": "1.4.7",
82
- "@fugood/node-llama-linux-x64-cuda": "1.4.7",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.4.7",
84
- "@fugood/node-llama-win32-arm64": "1.4.7",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.7",
86
- "@fugood/node-llama-win32-x64": "1.4.7",
87
- "@fugood/node-llama-win32-x64-cuda": "1.4.7",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.4.7"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.8",
76
+ "@fugood/node-llama-darwin-x64": "1.4.8",
77
+ "@fugood/node-llama-linux-arm64": "1.4.8",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.8",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.8",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.8",
81
+ "@fugood/node-llama-linux-x64": "1.4.8",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.8",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.8",
84
+ "@fugood/node-llama-win32-arm64": "1.4.8",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.8",
86
+ "@fugood/node-llama-win32-x64": "1.4.8",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.8",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.8"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -1,8 +1,8 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index 377b26846..1873b5206 100644
2
+ index 0182767c2..f8c4a4f63 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -149,9 +149,16 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -151,9 +151,16 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
@@ -21,21 +21,20 @@ index 377b26846..1873b5206 100644
21
21
 
22
22
  #
23
23
  diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
24
- index 74a7b6a46..7b7a1bd50 100644
24
+ index 1bcba9cd8..b7cd68734 100644
25
25
  --- a/src/llama.cpp/common/chat-peg-parser.cpp
26
26
  +++ b/src/llama.cpp/common/chat-peg-parser.cpp
27
- @@ -1,9 +1,5 @@
28
- #include "chat-peg-parser.h"
27
+ @@ -2,7 +2,7 @@
28
+
29
+ #include <nlohmann/json.hpp>
29
30
 
30
- -#include <nlohmann/json.hpp>
31
- -
32
31
  -using json = nlohmann::json;
33
- -
34
- static std::string_view trim_trailing_space(std::string_view sv) {
35
- while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
36
- sv.remove_suffix(1);
32
+ +using json = nlohmann::ordered_json;
33
+
34
+ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
35
+ int count = 0;
37
36
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
38
- index c371edaa5..ec032e351 100644
37
+ index 0a426f447..ab02be247 100644
39
38
  --- a/src/llama.cpp/common/chat.cpp
40
39
  +++ b/src/llama.cpp/common/chat.cpp
41
40
  @@ -7,9 +7,6 @@
@@ -65,7 +64,7 @@ index c371edaa5..ec032e351 100644
65
64
  struct templates_params {
66
65
  json messages;
67
66
  json tools;
68
- @@ -732,7 +719,7 @@ static std::string apply(
67
+ @@ -751,7 +738,7 @@ static std::string apply(
69
68
  tmpl_inputs.extra_context.merge_patch(*additional_context);
70
69
  }
71
70
  // TODO: add flag to control date/time, if only for testing purposes.
@@ -99,10 +98,10 @@ index 6085510a4..263076ce2 100644
99
98
  struct common_chat_tool_call {
100
99
  std::string name;
101
100
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
102
- index 0497f90a2..29b36f3fe 100644
101
+ index 5a8cf5248..8010a990e 100644
103
102
  --- a/src/llama.cpp/common/common.cpp
104
103
  +++ b/src/llama.cpp/common/common.cpp
105
- @@ -1280,6 +1280,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
104
+ @@ -1343,6 +1343,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
106
105
  mparams.n_gpu_layers = params.n_gpu_layers;
107
106
  }
108
107
 
@@ -111,16 +110,16 @@ index 0497f90a2..29b36f3fe 100644
111
110
  mparams.split_mode = params.split_mode;
112
111
  mparams.tensor_split = params.tensor_split;
113
112
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
114
- index d28e48991..562203d02 100644
113
+ index d70744840..dea8c4546 100644
115
114
  --- a/src/llama.cpp/common/common.h
116
115
  +++ b/src/llama.cpp/common/common.h
117
- @@ -302,6 +302,7 @@ struct lr_opt {
116
+ @@ -307,6 +307,7 @@ struct lr_opt {
118
117
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
119
118
 
120
119
  struct common_params {
121
120
  + bool vocab_only = false;
122
- int32_t n_predict = -1; // new tokens to predict
123
- int32_t n_ctx = 4096; // context size
121
+ int32_t n_predict = -1; // max. number of new tokens to predict, -1 == no limit
122
+ int32_t n_ctx = 0; // context size, 0 == context the model was trained with
124
123
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
125
124
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
126
125
  index fc31089f3..aa9befe4c 100644
@@ -136,10 +135,10 @@ index fc31089f3..aa9befe4c 100644
136
135
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
137
136
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
138
137
  diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
139
- index 72a82a891..1b681f4dd 100644
138
+ index 514f086f6..792abaa58 100644
140
139
  --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
141
140
  +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
142
- @@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
141
+ @@ -3213,11 +3213,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
143
142
  GGML_UNUSED(dev);
144
143
  }
145
144
 
@@ -169,7 +168,7 @@ index 72a82a891..1b681f4dd 100644
169
168
  GGML_UNUSED(dev);
170
169
  }
171
170
 
172
- @@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
171
+ @@ -3398,10 +3413,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
173
172
  }
174
173
  }
175
174
 
@@ -188,7 +187,7 @@ index 72a82a891..1b681f4dd 100644
188
187
 
189
188
  GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
190
189
 
191
- @@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
190
+ @@ -3414,6 +3436,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
192
191
  } catch (std::exception const &exc) {
193
192
  GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
194
193
  devices[i].context = nullptr;
@@ -416,8 +416,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
416
416
  _rn_ctx->attachThreadpoolsIfAvailable();
417
417
 
418
418
  // Collect used devices from the loaded model
419
- if (_rn_ctx->llama_init.model) {
420
- const auto &model_devices = _rn_ctx->llama_init.model->devices;
419
+ if (_rn_ctx->llama_init->model()) {
420
+ const auto &model_devices = _rn_ctx->llama_init->model()->devices;
421
421
  for (auto dev : model_devices) {
422
422
  const char *dev_name = ggml_backend_dev_name(dev);
423
423
  if (dev_name != nullptr) {
@@ -73,6 +73,8 @@ add_library(${TARGET} STATIC
73
73
  ngram-cache.h
74
74
  peg-parser.cpp
75
75
  peg-parser.h
76
+ preset.cpp
77
+ preset.h
76
78
  regex-partial.cpp
77
79
  regex-partial.h
78
80
  sampling.cpp