@fugood/llama.node 1.1.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/lib/binding.ts +4 -0
  2. package/lib/index.js +6 -1
  3. package/lib/index.ts +6 -0
  4. package/lib/version.js +5 -0
  5. package/lib/version.ts +2 -0
  6. package/package.json +14 -14
  7. package/scripts/llama.cpp.patch +9 -9
  8. package/src/LlamaCompletionWorker.cpp +73 -20
  9. package/src/LlamaCompletionWorker.h +8 -0
  10. package/src/llama.cpp/CMakeLists.txt +2 -0
  11. package/src/llama.cpp/common/arg.cpp +124 -40
  12. package/src/llama.cpp/common/chat-parser.cpp +9 -1
  13. package/src/llama.cpp/common/chat.cpp +312 -9
  14. package/src/llama.cpp/common/chat.h +4 -1
  15. package/src/llama.cpp/common/common.cpp +54 -0
  16. package/src/llama.cpp/common/common.h +41 -7
  17. package/src/llama.cpp/ggml/CMakeLists.txt +2 -0
  18. package/src/llama.cpp/ggml/include/ggml-opt.h +25 -6
  19. package/src/llama.cpp/ggml/include/ggml-zdnn.h +16 -0
  20. package/src/llama.cpp/ggml/include/ggml.h +28 -2
  21. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +1 -1
  23. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +1136 -1077
  24. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +14 -0
  25. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +6 -0
  26. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +21 -24
  27. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +16 -7
  28. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -2
  29. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -1
  30. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +200 -51
  31. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
  32. package/src/llama.cpp/ggml/src/ggml-cpu/traits.cpp +2 -2
  33. package/src/llama.cpp/ggml/src/ggml-cpu/traits.h +1 -1
  34. package/src/llama.cpp/include/llama.h +25 -0
  35. package/src/llama.cpp/src/llama-batch.cpp +1 -1
  36. package/src/llama.cpp/src/llama-chat.cpp +2 -4
  37. package/src/llama.cpp/src/llama-context.cpp +29 -17
  38. package/src/llama.cpp/src/llama-context.h +6 -5
  39. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +12 -6
  40. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +2 -2
  41. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +89 -69
  42. package/src/llama.cpp/src/llama-kv-cache-unified.h +2 -2
  43. package/src/llama.cpp/src/llama-memory-hybrid.cpp +6 -2
  44. package/src/llama.cpp/src/llama-memory-hybrid.h +2 -2
  45. package/src/llama.cpp/src/llama-memory-recurrent.cpp +6 -2
  46. package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
  47. package/src/llama.cpp/src/llama-memory.h +2 -2
  48. package/src/llama.cpp/src/llama-model.cpp +1 -0
  49. package/src/llama.cpp/src/llama-model.h +1 -0
  50. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  51. package/src/llama.cpp/src/llama-vocab.cpp +2 -1
@@ -74,16 +74,26 @@ extern "C" {
74
74
  GGML_OPT_BUILD_TYPE_OPT = 30,
75
75
  };
76
76
 
77
+ enum ggml_opt_optimizer_type {
78
+ GGML_OPT_OPTIMIZER_TYPE_ADAMW,
79
+ GGML_OPT_OPTIMIZER_TYPE_SGD,
80
+
81
+ GGML_OPT_OPTIMIZER_TYPE_COUNT
82
+ };
83
+
77
84
  // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
78
85
  struct ggml_opt_optimizer_params {
79
- // AdamW optimizer parameters
80
86
  struct {
81
87
  float alpha; // learning rate
82
- float beta1;
83
- float beta2;
88
+ float beta1; // first AdamW momentum
89
+ float beta2; // second AdamW momentum
84
90
  float eps; // epsilon for numerical stability
85
- float wd; // weight decay for AdamW, use 0.0f to disable
91
+ float wd; // weight decay - 0.0f to disable
86
92
  } adamw;
93
+ struct {
94
+ float alpha; // learning rate
95
+ float wd; // weight decay
96
+ } sgd;
87
97
  };
88
98
 
89
99
  // callback to calculate optimizer parameters prior to a backward pass
@@ -112,8 +122,11 @@ extern "C" {
112
122
 
113
123
  int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
114
124
 
115
- ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
116
- void * get_opt_pars_ud; // userdata for calculating optimizer parameters
125
+ ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
126
+ void * get_opt_pars_ud; // userdata for calculating optimizer parameters
127
+
128
+ // only GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor
129
+ enum ggml_opt_optimizer_type optimizer;
117
130
  };
118
131
 
119
132
  // get parameters for an optimization context with defaults set where possible
@@ -142,6 +155,10 @@ extern "C" {
142
155
  // get the gradient accumulator for a node from the forward graph
143
156
  GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
144
157
 
158
+ GGML_API enum ggml_opt_optimizer_type ggml_opt_context_optimizer_type(ggml_opt_context_t); //TODO consistent naming scheme
159
+
160
+ GGML_API const char * ggml_opt_optimizer_name(enum ggml_opt_optimizer_type);
161
+
145
162
  // ====== Optimization Result ======
146
163
 
147
164
  GGML_API ggml_opt_result_t ggml_opt_result_init(void);
@@ -226,12 +243,14 @@ extern "C" {
226
243
  struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
227
244
  ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
228
245
  enum ggml_opt_loss_type loss_type, // loss to minimize
246
+ enum ggml_opt_optimizer_type optimizer, // sgd or adamw
229
247
  ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
230
248
  int64_t nepoch, // how many times the dataset should be iterated over
231
249
  int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
232
250
  float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
233
251
  bool silent); // whether or not info prints to stderr should be suppressed
234
252
 
253
+
235
254
  #ifdef __cplusplus
236
255
  }
237
256
  #endif
@@ -0,0 +1,16 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ GGML_BACKEND_API ggml_backend_t ggml_backend_zdnn_init(void);
11
+
12
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void);
13
+
14
+ #ifdef __cplusplus
15
+ }
16
+ #endif
@@ -241,6 +241,8 @@
241
241
  #define GGML_ROPE_TYPE_MROPE 8
242
242
  #define GGML_ROPE_TYPE_VISION 24
243
243
 
244
+ #define GGML_MROPE_SECTIONS 4
245
+
244
246
  #define GGML_UNUSED(x) (void)(x)
245
247
 
246
248
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -540,6 +542,7 @@ extern "C" {
540
542
  GGML_OP_CROSS_ENTROPY_LOSS,
541
543
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
542
544
  GGML_OP_OPT_STEP_ADAMW,
545
+ GGML_OP_OPT_STEP_SGD,
543
546
 
544
547
  GGML_OP_GLU,
545
548
 
@@ -1660,7 +1663,7 @@ extern "C" {
1660
1663
  struct ggml_tensor * b,
1661
1664
  struct ggml_tensor * c,
1662
1665
  int n_dims,
1663
- int sections[4],
1666
+ int sections[GGML_MROPE_SECTIONS],
1664
1667
  int mode,
1665
1668
  int n_ctx_orig,
1666
1669
  float freq_base,
@@ -1686,6 +1689,22 @@ extern "C" {
1686
1689
  float beta_fast,
1687
1690
  float beta_slow);
1688
1691
 
1692
+ GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
1693
+ struct ggml_context * ctx,
1694
+ struct ggml_tensor * a,
1695
+ struct ggml_tensor * b,
1696
+ struct ggml_tensor * c,
1697
+ int n_dims,
1698
+ int sections[GGML_MROPE_SECTIONS],
1699
+ int mode,
1700
+ int n_ctx_orig,
1701
+ float freq_base,
1702
+ float freq_scale,
1703
+ float ext_factor,
1704
+ float attn_factor,
1705
+ float beta_fast,
1706
+ float beta_slow);
1707
+
1689
1708
  GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
1690
1709
  struct ggml_context * ctx,
1691
1710
  struct ggml_tensor * a,
@@ -2293,7 +2312,14 @@ extern "C" {
2293
2312
  struct ggml_tensor * grad,
2294
2313
  struct ggml_tensor * m,
2295
2314
  struct ggml_tensor * v,
2296
- struct ggml_tensor * adamw_params); // parameters such a the learning rate
2315
+ struct ggml_tensor * adamw_params); // parameters such as the learning rate
2316
+
2317
+ // stochastic gradient descent step (with weight decay)
2318
+ GGML_API struct ggml_tensor * ggml_opt_step_sgd(
2319
+ struct ggml_context * ctx,
2320
+ struct ggml_tensor * a,
2321
+ struct ggml_tensor * grad,
2322
+ struct ggml_tensor * sgd_params); // alpha, weight decay
2297
2323
 
2298
2324
  //
2299
2325
  // automatic differentiation
@@ -382,6 +382,7 @@ ggml_add_backend(RPC)
382
382
  ggml_add_backend(SYCL)
383
383
  ggml_add_backend(Vulkan)
384
384
  ggml_add_backend(WebGPU)
385
+ ggml_add_backend(zDNN)
385
386
  ggml_add_backend(OpenCL)
386
387
 
387
388
  foreach (target ggml-base ggml)
@@ -460,7 +460,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
460
460
  # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
461
461
  # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
462
462
  message(STATUS "z17 target")
463
- list(APPEND ARCH_FLAGS -march=z17)
463
+ list(APPEND ARCH_FLAGS -march=arch15)
464
464
  else()
465
465
  message(STATUS "Unknown target")
466
466
  message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")