@fugood/llama.node 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +4 -0
- package/lib/index.js +6 -1
- package/lib/index.ts +6 -0
- package/lib/version.js +5 -0
- package/lib/version.ts +2 -0
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +9 -9
- package/src/LlamaCompletionWorker.cpp +73 -20
- package/src/LlamaCompletionWorker.h +8 -0
- package/src/llama.cpp/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +124 -40
- package/src/llama.cpp/common/chat-parser.cpp +9 -1
- package/src/llama.cpp/common/chat.cpp +312 -9
- package/src/llama.cpp/common/chat.h +4 -1
- package/src/llama.cpp/common/common.cpp +54 -0
- package/src/llama.cpp/common/common.h +41 -7
- package/src/llama.cpp/ggml/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +25 -6
- package/src/llama.cpp/ggml/include/ggml-zdnn.h +16 -0
- package/src/llama.cpp/ggml/include/ggml.h +28 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +1136 -1077
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +21 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +16 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +200 -51
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.h +1 -1
- package/src/llama.cpp/include/llama.h +25 -0
- package/src/llama.cpp/src/llama-batch.cpp +1 -1
- package/src/llama.cpp/src/llama-chat.cpp +2 -4
- package/src/llama.cpp/src/llama-context.cpp +29 -17
- package/src/llama.cpp/src/llama-context.h +6 -5
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +12 -6
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +2 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +89 -69
- package/src/llama.cpp/src/llama-kv-cache-unified.h +2 -2
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-hybrid.h +2 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
- package/src/llama.cpp/src/llama-memory.h +2 -2
- package/src/llama.cpp/src/llama-model.cpp +1 -0
- package/src/llama.cpp/src/llama-model.h +1 -0
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +2 -1
|
@@ -74,16 +74,26 @@ extern "C" {
|
|
|
74
74
|
GGML_OPT_BUILD_TYPE_OPT = 30,
|
|
75
75
|
};
|
|
76
76
|
|
|
77
|
+
enum ggml_opt_optimizer_type {
|
|
78
|
+
GGML_OPT_OPTIMIZER_TYPE_ADAMW,
|
|
79
|
+
GGML_OPT_OPTIMIZER_TYPE_SGD,
|
|
80
|
+
|
|
81
|
+
GGML_OPT_OPTIMIZER_TYPE_COUNT
|
|
82
|
+
};
|
|
83
|
+
|
|
77
84
|
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
|
78
85
|
struct ggml_opt_optimizer_params {
|
|
79
|
-
// AdamW optimizer parameters
|
|
80
86
|
struct {
|
|
81
87
|
float alpha; // learning rate
|
|
82
|
-
float beta1;
|
|
83
|
-
float beta2;
|
|
88
|
+
float beta1; // first AdamW momentum
|
|
89
|
+
float beta2; // second AdamW momentum
|
|
84
90
|
float eps; // epsilon for numerical stability
|
|
85
|
-
float wd; // weight decay
|
|
91
|
+
float wd; // weight decay - 0.0f to disable
|
|
86
92
|
} adamw;
|
|
93
|
+
struct {
|
|
94
|
+
float alpha; // learning rate
|
|
95
|
+
float wd; // weight decay
|
|
96
|
+
} sgd;
|
|
87
97
|
};
|
|
88
98
|
|
|
89
99
|
// callback to calculate optimizer parameters prior to a backward pass
|
|
@@ -112,8 +122,11 @@ extern "C" {
|
|
|
112
122
|
|
|
113
123
|
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
|
|
114
124
|
|
|
115
|
-
ggml_opt_get_optimizer_params get_opt_pars;
|
|
116
|
-
void *
|
|
125
|
+
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
|
126
|
+
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
|
127
|
+
|
|
128
|
+
// only GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor
|
|
129
|
+
enum ggml_opt_optimizer_type optimizer;
|
|
117
130
|
};
|
|
118
131
|
|
|
119
132
|
// get parameters for an optimization context with defaults set where possible
|
|
@@ -142,6 +155,10 @@ extern "C" {
|
|
|
142
155
|
// get the gradient accumulator for a node from the forward graph
|
|
143
156
|
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
|
144
157
|
|
|
158
|
+
GGML_API enum ggml_opt_optimizer_type ggml_opt_context_optimizer_type(ggml_opt_context_t); //TODO consistent naming scheme
|
|
159
|
+
|
|
160
|
+
GGML_API const char * ggml_opt_optimizer_name(enum ggml_opt_optimizer_type);
|
|
161
|
+
|
|
145
162
|
// ====== Optimization Result ======
|
|
146
163
|
|
|
147
164
|
GGML_API ggml_opt_result_t ggml_opt_result_init(void);
|
|
@@ -226,12 +243,14 @@ extern "C" {
|
|
|
226
243
|
struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
|
227
244
|
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
|
228
245
|
enum ggml_opt_loss_type loss_type, // loss to minimize
|
|
246
|
+
enum ggml_opt_optimizer_type optimizer, // sgd or adamw
|
|
229
247
|
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
|
230
248
|
int64_t nepoch, // how many times the dataset should be iterated over
|
|
231
249
|
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
|
|
232
250
|
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
|
|
233
251
|
bool silent); // whether or not info prints to stderr should be suppressed
|
|
234
252
|
|
|
253
|
+
|
|
235
254
|
#ifdef __cplusplus
|
|
236
255
|
}
|
|
237
256
|
#endif
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
#include "ggml-backend.h"
|
|
5
|
+
|
|
6
|
+
#ifdef __cplusplus
|
|
7
|
+
extern "C" {
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_zdnn_init(void);
|
|
11
|
+
|
|
12
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void);
|
|
13
|
+
|
|
14
|
+
#ifdef __cplusplus
|
|
15
|
+
}
|
|
16
|
+
#endif
|
|
@@ -241,6 +241,8 @@
|
|
|
241
241
|
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
242
|
#define GGML_ROPE_TYPE_VISION 24
|
|
243
243
|
|
|
244
|
+
#define GGML_MROPE_SECTIONS 4
|
|
245
|
+
|
|
244
246
|
#define GGML_UNUSED(x) (void)(x)
|
|
245
247
|
|
|
246
248
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
|
@@ -540,6 +542,7 @@ extern "C" {
|
|
|
540
542
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
|
541
543
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
542
544
|
GGML_OP_OPT_STEP_ADAMW,
|
|
545
|
+
GGML_OP_OPT_STEP_SGD,
|
|
543
546
|
|
|
544
547
|
GGML_OP_GLU,
|
|
545
548
|
|
|
@@ -1660,7 +1663,7 @@ extern "C" {
|
|
|
1660
1663
|
struct ggml_tensor * b,
|
|
1661
1664
|
struct ggml_tensor * c,
|
|
1662
1665
|
int n_dims,
|
|
1663
|
-
int sections[
|
|
1666
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
1664
1667
|
int mode,
|
|
1665
1668
|
int n_ctx_orig,
|
|
1666
1669
|
float freq_base,
|
|
@@ -1686,6 +1689,22 @@ extern "C" {
|
|
|
1686
1689
|
float beta_fast,
|
|
1687
1690
|
float beta_slow);
|
|
1688
1691
|
|
|
1692
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
|
|
1693
|
+
struct ggml_context * ctx,
|
|
1694
|
+
struct ggml_tensor * a,
|
|
1695
|
+
struct ggml_tensor * b,
|
|
1696
|
+
struct ggml_tensor * c,
|
|
1697
|
+
int n_dims,
|
|
1698
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
1699
|
+
int mode,
|
|
1700
|
+
int n_ctx_orig,
|
|
1701
|
+
float freq_base,
|
|
1702
|
+
float freq_scale,
|
|
1703
|
+
float ext_factor,
|
|
1704
|
+
float attn_factor,
|
|
1705
|
+
float beta_fast,
|
|
1706
|
+
float beta_slow);
|
|
1707
|
+
|
|
1689
1708
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
|
1690
1709
|
struct ggml_context * ctx,
|
|
1691
1710
|
struct ggml_tensor * a,
|
|
@@ -2293,7 +2312,14 @@ extern "C" {
|
|
|
2293
2312
|
struct ggml_tensor * grad,
|
|
2294
2313
|
struct ggml_tensor * m,
|
|
2295
2314
|
struct ggml_tensor * v,
|
|
2296
|
-
struct ggml_tensor * adamw_params); // parameters such
|
|
2315
|
+
struct ggml_tensor * adamw_params); // parameters such as the learning rate
|
|
2316
|
+
|
|
2317
|
+
// stochastic gradient descent step (with weight decay)
|
|
2318
|
+
GGML_API struct ggml_tensor * ggml_opt_step_sgd(
|
|
2319
|
+
struct ggml_context * ctx,
|
|
2320
|
+
struct ggml_tensor * a,
|
|
2321
|
+
struct ggml_tensor * grad,
|
|
2322
|
+
struct ggml_tensor * sgd_params); // alpha, weight decay
|
|
2297
2323
|
|
|
2298
2324
|
//
|
|
2299
2325
|
// automatic differentiation
|
|
@@ -460,7 +460,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
460
460
|
# NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
|
|
461
461
|
# binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
|
|
462
462
|
message(STATUS "z17 target")
|
|
463
|
-
list(APPEND ARCH_FLAGS -march=
|
|
463
|
+
list(APPEND ARCH_FLAGS -march=arch15)
|
|
464
464
|
else()
|
|
465
465
|
message(STATUS "Unknown target")
|
|
466
466
|
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
|