@fugood/llama.node 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +4 -0
- package/lib/index.js +6 -1
- package/lib/index.ts +6 -0
- package/lib/version.js +5 -0
- package/lib/version.ts +2 -0
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +19 -15
- package/src/LlamaCompletionWorker.cpp +73 -18
- package/src/LlamaCompletionWorker.h +8 -0
- package/src/llama.cpp/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +147 -46
- package/src/llama.cpp/common/chat-parser.cpp +9 -1
- package/src/llama.cpp/common/chat.cpp +350 -3
- package/src/llama.cpp/common/chat.h +11 -3
- package/src/llama.cpp/common/common.cpp +54 -0
- package/src/llama.cpp/common/common.h +44 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -2
- package/src/llama.cpp/ggml/include/ggml-opt.h +25 -6
- package/src/llama.cpp/ggml/include/ggml-zdnn.h +16 -0
- package/src/llama.cpp/ggml/include/ggml.h +65 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +96 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +1136 -1077
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +20 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +21 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +16 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +270 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +3 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +35 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +200 -51
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/traits.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +19 -4
- package/src/llama.cpp/include/llama.h +26 -0
- package/src/llama.cpp/src/llama-arch.cpp +65 -0
- package/src/llama.cpp/src/llama-arch.h +10 -0
- package/src/llama.cpp/src/llama-batch.cpp +1 -1
- package/src/llama.cpp/src/llama-chat.cpp +15 -4
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +37 -25
- package/src/llama.cpp/src/llama-context.h +6 -5
- package/src/llama.cpp/src/llama-graph.cpp +118 -9
- package/src/llama.cpp/src/llama-graph.h +38 -0
- package/src/llama.cpp/src/llama-hparams.h +5 -3
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +12 -6
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +2 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +93 -69
- package/src/llama.cpp/src/llama-kv-cache-unified.h +2 -2
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-hybrid.h +2 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +6 -2
- package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
- package/src/llama.cpp/src/llama-memory.h +2 -2
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +500 -4
- package/src/llama.cpp/src/llama-model.h +25 -4
- package/src/llama.cpp/src/llama-quant.cpp +37 -1
- package/src/llama.cpp/src/llama-vocab.cpp +43 -0
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
|
|
14
14
|
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
|
|
15
15
|
#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
|
|
16
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
16
17
|
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
17
18
|
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
18
19
|
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
|
|
@@ -39,18 +40,22 @@
|
|
|
39
40
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
40
41
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
41
42
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
43
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
42
44
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
43
45
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
44
46
|
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
45
47
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
46
48
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
47
49
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
50
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
48
51
|
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
49
52
|
// repack.cpp
|
|
50
53
|
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
51
54
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
55
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
52
56
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
53
57
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
58
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
54
59
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
55
60
|
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
56
61
|
// repack.cpp
|
|
@@ -68,6 +73,7 @@
|
|
|
68
73
|
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
69
74
|
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
70
75
|
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
76
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
71
77
|
// repack.cpp
|
|
72
78
|
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
73
79
|
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
@@ -78,18 +84,21 @@
|
|
|
78
84
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
79
85
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
80
86
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
87
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
81
88
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
82
89
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
83
90
|
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
84
91
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
85
92
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
86
93
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
94
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
87
95
|
#elif defined(__loongarch64)
|
|
88
96
|
// quants.c
|
|
89
97
|
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
90
98
|
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
91
99
|
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
92
100
|
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
101
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
93
102
|
// repack.cpp
|
|
94
103
|
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
95
104
|
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
@@ -100,12 +109,14 @@
|
|
|
100
109
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
101
110
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
102
111
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
112
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
103
113
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
104
114
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
105
115
|
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
106
116
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
107
117
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
108
118
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
119
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
109
120
|
#elif defined(__riscv)
|
|
110
121
|
// quants.c
|
|
111
122
|
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
@@ -120,6 +131,7 @@
|
|
|
120
131
|
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
121
132
|
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
122
133
|
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
134
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
123
135
|
// repack.cpp
|
|
124
136
|
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
125
137
|
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
@@ -129,11 +141,13 @@
|
|
|
129
141
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
130
142
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
131
143
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
144
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
132
145
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
133
146
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
134
147
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
135
148
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
136
149
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
150
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
137
151
|
#elif defined(__s390x__)
|
|
138
152
|
// quants.c
|
|
139
153
|
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
@@ -149,6 +163,7 @@
|
|
|
149
163
|
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
150
164
|
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
151
165
|
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
166
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
152
167
|
// repack.cpp
|
|
153
168
|
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
154
169
|
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
@@ -159,12 +174,14 @@
|
|
|
159
174
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
160
175
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
161
176
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
177
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
162
178
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
163
179
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
164
180
|
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
165
181
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
166
182
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
167
183
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
184
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
168
185
|
#elif defined(__wasm__)
|
|
169
186
|
// quants.c
|
|
170
187
|
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
|
@@ -179,6 +196,7 @@
|
|
|
179
196
|
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
180
197
|
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
181
198
|
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
199
|
+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
|
|
182
200
|
// repack.cpp
|
|
183
201
|
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
184
202
|
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
@@ -189,10 +207,12 @@
|
|
|
189
207
|
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
190
208
|
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
|
|
191
209
|
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
210
|
+
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
|
|
192
211
|
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
193
212
|
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
194
213
|
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
195
214
|
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
196
215
|
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
|
|
197
216
|
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
217
|
+
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
|
|
198
218
|
#endif
|
|
@@ -253,6 +253,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
|
253
253
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
|
254
254
|
.nrows = 1,
|
|
255
255
|
},
|
|
256
|
+
[GGML_TYPE_MXFP4] = {
|
|
257
|
+
.from_float = quantize_row_mxfp4,
|
|
258
|
+
.vec_dot = ggml_vec_dot_mxfp4_q8_0,
|
|
259
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
260
|
+
.nrows = 1,
|
|
261
|
+
},
|
|
256
262
|
[GGML_TYPE_Q2_K] = {
|
|
257
263
|
.from_float = quantize_row_q2_K,
|
|
258
264
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
|
@@ -1670,6 +1676,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1670
1676
|
{
|
|
1671
1677
|
ggml_compute_forward_add(params, tensor);
|
|
1672
1678
|
} break;
|
|
1679
|
+
case GGML_OP_ADD_ID:
|
|
1680
|
+
{
|
|
1681
|
+
ggml_compute_forward_add_id(params, tensor);
|
|
1682
|
+
} break;
|
|
1673
1683
|
case GGML_OP_ADD1:
|
|
1674
1684
|
{
|
|
1675
1685
|
ggml_compute_forward_add1(params, tensor);
|
|
@@ -1924,7 +1934,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1924
1934
|
} break;
|
|
1925
1935
|
case GGML_OP_FLASH_ATTN_EXT:
|
|
1926
1936
|
{
|
|
1927
|
-
ggml_compute_forward_flash_attn_ext(params, tensor
|
|
1937
|
+
ggml_compute_forward_flash_attn_ext(params, tensor);
|
|
1928
1938
|
} break;
|
|
1929
1939
|
case GGML_OP_FLASH_ATTN_BACK:
|
|
1930
1940
|
{
|
|
@@ -2012,6 +2022,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
2012
2022
|
ggml_compute_forward_opt_step_adamw(params, tensor);
|
|
2013
2023
|
}
|
|
2014
2024
|
break;
|
|
2025
|
+
case GGML_OP_OPT_STEP_SGD:
|
|
2026
|
+
{
|
|
2027
|
+
ggml_compute_forward_opt_step_sgd(params, tensor);
|
|
2028
|
+
}
|
|
2029
|
+
break;
|
|
2015
2030
|
case GGML_OP_NONE:
|
|
2016
2031
|
{
|
|
2017
2032
|
// nop
|
|
@@ -2111,6 +2126,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2111
2126
|
case GGML_OP_DUP:
|
|
2112
2127
|
case GGML_OP_CONT:
|
|
2113
2128
|
case GGML_OP_ADD:
|
|
2129
|
+
case GGML_OP_ADD_ID:
|
|
2114
2130
|
case GGML_OP_ADD1:
|
|
2115
2131
|
case GGML_OP_ACC:
|
|
2116
2132
|
{
|
|
@@ -2172,6 +2188,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2172
2188
|
case GGML_GLU_OP_REGLU:
|
|
2173
2189
|
case GGML_GLU_OP_GEGLU:
|
|
2174
2190
|
case GGML_GLU_OP_SWIGLU:
|
|
2191
|
+
case GGML_GLU_OP_SWIGLU_OAI:
|
|
2175
2192
|
case GGML_GLU_OP_GEGLU_ERF:
|
|
2176
2193
|
case GGML_GLU_OP_GEGLU_QUICK:
|
|
2177
2194
|
{
|
|
@@ -2313,6 +2330,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2313
2330
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
|
2314
2331
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
|
2315
2332
|
case GGML_OP_OPT_STEP_ADAMW:
|
|
2333
|
+
case GGML_OP_OPT_STEP_SGD:
|
|
2316
2334
|
{
|
|
2317
2335
|
n_tasks = n_threads;
|
|
2318
2336
|
} break;
|
|
@@ -2673,6 +2691,7 @@ struct ggml_cplan ggml_graph_plan(
|
|
|
2673
2691
|
}
|
|
2674
2692
|
} break;
|
|
2675
2693
|
case GGML_OP_ADD:
|
|
2694
|
+
case GGML_OP_ADD_ID:
|
|
2676
2695
|
case GGML_OP_ADD1:
|
|
2677
2696
|
{
|
|
2678
2697
|
if (ggml_is_quantized(node->src[0]->type)) {
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
|
|
36
36
|
// ggml-backend interface
|
|
37
37
|
|
|
38
|
-
std::vector<ggml_backend_buffer_type_t
|
|
38
|
+
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
|
|
39
39
|
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
|
|
40
40
|
std::vector<ggml_backend_buffer_type_t> bufts;
|
|
41
41
|
|
|
@@ -57,8 +57,6 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|
|
57
57
|
}
|
|
58
58
|
#endif
|
|
59
59
|
|
|
60
|
-
bufts.push_back(NULL);
|
|
61
|
-
|
|
62
60
|
return bufts;
|
|
63
61
|
}();
|
|
64
62
|
|
|
@@ -66,14 +64,20 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|
|
66
64
|
}
|
|
67
65
|
|
|
68
66
|
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
|
|
69
|
-
|
|
67
|
+
static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
|
|
68
|
+
std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
|
|
69
|
+
bufts.push_back(nullptr);
|
|
70
|
+
return bufts;
|
|
71
|
+
}();
|
|
72
|
+
|
|
73
|
+
return extra_bufts.data();
|
|
70
74
|
|
|
71
75
|
GGML_UNUSED(device);
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
|
75
|
-
for (auto * extra :
|
|
76
|
-
if (extra
|
|
79
|
+
for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
|
|
80
|
+
if (extra == buft) {
|
|
77
81
|
return true;
|
|
78
82
|
}
|
|
79
83
|
}
|
|
@@ -210,10 +214,10 @@ ggml_backend_t ggml_backend_cpu_init(void) {
|
|
|
210
214
|
ctx->abort_callback_data = NULL;
|
|
211
215
|
|
|
212
216
|
ggml_backend_t cpu_backend = new ggml_backend {
|
|
213
|
-
/* .guid
|
|
214
|
-
/* .
|
|
215
|
-
/* .device
|
|
216
|
-
/* .context
|
|
217
|
+
/* .guid = */ ggml_backend_cpu_guid(),
|
|
218
|
+
/* .iface = */ ggml_backend_cpu_i,
|
|
219
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
|
220
|
+
/* .context = */ ctx,
|
|
217
221
|
};
|
|
218
222
|
|
|
219
223
|
if (cpu_backend == NULL) {
|
|
@@ -397,20 +401,13 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
|
397
401
|
return true;
|
|
398
402
|
}
|
|
399
403
|
|
|
400
|
-
//
|
|
401
|
-
for
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
// the other case need host buffer.
|
|
411
|
-
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
412
|
-
if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
|
|
413
|
-
return false;
|
|
404
|
+
// check extra buffer types
|
|
405
|
+
// note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
|
|
406
|
+
for (int i = 0; i < 4; i++) {
|
|
407
|
+
if (op->src[i] && op->src[i]->buffer &&
|
|
408
|
+
ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
|
|
409
|
+
auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
|
|
410
|
+
return buf_extra->supports_op(dev, op);
|
|
414
411
|
}
|
|
415
412
|
}
|
|
416
413
|
|
|
@@ -259,7 +259,10 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
|
|
259
259
|
const int64_t m_start = 0;
|
|
260
260
|
|
|
261
261
|
const int64_t n_step = static_cast<int64_t>(kernel->get_n_step());
|
|
262
|
-
|
|
262
|
+
int64_t num_threads = KAI_MIN(n / n_step, nth);
|
|
263
|
+
if (num_threads <= 0) {
|
|
264
|
+
num_threads = 1;
|
|
265
|
+
}
|
|
263
266
|
|
|
264
267
|
if (ith < num_threads) {
|
|
265
268
|
const int64_t num_n_per_thread0 = round_down(n / num_threads, n_step);
|
|
@@ -309,7 +312,8 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
|
|
309
312
|
GGML_ASSERT(kernel);
|
|
310
313
|
|
|
311
314
|
const int ith = params->ith;
|
|
312
|
-
const int
|
|
315
|
+
const int nth_raw = params->nth;
|
|
316
|
+
const int nth = nth_raw > 0 ? nth_raw : 1;
|
|
313
317
|
|
|
314
318
|
const size_t k = ne00;
|
|
315
319
|
const size_t m = ne11;
|
|
@@ -327,9 +331,12 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
|
|
327
331
|
const size_t num_n_per_thread = kai_roundup(kai_roundup(n, nth) / nth, n_step);
|
|
328
332
|
const size_t n_start = ith * num_n_per_thread;
|
|
329
333
|
|
|
330
|
-
size_t n_to_process =
|
|
331
|
-
if (
|
|
332
|
-
n_to_process =
|
|
334
|
+
size_t n_to_process = 0;
|
|
335
|
+
if (n_start < n) {
|
|
336
|
+
n_to_process = num_n_per_thread;
|
|
337
|
+
if ((n_start + n_to_process) > n) {
|
|
338
|
+
n_to_process = n - n_start;
|
|
339
|
+
}
|
|
333
340
|
}
|
|
334
341
|
|
|
335
342
|
// Calculate number of columns to be processed per thread
|
|
@@ -361,8 +368,10 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
|
|
361
368
|
const void* lhs_ptr = (const void*)((const char *)lhs_packed + lhs_packed_offset);
|
|
362
369
|
float *dst_ptr = reinterpret_cast<float *>(static_cast<uint8_t *>(dst->data) + dst_offset);
|
|
363
370
|
|
|
364
|
-
|
|
365
|
-
|
|
371
|
+
if (n_to_process > 0) {
|
|
372
|
+
variant_call<void>(kernel->run_kernel, m, n_to_process, k, QK4_0, lhs_ptr, rhs_ptr, dst_ptr, dst_stride,
|
|
373
|
+
sizeof(float), -FLT_MAX, FLT_MAX);
|
|
374
|
+
}
|
|
366
375
|
|
|
367
376
|
return true;
|
|
368
377
|
}
|