whisper.rn 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/ggml.c +5349 -5349
- package/cpp/ggml.h +810 -810
- package/cpp/whisper.cpp +518 -518
- package/cpp/whisper.h +2 -2
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +3 -0
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNWhisper.js +3 -0
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +3 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +1 -3
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +2 -3
- package/src/index.ts +2 -1
- package/whisper-rn.podspec +1 -1
package/cpp/ggml.h
CHANGED
|
@@ -32,22 +32,22 @@
|
|
|
32
32
|
// For example, here we define the function: f(x) = a*x^2 + b
|
|
33
33
|
//
|
|
34
34
|
// {
|
|
35
|
-
// struct
|
|
35
|
+
// struct wsp_ggml_init_params params = {
|
|
36
36
|
// .mem_size = 16*1024*1024,
|
|
37
37
|
// .mem_buffer = NULL,
|
|
38
38
|
// };
|
|
39
39
|
//
|
|
40
40
|
// // memory allocation happens here
|
|
41
|
-
// struct
|
|
41
|
+
// struct wsp_ggml_context * ctx = wsp_ggml_init(params);
|
|
42
42
|
//
|
|
43
|
-
// struct
|
|
43
|
+
// struct wsp_ggml_tensor * x = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
|
|
44
44
|
//
|
|
45
|
-
//
|
|
45
|
+
// wsp_ggml_set_param(ctx, x); // x is an input variable
|
|
46
46
|
//
|
|
47
|
-
// struct
|
|
48
|
-
// struct
|
|
49
|
-
// struct
|
|
50
|
-
// struct
|
|
47
|
+
// struct wsp_ggml_tensor * a = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
|
|
48
|
+
// struct wsp_ggml_tensor * b = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
|
|
49
|
+
// struct wsp_ggml_tensor * x2 = wsp_ggml_mul(ctx, x, x);
|
|
50
|
+
// struct wsp_ggml_tensor * f = wsp_ggml_add(ctx, wsp_ggml_mul(ctx, a, x2), b);
|
|
51
51
|
//
|
|
52
52
|
// ...
|
|
53
53
|
// }
|
|
@@ -58,33 +58,33 @@
|
|
|
58
58
|
// {
|
|
59
59
|
// ...
|
|
60
60
|
//
|
|
61
|
-
// struct
|
|
61
|
+
// struct wsp_ggml_cgraph gf = wsp_ggml_build_forward(f);
|
|
62
62
|
//
|
|
63
63
|
// // set the input variable and parameter values
|
|
64
|
-
//
|
|
65
|
-
//
|
|
66
|
-
//
|
|
64
|
+
// wsp_ggml_set_f32(x, 2.0f);
|
|
65
|
+
// wsp_ggml_set_f32(a, 3.0f);
|
|
66
|
+
// wsp_ggml_set_f32(b, 4.0f);
|
|
67
67
|
//
|
|
68
|
-
//
|
|
68
|
+
// wsp_ggml_graph_compute(ctx0, &gf);
|
|
69
69
|
//
|
|
70
|
-
// printf("f = %f\n",
|
|
70
|
+
// printf("f = %f\n", wsp_ggml_get_f32_1d(f, 0));
|
|
71
71
|
//
|
|
72
72
|
// ...
|
|
73
73
|
// }
|
|
74
74
|
//
|
|
75
|
-
// The actual computation is performed in the
|
|
75
|
+
// The actual computation is performed in the wsp_ggml_graph_compute() function.
|
|
76
76
|
//
|
|
77
|
-
// The
|
|
78
|
-
//
|
|
77
|
+
// The wsp_ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
|
|
78
|
+
// wsp_ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
|
|
79
79
|
// in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
|
|
80
|
-
// and after defining the computation graph, call the
|
|
80
|
+
// and after defining the computation graph, call the wsp_ggml_used_mem() function to find out how much memory was
|
|
81
81
|
// actually needed.
|
|
82
82
|
//
|
|
83
|
-
// The
|
|
83
|
+
// The wsp_ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
|
|
84
84
|
// differentiation and optimization algorithms.
|
|
85
85
|
//
|
|
86
86
|
// The described approach allows to define the function graph once and then compute its forward or backward graphs
|
|
87
|
-
// multiple times. All computations will use the same memory buffer allocated in the
|
|
87
|
+
// multiple times. All computations will use the same memory buffer allocated in the wsp_ggml_init() function. This way
|
|
88
88
|
// the user can avoid the memory allocation overhead at runtime.
|
|
89
89
|
//
|
|
90
90
|
// The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
|
|
@@ -95,9 +95,9 @@
|
|
|
95
95
|
// clear that the library needs to support more complex operations. The way to support these operations is not clear
|
|
96
96
|
// yet, but a few examples are demonstrated in the following operations:
|
|
97
97
|
//
|
|
98
|
-
// -
|
|
99
|
-
// -
|
|
100
|
-
// -
|
|
98
|
+
// - wsp_ggml_permute()
|
|
99
|
+
// - wsp_ggml_conv_1d_1s()
|
|
100
|
+
// - wsp_ggml_conv_1d_2s()
|
|
101
101
|
//
|
|
102
102
|
// For each tensor operator, the library implements a forward and backward computation function. The forward function
|
|
103
103
|
// computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
|
|
@@ -108,20 +108,20 @@
|
|
|
108
108
|
// https://www.youtube.com/watch?v=wG_nF1awSSY
|
|
109
109
|
//
|
|
110
110
|
//
|
|
111
|
-
// ## Tensor data (struct
|
|
111
|
+
// ## Tensor data (struct wsp_ggml_tensor)
|
|
112
112
|
//
|
|
113
|
-
// The tensors are stored in memory via the
|
|
113
|
+
// The tensors are stored in memory via the wsp_ggml_tensor struct. The structure provides information about the size of
|
|
114
114
|
// the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
|
|
115
115
|
// pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
|
|
116
116
|
//
|
|
117
117
|
// {
|
|
118
|
-
// struct
|
|
118
|
+
// struct wsp_ggml_tensor * c = wsp_ggml_add(ctx, a, b);
|
|
119
119
|
//
|
|
120
120
|
// assert(c->src[0] == a);
|
|
121
121
|
// assert(c->src[1] == b);
|
|
122
122
|
// }
|
|
123
123
|
//
|
|
124
|
-
// The multi-dimensional tensors are stored in row-major order. The
|
|
124
|
+
// The multi-dimensional tensors are stored in row-major order. The wsp_ggml_tensor struct contains fields for the
|
|
125
125
|
// number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
|
|
126
126
|
// to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
|
|
127
127
|
// permutation. All tensor operations have to take the stride into account and not assume that the tensor is
|
|
@@ -130,7 +130,7 @@
|
|
|
130
130
|
// The data of the tensor is accessed via the "data" pointer. For example:
|
|
131
131
|
//
|
|
132
132
|
// {
|
|
133
|
-
// struct
|
|
133
|
+
// struct wsp_ggml_tensor * a = wsp_ggml_new_tensor_2d(ctx, WSP_GGML_TYPE_F32, 2, 3);
|
|
134
134
|
//
|
|
135
135
|
// // a[1, 2] = 1.0f;
|
|
136
136
|
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
|
@@ -141,9 +141,9 @@
|
|
|
141
141
|
// ...
|
|
142
142
|
// }
|
|
143
143
|
//
|
|
144
|
-
// Alternatively, there are helper functions, such as
|
|
144
|
+
// Alternatively, there are helper functions, such as wsp_ggml_get_f32_1d() and wsp_ggml_set_f32_1d() that can be used.
|
|
145
145
|
//
|
|
146
|
-
// ## The matrix multiplication operator (
|
|
146
|
+
// ## The matrix multiplication operator (wsp_ggml_mul_mat)
|
|
147
147
|
//
|
|
148
148
|
// TODO
|
|
149
149
|
//
|
|
@@ -169,44 +169,44 @@
|
|
|
169
169
|
//
|
|
170
170
|
//
|
|
171
171
|
|
|
172
|
-
#ifdef
|
|
172
|
+
#ifdef WSP_GGML_SHARED
|
|
173
173
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
|
174
|
-
# ifdef
|
|
175
|
-
# define
|
|
174
|
+
# ifdef WSP_GGML_BUILD
|
|
175
|
+
# define WSP_GGML_API __declspec(dllexport)
|
|
176
176
|
# else
|
|
177
|
-
# define
|
|
177
|
+
# define WSP_GGML_API __declspec(dllimport)
|
|
178
178
|
# endif
|
|
179
179
|
# else
|
|
180
|
-
# define
|
|
180
|
+
# define WSP_GGML_API __attribute__ ((visibility ("default")))
|
|
181
181
|
# endif
|
|
182
182
|
#else
|
|
183
|
-
# define
|
|
183
|
+
# define WSP_GGML_API
|
|
184
184
|
#endif
|
|
185
185
|
|
|
186
186
|
#include <stdint.h>
|
|
187
187
|
#include <stddef.h>
|
|
188
188
|
#include <stdbool.h>
|
|
189
189
|
|
|
190
|
-
#define
|
|
191
|
-
#define
|
|
190
|
+
#define WSP_GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
|
191
|
+
#define WSP_GGML_FILE_VERSION 1
|
|
192
192
|
|
|
193
|
-
#define
|
|
194
|
-
#define
|
|
193
|
+
#define WSP_GGML_QNT_VERSION 2 // bump this on quantization format changes
|
|
194
|
+
#define WSP_GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
|
195
195
|
|
|
196
|
-
#define
|
|
197
|
-
#define
|
|
198
|
-
#define
|
|
199
|
-
#define
|
|
200
|
-
#define
|
|
201
|
-
#define
|
|
202
|
-
#define
|
|
196
|
+
#define WSP_GGML_MAX_DIMS 4
|
|
197
|
+
#define WSP_GGML_MAX_NODES 4096
|
|
198
|
+
#define WSP_GGML_MAX_PARAMS 256
|
|
199
|
+
#define WSP_GGML_MAX_CONTEXTS 64
|
|
200
|
+
#define WSP_GGML_MAX_OPT 4
|
|
201
|
+
#define WSP_GGML_MAX_NAME 48
|
|
202
|
+
#define WSP_GGML_DEFAULT_N_THREADS 4
|
|
203
203
|
|
|
204
|
-
#define
|
|
204
|
+
#define WSP_GGML_UNUSED(x) (void)(x)
|
|
205
205
|
|
|
206
|
-
#define
|
|
206
|
+
#define WSP_GGML_ASSERT(x) \
|
|
207
207
|
do { \
|
|
208
208
|
if (!(x)) { \
|
|
209
|
-
fprintf(stderr, "
|
|
209
|
+
fprintf(stderr, "WSP_GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
|
210
210
|
abort(); \
|
|
211
211
|
} \
|
|
212
212
|
} while (0)
|
|
@@ -216,24 +216,24 @@
|
|
|
216
216
|
//
|
|
217
217
|
// example:
|
|
218
218
|
//
|
|
219
|
-
//
|
|
220
|
-
//
|
|
219
|
+
// WSP_GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
|
220
|
+
// WSP_GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
|
221
221
|
//
|
|
222
|
-
#define
|
|
222
|
+
#define WSP_GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
|
223
223
|
const type prefix##0 = (pointer)->array[0]; \
|
|
224
|
-
|
|
225
|
-
#define
|
|
226
|
-
|
|
224
|
+
WSP_GGML_UNUSED(prefix##0);
|
|
225
|
+
#define WSP_GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
|
226
|
+
WSP_GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
|
227
227
|
const type prefix##1 = (pointer)->array[1]; \
|
|
228
|
-
|
|
229
|
-
#define
|
|
230
|
-
|
|
228
|
+
WSP_GGML_UNUSED(prefix##1);
|
|
229
|
+
#define WSP_GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
|
230
|
+
WSP_GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
|
231
231
|
const type prefix##2 = (pointer)->array[2]; \
|
|
232
|
-
|
|
233
|
-
#define
|
|
234
|
-
|
|
232
|
+
WSP_GGML_UNUSED(prefix##2);
|
|
233
|
+
#define WSP_GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
|
234
|
+
WSP_GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
|
235
235
|
const type prefix##3 = (pointer)->array[3]; \
|
|
236
|
-
|
|
236
|
+
WSP_GGML_UNUSED(prefix##3);
|
|
237
237
|
|
|
238
238
|
#ifdef __cplusplus
|
|
239
239
|
extern "C" {
|
|
@@ -241,182 +241,182 @@ extern "C" {
|
|
|
241
241
|
|
|
242
242
|
#ifdef __ARM_NEON
|
|
243
243
|
// we use the built-in 16-bit float type
|
|
244
|
-
typedef __fp16
|
|
244
|
+
typedef __fp16 wsp_ggml_fp16_t;
|
|
245
245
|
#else
|
|
246
|
-
typedef uint16_t
|
|
246
|
+
typedef uint16_t wsp_ggml_fp16_t;
|
|
247
247
|
#endif
|
|
248
248
|
|
|
249
249
|
// convert FP16 <-> FP32
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
struct
|
|
257
|
-
struct
|
|
258
|
-
|
|
259
|
-
enum
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
//
|
|
265
|
-
//
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
250
|
+
WSP_GGML_API float wsp_ggml_fp16_to_fp32(wsp_ggml_fp16_t x);
|
|
251
|
+
WSP_GGML_API wsp_ggml_fp16_t wsp_ggml_fp32_to_fp16(float x);
|
|
252
|
+
|
|
253
|
+
WSP_GGML_API void wsp_ggml_fp16_to_fp32_row(const wsp_ggml_fp16_t * x, float * y, size_t n);
|
|
254
|
+
WSP_GGML_API void wsp_ggml_fp32_to_fp16_row(const float * x, wsp_ggml_fp16_t * y, size_t n);
|
|
255
|
+
|
|
256
|
+
struct wsp_ggml_object;
|
|
257
|
+
struct wsp_ggml_context;
|
|
258
|
+
|
|
259
|
+
enum wsp_ggml_type {
|
|
260
|
+
WSP_GGML_TYPE_F32 = 0,
|
|
261
|
+
WSP_GGML_TYPE_F16 = 1,
|
|
262
|
+
WSP_GGML_TYPE_Q4_0 = 2,
|
|
263
|
+
WSP_GGML_TYPE_Q4_1 = 3,
|
|
264
|
+
// WSP_GGML_TYPE_Q4_2 = 4, support has been removed
|
|
265
|
+
// WSP_GGML_TYPE_Q4_3 (5) support has been removed
|
|
266
|
+
WSP_GGML_TYPE_Q5_0 = 6,
|
|
267
|
+
WSP_GGML_TYPE_Q5_1 = 7,
|
|
268
|
+
WSP_GGML_TYPE_Q8_0 = 8,
|
|
269
|
+
WSP_GGML_TYPE_Q8_1 = 9,
|
|
270
270
|
// k-quantizations
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
271
|
+
WSP_GGML_TYPE_Q2_K = 10,
|
|
272
|
+
WSP_GGML_TYPE_Q3_K = 11,
|
|
273
|
+
WSP_GGML_TYPE_Q4_K = 12,
|
|
274
|
+
WSP_GGML_TYPE_Q5_K = 13,
|
|
275
|
+
WSP_GGML_TYPE_Q6_K = 14,
|
|
276
|
+
WSP_GGML_TYPE_Q8_K = 15,
|
|
277
|
+
WSP_GGML_TYPE_I8,
|
|
278
|
+
WSP_GGML_TYPE_I16,
|
|
279
|
+
WSP_GGML_TYPE_I32,
|
|
280
|
+
WSP_GGML_TYPE_COUNT,
|
|
281
281
|
};
|
|
282
282
|
|
|
283
|
-
enum
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
283
|
+
enum wsp_ggml_backend {
|
|
284
|
+
WSP_GGML_BACKEND_CPU = 0,
|
|
285
|
+
WSP_GGML_BACKEND_GPU = 10,
|
|
286
|
+
WSP_GGML_BACKEND_GPU_SPLIT = 20,
|
|
287
287
|
};
|
|
288
288
|
|
|
289
289
|
// model file types
|
|
290
|
-
enum
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
290
|
+
enum wsp_ggml_ftype {
|
|
291
|
+
WSP_GGML_FTYPE_UNKNOWN = -1,
|
|
292
|
+
WSP_GGML_FTYPE_ALL_F32 = 0,
|
|
293
|
+
WSP_GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
|
294
|
+
WSP_GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
|
295
|
+
WSP_GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
|
296
|
+
WSP_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
|
297
|
+
WSP_GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
|
298
|
+
WSP_GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
|
299
|
+
WSP_GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
|
300
|
+
WSP_GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
|
301
|
+
WSP_GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
|
|
302
|
+
WSP_GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
|
|
303
|
+
WSP_GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
|
|
304
|
+
WSP_GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
|
305
305
|
};
|
|
306
306
|
|
|
307
307
|
// available tensor operations:
|
|
308
|
-
enum
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
308
|
+
enum wsp_ggml_op {
|
|
309
|
+
WSP_GGML_OP_NONE = 0,
|
|
310
|
+
|
|
311
|
+
WSP_GGML_OP_DUP,
|
|
312
|
+
WSP_GGML_OP_ADD,
|
|
313
|
+
WSP_GGML_OP_ADD1,
|
|
314
|
+
WSP_GGML_OP_ACC,
|
|
315
|
+
WSP_GGML_OP_SUB,
|
|
316
|
+
WSP_GGML_OP_MUL,
|
|
317
|
+
WSP_GGML_OP_DIV,
|
|
318
|
+
WSP_GGML_OP_SQR,
|
|
319
|
+
WSP_GGML_OP_SQRT,
|
|
320
|
+
WSP_GGML_OP_LOG,
|
|
321
|
+
WSP_GGML_OP_SUM,
|
|
322
|
+
WSP_GGML_OP_SUM_ROWS,
|
|
323
|
+
WSP_GGML_OP_MEAN,
|
|
324
|
+
WSP_GGML_OP_ARGMAX,
|
|
325
|
+
WSP_GGML_OP_REPEAT,
|
|
326
|
+
WSP_GGML_OP_REPEAT_BACK,
|
|
327
|
+
WSP_GGML_OP_ABS,
|
|
328
|
+
WSP_GGML_OP_SGN,
|
|
329
|
+
WSP_GGML_OP_NEG,
|
|
330
|
+
WSP_GGML_OP_STEP,
|
|
331
|
+
WSP_GGML_OP_TANH,
|
|
332
|
+
WSP_GGML_OP_ELU,
|
|
333
|
+
WSP_GGML_OP_RELU,
|
|
334
|
+
WSP_GGML_OP_GELU,
|
|
335
|
+
WSP_GGML_OP_GELU_QUICK,
|
|
336
|
+
WSP_GGML_OP_SILU,
|
|
337
|
+
WSP_GGML_OP_SILU_BACK,
|
|
338
|
+
WSP_GGML_OP_NORM, // normalize
|
|
339
|
+
WSP_GGML_OP_RMS_NORM,
|
|
340
|
+
WSP_GGML_OP_RMS_NORM_BACK,
|
|
341
|
+
|
|
342
|
+
WSP_GGML_OP_MUL_MAT,
|
|
343
|
+
WSP_GGML_OP_OUT_PROD,
|
|
344
|
+
|
|
345
|
+
WSP_GGML_OP_SCALE,
|
|
346
|
+
WSP_GGML_OP_SET,
|
|
347
|
+
WSP_GGML_OP_CPY,
|
|
348
|
+
WSP_GGML_OP_CONT,
|
|
349
|
+
WSP_GGML_OP_RESHAPE,
|
|
350
|
+
WSP_GGML_OP_VIEW,
|
|
351
|
+
WSP_GGML_OP_PERMUTE,
|
|
352
|
+
WSP_GGML_OP_TRANSPOSE,
|
|
353
|
+
WSP_GGML_OP_GET_ROWS,
|
|
354
|
+
WSP_GGML_OP_GET_ROWS_BACK,
|
|
355
|
+
WSP_GGML_OP_DIAG,
|
|
356
|
+
WSP_GGML_OP_DIAG_MASK_INF,
|
|
357
|
+
WSP_GGML_OP_DIAG_MASK_ZERO,
|
|
358
|
+
WSP_GGML_OP_SOFT_MAX,
|
|
359
|
+
WSP_GGML_OP_SOFT_MAX_BACK,
|
|
360
|
+
WSP_GGML_OP_ROPE,
|
|
361
|
+
WSP_GGML_OP_ROPE_BACK,
|
|
362
|
+
WSP_GGML_OP_ALIBI,
|
|
363
|
+
WSP_GGML_OP_CLAMP,
|
|
364
|
+
WSP_GGML_OP_CONV_1D,
|
|
365
|
+
WSP_GGML_OP_CONV_2D,
|
|
366
|
+
|
|
367
|
+
WSP_GGML_OP_FLASH_ATTN,
|
|
368
|
+
WSP_GGML_OP_FLASH_FF,
|
|
369
|
+
WSP_GGML_OP_FLASH_ATTN_BACK,
|
|
370
|
+
WSP_GGML_OP_WIN_PART,
|
|
371
|
+
WSP_GGML_OP_WIN_UNPART,
|
|
372
|
+
|
|
373
|
+
WSP_GGML_OP_MAP_UNARY,
|
|
374
|
+
WSP_GGML_OP_MAP_BINARY,
|
|
375
|
+
|
|
376
|
+
WSP_GGML_OP_MAP_CUSTOM1,
|
|
377
|
+
WSP_GGML_OP_MAP_CUSTOM2,
|
|
378
|
+
WSP_GGML_OP_MAP_CUSTOM3,
|
|
379
|
+
|
|
380
|
+
WSP_GGML_OP_CROSS_ENTROPY_LOSS,
|
|
381
|
+
WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
382
|
+
|
|
383
|
+
WSP_GGML_OP_COUNT,
|
|
384
384
|
};
|
|
385
385
|
|
|
386
386
|
|
|
387
387
|
// ggml object
|
|
388
|
-
struct
|
|
388
|
+
struct wsp_ggml_object {
|
|
389
389
|
size_t offs;
|
|
390
390
|
size_t size;
|
|
391
391
|
|
|
392
|
-
struct
|
|
392
|
+
struct wsp_ggml_object * next;
|
|
393
393
|
|
|
394
394
|
char padding[8];
|
|
395
395
|
};
|
|
396
396
|
|
|
397
|
-
static const size_t
|
|
397
|
+
static const size_t WSP_GGML_OBJECT_SIZE = sizeof(struct wsp_ggml_object);
|
|
398
398
|
|
|
399
399
|
// n-dimensional tensor
|
|
400
|
-
struct
|
|
401
|
-
enum
|
|
402
|
-
enum
|
|
400
|
+
struct wsp_ggml_tensor {
|
|
401
|
+
enum wsp_ggml_type type;
|
|
402
|
+
enum wsp_ggml_backend backend;
|
|
403
403
|
|
|
404
404
|
int n_dims;
|
|
405
|
-
int64_t ne[
|
|
406
|
-
size_t nb[
|
|
405
|
+
int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
|
|
406
|
+
size_t nb[WSP_GGML_MAX_DIMS]; // stride in bytes:
|
|
407
407
|
// nb[0] = sizeof(type)
|
|
408
408
|
// nb[1] = nb[0] * ne[0] + padding
|
|
409
409
|
// nb[i] = nb[i-1] * ne[i-1]
|
|
410
410
|
|
|
411
411
|
// compute data
|
|
412
|
-
enum
|
|
412
|
+
enum wsp_ggml_op op;
|
|
413
413
|
|
|
414
414
|
bool is_param;
|
|
415
415
|
|
|
416
|
-
struct
|
|
417
|
-
struct
|
|
418
|
-
struct
|
|
419
|
-
struct
|
|
416
|
+
struct wsp_ggml_tensor * grad;
|
|
417
|
+
struct wsp_ggml_tensor * src0;
|
|
418
|
+
struct wsp_ggml_tensor * src1;
|
|
419
|
+
struct wsp_ggml_tensor * opt[WSP_GGML_MAX_OPT];
|
|
420
420
|
|
|
421
421
|
// thread scheduling
|
|
422
422
|
int n_tasks;
|
|
@@ -428,27 +428,27 @@ extern "C" {
|
|
|
428
428
|
|
|
429
429
|
void * data;
|
|
430
430
|
|
|
431
|
-
char name[
|
|
431
|
+
char name[WSP_GGML_MAX_NAME];
|
|
432
432
|
|
|
433
433
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
434
434
|
|
|
435
435
|
char padding[4];
|
|
436
436
|
};
|
|
437
437
|
|
|
438
|
-
static const size_t
|
|
438
|
+
static const size_t WSP_GGML_TENSOR_SIZE = sizeof(struct wsp_ggml_tensor);
|
|
439
439
|
|
|
440
440
|
// computation graph
|
|
441
|
-
struct
|
|
441
|
+
struct wsp_ggml_cgraph {
|
|
442
442
|
int n_nodes;
|
|
443
443
|
int n_leafs;
|
|
444
444
|
int n_threads;
|
|
445
445
|
|
|
446
446
|
size_t work_size;
|
|
447
|
-
struct
|
|
447
|
+
struct wsp_ggml_tensor * work;
|
|
448
448
|
|
|
449
|
-
struct
|
|
450
|
-
struct
|
|
451
|
-
struct
|
|
449
|
+
struct wsp_ggml_tensor * nodes[WSP_GGML_MAX_NODES];
|
|
450
|
+
struct wsp_ggml_tensor * grads[WSP_GGML_MAX_NODES];
|
|
451
|
+
struct wsp_ggml_tensor * leafs[WSP_GGML_MAX_NODES];
|
|
452
452
|
|
|
453
453
|
// performance
|
|
454
454
|
int perf_runs;
|
|
@@ -457,13 +457,13 @@ extern "C" {
|
|
|
457
457
|
};
|
|
458
458
|
|
|
459
459
|
// scratch buffer
|
|
460
|
-
struct
|
|
460
|
+
struct wsp_ggml_scratch {
|
|
461
461
|
size_t offs;
|
|
462
462
|
size_t size;
|
|
463
463
|
void * data;
|
|
464
464
|
};
|
|
465
465
|
|
|
466
|
-
struct
|
|
466
|
+
struct wsp_ggml_init_params {
|
|
467
467
|
// memory pool
|
|
468
468
|
size_t mem_size; // bytes
|
|
469
469
|
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
@@ -475,14 +475,14 @@ extern "C" {
|
|
|
475
475
|
|
|
476
476
|
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
|
477
477
|
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
|
478
|
-
enum
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
478
|
+
enum wsp_ggml_task_type {
|
|
479
|
+
WSP_GGML_TASK_INIT = 0,
|
|
480
|
+
WSP_GGML_TASK_COMPUTE,
|
|
481
|
+
WSP_GGML_TASK_FINALIZE,
|
|
482
482
|
};
|
|
483
483
|
|
|
484
|
-
struct
|
|
485
|
-
enum
|
|
484
|
+
struct wsp_ggml_compute_params {
|
|
485
|
+
enum wsp_ggml_task_type type;
|
|
486
486
|
|
|
487
487
|
// ith = thread index, nth = number of threads
|
|
488
488
|
int ith, nth;
|
|
@@ -494,506 +494,506 @@ extern "C" {
|
|
|
494
494
|
|
|
495
495
|
// misc
|
|
496
496
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
497
|
+
WSP_GGML_API void wsp_ggml_time_init(void); // call this once at the beginning of the program
|
|
498
|
+
WSP_GGML_API int64_t wsp_ggml_time_ms(void);
|
|
499
|
+
WSP_GGML_API int64_t wsp_ggml_time_us(void);
|
|
500
|
+
WSP_GGML_API int64_t wsp_ggml_cycles(void);
|
|
501
|
+
WSP_GGML_API int64_t wsp_ggml_cycles_per_ms(void);
|
|
502
502
|
|
|
503
|
-
|
|
504
|
-
|
|
503
|
+
WSP_GGML_API void wsp_ggml_numa_init(void); // call once for better performance on NUMA systems
|
|
504
|
+
WSP_GGML_API bool wsp_ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
|
505
505
|
|
|
506
|
-
|
|
507
|
-
|
|
506
|
+
WSP_GGML_API void wsp_ggml_print_object (const struct wsp_ggml_object * obj);
|
|
507
|
+
WSP_GGML_API void wsp_ggml_print_objects(const struct wsp_ggml_context * ctx);
|
|
508
508
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
509
|
+
WSP_GGML_API int64_t wsp_ggml_nelements (const struct wsp_ggml_tensor * tensor);
|
|
510
|
+
WSP_GGML_API int64_t wsp_ggml_nrows (const struct wsp_ggml_tensor * tensor);
|
|
511
|
+
WSP_GGML_API size_t wsp_ggml_nbytes (const struct wsp_ggml_tensor * tensor);
|
|
512
|
+
WSP_GGML_API size_t wsp_ggml_nbytes_split(const struct wsp_ggml_tensor * tensor, int nrows_split);
|
|
513
513
|
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
514
|
+
WSP_GGML_API int wsp_ggml_blck_size (enum wsp_ggml_type type);
|
|
515
|
+
WSP_GGML_API size_t wsp_ggml_type_size (enum wsp_ggml_type type); // size in bytes for all elements in a block
|
|
516
|
+
WSP_GGML_API float wsp_ggml_type_sizef(enum wsp_ggml_type type); // wsp_ggml_type_size()/wsp_ggml_blck_size() as float
|
|
517
517
|
|
|
518
|
-
|
|
519
|
-
|
|
518
|
+
WSP_GGML_API const char * wsp_ggml_type_name(enum wsp_ggml_type type);
|
|
519
|
+
WSP_GGML_API const char * wsp_ggml_op_name (enum wsp_ggml_op op);
|
|
520
520
|
|
|
521
|
-
|
|
521
|
+
WSP_GGML_API size_t wsp_ggml_element_size(const struct wsp_ggml_tensor * tensor);
|
|
522
522
|
|
|
523
|
-
|
|
523
|
+
WSP_GGML_API bool wsp_ggml_is_quantized(enum wsp_ggml_type type);
|
|
524
524
|
|
|
525
525
|
// TODO: temporary until model loading of ggml examples is refactored
|
|
526
|
-
|
|
526
|
+
WSP_GGML_API enum wsp_ggml_type wsp_ggml_ftype_to_wsp_ggml_type(enum wsp_ggml_ftype ftype);
|
|
527
527
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
528
|
+
WSP_GGML_API bool wsp_ggml_is_transposed(const struct wsp_ggml_tensor * tensor);
|
|
529
|
+
WSP_GGML_API bool wsp_ggml_is_contiguous(const struct wsp_ggml_tensor * tensor);
|
|
530
|
+
WSP_GGML_API bool wsp_ggml_is_permuted (const struct wsp_ggml_tensor * tensor);
|
|
531
531
|
|
|
532
532
|
// use this to compute the memory overhead of a tensor
|
|
533
|
-
|
|
533
|
+
WSP_GGML_API size_t wsp_ggml_tensor_overhead(void);
|
|
534
534
|
|
|
535
535
|
// main
|
|
536
536
|
|
|
537
|
-
|
|
538
|
-
|
|
537
|
+
WSP_GGML_API struct wsp_ggml_context * wsp_ggml_init(struct wsp_ggml_init_params params);
|
|
538
|
+
WSP_GGML_API void wsp_ggml_free(struct wsp_ggml_context * ctx);
|
|
539
539
|
|
|
540
|
-
|
|
540
|
+
WSP_GGML_API size_t wsp_ggml_used_mem(const struct wsp_ggml_context * ctx);
|
|
541
541
|
|
|
542
|
-
|
|
543
|
-
|
|
542
|
+
WSP_GGML_API size_t wsp_ggml_set_scratch (struct wsp_ggml_context * ctx, struct wsp_ggml_scratch scratch);
|
|
543
|
+
WSP_GGML_API void wsp_ggml_set_no_alloc(struct wsp_ggml_context * ctx, bool no_alloc);
|
|
544
544
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
545
|
+
WSP_GGML_API void * wsp_ggml_get_mem_buffer (const struct wsp_ggml_context * ctx);
|
|
546
|
+
WSP_GGML_API size_t wsp_ggml_get_mem_size (const struct wsp_ggml_context * ctx);
|
|
547
|
+
WSP_GGML_API size_t wsp_ggml_get_max_tensor_size(const struct wsp_ggml_context * ctx);
|
|
548
548
|
|
|
549
|
-
|
|
550
|
-
struct
|
|
551
|
-
enum
|
|
549
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor(
|
|
550
|
+
struct wsp_ggml_context * ctx,
|
|
551
|
+
enum wsp_ggml_type type,
|
|
552
552
|
int n_dims,
|
|
553
553
|
const int64_t *ne);
|
|
554
554
|
|
|
555
|
-
|
|
556
|
-
struct
|
|
557
|
-
enum
|
|
555
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_1d(
|
|
556
|
+
struct wsp_ggml_context * ctx,
|
|
557
|
+
enum wsp_ggml_type type,
|
|
558
558
|
int64_t ne0);
|
|
559
559
|
|
|
560
|
-
|
|
561
|
-
struct
|
|
562
|
-
enum
|
|
560
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_2d(
|
|
561
|
+
struct wsp_ggml_context * ctx,
|
|
562
|
+
enum wsp_ggml_type type,
|
|
563
563
|
int64_t ne0,
|
|
564
564
|
int64_t ne1);
|
|
565
565
|
|
|
566
|
-
|
|
567
|
-
struct
|
|
568
|
-
enum
|
|
566
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_3d(
|
|
567
|
+
struct wsp_ggml_context * ctx,
|
|
568
|
+
enum wsp_ggml_type type,
|
|
569
569
|
int64_t ne0,
|
|
570
570
|
int64_t ne1,
|
|
571
571
|
int64_t ne2);
|
|
572
572
|
|
|
573
|
-
|
|
574
|
-
struct
|
|
575
|
-
enum
|
|
573
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_4d(
|
|
574
|
+
struct wsp_ggml_context * ctx,
|
|
575
|
+
enum wsp_ggml_type type,
|
|
576
576
|
int64_t ne0,
|
|
577
577
|
int64_t ne1,
|
|
578
578
|
int64_t ne2,
|
|
579
579
|
int64_t ne3);
|
|
580
580
|
|
|
581
|
-
|
|
582
|
-
|
|
581
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_i32(struct wsp_ggml_context * ctx, int32_t value);
|
|
582
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_f32(struct wsp_ggml_context * ctx, float value);
|
|
583
583
|
|
|
584
|
-
|
|
585
|
-
|
|
584
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup_tensor (struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
|
|
585
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_tensor(struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
|
|
586
586
|
|
|
587
|
-
|
|
587
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_tensor(struct wsp_ggml_context * ctx, const char * name);
|
|
588
588
|
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
589
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);
|
|
590
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_i32 (struct wsp_ggml_tensor * tensor, int32_t value);
|
|
591
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_f32 (struct wsp_ggml_tensor * tensor, float value);
|
|
592
592
|
|
|
593
|
-
|
|
594
|
-
|
|
593
|
+
WSP_GGML_API int32_t wsp_ggml_get_i32_1d(const struct wsp_ggml_tensor * tensor, int i);
|
|
594
|
+
WSP_GGML_API void wsp_ggml_set_i32_1d(const struct wsp_ggml_tensor * tensor, int i, int32_t value);
|
|
595
595
|
|
|
596
|
-
|
|
597
|
-
|
|
596
|
+
WSP_GGML_API float wsp_ggml_get_f32_1d(const struct wsp_ggml_tensor * tensor, int i);
|
|
597
|
+
WSP_GGML_API void wsp_ggml_set_f32_1d(const struct wsp_ggml_tensor * tensor, int i, float value);
|
|
598
598
|
|
|
599
|
-
|
|
600
|
-
|
|
599
|
+
WSP_GGML_API void * wsp_ggml_get_data (const struct wsp_ggml_tensor * tensor);
|
|
600
|
+
WSP_GGML_API float * wsp_ggml_get_data_f32(const struct wsp_ggml_tensor * tensor);
|
|
601
601
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
602
|
+
WSP_GGML_API const char * wsp_ggml_get_name(const struct wsp_ggml_tensor * tensor);
|
|
603
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_name(struct wsp_ggml_tensor * tensor, const char * name);
|
|
604
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_format_name(struct wsp_ggml_tensor * tensor, const char * fmt, ...);
|
|
605
605
|
|
|
606
606
|
//
|
|
607
607
|
// operations on tensors with backpropagation
|
|
608
608
|
//
|
|
609
609
|
|
|
610
|
-
|
|
611
|
-
struct
|
|
612
|
-
struct
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
struct
|
|
616
|
-
struct
|
|
617
|
-
struct
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
struct
|
|
621
|
-
struct
|
|
622
|
-
struct
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
struct
|
|
626
|
-
struct
|
|
627
|
-
struct
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
struct
|
|
631
|
-
struct
|
|
632
|
-
struct
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
struct
|
|
636
|
-
struct
|
|
637
|
-
struct
|
|
610
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup(
|
|
611
|
+
struct wsp_ggml_context * ctx,
|
|
612
|
+
struct wsp_ggml_tensor * a);
|
|
613
|
+
|
|
614
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add(
|
|
615
|
+
struct wsp_ggml_context * ctx,
|
|
616
|
+
struct wsp_ggml_tensor * a,
|
|
617
|
+
struct wsp_ggml_tensor * b);
|
|
618
|
+
|
|
619
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add_inplace(
|
|
620
|
+
struct wsp_ggml_context * ctx,
|
|
621
|
+
struct wsp_ggml_tensor * a,
|
|
622
|
+
struct wsp_ggml_tensor * b);
|
|
623
|
+
|
|
624
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1(
|
|
625
|
+
struct wsp_ggml_context * ctx,
|
|
626
|
+
struct wsp_ggml_tensor * a,
|
|
627
|
+
struct wsp_ggml_tensor * b);
|
|
628
|
+
|
|
629
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1_inplace(
|
|
630
|
+
struct wsp_ggml_context * ctx,
|
|
631
|
+
struct wsp_ggml_tensor * a,
|
|
632
|
+
struct wsp_ggml_tensor * b);
|
|
633
|
+
|
|
634
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc(
|
|
635
|
+
struct wsp_ggml_context * ctx,
|
|
636
|
+
struct wsp_ggml_tensor * a,
|
|
637
|
+
struct wsp_ggml_tensor * b,
|
|
638
638
|
size_t nb1,
|
|
639
639
|
size_t nb2,
|
|
640
640
|
size_t nb3,
|
|
641
641
|
size_t offset);
|
|
642
642
|
|
|
643
|
-
|
|
644
|
-
struct
|
|
645
|
-
struct
|
|
646
|
-
struct
|
|
643
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc_inplace(
|
|
644
|
+
struct wsp_ggml_context * ctx,
|
|
645
|
+
struct wsp_ggml_tensor * a,
|
|
646
|
+
struct wsp_ggml_tensor * b,
|
|
647
647
|
size_t nb1,
|
|
648
648
|
size_t nb2,
|
|
649
649
|
size_t nb3,
|
|
650
650
|
size_t offset);
|
|
651
651
|
|
|
652
|
-
|
|
653
|
-
struct
|
|
654
|
-
struct
|
|
655
|
-
struct
|
|
652
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub(
|
|
653
|
+
struct wsp_ggml_context * ctx,
|
|
654
|
+
struct wsp_ggml_tensor * a,
|
|
655
|
+
struct wsp_ggml_tensor * b);
|
|
656
656
|
|
|
657
|
-
|
|
658
|
-
struct
|
|
659
|
-
struct
|
|
660
|
-
struct
|
|
657
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub_inplace(
|
|
658
|
+
struct wsp_ggml_context * ctx,
|
|
659
|
+
struct wsp_ggml_tensor * a,
|
|
660
|
+
struct wsp_ggml_tensor * b);
|
|
661
661
|
|
|
662
|
-
|
|
663
|
-
struct
|
|
664
|
-
struct
|
|
665
|
-
struct
|
|
662
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul(
|
|
663
|
+
struct wsp_ggml_context * ctx,
|
|
664
|
+
struct wsp_ggml_tensor * a,
|
|
665
|
+
struct wsp_ggml_tensor * b);
|
|
666
666
|
|
|
667
|
-
|
|
668
|
-
struct
|
|
669
|
-
struct
|
|
670
|
-
struct
|
|
667
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_inplace(
|
|
668
|
+
struct wsp_ggml_context * ctx,
|
|
669
|
+
struct wsp_ggml_tensor * a,
|
|
670
|
+
struct wsp_ggml_tensor * b);
|
|
671
671
|
|
|
672
|
-
|
|
673
|
-
struct
|
|
674
|
-
struct
|
|
675
|
-
struct
|
|
672
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div(
|
|
673
|
+
struct wsp_ggml_context * ctx,
|
|
674
|
+
struct wsp_ggml_tensor * a,
|
|
675
|
+
struct wsp_ggml_tensor * b);
|
|
676
676
|
|
|
677
|
-
|
|
678
|
-
struct
|
|
679
|
-
struct
|
|
680
|
-
struct
|
|
677
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div_inplace(
|
|
678
|
+
struct wsp_ggml_context * ctx,
|
|
679
|
+
struct wsp_ggml_tensor * a,
|
|
680
|
+
struct wsp_ggml_tensor * b);
|
|
681
681
|
|
|
682
|
-
|
|
683
|
-
struct
|
|
684
|
-
struct
|
|
682
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr(
|
|
683
|
+
struct wsp_ggml_context * ctx,
|
|
684
|
+
struct wsp_ggml_tensor * a);
|
|
685
685
|
|
|
686
|
-
|
|
687
|
-
struct
|
|
688
|
-
struct
|
|
686
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr_inplace(
|
|
687
|
+
struct wsp_ggml_context * ctx,
|
|
688
|
+
struct wsp_ggml_tensor * a);
|
|
689
689
|
|
|
690
|
-
|
|
691
|
-
struct
|
|
692
|
-
struct
|
|
690
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt(
|
|
691
|
+
struct wsp_ggml_context * ctx,
|
|
692
|
+
struct wsp_ggml_tensor * a);
|
|
693
693
|
|
|
694
|
-
|
|
695
|
-
struct
|
|
696
|
-
struct
|
|
694
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt_inplace(
|
|
695
|
+
struct wsp_ggml_context * ctx,
|
|
696
|
+
struct wsp_ggml_tensor * a);
|
|
697
697
|
|
|
698
|
-
|
|
699
|
-
struct
|
|
700
|
-
struct
|
|
698
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log(
|
|
699
|
+
struct wsp_ggml_context * ctx,
|
|
700
|
+
struct wsp_ggml_tensor * a);
|
|
701
701
|
|
|
702
|
-
|
|
703
|
-
struct
|
|
704
|
-
struct
|
|
702
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log_inplace(
|
|
703
|
+
struct wsp_ggml_context * ctx,
|
|
704
|
+
struct wsp_ggml_tensor * a);
|
|
705
705
|
|
|
706
706
|
// return scalar
|
|
707
|
-
|
|
708
|
-
struct
|
|
709
|
-
struct
|
|
707
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum(
|
|
708
|
+
struct wsp_ggml_context * ctx,
|
|
709
|
+
struct wsp_ggml_tensor * a);
|
|
710
710
|
|
|
711
711
|
// sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
|
|
712
|
-
|
|
713
|
-
struct
|
|
714
|
-
struct
|
|
712
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum_rows(
|
|
713
|
+
struct wsp_ggml_context * ctx,
|
|
714
|
+
struct wsp_ggml_tensor * a);
|
|
715
715
|
|
|
716
716
|
// mean along rows
|
|
717
|
-
|
|
718
|
-
struct
|
|
719
|
-
struct
|
|
717
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
|
|
718
|
+
struct wsp_ggml_context * ctx,
|
|
719
|
+
struct wsp_ggml_tensor * a);
|
|
720
720
|
|
|
721
721
|
// argmax along rows
|
|
722
|
-
|
|
723
|
-
struct
|
|
724
|
-
struct
|
|
722
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_argmax(
|
|
723
|
+
struct wsp_ggml_context * ctx,
|
|
724
|
+
struct wsp_ggml_tensor * a);
|
|
725
725
|
|
|
726
726
|
// if a is the same shape as b, and a is not parameter, return a
|
|
727
727
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
|
728
|
-
|
|
729
|
-
struct
|
|
730
|
-
struct
|
|
731
|
-
struct
|
|
728
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat(
|
|
729
|
+
struct wsp_ggml_context * ctx,
|
|
730
|
+
struct wsp_ggml_tensor * a,
|
|
731
|
+
struct wsp_ggml_tensor * b);
|
|
732
732
|
|
|
733
|
-
|
|
734
|
-
struct
|
|
735
|
-
struct
|
|
736
|
-
struct
|
|
733
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat_back(
|
|
734
|
+
struct wsp_ggml_context * ctx,
|
|
735
|
+
struct wsp_ggml_tensor * a,
|
|
736
|
+
struct wsp_ggml_tensor * b);
|
|
737
737
|
|
|
738
|
-
|
|
739
|
-
struct
|
|
740
|
-
struct
|
|
738
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs(
|
|
739
|
+
struct wsp_ggml_context * ctx,
|
|
740
|
+
struct wsp_ggml_tensor * a);
|
|
741
741
|
|
|
742
|
-
|
|
743
|
-
struct
|
|
744
|
-
struct
|
|
742
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs_inplace(
|
|
743
|
+
struct wsp_ggml_context * ctx,
|
|
744
|
+
struct wsp_ggml_tensor * a);
|
|
745
745
|
|
|
746
|
-
|
|
747
|
-
struct
|
|
748
|
-
struct
|
|
746
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn(
|
|
747
|
+
struct wsp_ggml_context * ctx,
|
|
748
|
+
struct wsp_ggml_tensor * a);
|
|
749
749
|
|
|
750
|
-
|
|
751
|
-
struct
|
|
752
|
-
struct
|
|
750
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn_inplace(
|
|
751
|
+
struct wsp_ggml_context * ctx,
|
|
752
|
+
struct wsp_ggml_tensor * a);
|
|
753
753
|
|
|
754
|
-
|
|
755
|
-
struct
|
|
756
|
-
struct
|
|
754
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg(
|
|
755
|
+
struct wsp_ggml_context * ctx,
|
|
756
|
+
struct wsp_ggml_tensor * a);
|
|
757
757
|
|
|
758
|
-
|
|
759
|
-
struct
|
|
760
|
-
struct
|
|
758
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg_inplace(
|
|
759
|
+
struct wsp_ggml_context * ctx,
|
|
760
|
+
struct wsp_ggml_tensor * a);
|
|
761
761
|
|
|
762
|
-
|
|
763
|
-
struct
|
|
764
|
-
struct
|
|
762
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step(
|
|
763
|
+
struct wsp_ggml_context * ctx,
|
|
764
|
+
struct wsp_ggml_tensor * a);
|
|
765
765
|
|
|
766
|
-
|
|
767
|
-
struct
|
|
768
|
-
struct
|
|
766
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step_inplace(
|
|
767
|
+
struct wsp_ggml_context * ctx,
|
|
768
|
+
struct wsp_ggml_tensor * a);
|
|
769
769
|
|
|
770
|
-
|
|
771
|
-
struct
|
|
772
|
-
struct
|
|
770
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh(
|
|
771
|
+
struct wsp_ggml_context * ctx,
|
|
772
|
+
struct wsp_ggml_tensor * a);
|
|
773
773
|
|
|
774
|
-
|
|
775
|
-
struct
|
|
776
|
-
struct
|
|
774
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh_inplace(
|
|
775
|
+
struct wsp_ggml_context * ctx,
|
|
776
|
+
struct wsp_ggml_tensor * a);
|
|
777
777
|
|
|
778
|
-
|
|
779
|
-
struct
|
|
780
|
-
struct
|
|
778
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu(
|
|
779
|
+
struct wsp_ggml_context * ctx,
|
|
780
|
+
struct wsp_ggml_tensor * a);
|
|
781
781
|
|
|
782
|
-
|
|
783
|
-
struct
|
|
784
|
-
struct
|
|
782
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu_inplace(
|
|
783
|
+
struct wsp_ggml_context * ctx,
|
|
784
|
+
struct wsp_ggml_tensor * a);
|
|
785
785
|
|
|
786
|
-
|
|
787
|
-
struct
|
|
788
|
-
struct
|
|
786
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu(
|
|
787
|
+
struct wsp_ggml_context * ctx,
|
|
788
|
+
struct wsp_ggml_tensor * a);
|
|
789
789
|
|
|
790
|
-
|
|
791
|
-
struct
|
|
792
|
-
struct
|
|
790
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu_inplace(
|
|
791
|
+
struct wsp_ggml_context * ctx,
|
|
792
|
+
struct wsp_ggml_tensor * a);
|
|
793
793
|
|
|
794
794
|
// TODO: double-check this computation is correct
|
|
795
|
-
|
|
796
|
-
struct
|
|
797
|
-
struct
|
|
795
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu(
|
|
796
|
+
struct wsp_ggml_context * ctx,
|
|
797
|
+
struct wsp_ggml_tensor * a);
|
|
798
798
|
|
|
799
|
-
|
|
800
|
-
struct
|
|
801
|
-
struct
|
|
799
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_inplace(
|
|
800
|
+
struct wsp_ggml_context * ctx,
|
|
801
|
+
struct wsp_ggml_tensor * a);
|
|
802
802
|
|
|
803
|
-
|
|
804
|
-
struct
|
|
805
|
-
struct
|
|
803
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick(
|
|
804
|
+
struct wsp_ggml_context * ctx,
|
|
805
|
+
struct wsp_ggml_tensor * a);
|
|
806
806
|
|
|
807
|
-
|
|
808
|
-
struct
|
|
809
|
-
struct
|
|
807
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick_inplace(
|
|
808
|
+
struct wsp_ggml_context * ctx,
|
|
809
|
+
struct wsp_ggml_tensor * a);
|
|
810
810
|
|
|
811
|
-
|
|
812
|
-
struct
|
|
813
|
-
struct
|
|
811
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu(
|
|
812
|
+
struct wsp_ggml_context * ctx,
|
|
813
|
+
struct wsp_ggml_tensor * a);
|
|
814
814
|
|
|
815
|
-
|
|
816
|
-
struct
|
|
817
|
-
struct
|
|
815
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
|
|
816
|
+
struct wsp_ggml_context * ctx,
|
|
817
|
+
struct wsp_ggml_tensor * a);
|
|
818
818
|
|
|
819
819
|
// a - x
|
|
820
820
|
// b - dy
|
|
821
|
-
|
|
822
|
-
struct
|
|
823
|
-
struct
|
|
824
|
-
struct
|
|
821
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_back(
|
|
822
|
+
struct wsp_ggml_context * ctx,
|
|
823
|
+
struct wsp_ggml_tensor * a,
|
|
824
|
+
struct wsp_ggml_tensor * b);
|
|
825
825
|
|
|
826
826
|
// normalize along rows
|
|
827
827
|
// TODO: eps is hardcoded to 1e-5 for now
|
|
828
|
-
|
|
829
|
-
struct
|
|
830
|
-
struct
|
|
828
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
|
|
829
|
+
struct wsp_ggml_context * ctx,
|
|
830
|
+
struct wsp_ggml_tensor * a);
|
|
831
831
|
|
|
832
|
-
|
|
833
|
-
struct
|
|
834
|
-
struct
|
|
832
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm_inplace(
|
|
833
|
+
struct wsp_ggml_context * ctx,
|
|
834
|
+
struct wsp_ggml_tensor * a);
|
|
835
835
|
|
|
836
|
-
|
|
837
|
-
struct
|
|
838
|
-
struct
|
|
836
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm(
|
|
837
|
+
struct wsp_ggml_context * ctx,
|
|
838
|
+
struct wsp_ggml_tensor * a);
|
|
839
839
|
|
|
840
|
-
|
|
841
|
-
struct
|
|
842
|
-
struct
|
|
840
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_inplace(
|
|
841
|
+
struct wsp_ggml_context * ctx,
|
|
842
|
+
struct wsp_ggml_tensor * a);
|
|
843
843
|
|
|
844
844
|
// a - x
|
|
845
845
|
// b - dy
|
|
846
|
-
|
|
847
|
-
struct
|
|
848
|
-
struct
|
|
849
|
-
struct
|
|
846
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_back(
|
|
847
|
+
struct wsp_ggml_context * ctx,
|
|
848
|
+
struct wsp_ggml_tensor * a,
|
|
849
|
+
struct wsp_ggml_tensor * b);
|
|
850
850
|
|
|
851
851
|
// A: n columns, m rows
|
|
852
852
|
// B: n columns, p rows (i.e. we transpose it internally)
|
|
853
853
|
// result is m columns, p rows
|
|
854
|
-
|
|
855
|
-
struct
|
|
856
|
-
struct
|
|
857
|
-
struct
|
|
854
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_mat(
|
|
855
|
+
struct wsp_ggml_context * ctx,
|
|
856
|
+
struct wsp_ggml_tensor * a,
|
|
857
|
+
struct wsp_ggml_tensor * b);
|
|
858
858
|
|
|
859
859
|
// A: m columns, n rows,
|
|
860
860
|
// B: p columns, n rows,
|
|
861
861
|
// result is m columns, p rows
|
|
862
|
-
|
|
863
|
-
struct
|
|
864
|
-
struct
|
|
865
|
-
struct
|
|
862
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_out_prod(
|
|
863
|
+
struct wsp_ggml_context * ctx,
|
|
864
|
+
struct wsp_ggml_tensor * a,
|
|
865
|
+
struct wsp_ggml_tensor * b);
|
|
866
866
|
|
|
867
867
|
//
|
|
868
868
|
// operations on tensors without backpropagation
|
|
869
869
|
//
|
|
870
870
|
|
|
871
|
-
|
|
872
|
-
struct
|
|
873
|
-
struct
|
|
874
|
-
struct
|
|
871
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale(
|
|
872
|
+
struct wsp_ggml_context * ctx,
|
|
873
|
+
struct wsp_ggml_tensor * a,
|
|
874
|
+
struct wsp_ggml_tensor * b);
|
|
875
875
|
|
|
876
876
|
// in-place, returns view(a)
|
|
877
|
-
|
|
878
|
-
struct
|
|
879
|
-
struct
|
|
880
|
-
struct
|
|
877
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_inplace(
|
|
878
|
+
struct wsp_ggml_context * ctx,
|
|
879
|
+
struct wsp_ggml_tensor * a,
|
|
880
|
+
struct wsp_ggml_tensor * b);
|
|
881
881
|
|
|
882
882
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
883
|
-
|
|
884
|
-
struct
|
|
885
|
-
struct
|
|
886
|
-
struct
|
|
883
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
|
|
884
|
+
struct wsp_ggml_context * ctx,
|
|
885
|
+
struct wsp_ggml_tensor * a,
|
|
886
|
+
struct wsp_ggml_tensor * b,
|
|
887
887
|
size_t nb1,
|
|
888
888
|
size_t nb2,
|
|
889
889
|
size_t nb3,
|
|
890
890
|
size_t offset);
|
|
891
891
|
|
|
892
892
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
|
893
|
-
|
|
894
|
-
struct
|
|
895
|
-
struct
|
|
896
|
-
struct
|
|
893
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_inplace(
|
|
894
|
+
struct wsp_ggml_context * ctx,
|
|
895
|
+
struct wsp_ggml_tensor * a,
|
|
896
|
+
struct wsp_ggml_tensor * b,
|
|
897
897
|
size_t nb1,
|
|
898
898
|
size_t nb2,
|
|
899
899
|
size_t nb3,
|
|
900
900
|
size_t offset);
|
|
901
901
|
|
|
902
|
-
|
|
903
|
-
struct
|
|
904
|
-
struct
|
|
905
|
-
struct
|
|
902
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d(
|
|
903
|
+
struct wsp_ggml_context * ctx,
|
|
904
|
+
struct wsp_ggml_tensor * a,
|
|
905
|
+
struct wsp_ggml_tensor * b,
|
|
906
906
|
size_t offset);
|
|
907
907
|
|
|
908
|
-
|
|
909
|
-
struct
|
|
910
|
-
struct
|
|
911
|
-
struct
|
|
908
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d_inplace(
|
|
909
|
+
struct wsp_ggml_context * ctx,
|
|
910
|
+
struct wsp_ggml_tensor * a,
|
|
911
|
+
struct wsp_ggml_tensor * b,
|
|
912
912
|
size_t offset);
|
|
913
913
|
|
|
914
914
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
915
|
-
|
|
916
|
-
struct
|
|
917
|
-
struct
|
|
918
|
-
struct
|
|
915
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d(
|
|
916
|
+
struct wsp_ggml_context * ctx,
|
|
917
|
+
struct wsp_ggml_tensor * a,
|
|
918
|
+
struct wsp_ggml_tensor * b,
|
|
919
919
|
size_t nb1,
|
|
920
920
|
size_t offset);
|
|
921
921
|
|
|
922
922
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
|
923
|
-
|
|
924
|
-
struct
|
|
925
|
-
struct
|
|
926
|
-
struct
|
|
923
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d_inplace(
|
|
924
|
+
struct wsp_ggml_context * ctx,
|
|
925
|
+
struct wsp_ggml_tensor * a,
|
|
926
|
+
struct wsp_ggml_tensor * b,
|
|
927
927
|
size_t nb1,
|
|
928
928
|
size_t offset);
|
|
929
929
|
|
|
930
930
|
|
|
931
931
|
// a -> b, return view(b)
|
|
932
|
-
|
|
933
|
-
struct
|
|
934
|
-
struct
|
|
935
|
-
struct
|
|
932
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cpy(
|
|
933
|
+
struct wsp_ggml_context * ctx,
|
|
934
|
+
struct wsp_ggml_tensor * a,
|
|
935
|
+
struct wsp_ggml_tensor * b);
|
|
936
936
|
|
|
937
937
|
// make contiguous
|
|
938
|
-
|
|
939
|
-
struct
|
|
940
|
-
struct
|
|
938
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont(
|
|
939
|
+
struct wsp_ggml_context * ctx,
|
|
940
|
+
struct wsp_ggml_tensor * a);
|
|
941
941
|
|
|
942
942
|
// return view(a), b specifies the new shape
|
|
943
943
|
// TODO: when we start computing gradient, make a copy instead of view
|
|
944
|
-
|
|
945
|
-
struct
|
|
946
|
-
struct
|
|
947
|
-
struct
|
|
944
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape(
|
|
945
|
+
struct wsp_ggml_context * ctx,
|
|
946
|
+
struct wsp_ggml_tensor * a,
|
|
947
|
+
struct wsp_ggml_tensor * b);
|
|
948
948
|
|
|
949
949
|
// return view(a)
|
|
950
950
|
// TODO: when we start computing gradient, make a copy instead of view
|
|
951
|
-
|
|
952
|
-
struct
|
|
953
|
-
struct
|
|
951
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_1d(
|
|
952
|
+
struct wsp_ggml_context * ctx,
|
|
953
|
+
struct wsp_ggml_tensor * a,
|
|
954
954
|
int64_t ne0);
|
|
955
955
|
|
|
956
|
-
|
|
957
|
-
struct
|
|
958
|
-
struct
|
|
956
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_2d(
|
|
957
|
+
struct wsp_ggml_context * ctx,
|
|
958
|
+
struct wsp_ggml_tensor * a,
|
|
959
959
|
int64_t ne0,
|
|
960
960
|
int64_t ne1);
|
|
961
961
|
|
|
962
962
|
// return view(a)
|
|
963
963
|
// TODO: when we start computing gradient, make a copy instead of view
|
|
964
|
-
|
|
965
|
-
struct
|
|
966
|
-
struct
|
|
964
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_3d(
|
|
965
|
+
struct wsp_ggml_context * ctx,
|
|
966
|
+
struct wsp_ggml_tensor * a,
|
|
967
967
|
int64_t ne0,
|
|
968
968
|
int64_t ne1,
|
|
969
969
|
int64_t ne2);
|
|
970
970
|
|
|
971
|
-
|
|
972
|
-
struct
|
|
973
|
-
struct
|
|
971
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_4d(
|
|
972
|
+
struct wsp_ggml_context * ctx,
|
|
973
|
+
struct wsp_ggml_tensor * a,
|
|
974
974
|
int64_t ne0,
|
|
975
975
|
int64_t ne1,
|
|
976
976
|
int64_t ne2,
|
|
977
977
|
int64_t ne3);
|
|
978
978
|
|
|
979
979
|
// offset in bytes
|
|
980
|
-
|
|
981
|
-
struct
|
|
982
|
-
struct
|
|
980
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_1d(
|
|
981
|
+
struct wsp_ggml_context * ctx,
|
|
982
|
+
struct wsp_ggml_tensor * a,
|
|
983
983
|
int64_t ne0,
|
|
984
984
|
size_t offset);
|
|
985
985
|
|
|
986
|
-
|
|
987
|
-
struct
|
|
988
|
-
struct
|
|
986
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_2d(
|
|
987
|
+
struct wsp_ggml_context * ctx,
|
|
988
|
+
struct wsp_ggml_tensor * a,
|
|
989
989
|
int64_t ne0,
|
|
990
990
|
int64_t ne1,
|
|
991
991
|
size_t nb1, // row stride in bytes
|
|
992
992
|
size_t offset);
|
|
993
993
|
|
|
994
|
-
|
|
995
|
-
struct
|
|
996
|
-
struct
|
|
994
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_3d(
|
|
995
|
+
struct wsp_ggml_context * ctx,
|
|
996
|
+
struct wsp_ggml_tensor * a,
|
|
997
997
|
int64_t ne0,
|
|
998
998
|
int64_t ne1,
|
|
999
999
|
int64_t ne2,
|
|
@@ -1001,9 +1001,9 @@ extern "C" {
|
|
|
1001
1001
|
size_t nb2, // slice stride in bytes
|
|
1002
1002
|
size_t offset);
|
|
1003
1003
|
|
|
1004
|
-
|
|
1005
|
-
struct
|
|
1006
|
-
struct
|
|
1004
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_4d(
|
|
1005
|
+
struct wsp_ggml_context * ctx,
|
|
1006
|
+
struct wsp_ggml_tensor * a,
|
|
1007
1007
|
int64_t ne0,
|
|
1008
1008
|
int64_t ne1,
|
|
1009
1009
|
int64_t ne2,
|
|
@@ -1013,95 +1013,95 @@ extern "C" {
|
|
|
1013
1013
|
size_t nb3,
|
|
1014
1014
|
size_t offset);
|
|
1015
1015
|
|
|
1016
|
-
|
|
1017
|
-
struct
|
|
1018
|
-
struct
|
|
1016
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_permute(
|
|
1017
|
+
struct wsp_ggml_context * ctx,
|
|
1018
|
+
struct wsp_ggml_tensor * a,
|
|
1019
1019
|
int axis0,
|
|
1020
1020
|
int axis1,
|
|
1021
1021
|
int axis2,
|
|
1022
1022
|
int axis3);
|
|
1023
1023
|
|
|
1024
|
-
// alias for
|
|
1025
|
-
|
|
1026
|
-
struct
|
|
1027
|
-
struct
|
|
1024
|
+
// alias for wsp_ggml_permute(ctx, a, 1, 0, 2, 3)
|
|
1025
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_transpose(
|
|
1026
|
+
struct wsp_ggml_context * ctx,
|
|
1027
|
+
struct wsp_ggml_tensor * a);
|
|
1028
1028
|
|
|
1029
|
-
|
|
1030
|
-
struct
|
|
1031
|
-
struct
|
|
1032
|
-
struct
|
|
1029
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows(
|
|
1030
|
+
struct wsp_ggml_context * ctx,
|
|
1031
|
+
struct wsp_ggml_tensor * a,
|
|
1032
|
+
struct wsp_ggml_tensor * b);
|
|
1033
1033
|
|
|
1034
|
-
|
|
1035
|
-
struct
|
|
1036
|
-
struct
|
|
1037
|
-
struct
|
|
1038
|
-
struct
|
|
1034
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows_back(
|
|
1035
|
+
struct wsp_ggml_context * ctx,
|
|
1036
|
+
struct wsp_ggml_tensor * a,
|
|
1037
|
+
struct wsp_ggml_tensor * b,
|
|
1038
|
+
struct wsp_ggml_tensor * c);
|
|
1039
1039
|
|
|
1040
|
-
|
|
1041
|
-
struct
|
|
1042
|
-
struct
|
|
1040
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag(
|
|
1041
|
+
struct wsp_ggml_context * ctx,
|
|
1042
|
+
struct wsp_ggml_tensor * a);
|
|
1043
1043
|
|
|
1044
1044
|
// set elements above the diagonal to -INF
|
|
1045
|
-
|
|
1046
|
-
struct
|
|
1047
|
-
struct
|
|
1045
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf(
|
|
1046
|
+
struct wsp_ggml_context * ctx,
|
|
1047
|
+
struct wsp_ggml_tensor * a,
|
|
1048
1048
|
int n_past);
|
|
1049
1049
|
|
|
1050
1050
|
// in-place, returns view(a)
|
|
1051
|
-
|
|
1052
|
-
struct
|
|
1053
|
-
struct
|
|
1051
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf_inplace(
|
|
1052
|
+
struct wsp_ggml_context * ctx,
|
|
1053
|
+
struct wsp_ggml_tensor * a,
|
|
1054
1054
|
int n_past);
|
|
1055
1055
|
|
|
1056
1056
|
// set elements above the diagonal to 0
|
|
1057
|
-
|
|
1058
|
-
struct
|
|
1059
|
-
struct
|
|
1057
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero(
|
|
1058
|
+
struct wsp_ggml_context * ctx,
|
|
1059
|
+
struct wsp_ggml_tensor * a,
|
|
1060
1060
|
int n_past);
|
|
1061
1061
|
|
|
1062
1062
|
// in-place, returns view(a)
|
|
1063
|
-
|
|
1064
|
-
struct
|
|
1065
|
-
struct
|
|
1063
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero_inplace(
|
|
1064
|
+
struct wsp_ggml_context * ctx,
|
|
1065
|
+
struct wsp_ggml_tensor * a,
|
|
1066
1066
|
int n_past);
|
|
1067
1067
|
|
|
1068
|
-
|
|
1069
|
-
struct
|
|
1070
|
-
struct
|
|
1068
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max(
|
|
1069
|
+
struct wsp_ggml_context * ctx,
|
|
1070
|
+
struct wsp_ggml_tensor * a);
|
|
1071
1071
|
|
|
1072
1072
|
// in-place, returns view(a)
|
|
1073
|
-
|
|
1074
|
-
struct
|
|
1075
|
-
struct
|
|
1073
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_inplace(
|
|
1074
|
+
struct wsp_ggml_context * ctx,
|
|
1075
|
+
struct wsp_ggml_tensor * a);
|
|
1076
1076
|
|
|
1077
|
-
|
|
1078
|
-
struct
|
|
1079
|
-
struct
|
|
1080
|
-
struct
|
|
1077
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back(
|
|
1078
|
+
struct wsp_ggml_context * ctx,
|
|
1079
|
+
struct wsp_ggml_tensor * a,
|
|
1080
|
+
struct wsp_ggml_tensor * b);
|
|
1081
1081
|
|
|
1082
1082
|
// in-place, returns view(a)
|
|
1083
|
-
|
|
1084
|
-
struct
|
|
1085
|
-
struct
|
|
1086
|
-
struct
|
|
1083
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back_inplace(
|
|
1084
|
+
struct wsp_ggml_context * ctx,
|
|
1085
|
+
struct wsp_ggml_tensor * a,
|
|
1086
|
+
struct wsp_ggml_tensor * b);
|
|
1087
1087
|
|
|
1088
1088
|
// rotary position embedding
|
|
1089
1089
|
// if mode & 1 == 1, skip n_past elements
|
|
1090
1090
|
// if mode & 2 == 1, GPT-NeoX style
|
|
1091
1091
|
// if mode & 4 == 1, ChatGLM style
|
|
1092
1092
|
// TODO: avoid creating a new tensor every time
|
|
1093
|
-
|
|
1094
|
-
struct
|
|
1095
|
-
struct
|
|
1093
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope(
|
|
1094
|
+
struct wsp_ggml_context * ctx,
|
|
1095
|
+
struct wsp_ggml_tensor * a,
|
|
1096
1096
|
int n_past,
|
|
1097
1097
|
int n_dims,
|
|
1098
1098
|
int mode,
|
|
1099
1099
|
int n_ctx);
|
|
1100
1100
|
|
|
1101
1101
|
// in-place, returns view(a)
|
|
1102
|
-
|
|
1103
|
-
struct
|
|
1104
|
-
struct
|
|
1102
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_inplace(
|
|
1103
|
+
struct wsp_ggml_context * ctx,
|
|
1104
|
+
struct wsp_ggml_tensor * a,
|
|
1105
1105
|
int n_past,
|
|
1106
1106
|
int n_dims,
|
|
1107
1107
|
int mode,
|
|
@@ -1109,42 +1109,42 @@ extern "C" {
|
|
|
1109
1109
|
|
|
1110
1110
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1111
1111
|
// a - dy
|
|
1112
|
-
|
|
1113
|
-
struct
|
|
1114
|
-
struct
|
|
1112
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_back(
|
|
1113
|
+
struct wsp_ggml_context * ctx,
|
|
1114
|
+
struct wsp_ggml_tensor * a,
|
|
1115
1115
|
int n_past,
|
|
1116
1116
|
int n_dims,
|
|
1117
1117
|
int mode);
|
|
1118
1118
|
|
|
1119
1119
|
// alibi position embedding
|
|
1120
1120
|
// in-place, returns view(a)
|
|
1121
|
-
struct
|
|
1122
|
-
struct
|
|
1123
|
-
struct
|
|
1121
|
+
struct wsp_ggml_tensor * wsp_ggml_alibi(
|
|
1122
|
+
struct wsp_ggml_context * ctx,
|
|
1123
|
+
struct wsp_ggml_tensor * a,
|
|
1124
1124
|
int n_past,
|
|
1125
1125
|
int n_head,
|
|
1126
1126
|
float bias_max);
|
|
1127
1127
|
|
|
1128
1128
|
// clamp
|
|
1129
1129
|
// in-place, returns view(a)
|
|
1130
|
-
struct
|
|
1131
|
-
struct
|
|
1132
|
-
struct
|
|
1130
|
+
struct wsp_ggml_tensor * wsp_ggml_clamp(
|
|
1131
|
+
struct wsp_ggml_context * ctx,
|
|
1132
|
+
struct wsp_ggml_tensor * a,
|
|
1133
1133
|
float min,
|
|
1134
1134
|
float max);
|
|
1135
1135
|
|
|
1136
|
-
|
|
1137
|
-
struct
|
|
1138
|
-
struct
|
|
1139
|
-
struct
|
|
1136
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d(
|
|
1137
|
+
struct wsp_ggml_context * ctx,
|
|
1138
|
+
struct wsp_ggml_tensor * a,
|
|
1139
|
+
struct wsp_ggml_tensor * b,
|
|
1140
1140
|
int s0, // stride
|
|
1141
1141
|
int p0, // padding
|
|
1142
1142
|
int d0); // dilation
|
|
1143
1143
|
|
|
1144
|
-
|
|
1145
|
-
struct
|
|
1146
|
-
struct
|
|
1147
|
-
struct
|
|
1144
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d(
|
|
1145
|
+
struct wsp_ggml_context * ctx,
|
|
1146
|
+
struct wsp_ggml_tensor * a,
|
|
1147
|
+
struct wsp_ggml_tensor * b,
|
|
1148
1148
|
int s0,
|
|
1149
1149
|
int s1,
|
|
1150
1150
|
int p0,
|
|
@@ -1153,36 +1153,36 @@ extern "C" {
|
|
|
1153
1153
|
int d1);
|
|
1154
1154
|
|
|
1155
1155
|
// conv_1d with padding = half
|
|
1156
|
-
// alias for
|
|
1157
|
-
|
|
1158
|
-
struct
|
|
1159
|
-
struct
|
|
1160
|
-
struct
|
|
1156
|
+
// alias for wsp_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
|
1157
|
+
WSP_GGML_API struct wsp_ggml_tensor* wsp_ggml_conv_1d_ph(
|
|
1158
|
+
struct wsp_ggml_context * ctx,
|
|
1159
|
+
struct wsp_ggml_tensor * a,
|
|
1160
|
+
struct wsp_ggml_tensor * b,
|
|
1161
1161
|
int s,
|
|
1162
1162
|
int d);
|
|
1163
1163
|
|
|
1164
|
-
|
|
1165
|
-
struct
|
|
1166
|
-
struct
|
|
1167
|
-
struct
|
|
1168
|
-
struct
|
|
1164
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn(
|
|
1165
|
+
struct wsp_ggml_context * ctx,
|
|
1166
|
+
struct wsp_ggml_tensor * q,
|
|
1167
|
+
struct wsp_ggml_tensor * k,
|
|
1168
|
+
struct wsp_ggml_tensor * v,
|
|
1169
1169
|
bool masked);
|
|
1170
1170
|
|
|
1171
|
-
|
|
1172
|
-
struct
|
|
1173
|
-
struct
|
|
1174
|
-
struct
|
|
1175
|
-
struct
|
|
1176
|
-
struct
|
|
1171
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_back(
|
|
1172
|
+
struct wsp_ggml_context * ctx,
|
|
1173
|
+
struct wsp_ggml_tensor * q,
|
|
1174
|
+
struct wsp_ggml_tensor * k,
|
|
1175
|
+
struct wsp_ggml_tensor * v,
|
|
1176
|
+
struct wsp_ggml_tensor * d,
|
|
1177
1177
|
bool masked);
|
|
1178
1178
|
|
|
1179
|
-
|
|
1180
|
-
struct
|
|
1181
|
-
struct
|
|
1182
|
-
struct
|
|
1183
|
-
struct
|
|
1184
|
-
struct
|
|
1185
|
-
struct
|
|
1179
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_ff(
|
|
1180
|
+
struct wsp_ggml_context * ctx,
|
|
1181
|
+
struct wsp_ggml_tensor * a,
|
|
1182
|
+
struct wsp_ggml_tensor * b0,
|
|
1183
|
+
struct wsp_ggml_tensor * b1,
|
|
1184
|
+
struct wsp_ggml_tensor * c0,
|
|
1185
|
+
struct wsp_ggml_tensor * c1);
|
|
1186
1186
|
|
|
1187
1187
|
// partition into non-overlapping windows with padding if needed
|
|
1188
1188
|
// example:
|
|
@@ -1190,167 +1190,167 @@ extern "C" {
|
|
|
1190
1190
|
// w: 14
|
|
1191
1191
|
// res: 768 14 14 25
|
|
1192
1192
|
// used in sam
|
|
1193
|
-
|
|
1194
|
-
struct
|
|
1195
|
-
struct
|
|
1193
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_part(
|
|
1194
|
+
struct wsp_ggml_context * ctx,
|
|
1195
|
+
struct wsp_ggml_tensor * a,
|
|
1196
1196
|
int w);
|
|
1197
1197
|
|
|
1198
|
-
// reverse of
|
|
1198
|
+
// reverse of wsp_ggml_win_part
|
|
1199
1199
|
// used in sam
|
|
1200
|
-
|
|
1201
|
-
struct
|
|
1202
|
-
struct
|
|
1200
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_unpart(
|
|
1201
|
+
struct wsp_ggml_context * ctx,
|
|
1202
|
+
struct wsp_ggml_tensor * a,
|
|
1203
1203
|
int w0,
|
|
1204
1204
|
int h0,
|
|
1205
1205
|
int w);
|
|
1206
1206
|
|
|
1207
1207
|
// custom operators
|
|
1208
1208
|
|
|
1209
|
-
typedef void (*
|
|
1210
|
-
typedef void (*
|
|
1211
|
-
|
|
1212
|
-
typedef void (*
|
|
1213
|
-
typedef void (*
|
|
1214
|
-
typedef void (*
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
struct
|
|
1218
|
-
struct
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
struct
|
|
1223
|
-
struct
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
struct
|
|
1228
|
-
struct
|
|
1229
|
-
struct
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
struct
|
|
1234
|
-
struct
|
|
1235
|
-
struct
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
struct
|
|
1240
|
-
struct
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
struct
|
|
1245
|
-
struct
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
struct
|
|
1250
|
-
struct
|
|
1251
|
-
struct
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
struct
|
|
1256
|
-
struct
|
|
1257
|
-
struct
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
struct
|
|
1262
|
-
struct
|
|
1263
|
-
struct
|
|
1264
|
-
struct
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
struct
|
|
1269
|
-
struct
|
|
1270
|
-
struct
|
|
1271
|
-
struct
|
|
1272
|
-
|
|
1209
|
+
typedef void (*wsp_ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
1210
|
+
typedef void (*wsp_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
|
1211
|
+
|
|
1212
|
+
typedef void (*wsp_ggml_custom1_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
|
|
1213
|
+
typedef void (*wsp_ggml_custom2_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
|
|
1214
|
+
typedef void (*wsp_ggml_custom3_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
|
|
1215
|
+
|
|
1216
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_f32(
|
|
1217
|
+
struct wsp_ggml_context * ctx,
|
|
1218
|
+
struct wsp_ggml_tensor * a,
|
|
1219
|
+
wsp_ggml_unary_op_f32_t fun);
|
|
1220
|
+
|
|
1221
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_inplace_f32(
|
|
1222
|
+
struct wsp_ggml_context * ctx,
|
|
1223
|
+
struct wsp_ggml_tensor * a,
|
|
1224
|
+
wsp_ggml_unary_op_f32_t fun);
|
|
1225
|
+
|
|
1226
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_f32(
|
|
1227
|
+
struct wsp_ggml_context * ctx,
|
|
1228
|
+
struct wsp_ggml_tensor * a,
|
|
1229
|
+
struct wsp_ggml_tensor * b,
|
|
1230
|
+
wsp_ggml_binary_op_f32_t fun);
|
|
1231
|
+
|
|
1232
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_inplace_f32(
|
|
1233
|
+
struct wsp_ggml_context * ctx,
|
|
1234
|
+
struct wsp_ggml_tensor * a,
|
|
1235
|
+
struct wsp_ggml_tensor * b,
|
|
1236
|
+
wsp_ggml_binary_op_f32_t fun);
|
|
1237
|
+
|
|
1238
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_f32(
|
|
1239
|
+
struct wsp_ggml_context * ctx,
|
|
1240
|
+
struct wsp_ggml_tensor * a,
|
|
1241
|
+
wsp_ggml_custom1_op_f32_t fun);
|
|
1242
|
+
|
|
1243
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_inplace_f32(
|
|
1244
|
+
struct wsp_ggml_context * ctx,
|
|
1245
|
+
struct wsp_ggml_tensor * a,
|
|
1246
|
+
wsp_ggml_custom1_op_f32_t fun);
|
|
1247
|
+
|
|
1248
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_f32(
|
|
1249
|
+
struct wsp_ggml_context * ctx,
|
|
1250
|
+
struct wsp_ggml_tensor * a,
|
|
1251
|
+
struct wsp_ggml_tensor * b,
|
|
1252
|
+
wsp_ggml_custom2_op_f32_t fun);
|
|
1253
|
+
|
|
1254
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_inplace_f32(
|
|
1255
|
+
struct wsp_ggml_context * ctx,
|
|
1256
|
+
struct wsp_ggml_tensor * a,
|
|
1257
|
+
struct wsp_ggml_tensor * b,
|
|
1258
|
+
wsp_ggml_custom2_op_f32_t fun);
|
|
1259
|
+
|
|
1260
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_f32(
|
|
1261
|
+
struct wsp_ggml_context * ctx,
|
|
1262
|
+
struct wsp_ggml_tensor * a,
|
|
1263
|
+
struct wsp_ggml_tensor * b,
|
|
1264
|
+
struct wsp_ggml_tensor * c,
|
|
1265
|
+
wsp_ggml_custom3_op_f32_t fun);
|
|
1266
|
+
|
|
1267
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_inplace_f32(
|
|
1268
|
+
struct wsp_ggml_context * ctx,
|
|
1269
|
+
struct wsp_ggml_tensor * a,
|
|
1270
|
+
struct wsp_ggml_tensor * b,
|
|
1271
|
+
struct wsp_ggml_tensor * c,
|
|
1272
|
+
wsp_ggml_custom3_op_f32_t fun);
|
|
1273
1273
|
|
|
1274
1274
|
// loss function
|
|
1275
1275
|
|
|
1276
|
-
|
|
1277
|
-
struct
|
|
1278
|
-
struct
|
|
1279
|
-
struct
|
|
1276
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss(
|
|
1277
|
+
struct wsp_ggml_context * ctx,
|
|
1278
|
+
struct wsp_ggml_tensor * a,
|
|
1279
|
+
struct wsp_ggml_tensor * b);
|
|
1280
1280
|
|
|
1281
|
-
|
|
1282
|
-
struct
|
|
1283
|
-
struct
|
|
1284
|
-
struct
|
|
1285
|
-
struct
|
|
1281
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss_back(
|
|
1282
|
+
struct wsp_ggml_context * ctx,
|
|
1283
|
+
struct wsp_ggml_tensor * a,
|
|
1284
|
+
struct wsp_ggml_tensor * b,
|
|
1285
|
+
struct wsp_ggml_tensor * c);
|
|
1286
1286
|
|
|
1287
1287
|
//
|
|
1288
1288
|
// automatic differentiation
|
|
1289
1289
|
//
|
|
1290
1290
|
|
|
1291
|
-
|
|
1292
|
-
struct
|
|
1293
|
-
struct
|
|
1291
|
+
WSP_GGML_API void wsp_ggml_set_param(
|
|
1292
|
+
struct wsp_ggml_context * ctx,
|
|
1293
|
+
struct wsp_ggml_tensor * tensor);
|
|
1294
1294
|
|
|
1295
|
-
|
|
1295
|
+
WSP_GGML_API void wsp_ggml_build_forward_expand(struct wsp_ggml_cgraph * cgraph, struct wsp_ggml_tensor * tensor);
|
|
1296
1296
|
|
|
1297
|
-
|
|
1298
|
-
|
|
1297
|
+
WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_forward (struct wsp_ggml_tensor * tensor);
|
|
1298
|
+
WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_backward(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * gf, bool keep);
|
|
1299
1299
|
|
|
1300
|
-
|
|
1301
|
-
|
|
1300
|
+
WSP_GGML_API void wsp_ggml_graph_compute(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * cgraph);
|
|
1301
|
+
WSP_GGML_API void wsp_ggml_graph_reset (struct wsp_ggml_cgraph * cgraph);
|
|
1302
1302
|
|
|
1303
|
-
|
|
1303
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_graph_get_tensor(struct wsp_ggml_cgraph * cgraph, const char * name);
|
|
1304
1304
|
|
|
1305
|
-
|
|
1306
|
-
|
|
1305
|
+
WSP_GGML_API void wsp_ggml_graph_export(const struct wsp_ggml_cgraph * cgraph, const char * fname);
|
|
1306
|
+
WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_graph_import(const char * fname, struct wsp_ggml_context ** ctx_data, struct wsp_ggml_context ** ctx_eval);
|
|
1307
1307
|
|
|
1308
1308
|
// print info and performance information for the graph
|
|
1309
|
-
|
|
1309
|
+
WSP_GGML_API void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph);
|
|
1310
1310
|
|
|
1311
1311
|
// dump the graph into a file using the dot format
|
|
1312
|
-
|
|
1312
|
+
WSP_GGML_API void wsp_ggml_graph_dump_dot(const struct wsp_ggml_cgraph * gb, const struct wsp_ggml_cgraph * gf, const char * filename);
|
|
1313
1313
|
|
|
1314
1314
|
//
|
|
1315
1315
|
// optimization
|
|
1316
1316
|
//
|
|
1317
1317
|
|
|
1318
1318
|
// optimization methods
|
|
1319
|
-
enum
|
|
1320
|
-
|
|
1321
|
-
|
|
1319
|
+
enum wsp_ggml_opt_type {
|
|
1320
|
+
WSP_GGML_OPT_ADAM,
|
|
1321
|
+
WSP_GGML_OPT_LBFGS,
|
|
1322
1322
|
};
|
|
1323
1323
|
|
|
1324
1324
|
// linesearch methods
|
|
1325
|
-
enum
|
|
1326
|
-
|
|
1325
|
+
enum wsp_ggml_linesearch {
|
|
1326
|
+
WSP_GGML_LINESEARCH_DEFAULT = 1,
|
|
1327
1327
|
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1328
|
+
WSP_GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
|
1329
|
+
WSP_GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
|
1330
|
+
WSP_GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
|
1331
1331
|
};
|
|
1332
1332
|
|
|
1333
1333
|
// optimization return values
|
|
1334
|
-
enum
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1334
|
+
enum wsp_ggml_opt_result {
|
|
1335
|
+
WSP_GGML_OPT_OK = 0,
|
|
1336
|
+
WSP_GGML_OPT_DID_NOT_CONVERGE,
|
|
1337
|
+
WSP_GGML_OPT_NO_CONTEXT,
|
|
1338
|
+
WSP_GGML_OPT_INVALID_WOLFE,
|
|
1339
|
+
WSP_GGML_OPT_FAIL,
|
|
1340
|
+
|
|
1341
|
+
WSP_GGML_LINESEARCH_FAIL = -128,
|
|
1342
|
+
WSP_GGML_LINESEARCH_MINIMUM_STEP,
|
|
1343
|
+
WSP_GGML_LINESEARCH_MAXIMUM_STEP,
|
|
1344
|
+
WSP_GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
|
1345
|
+
WSP_GGML_LINESEARCH_INVALID_PARAMETERS,
|
|
1346
1346
|
};
|
|
1347
1347
|
|
|
1348
1348
|
// optimization parameters
|
|
1349
1349
|
//
|
|
1350
|
-
// see ggml.c (
|
|
1350
|
+
// see ggml.c (wsp_ggml_opt_default_params) for default values
|
|
1351
1351
|
//
|
|
1352
|
-
struct
|
|
1353
|
-
enum
|
|
1352
|
+
struct wsp_ggml_opt_params {
|
|
1353
|
+
enum wsp_ggml_opt_type type;
|
|
1354
1354
|
|
|
1355
1355
|
int n_threads;
|
|
1356
1356
|
|
|
@@ -1400,13 +1400,13 @@ extern "C" {
|
|
|
1400
1400
|
float min_step;
|
|
1401
1401
|
float max_step;
|
|
1402
1402
|
|
|
1403
|
-
enum
|
|
1403
|
+
enum wsp_ggml_linesearch linesearch;
|
|
1404
1404
|
} lbfgs;
|
|
1405
1405
|
};
|
|
1406
1406
|
|
|
1407
|
-
struct
|
|
1408
|
-
struct
|
|
1409
|
-
struct
|
|
1407
|
+
struct wsp_ggml_opt_context {
|
|
1408
|
+
struct wsp_ggml_context * ctx;
|
|
1409
|
+
struct wsp_ggml_opt_params params;
|
|
1410
1410
|
|
|
1411
1411
|
int iter;
|
|
1412
1412
|
int64_t nx; // number of parameter elements
|
|
@@ -1414,30 +1414,30 @@ extern "C" {
|
|
|
1414
1414
|
bool just_initialized;
|
|
1415
1415
|
|
|
1416
1416
|
struct {
|
|
1417
|
-
struct
|
|
1418
|
-
struct
|
|
1419
|
-
struct
|
|
1420
|
-
struct
|
|
1421
|
-
struct
|
|
1422
|
-
struct
|
|
1423
|
-
struct
|
|
1424
|
-
struct
|
|
1417
|
+
struct wsp_ggml_tensor * x; // view of the parameters
|
|
1418
|
+
struct wsp_ggml_tensor * g1; // gradient
|
|
1419
|
+
struct wsp_ggml_tensor * g2; // gradient squared
|
|
1420
|
+
struct wsp_ggml_tensor * m; // first moment
|
|
1421
|
+
struct wsp_ggml_tensor * v; // second moment
|
|
1422
|
+
struct wsp_ggml_tensor * mh; // first moment hat
|
|
1423
|
+
struct wsp_ggml_tensor * vh; // second moment hat
|
|
1424
|
+
struct wsp_ggml_tensor * pf; // past function values
|
|
1425
1425
|
float fx_best;
|
|
1426
1426
|
float fx_prev;
|
|
1427
1427
|
int n_no_improvement;
|
|
1428
1428
|
} adam;
|
|
1429
1429
|
|
|
1430
1430
|
struct {
|
|
1431
|
-
struct
|
|
1432
|
-
struct
|
|
1433
|
-
struct
|
|
1434
|
-
struct
|
|
1435
|
-
struct
|
|
1436
|
-
struct
|
|
1437
|
-
struct
|
|
1438
|
-
struct
|
|
1439
|
-
struct
|
|
1440
|
-
struct
|
|
1431
|
+
struct wsp_ggml_tensor * x; // current parameters
|
|
1432
|
+
struct wsp_ggml_tensor * xp; // previous parameters
|
|
1433
|
+
struct wsp_ggml_tensor * g; // current gradient
|
|
1434
|
+
struct wsp_ggml_tensor * gp; // previous gradient
|
|
1435
|
+
struct wsp_ggml_tensor * d; // search direction
|
|
1436
|
+
struct wsp_ggml_tensor * pf; // past function values
|
|
1437
|
+
struct wsp_ggml_tensor * lmal; // the L-BFGS memory alpha
|
|
1438
|
+
struct wsp_ggml_tensor * lmys; // the L-BFGS memory ys
|
|
1439
|
+
struct wsp_ggml_tensor * lms; // the L-BFGS memory s
|
|
1440
|
+
struct wsp_ggml_tensor * lmy; // the L-BFGS memory y
|
|
1441
1441
|
float fx_best;
|
|
1442
1442
|
float step;
|
|
1443
1443
|
int j;
|
|
@@ -1447,68 +1447,68 @@ extern "C" {
|
|
|
1447
1447
|
} lbfgs;
|
|
1448
1448
|
};
|
|
1449
1449
|
|
|
1450
|
-
|
|
1450
|
+
WSP_GGML_API struct wsp_ggml_opt_params wsp_ggml_opt_default_params(enum wsp_ggml_opt_type type);
|
|
1451
1451
|
|
|
1452
1452
|
// optimize the function defined by the tensor f
|
|
1453
|
-
|
|
1454
|
-
struct
|
|
1455
|
-
struct
|
|
1456
|
-
struct
|
|
1453
|
+
WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt(
|
|
1454
|
+
struct wsp_ggml_context * ctx,
|
|
1455
|
+
struct wsp_ggml_opt_params params,
|
|
1456
|
+
struct wsp_ggml_tensor * f);
|
|
1457
1457
|
|
|
1458
1458
|
// initialize optimizer context
|
|
1459
|
-
|
|
1460
|
-
struct
|
|
1461
|
-
struct
|
|
1462
|
-
struct
|
|
1459
|
+
WSP_GGML_API void wsp_ggml_opt_init(
|
|
1460
|
+
struct wsp_ggml_context * ctx,
|
|
1461
|
+
struct wsp_ggml_opt_context * opt,
|
|
1462
|
+
struct wsp_ggml_opt_params params,
|
|
1463
1463
|
int64_t nx);
|
|
1464
1464
|
|
|
1465
1465
|
// continue optimizing the function defined by the tensor f
|
|
1466
|
-
|
|
1467
|
-
struct
|
|
1468
|
-
struct
|
|
1469
|
-
struct
|
|
1466
|
+
WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume(
|
|
1467
|
+
struct wsp_ggml_context * ctx,
|
|
1468
|
+
struct wsp_ggml_opt_context * opt,
|
|
1469
|
+
struct wsp_ggml_tensor * f);
|
|
1470
1470
|
|
|
1471
1471
|
// continue optimizing the function defined by the tensor f
|
|
1472
|
-
|
|
1473
|
-
struct
|
|
1474
|
-
struct
|
|
1475
|
-
struct
|
|
1476
|
-
struct
|
|
1477
|
-
struct
|
|
1472
|
+
WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume_g(
|
|
1473
|
+
struct wsp_ggml_context * ctx,
|
|
1474
|
+
struct wsp_ggml_opt_context * opt,
|
|
1475
|
+
struct wsp_ggml_tensor * f,
|
|
1476
|
+
struct wsp_ggml_cgraph * gf,
|
|
1477
|
+
struct wsp_ggml_cgraph * gb);
|
|
1478
1478
|
|
|
1479
1479
|
//
|
|
1480
1480
|
// quantization
|
|
1481
1481
|
//
|
|
1482
1482
|
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1483
|
+
WSP_GGML_API size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
1484
|
+
WSP_GGML_API size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
1485
|
+
WSP_GGML_API size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
1486
|
+
WSP_GGML_API size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
1487
|
+
WSP_GGML_API size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
1488
1488
|
|
|
1489
|
-
|
|
1489
|
+
WSP_GGML_API size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
|
1490
1490
|
|
|
1491
1491
|
//
|
|
1492
1492
|
// system info
|
|
1493
1493
|
//
|
|
1494
1494
|
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1495
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx (void);
|
|
1496
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx2 (void);
|
|
1497
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx512 (void);
|
|
1498
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx512_vbmi(void);
|
|
1499
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx512_vnni(void);
|
|
1500
|
+
WSP_GGML_API int wsp_ggml_cpu_has_fma (void);
|
|
1501
|
+
WSP_GGML_API int wsp_ggml_cpu_has_neon (void);
|
|
1502
|
+
WSP_GGML_API int wsp_ggml_cpu_has_arm_fma (void);
|
|
1503
|
+
WSP_GGML_API int wsp_ggml_cpu_has_f16c (void);
|
|
1504
|
+
WSP_GGML_API int wsp_ggml_cpu_has_fp16_va (void);
|
|
1505
|
+
WSP_GGML_API int wsp_ggml_cpu_has_wasm_simd (void);
|
|
1506
|
+
WSP_GGML_API int wsp_ggml_cpu_has_blas (void);
|
|
1507
|
+
WSP_GGML_API int wsp_ggml_cpu_has_cublas (void);
|
|
1508
|
+
WSP_GGML_API int wsp_ggml_cpu_has_clblast (void);
|
|
1509
|
+
WSP_GGML_API int wsp_ggml_cpu_has_gpublas (void);
|
|
1510
|
+
WSP_GGML_API int wsp_ggml_cpu_has_sse3 (void);
|
|
1511
|
+
WSP_GGML_API int wsp_ggml_cpu_has_vsx (void);
|
|
1512
1512
|
|
|
1513
1513
|
//
|
|
1514
1514
|
// Internal types and functions exposed for tests and benchmarks
|
|
@@ -1516,13 +1516,13 @@ extern "C" {
|
|
|
1516
1516
|
|
|
1517
1517
|
#ifdef __cplusplus
|
|
1518
1518
|
// restrict not standard in C++
|
|
1519
|
-
#define
|
|
1519
|
+
#define WSP_GGML_RESTRICT
|
|
1520
1520
|
#else
|
|
1521
|
-
#define
|
|
1521
|
+
#define WSP_GGML_RESTRICT restrict
|
|
1522
1522
|
#endif
|
|
1523
|
-
typedef void (*dequantize_row_q_t)(const void *
|
|
1524
|
-
typedef void (*quantize_row_q_t) (const float *
|
|
1525
|
-
typedef void (*vec_dot_q_t) (const int n, float *
|
|
1523
|
+
typedef void (*dequantize_row_q_t)(const void * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int k);
|
|
1524
|
+
typedef void (*quantize_row_q_t) (const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int k);
|
|
1525
|
+
typedef void (*vec_dot_q_t) (const int n, float * WSP_GGML_RESTRICT s, const void * WSP_GGML_RESTRICT x, const void * WSP_GGML_RESTRICT y);
|
|
1526
1526
|
|
|
1527
1527
|
typedef struct {
|
|
1528
1528
|
dequantize_row_q_t dequantize_row_q;
|
|
@@ -1530,10 +1530,10 @@ extern "C" {
|
|
|
1530
1530
|
quantize_row_q_t quantize_row_q_reference;
|
|
1531
1531
|
quantize_row_q_t quantize_row_q_dot;
|
|
1532
1532
|
vec_dot_q_t vec_dot_q;
|
|
1533
|
-
enum
|
|
1533
|
+
enum wsp_ggml_type vec_dot_type;
|
|
1534
1534
|
} quantize_fns_t;
|
|
1535
1535
|
|
|
1536
|
-
quantize_fns_t
|
|
1536
|
+
quantize_fns_t wsp_ggml_internal_get_quantize_fn(size_t i);
|
|
1537
1537
|
|
|
1538
1538
|
#ifdef __cplusplus
|
|
1539
1539
|
}
|