whisper.rn 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/ggml.h CHANGED
@@ -32,22 +32,22 @@
32
32
  // For example, here we define the function: f(x) = a*x^2 + b
33
33
  //
34
34
  // {
35
- // struct ggml_init_params params = {
35
+ // struct wsp_ggml_init_params params = {
36
36
  // .mem_size = 16*1024*1024,
37
37
  // .mem_buffer = NULL,
38
38
  // };
39
39
  //
40
40
  // // memory allocation happens here
41
- // struct ggml_context * ctx = ggml_init(params);
41
+ // struct wsp_ggml_context * ctx = wsp_ggml_init(params);
42
42
  //
43
- // struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
43
+ // struct wsp_ggml_tensor * x = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
44
44
  //
45
- // ggml_set_param(ctx, x); // x is an input variable
45
+ // wsp_ggml_set_param(ctx, x); // x is an input variable
46
46
  //
47
- // struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
48
- // struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
49
- // struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
50
- // struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
47
+ // struct wsp_ggml_tensor * a = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
48
+ // struct wsp_ggml_tensor * b = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, 1);
49
+ // struct wsp_ggml_tensor * x2 = wsp_ggml_mul(ctx, x, x);
50
+ // struct wsp_ggml_tensor * f = wsp_ggml_add(ctx, wsp_ggml_mul(ctx, a, x2), b);
51
51
  //
52
52
  // ...
53
53
  // }
@@ -58,33 +58,33 @@
58
58
  // {
59
59
  // ...
60
60
  //
61
- // struct ggml_cgraph gf = ggml_build_forward(f);
61
+ // struct wsp_ggml_cgraph gf = wsp_ggml_build_forward(f);
62
62
  //
63
63
  // // set the input variable and parameter values
64
- // ggml_set_f32(x, 2.0f);
65
- // ggml_set_f32(a, 3.0f);
66
- // ggml_set_f32(b, 4.0f);
64
+ // wsp_ggml_set_f32(x, 2.0f);
65
+ // wsp_ggml_set_f32(a, 3.0f);
66
+ // wsp_ggml_set_f32(b, 4.0f);
67
67
  //
68
- // ggml_graph_compute(ctx0, &gf);
68
+ // wsp_ggml_graph_compute(ctx0, &gf);
69
69
  //
70
- // printf("f = %f\n", ggml_get_f32_1d(f, 0));
70
+ // printf("f = %f\n", wsp_ggml_get_f32_1d(f, 0));
71
71
  //
72
72
  // ...
73
73
  // }
74
74
  //
75
- // The actual computation is performed in the ggml_graph_compute() function.
75
+ // The actual computation is performed in the wsp_ggml_graph_compute() function.
76
76
  //
77
- // The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
78
- // ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
77
+ // The wsp_ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
78
+ // wsp_ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
79
79
  // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
80
- // and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
80
+ // and after defining the computation graph, call the wsp_ggml_used_mem() function to find out how much memory was
81
81
  // actually needed.
82
82
  //
83
- // The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
83
+ // The wsp_ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
84
84
  // differentiation and optimization algorithms.
85
85
  //
86
86
  // The described approach allows to define the function graph once and then compute its forward or backward graphs
87
- // multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
87
+ // multiple times. All computations will use the same memory buffer allocated in the wsp_ggml_init() function. This way
88
88
  // the user can avoid the memory allocation overhead at runtime.
89
89
  //
90
90
  // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
@@ -95,9 +95,9 @@
95
95
  // clear that the library needs to support more complex operations. The way to support these operations is not clear
96
96
  // yet, but a few examples are demonstrated in the following operations:
97
97
  //
98
- // - ggml_permute()
99
- // - ggml_conv_1d_1s()
100
- // - ggml_conv_1d_2s()
98
+ // - wsp_ggml_permute()
99
+ // - wsp_ggml_conv_1d_1s()
100
+ // - wsp_ggml_conv_1d_2s()
101
101
  //
102
102
  // For each tensor operator, the library implements a forward and backward computation function. The forward function
103
103
  // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
@@ -108,20 +108,20 @@
108
108
  // https://www.youtube.com/watch?v=wG_nF1awSSY
109
109
  //
110
110
  //
111
- // ## Tensor data (struct ggml_tensor)
111
+ // ## Tensor data (struct wsp_ggml_tensor)
112
112
  //
113
- // The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
113
+ // The tensors are stored in memory via the wsp_ggml_tensor struct. The structure provides information about the size of
114
114
  // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
115
115
  // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
116
116
  //
117
117
  // {
118
- // struct ggml_tensor * c = ggml_add(ctx, a, b);
118
+ // struct wsp_ggml_tensor * c = wsp_ggml_add(ctx, a, b);
119
119
  //
120
120
  // assert(c->src[0] == a);
121
121
  // assert(c->src[1] == b);
122
122
  // }
123
123
  //
124
- // The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
124
+ // The multi-dimensional tensors are stored in row-major order. The wsp_ggml_tensor struct contains fields for the
125
125
  // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
126
126
  // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
127
127
  // permutation. All tensor operations have to take the stride into account and not assume that the tensor is
@@ -130,7 +130,7 @@
130
130
  // The data of the tensor is accessed via the "data" pointer. For example:
131
131
  //
132
132
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
133
+ // struct wsp_ggml_tensor * a = wsp_ggml_new_tensor_2d(ctx, WSP_GGML_TYPE_F32, 2, 3);
134
134
  //
135
135
  // // a[1, 2] = 1.0f;
136
136
  // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
@@ -141,9 +141,9 @@
141
141
  // ...
142
142
  // }
143
143
  //
144
- // Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
144
+ // Alternatively, there are helper functions, such as wsp_ggml_get_f32_1d() and wsp_ggml_set_f32_1d() that can be used.
145
145
  //
146
- // ## The matrix multiplication operator (ggml_mul_mat)
146
+ // ## The matrix multiplication operator (wsp_ggml_mul_mat)
147
147
  //
148
148
  // TODO
149
149
  //
@@ -169,44 +169,44 @@
169
169
  //
170
170
  //
171
171
 
172
- #ifdef GGML_SHARED
172
+ #ifdef WSP_GGML_SHARED
173
173
  # if defined(_WIN32) && !defined(__MINGW32__)
174
- # ifdef GGML_BUILD
175
- # define GGML_API __declspec(dllexport)
174
+ # ifdef WSP_GGML_BUILD
175
+ # define WSP_GGML_API __declspec(dllexport)
176
176
  # else
177
- # define GGML_API __declspec(dllimport)
177
+ # define WSP_GGML_API __declspec(dllimport)
178
178
  # endif
179
179
  # else
180
- # define GGML_API __attribute__ ((visibility ("default")))
180
+ # define WSP_GGML_API __attribute__ ((visibility ("default")))
181
181
  # endif
182
182
  #else
183
- # define GGML_API
183
+ # define WSP_GGML_API
184
184
  #endif
185
185
 
186
186
  #include <stdint.h>
187
187
  #include <stddef.h>
188
188
  #include <stdbool.h>
189
189
 
190
- #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
191
- #define GGML_FILE_VERSION 1
190
+ #define WSP_GGML_FILE_MAGIC 0x67676d6c // "ggml"
191
+ #define WSP_GGML_FILE_VERSION 1
192
192
 
193
- #define GGML_QNT_VERSION 2 // bump this on quantization format changes
194
- #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
193
+ #define WSP_GGML_QNT_VERSION 2 // bump this on quantization format changes
194
+ #define WSP_GGML_QNT_VERSION_FACTOR 1000 // do not change this
195
195
 
196
- #define GGML_MAX_DIMS 4
197
- #define GGML_MAX_NODES 4096
198
- #define GGML_MAX_PARAMS 256
199
- #define GGML_MAX_CONTEXTS 64
200
- #define GGML_MAX_OPT 4
201
- #define GGML_MAX_NAME 48
202
- #define GGML_DEFAULT_N_THREADS 4
196
+ #define WSP_GGML_MAX_DIMS 4
197
+ #define WSP_GGML_MAX_NODES 4096
198
+ #define WSP_GGML_MAX_PARAMS 256
199
+ #define WSP_GGML_MAX_CONTEXTS 64
200
+ #define WSP_GGML_MAX_OPT 4
201
+ #define WSP_GGML_MAX_NAME 48
202
+ #define WSP_GGML_DEFAULT_N_THREADS 4
203
203
 
204
- #define GGML_UNUSED(x) (void)(x)
204
+ #define WSP_GGML_UNUSED(x) (void)(x)
205
205
 
206
- #define GGML_ASSERT(x) \
206
+ #define WSP_GGML_ASSERT(x) \
207
207
  do { \
208
208
  if (!(x)) { \
209
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
209
+ fprintf(stderr, "WSP_GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
210
210
  abort(); \
211
211
  } \
212
212
  } while (0)
@@ -216,24 +216,24 @@
216
216
  //
217
217
  // example:
218
218
  //
219
- // GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
220
- // GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
219
+ // WSP_GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
220
+ // WSP_GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
221
221
  //
222
- #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
222
+ #define WSP_GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
223
223
  const type prefix##0 = (pointer)->array[0]; \
224
- GGML_UNUSED(prefix##0);
225
- #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
226
- GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
224
+ WSP_GGML_UNUSED(prefix##0);
225
+ #define WSP_GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
226
+ WSP_GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
227
227
  const type prefix##1 = (pointer)->array[1]; \
228
- GGML_UNUSED(prefix##1);
229
- #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
230
- GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
228
+ WSP_GGML_UNUSED(prefix##1);
229
+ #define WSP_GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
230
+ WSP_GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
231
231
  const type prefix##2 = (pointer)->array[2]; \
232
- GGML_UNUSED(prefix##2);
233
- #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
234
- GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
232
+ WSP_GGML_UNUSED(prefix##2);
233
+ #define WSP_GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
234
+ WSP_GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
235
235
  const type prefix##3 = (pointer)->array[3]; \
236
- GGML_UNUSED(prefix##3);
236
+ WSP_GGML_UNUSED(prefix##3);
237
237
 
238
238
  #ifdef __cplusplus
239
239
  extern "C" {
@@ -241,182 +241,182 @@ extern "C" {
241
241
 
242
242
  #ifdef __ARM_NEON
243
243
  // we use the built-in 16-bit float type
244
- typedef __fp16 ggml_fp16_t;
244
+ typedef __fp16 wsp_ggml_fp16_t;
245
245
  #else
246
- typedef uint16_t ggml_fp16_t;
246
+ typedef uint16_t wsp_ggml_fp16_t;
247
247
  #endif
248
248
 
249
249
  // convert FP16 <-> FP32
250
- GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
251
- GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
252
-
253
- GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
254
- GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
255
-
256
- struct ggml_object;
257
- struct ggml_context;
258
-
259
- enum ggml_type {
260
- GGML_TYPE_F32 = 0,
261
- GGML_TYPE_F16 = 1,
262
- GGML_TYPE_Q4_0 = 2,
263
- GGML_TYPE_Q4_1 = 3,
264
- // GGML_TYPE_Q4_2 = 4, support has been removed
265
- // GGML_TYPE_Q4_3 (5) support has been removed
266
- GGML_TYPE_Q5_0 = 6,
267
- GGML_TYPE_Q5_1 = 7,
268
- GGML_TYPE_Q8_0 = 8,
269
- GGML_TYPE_Q8_1 = 9,
250
+ WSP_GGML_API float wsp_ggml_fp16_to_fp32(wsp_ggml_fp16_t x);
251
+ WSP_GGML_API wsp_ggml_fp16_t wsp_ggml_fp32_to_fp16(float x);
252
+
253
+ WSP_GGML_API void wsp_ggml_fp16_to_fp32_row(const wsp_ggml_fp16_t * x, float * y, size_t n);
254
+ WSP_GGML_API void wsp_ggml_fp32_to_fp16_row(const float * x, wsp_ggml_fp16_t * y, size_t n);
255
+
256
+ struct wsp_ggml_object;
257
+ struct wsp_ggml_context;
258
+
259
+ enum wsp_ggml_type {
260
+ WSP_GGML_TYPE_F32 = 0,
261
+ WSP_GGML_TYPE_F16 = 1,
262
+ WSP_GGML_TYPE_Q4_0 = 2,
263
+ WSP_GGML_TYPE_Q4_1 = 3,
264
+ // WSP_GGML_TYPE_Q4_2 = 4, support has been removed
265
+ // WSP_GGML_TYPE_Q4_3 (5) support has been removed
266
+ WSP_GGML_TYPE_Q5_0 = 6,
267
+ WSP_GGML_TYPE_Q5_1 = 7,
268
+ WSP_GGML_TYPE_Q8_0 = 8,
269
+ WSP_GGML_TYPE_Q8_1 = 9,
270
270
  // k-quantizations
271
- GGML_TYPE_Q2_K = 10,
272
- GGML_TYPE_Q3_K = 11,
273
- GGML_TYPE_Q4_K = 12,
274
- GGML_TYPE_Q5_K = 13,
275
- GGML_TYPE_Q6_K = 14,
276
- GGML_TYPE_Q8_K = 15,
277
- GGML_TYPE_I8,
278
- GGML_TYPE_I16,
279
- GGML_TYPE_I32,
280
- GGML_TYPE_COUNT,
271
+ WSP_GGML_TYPE_Q2_K = 10,
272
+ WSP_GGML_TYPE_Q3_K = 11,
273
+ WSP_GGML_TYPE_Q4_K = 12,
274
+ WSP_GGML_TYPE_Q5_K = 13,
275
+ WSP_GGML_TYPE_Q6_K = 14,
276
+ WSP_GGML_TYPE_Q8_K = 15,
277
+ WSP_GGML_TYPE_I8,
278
+ WSP_GGML_TYPE_I16,
279
+ WSP_GGML_TYPE_I32,
280
+ WSP_GGML_TYPE_COUNT,
281
281
  };
282
282
 
283
- enum ggml_backend {
284
- GGML_BACKEND_CPU = 0,
285
- GGML_BACKEND_GPU = 10,
286
- GGML_BACKEND_GPU_SPLIT = 20,
283
+ enum wsp_ggml_backend {
284
+ WSP_GGML_BACKEND_CPU = 0,
285
+ WSP_GGML_BACKEND_GPU = 10,
286
+ WSP_GGML_BACKEND_GPU_SPLIT = 20,
287
287
  };
288
288
 
289
289
  // model file types
290
- enum ggml_ftype {
291
- GGML_FTYPE_UNKNOWN = -1,
292
- GGML_FTYPE_ALL_F32 = 0,
293
- GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
294
- GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
295
- GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
296
- GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
297
- GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
298
- GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
299
- GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
300
- GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
301
- GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
302
- GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
303
- GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
304
- GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
290
+ enum wsp_ggml_ftype {
291
+ WSP_GGML_FTYPE_UNKNOWN = -1,
292
+ WSP_GGML_FTYPE_ALL_F32 = 0,
293
+ WSP_GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
294
+ WSP_GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
295
+ WSP_GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
296
+ WSP_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
297
+ WSP_GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
298
+ WSP_GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
299
+ WSP_GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
300
+ WSP_GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
301
+ WSP_GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
302
+ WSP_GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
303
+ WSP_GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
304
+ WSP_GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
305
305
  };
306
306
 
307
307
  // available tensor operations:
308
- enum ggml_op {
309
- GGML_OP_NONE = 0,
310
-
311
- GGML_OP_DUP,
312
- GGML_OP_ADD,
313
- GGML_OP_ADD1,
314
- GGML_OP_ACC,
315
- GGML_OP_SUB,
316
- GGML_OP_MUL,
317
- GGML_OP_DIV,
318
- GGML_OP_SQR,
319
- GGML_OP_SQRT,
320
- GGML_OP_LOG,
321
- GGML_OP_SUM,
322
- GGML_OP_SUM_ROWS,
323
- GGML_OP_MEAN,
324
- GGML_OP_ARGMAX,
325
- GGML_OP_REPEAT,
326
- GGML_OP_REPEAT_BACK,
327
- GGML_OP_ABS,
328
- GGML_OP_SGN,
329
- GGML_OP_NEG,
330
- GGML_OP_STEP,
331
- GGML_OP_TANH,
332
- GGML_OP_ELU,
333
- GGML_OP_RELU,
334
- GGML_OP_GELU,
335
- GGML_OP_GELU_QUICK,
336
- GGML_OP_SILU,
337
- GGML_OP_SILU_BACK,
338
- GGML_OP_NORM, // normalize
339
- GGML_OP_RMS_NORM,
340
- GGML_OP_RMS_NORM_BACK,
341
-
342
- GGML_OP_MUL_MAT,
343
- GGML_OP_OUT_PROD,
344
-
345
- GGML_OP_SCALE,
346
- GGML_OP_SET,
347
- GGML_OP_CPY,
348
- GGML_OP_CONT,
349
- GGML_OP_RESHAPE,
350
- GGML_OP_VIEW,
351
- GGML_OP_PERMUTE,
352
- GGML_OP_TRANSPOSE,
353
- GGML_OP_GET_ROWS,
354
- GGML_OP_GET_ROWS_BACK,
355
- GGML_OP_DIAG,
356
- GGML_OP_DIAG_MASK_INF,
357
- GGML_OP_DIAG_MASK_ZERO,
358
- GGML_OP_SOFT_MAX,
359
- GGML_OP_SOFT_MAX_BACK,
360
- GGML_OP_ROPE,
361
- GGML_OP_ROPE_BACK,
362
- GGML_OP_ALIBI,
363
- GGML_OP_CLAMP,
364
- GGML_OP_CONV_1D,
365
- GGML_OP_CONV_2D,
366
-
367
- GGML_OP_FLASH_ATTN,
368
- GGML_OP_FLASH_FF,
369
- GGML_OP_FLASH_ATTN_BACK,
370
- GGML_OP_WIN_PART,
371
- GGML_OP_WIN_UNPART,
372
-
373
- GGML_OP_MAP_UNARY,
374
- GGML_OP_MAP_BINARY,
375
-
376
- GGML_OP_MAP_CUSTOM1,
377
- GGML_OP_MAP_CUSTOM2,
378
- GGML_OP_MAP_CUSTOM3,
379
-
380
- GGML_OP_CROSS_ENTROPY_LOSS,
381
- GGML_OP_CROSS_ENTROPY_LOSS_BACK,
382
-
383
- GGML_OP_COUNT,
308
+ enum wsp_ggml_op {
309
+ WSP_GGML_OP_NONE = 0,
310
+
311
+ WSP_GGML_OP_DUP,
312
+ WSP_GGML_OP_ADD,
313
+ WSP_GGML_OP_ADD1,
314
+ WSP_GGML_OP_ACC,
315
+ WSP_GGML_OP_SUB,
316
+ WSP_GGML_OP_MUL,
317
+ WSP_GGML_OP_DIV,
318
+ WSP_GGML_OP_SQR,
319
+ WSP_GGML_OP_SQRT,
320
+ WSP_GGML_OP_LOG,
321
+ WSP_GGML_OP_SUM,
322
+ WSP_GGML_OP_SUM_ROWS,
323
+ WSP_GGML_OP_MEAN,
324
+ WSP_GGML_OP_ARGMAX,
325
+ WSP_GGML_OP_REPEAT,
326
+ WSP_GGML_OP_REPEAT_BACK,
327
+ WSP_GGML_OP_ABS,
328
+ WSP_GGML_OP_SGN,
329
+ WSP_GGML_OP_NEG,
330
+ WSP_GGML_OP_STEP,
331
+ WSP_GGML_OP_TANH,
332
+ WSP_GGML_OP_ELU,
333
+ WSP_GGML_OP_RELU,
334
+ WSP_GGML_OP_GELU,
335
+ WSP_GGML_OP_GELU_QUICK,
336
+ WSP_GGML_OP_SILU,
337
+ WSP_GGML_OP_SILU_BACK,
338
+ WSP_GGML_OP_NORM, // normalize
339
+ WSP_GGML_OP_RMS_NORM,
340
+ WSP_GGML_OP_RMS_NORM_BACK,
341
+
342
+ WSP_GGML_OP_MUL_MAT,
343
+ WSP_GGML_OP_OUT_PROD,
344
+
345
+ WSP_GGML_OP_SCALE,
346
+ WSP_GGML_OP_SET,
347
+ WSP_GGML_OP_CPY,
348
+ WSP_GGML_OP_CONT,
349
+ WSP_GGML_OP_RESHAPE,
350
+ WSP_GGML_OP_VIEW,
351
+ WSP_GGML_OP_PERMUTE,
352
+ WSP_GGML_OP_TRANSPOSE,
353
+ WSP_GGML_OP_GET_ROWS,
354
+ WSP_GGML_OP_GET_ROWS_BACK,
355
+ WSP_GGML_OP_DIAG,
356
+ WSP_GGML_OP_DIAG_MASK_INF,
357
+ WSP_GGML_OP_DIAG_MASK_ZERO,
358
+ WSP_GGML_OP_SOFT_MAX,
359
+ WSP_GGML_OP_SOFT_MAX_BACK,
360
+ WSP_GGML_OP_ROPE,
361
+ WSP_GGML_OP_ROPE_BACK,
362
+ WSP_GGML_OP_ALIBI,
363
+ WSP_GGML_OP_CLAMP,
364
+ WSP_GGML_OP_CONV_1D,
365
+ WSP_GGML_OP_CONV_2D,
366
+
367
+ WSP_GGML_OP_FLASH_ATTN,
368
+ WSP_GGML_OP_FLASH_FF,
369
+ WSP_GGML_OP_FLASH_ATTN_BACK,
370
+ WSP_GGML_OP_WIN_PART,
371
+ WSP_GGML_OP_WIN_UNPART,
372
+
373
+ WSP_GGML_OP_MAP_UNARY,
374
+ WSP_GGML_OP_MAP_BINARY,
375
+
376
+ WSP_GGML_OP_MAP_CUSTOM1,
377
+ WSP_GGML_OP_MAP_CUSTOM2,
378
+ WSP_GGML_OP_MAP_CUSTOM3,
379
+
380
+ WSP_GGML_OP_CROSS_ENTROPY_LOSS,
381
+ WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
382
+
383
+ WSP_GGML_OP_COUNT,
384
384
  };
385
385
 
386
386
 
387
387
  // ggml object
388
- struct ggml_object {
388
+ struct wsp_ggml_object {
389
389
  size_t offs;
390
390
  size_t size;
391
391
 
392
- struct ggml_object * next;
392
+ struct wsp_ggml_object * next;
393
393
 
394
394
  char padding[8];
395
395
  };
396
396
 
397
- static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
397
+ static const size_t WSP_GGML_OBJECT_SIZE = sizeof(struct wsp_ggml_object);
398
398
 
399
399
  // n-dimensional tensor
400
- struct ggml_tensor {
401
- enum ggml_type type;
402
- enum ggml_backend backend;
400
+ struct wsp_ggml_tensor {
401
+ enum wsp_ggml_type type;
402
+ enum wsp_ggml_backend backend;
403
403
 
404
404
  int n_dims;
405
- int64_t ne[GGML_MAX_DIMS]; // number of elements
406
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
405
+ int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
406
+ size_t nb[WSP_GGML_MAX_DIMS]; // stride in bytes:
407
407
  // nb[0] = sizeof(type)
408
408
  // nb[1] = nb[0] * ne[0] + padding
409
409
  // nb[i] = nb[i-1] * ne[i-1]
410
410
 
411
411
  // compute data
412
- enum ggml_op op;
412
+ enum wsp_ggml_op op;
413
413
 
414
414
  bool is_param;
415
415
 
416
- struct ggml_tensor * grad;
417
- struct ggml_tensor * src0;
418
- struct ggml_tensor * src1;
419
- struct ggml_tensor * opt[GGML_MAX_OPT];
416
+ struct wsp_ggml_tensor * grad;
417
+ struct wsp_ggml_tensor * src0;
418
+ struct wsp_ggml_tensor * src1;
419
+ struct wsp_ggml_tensor * opt[WSP_GGML_MAX_OPT];
420
420
 
421
421
  // thread scheduling
422
422
  int n_tasks;
@@ -428,27 +428,27 @@ extern "C" {
428
428
 
429
429
  void * data;
430
430
 
431
- char name[GGML_MAX_NAME];
431
+ char name[WSP_GGML_MAX_NAME];
432
432
 
433
433
  void * extra; // extra things e.g. for ggml-cuda.cu
434
434
 
435
435
  char padding[4];
436
436
  };
437
437
 
438
- static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
438
+ static const size_t WSP_GGML_TENSOR_SIZE = sizeof(struct wsp_ggml_tensor);
439
439
 
440
440
  // computation graph
441
- struct ggml_cgraph {
441
+ struct wsp_ggml_cgraph {
442
442
  int n_nodes;
443
443
  int n_leafs;
444
444
  int n_threads;
445
445
 
446
446
  size_t work_size;
447
- struct ggml_tensor * work;
447
+ struct wsp_ggml_tensor * work;
448
448
 
449
- struct ggml_tensor * nodes[GGML_MAX_NODES];
450
- struct ggml_tensor * grads[GGML_MAX_NODES];
451
- struct ggml_tensor * leafs[GGML_MAX_NODES];
449
+ struct wsp_ggml_tensor * nodes[WSP_GGML_MAX_NODES];
450
+ struct wsp_ggml_tensor * grads[WSP_GGML_MAX_NODES];
451
+ struct wsp_ggml_tensor * leafs[WSP_GGML_MAX_NODES];
452
452
 
453
453
  // performance
454
454
  int perf_runs;
@@ -457,13 +457,13 @@ extern "C" {
457
457
  };
458
458
 
459
459
  // scratch buffer
460
- struct ggml_scratch {
460
+ struct wsp_ggml_scratch {
461
461
  size_t offs;
462
462
  size_t size;
463
463
  void * data;
464
464
  };
465
465
 
466
- struct ggml_init_params {
466
+ struct wsp_ggml_init_params {
467
467
  // memory pool
468
468
  size_t mem_size; // bytes
469
469
  void * mem_buffer; // if NULL, memory will be allocated internally
@@ -475,14 +475,14 @@ extern "C" {
475
475
 
476
476
  // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
477
477
  // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
478
- enum ggml_task_type {
479
- GGML_TASK_INIT = 0,
480
- GGML_TASK_COMPUTE,
481
- GGML_TASK_FINALIZE,
478
+ enum wsp_ggml_task_type {
479
+ WSP_GGML_TASK_INIT = 0,
480
+ WSP_GGML_TASK_COMPUTE,
481
+ WSP_GGML_TASK_FINALIZE,
482
482
  };
483
483
 
484
- struct ggml_compute_params {
485
- enum ggml_task_type type;
484
+ struct wsp_ggml_compute_params {
485
+ enum wsp_ggml_task_type type;
486
486
 
487
487
  // ith = thread index, nth = number of threads
488
488
  int ith, nth;
@@ -494,506 +494,506 @@ extern "C" {
494
494
 
495
495
  // misc
496
496
 
497
- GGML_API void ggml_time_init(void); // call this once at the beginning of the program
498
- GGML_API int64_t ggml_time_ms(void);
499
- GGML_API int64_t ggml_time_us(void);
500
- GGML_API int64_t ggml_cycles(void);
501
- GGML_API int64_t ggml_cycles_per_ms(void);
497
+ WSP_GGML_API void wsp_ggml_time_init(void); // call this once at the beginning of the program
498
+ WSP_GGML_API int64_t wsp_ggml_time_ms(void);
499
+ WSP_GGML_API int64_t wsp_ggml_time_us(void);
500
+ WSP_GGML_API int64_t wsp_ggml_cycles(void);
501
+ WSP_GGML_API int64_t wsp_ggml_cycles_per_ms(void);
502
502
 
503
- GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
504
- GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
503
+ WSP_GGML_API void wsp_ggml_numa_init(void); // call once for better performance on NUMA systems
504
+ WSP_GGML_API bool wsp_ggml_is_numa(void); // true if init detected that system has >1 NUMA node
505
505
 
506
- GGML_API void ggml_print_object (const struct ggml_object * obj);
507
- GGML_API void ggml_print_objects(const struct ggml_context * ctx);
506
+ WSP_GGML_API void wsp_ggml_print_object (const struct wsp_ggml_object * obj);
507
+ WSP_GGML_API void wsp_ggml_print_objects(const struct wsp_ggml_context * ctx);
508
508
 
509
- GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
510
- GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
511
- GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
512
- GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
509
+ WSP_GGML_API int64_t wsp_ggml_nelements (const struct wsp_ggml_tensor * tensor);
510
+ WSP_GGML_API int64_t wsp_ggml_nrows (const struct wsp_ggml_tensor * tensor);
511
+ WSP_GGML_API size_t wsp_ggml_nbytes (const struct wsp_ggml_tensor * tensor);
512
+ WSP_GGML_API size_t wsp_ggml_nbytes_split(const struct wsp_ggml_tensor * tensor, int nrows_split);
513
513
 
514
- GGML_API int ggml_blck_size (enum ggml_type type);
515
- GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
516
- GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
514
+ WSP_GGML_API int wsp_ggml_blck_size (enum wsp_ggml_type type);
515
+ WSP_GGML_API size_t wsp_ggml_type_size (enum wsp_ggml_type type); // size in bytes for all elements in a block
516
+ WSP_GGML_API float wsp_ggml_type_sizef(enum wsp_ggml_type type); // wsp_ggml_type_size()/wsp_ggml_blck_size() as float
517
517
 
518
- GGML_API const char * ggml_type_name(enum ggml_type type);
519
- GGML_API const char * ggml_op_name (enum ggml_op op);
518
+ WSP_GGML_API const char * wsp_ggml_type_name(enum wsp_ggml_type type);
519
+ WSP_GGML_API const char * wsp_ggml_op_name (enum wsp_ggml_op op);
520
520
 
521
- GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
521
+ WSP_GGML_API size_t wsp_ggml_element_size(const struct wsp_ggml_tensor * tensor);
522
522
 
523
- GGML_API bool ggml_is_quantized(enum ggml_type type);
523
+ WSP_GGML_API bool wsp_ggml_is_quantized(enum wsp_ggml_type type);
524
524
 
525
525
  // TODO: temporary until model loading of ggml examples is refactored
526
- GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
526
+ WSP_GGML_API enum wsp_ggml_type wsp_ggml_ftype_to_wsp_ggml_type(enum wsp_ggml_ftype ftype);
527
527
 
528
- GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
529
- GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
530
- GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
528
+ WSP_GGML_API bool wsp_ggml_is_transposed(const struct wsp_ggml_tensor * tensor);
529
+ WSP_GGML_API bool wsp_ggml_is_contiguous(const struct wsp_ggml_tensor * tensor);
530
+ WSP_GGML_API bool wsp_ggml_is_permuted (const struct wsp_ggml_tensor * tensor);
531
531
 
532
532
  // use this to compute the memory overhead of a tensor
533
- GGML_API size_t ggml_tensor_overhead(void);
533
+ WSP_GGML_API size_t wsp_ggml_tensor_overhead(void);
534
534
 
535
535
  // main
536
536
 
537
- GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
538
- GGML_API void ggml_free(struct ggml_context * ctx);
537
+ WSP_GGML_API struct wsp_ggml_context * wsp_ggml_init(struct wsp_ggml_init_params params);
538
+ WSP_GGML_API void wsp_ggml_free(struct wsp_ggml_context * ctx);
539
539
 
540
- GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
540
+ WSP_GGML_API size_t wsp_ggml_used_mem(const struct wsp_ggml_context * ctx);
541
541
 
542
- GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
543
- GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
542
+ WSP_GGML_API size_t wsp_ggml_set_scratch (struct wsp_ggml_context * ctx, struct wsp_ggml_scratch scratch);
543
+ WSP_GGML_API void wsp_ggml_set_no_alloc(struct wsp_ggml_context * ctx, bool no_alloc);
544
544
 
545
- GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
546
- GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
547
- GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
545
+ WSP_GGML_API void * wsp_ggml_get_mem_buffer (const struct wsp_ggml_context * ctx);
546
+ WSP_GGML_API size_t wsp_ggml_get_mem_size (const struct wsp_ggml_context * ctx);
547
+ WSP_GGML_API size_t wsp_ggml_get_max_tensor_size(const struct wsp_ggml_context * ctx);
548
548
 
549
- GGML_API struct ggml_tensor * ggml_new_tensor(
550
- struct ggml_context * ctx,
551
- enum ggml_type type,
549
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor(
550
+ struct wsp_ggml_context * ctx,
551
+ enum wsp_ggml_type type,
552
552
  int n_dims,
553
553
  const int64_t *ne);
554
554
 
555
- GGML_API struct ggml_tensor * ggml_new_tensor_1d(
556
- struct ggml_context * ctx,
557
- enum ggml_type type,
555
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_1d(
556
+ struct wsp_ggml_context * ctx,
557
+ enum wsp_ggml_type type,
558
558
  int64_t ne0);
559
559
 
560
- GGML_API struct ggml_tensor * ggml_new_tensor_2d(
561
- struct ggml_context * ctx,
562
- enum ggml_type type,
560
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_2d(
561
+ struct wsp_ggml_context * ctx,
562
+ enum wsp_ggml_type type,
563
563
  int64_t ne0,
564
564
  int64_t ne1);
565
565
 
566
- GGML_API struct ggml_tensor * ggml_new_tensor_3d(
567
- struct ggml_context * ctx,
568
- enum ggml_type type,
566
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_3d(
567
+ struct wsp_ggml_context * ctx,
568
+ enum wsp_ggml_type type,
569
569
  int64_t ne0,
570
570
  int64_t ne1,
571
571
  int64_t ne2);
572
572
 
573
- GGML_API struct ggml_tensor * ggml_new_tensor_4d(
574
- struct ggml_context * ctx,
575
- enum ggml_type type,
573
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_tensor_4d(
574
+ struct wsp_ggml_context * ctx,
575
+ enum wsp_ggml_type type,
576
576
  int64_t ne0,
577
577
  int64_t ne1,
578
578
  int64_t ne2,
579
579
  int64_t ne3);
580
580
 
581
- GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
582
- GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
581
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_i32(struct wsp_ggml_context * ctx, int32_t value);
582
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_new_f32(struct wsp_ggml_context * ctx, float value);
583
583
 
584
- GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
585
- GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
584
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup_tensor (struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
585
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_tensor(struct wsp_ggml_context * ctx, const struct wsp_ggml_tensor * src);
586
586
 
587
- GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
587
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_tensor(struct wsp_ggml_context * ctx, const char * name);
588
588
 
589
- GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
590
- GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
591
- GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
589
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);
590
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_i32 (struct wsp_ggml_tensor * tensor, int32_t value);
591
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_f32 (struct wsp_ggml_tensor * tensor, float value);
592
592
 
593
- GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
594
- GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
593
+ WSP_GGML_API int32_t wsp_ggml_get_i32_1d(const struct wsp_ggml_tensor * tensor, int i);
594
+ WSP_GGML_API void wsp_ggml_set_i32_1d(const struct wsp_ggml_tensor * tensor, int i, int32_t value);
595
595
 
596
- GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
597
- GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
596
+ WSP_GGML_API float wsp_ggml_get_f32_1d(const struct wsp_ggml_tensor * tensor, int i);
597
+ WSP_GGML_API void wsp_ggml_set_f32_1d(const struct wsp_ggml_tensor * tensor, int i, float value);
598
598
 
599
- GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
600
- GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
599
+ WSP_GGML_API void * wsp_ggml_get_data (const struct wsp_ggml_tensor * tensor);
600
+ WSP_GGML_API float * wsp_ggml_get_data_f32(const struct wsp_ggml_tensor * tensor);
601
601
 
602
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
603
- GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
604
- GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
602
+ WSP_GGML_API const char * wsp_ggml_get_name(const struct wsp_ggml_tensor * tensor);
603
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_name(struct wsp_ggml_tensor * tensor, const char * name);
604
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_format_name(struct wsp_ggml_tensor * tensor, const char * fmt, ...);
605
605
 
606
606
  //
607
607
  // operations on tensors with backpropagation
608
608
  //
609
609
 
610
- GGML_API struct ggml_tensor * ggml_dup(
611
- struct ggml_context * ctx,
612
- struct ggml_tensor * a);
613
-
614
- GGML_API struct ggml_tensor * ggml_add(
615
- struct ggml_context * ctx,
616
- struct ggml_tensor * a,
617
- struct ggml_tensor * b);
618
-
619
- GGML_API struct ggml_tensor * ggml_add_inplace(
620
- struct ggml_context * ctx,
621
- struct ggml_tensor * a,
622
- struct ggml_tensor * b);
623
-
624
- GGML_API struct ggml_tensor * ggml_add1(
625
- struct ggml_context * ctx,
626
- struct ggml_tensor * a,
627
- struct ggml_tensor * b);
628
-
629
- GGML_API struct ggml_tensor * ggml_add1_inplace(
630
- struct ggml_context * ctx,
631
- struct ggml_tensor * a,
632
- struct ggml_tensor * b);
633
-
634
- GGML_API struct ggml_tensor * ggml_acc(
635
- struct ggml_context * ctx,
636
- struct ggml_tensor * a,
637
- struct ggml_tensor * b,
610
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_dup(
611
+ struct wsp_ggml_context * ctx,
612
+ struct wsp_ggml_tensor * a);
613
+
614
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add(
615
+ struct wsp_ggml_context * ctx,
616
+ struct wsp_ggml_tensor * a,
617
+ struct wsp_ggml_tensor * b);
618
+
619
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add_inplace(
620
+ struct wsp_ggml_context * ctx,
621
+ struct wsp_ggml_tensor * a,
622
+ struct wsp_ggml_tensor * b);
623
+
624
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1(
625
+ struct wsp_ggml_context * ctx,
626
+ struct wsp_ggml_tensor * a,
627
+ struct wsp_ggml_tensor * b);
628
+
629
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1_inplace(
630
+ struct wsp_ggml_context * ctx,
631
+ struct wsp_ggml_tensor * a,
632
+ struct wsp_ggml_tensor * b);
633
+
634
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc(
635
+ struct wsp_ggml_context * ctx,
636
+ struct wsp_ggml_tensor * a,
637
+ struct wsp_ggml_tensor * b,
638
638
  size_t nb1,
639
639
  size_t nb2,
640
640
  size_t nb3,
641
641
  size_t offset);
642
642
 
643
- GGML_API struct ggml_tensor * ggml_acc_inplace(
644
- struct ggml_context * ctx,
645
- struct ggml_tensor * a,
646
- struct ggml_tensor * b,
643
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_acc_inplace(
644
+ struct wsp_ggml_context * ctx,
645
+ struct wsp_ggml_tensor * a,
646
+ struct wsp_ggml_tensor * b,
647
647
  size_t nb1,
648
648
  size_t nb2,
649
649
  size_t nb3,
650
650
  size_t offset);
651
651
 
652
- GGML_API struct ggml_tensor * ggml_sub(
653
- struct ggml_context * ctx,
654
- struct ggml_tensor * a,
655
- struct ggml_tensor * b);
652
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub(
653
+ struct wsp_ggml_context * ctx,
654
+ struct wsp_ggml_tensor * a,
655
+ struct wsp_ggml_tensor * b);
656
656
 
657
- GGML_API struct ggml_tensor * ggml_sub_inplace(
658
- struct ggml_context * ctx,
659
- struct ggml_tensor * a,
660
- struct ggml_tensor * b);
657
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sub_inplace(
658
+ struct wsp_ggml_context * ctx,
659
+ struct wsp_ggml_tensor * a,
660
+ struct wsp_ggml_tensor * b);
661
661
 
662
- GGML_API struct ggml_tensor * ggml_mul(
663
- struct ggml_context * ctx,
664
- struct ggml_tensor * a,
665
- struct ggml_tensor * b);
662
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul(
663
+ struct wsp_ggml_context * ctx,
664
+ struct wsp_ggml_tensor * a,
665
+ struct wsp_ggml_tensor * b);
666
666
 
667
- GGML_API struct ggml_tensor * ggml_mul_inplace(
668
- struct ggml_context * ctx,
669
- struct ggml_tensor * a,
670
- struct ggml_tensor * b);
667
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_inplace(
668
+ struct wsp_ggml_context * ctx,
669
+ struct wsp_ggml_tensor * a,
670
+ struct wsp_ggml_tensor * b);
671
671
 
672
- GGML_API struct ggml_tensor * ggml_div(
673
- struct ggml_context * ctx,
674
- struct ggml_tensor * a,
675
- struct ggml_tensor * b);
672
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div(
673
+ struct wsp_ggml_context * ctx,
674
+ struct wsp_ggml_tensor * a,
675
+ struct wsp_ggml_tensor * b);
676
676
 
677
- GGML_API struct ggml_tensor * ggml_div_inplace(
678
- struct ggml_context * ctx,
679
- struct ggml_tensor * a,
680
- struct ggml_tensor * b);
677
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_div_inplace(
678
+ struct wsp_ggml_context * ctx,
679
+ struct wsp_ggml_tensor * a,
680
+ struct wsp_ggml_tensor * b);
681
681
 
682
- GGML_API struct ggml_tensor * ggml_sqr(
683
- struct ggml_context * ctx,
684
- struct ggml_tensor * a);
682
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr(
683
+ struct wsp_ggml_context * ctx,
684
+ struct wsp_ggml_tensor * a);
685
685
 
686
- GGML_API struct ggml_tensor * ggml_sqr_inplace(
687
- struct ggml_context * ctx,
688
- struct ggml_tensor * a);
686
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqr_inplace(
687
+ struct wsp_ggml_context * ctx,
688
+ struct wsp_ggml_tensor * a);
689
689
 
690
- GGML_API struct ggml_tensor * ggml_sqrt(
691
- struct ggml_context * ctx,
692
- struct ggml_tensor * a);
690
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt(
691
+ struct wsp_ggml_context * ctx,
692
+ struct wsp_ggml_tensor * a);
693
693
 
694
- GGML_API struct ggml_tensor * ggml_sqrt_inplace(
695
- struct ggml_context * ctx,
696
- struct ggml_tensor * a);
694
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sqrt_inplace(
695
+ struct wsp_ggml_context * ctx,
696
+ struct wsp_ggml_tensor * a);
697
697
 
698
- GGML_API struct ggml_tensor * ggml_log(
699
- struct ggml_context * ctx,
700
- struct ggml_tensor * a);
698
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log(
699
+ struct wsp_ggml_context * ctx,
700
+ struct wsp_ggml_tensor * a);
701
701
 
702
- GGML_API struct ggml_tensor * ggml_log_inplace(
703
- struct ggml_context * ctx,
704
- struct ggml_tensor * a);
702
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_log_inplace(
703
+ struct wsp_ggml_context * ctx,
704
+ struct wsp_ggml_tensor * a);
705
705
 
706
706
  // return scalar
707
- GGML_API struct ggml_tensor * ggml_sum(
708
- struct ggml_context * ctx,
709
- struct ggml_tensor * a);
707
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum(
708
+ struct wsp_ggml_context * ctx,
709
+ struct wsp_ggml_tensor * a);
710
710
 
711
711
  // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
712
- GGML_API struct ggml_tensor * ggml_sum_rows(
713
- struct ggml_context * ctx,
714
- struct ggml_tensor * a);
712
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sum_rows(
713
+ struct wsp_ggml_context * ctx,
714
+ struct wsp_ggml_tensor * a);
715
715
 
716
716
  // mean along rows
717
- GGML_API struct ggml_tensor * ggml_mean(
718
- struct ggml_context * ctx,
719
- struct ggml_tensor * a);
717
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
718
+ struct wsp_ggml_context * ctx,
719
+ struct wsp_ggml_tensor * a);
720
720
 
721
721
  // argmax along rows
722
- GGML_API struct ggml_tensor * ggml_argmax(
723
- struct ggml_context * ctx,
724
- struct ggml_tensor * a);
722
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_argmax(
723
+ struct wsp_ggml_context * ctx,
724
+ struct wsp_ggml_tensor * a);
725
725
 
726
726
  // if a is the same shape as b, and a is not parameter, return a
727
727
  // otherwise, return a new tensor: repeat(a) to fit in b
728
- GGML_API struct ggml_tensor * ggml_repeat(
729
- struct ggml_context * ctx,
730
- struct ggml_tensor * a,
731
- struct ggml_tensor * b);
728
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat(
729
+ struct wsp_ggml_context * ctx,
730
+ struct wsp_ggml_tensor * a,
731
+ struct wsp_ggml_tensor * b);
732
732
 
733
- GGML_API struct ggml_tensor * ggml_repeat_back(
734
- struct ggml_context * ctx,
735
- struct ggml_tensor * a,
736
- struct ggml_tensor * b);
733
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_repeat_back(
734
+ struct wsp_ggml_context * ctx,
735
+ struct wsp_ggml_tensor * a,
736
+ struct wsp_ggml_tensor * b);
737
737
 
738
- GGML_API struct ggml_tensor * ggml_abs(
739
- struct ggml_context * ctx,
740
- struct ggml_tensor * a);
738
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs(
739
+ struct wsp_ggml_context * ctx,
740
+ struct wsp_ggml_tensor * a);
741
741
 
742
- GGML_API struct ggml_tensor * ggml_abs_inplace(
743
- struct ggml_context * ctx,
744
- struct ggml_tensor * a);
742
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_abs_inplace(
743
+ struct wsp_ggml_context * ctx,
744
+ struct wsp_ggml_tensor * a);
745
745
 
746
- GGML_API struct ggml_tensor * ggml_sgn(
747
- struct ggml_context * ctx,
748
- struct ggml_tensor * a);
746
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn(
747
+ struct wsp_ggml_context * ctx,
748
+ struct wsp_ggml_tensor * a);
749
749
 
750
- GGML_API struct ggml_tensor * ggml_sgn_inplace(
751
- struct ggml_context * ctx,
752
- struct ggml_tensor * a);
750
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sgn_inplace(
751
+ struct wsp_ggml_context * ctx,
752
+ struct wsp_ggml_tensor * a);
753
753
 
754
- GGML_API struct ggml_tensor * ggml_neg(
755
- struct ggml_context * ctx,
756
- struct ggml_tensor * a);
754
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg(
755
+ struct wsp_ggml_context * ctx,
756
+ struct wsp_ggml_tensor * a);
757
757
 
758
- GGML_API struct ggml_tensor * ggml_neg_inplace(
759
- struct ggml_context * ctx,
760
- struct ggml_tensor * a);
758
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_neg_inplace(
759
+ struct wsp_ggml_context * ctx,
760
+ struct wsp_ggml_tensor * a);
761
761
 
762
- GGML_API struct ggml_tensor * ggml_step(
763
- struct ggml_context * ctx,
764
- struct ggml_tensor * a);
762
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step(
763
+ struct wsp_ggml_context * ctx,
764
+ struct wsp_ggml_tensor * a);
765
765
 
766
- GGML_API struct ggml_tensor * ggml_step_inplace(
767
- struct ggml_context * ctx,
768
- struct ggml_tensor * a);
766
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_step_inplace(
767
+ struct wsp_ggml_context * ctx,
768
+ struct wsp_ggml_tensor * a);
769
769
 
770
- GGML_API struct ggml_tensor * ggml_tanh(
771
- struct ggml_context * ctx,
772
- struct ggml_tensor * a);
770
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh(
771
+ struct wsp_ggml_context * ctx,
772
+ struct wsp_ggml_tensor * a);
773
773
 
774
- GGML_API struct ggml_tensor * ggml_tanh_inplace(
775
- struct ggml_context * ctx,
776
- struct ggml_tensor * a);
774
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tanh_inplace(
775
+ struct wsp_ggml_context * ctx,
776
+ struct wsp_ggml_tensor * a);
777
777
 
778
- GGML_API struct ggml_tensor * ggml_elu(
779
- struct ggml_context * ctx,
780
- struct ggml_tensor * a);
778
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu(
779
+ struct wsp_ggml_context * ctx,
780
+ struct wsp_ggml_tensor * a);
781
781
 
782
- GGML_API struct ggml_tensor * ggml_elu_inplace(
783
- struct ggml_context * ctx,
784
- struct ggml_tensor * a);
782
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_elu_inplace(
783
+ struct wsp_ggml_context * ctx,
784
+ struct wsp_ggml_tensor * a);
785
785
 
786
- GGML_API struct ggml_tensor * ggml_relu(
787
- struct ggml_context * ctx,
788
- struct ggml_tensor * a);
786
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu(
787
+ struct wsp_ggml_context * ctx,
788
+ struct wsp_ggml_tensor * a);
789
789
 
790
- GGML_API struct ggml_tensor * ggml_relu_inplace(
791
- struct ggml_context * ctx,
792
- struct ggml_tensor * a);
790
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_relu_inplace(
791
+ struct wsp_ggml_context * ctx,
792
+ struct wsp_ggml_tensor * a);
793
793
 
794
794
  // TODO: double-check this computation is correct
795
- GGML_API struct ggml_tensor * ggml_gelu(
796
- struct ggml_context * ctx,
797
- struct ggml_tensor * a);
795
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu(
796
+ struct wsp_ggml_context * ctx,
797
+ struct wsp_ggml_tensor * a);
798
798
 
799
- GGML_API struct ggml_tensor * ggml_gelu_inplace(
800
- struct ggml_context * ctx,
801
- struct ggml_tensor * a);
799
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_inplace(
800
+ struct wsp_ggml_context * ctx,
801
+ struct wsp_ggml_tensor * a);
802
802
 
803
- GGML_API struct ggml_tensor * ggml_gelu_quick(
804
- struct ggml_context * ctx,
805
- struct ggml_tensor * a);
803
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick(
804
+ struct wsp_ggml_context * ctx,
805
+ struct wsp_ggml_tensor * a);
806
806
 
807
- GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
808
- struct ggml_context * ctx,
809
- struct ggml_tensor * a);
807
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gelu_quick_inplace(
808
+ struct wsp_ggml_context * ctx,
809
+ struct wsp_ggml_tensor * a);
810
810
 
811
- GGML_API struct ggml_tensor * ggml_silu(
812
- struct ggml_context * ctx,
813
- struct ggml_tensor * a);
811
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu(
812
+ struct wsp_ggml_context * ctx,
813
+ struct wsp_ggml_tensor * a);
814
814
 
815
- GGML_API struct ggml_tensor * ggml_silu_inplace(
816
- struct ggml_context * ctx,
817
- struct ggml_tensor * a);
815
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
816
+ struct wsp_ggml_context * ctx,
817
+ struct wsp_ggml_tensor * a);
818
818
 
819
819
  // a - x
820
820
  // b - dy
821
- GGML_API struct ggml_tensor * ggml_silu_back(
822
- struct ggml_context * ctx,
823
- struct ggml_tensor * a,
824
- struct ggml_tensor * b);
821
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_silu_back(
822
+ struct wsp_ggml_context * ctx,
823
+ struct wsp_ggml_tensor * a,
824
+ struct wsp_ggml_tensor * b);
825
825
 
826
826
  // normalize along rows
827
827
  // TODO: eps is hardcoded to 1e-5 for now
828
- GGML_API struct ggml_tensor * ggml_norm(
829
- struct ggml_context * ctx,
830
- struct ggml_tensor * a);
828
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
829
+ struct wsp_ggml_context * ctx,
830
+ struct wsp_ggml_tensor * a);
831
831
 
832
- GGML_API struct ggml_tensor * ggml_norm_inplace(
833
- struct ggml_context * ctx,
834
- struct ggml_tensor * a);
832
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm_inplace(
833
+ struct wsp_ggml_context * ctx,
834
+ struct wsp_ggml_tensor * a);
835
835
 
836
- GGML_API struct ggml_tensor * ggml_rms_norm(
837
- struct ggml_context * ctx,
838
- struct ggml_tensor * a);
836
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm(
837
+ struct wsp_ggml_context * ctx,
838
+ struct wsp_ggml_tensor * a);
839
839
 
840
- GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
841
- struct ggml_context * ctx,
842
- struct ggml_tensor * a);
840
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_inplace(
841
+ struct wsp_ggml_context * ctx,
842
+ struct wsp_ggml_tensor * a);
843
843
 
844
844
  // a - x
845
845
  // b - dy
846
- GGML_API struct ggml_tensor * ggml_rms_norm_back(
847
- struct ggml_context * ctx,
848
- struct ggml_tensor * a,
849
- struct ggml_tensor * b);
846
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rms_norm_back(
847
+ struct wsp_ggml_context * ctx,
848
+ struct wsp_ggml_tensor * a,
849
+ struct wsp_ggml_tensor * b);
850
850
 
851
851
  // A: n columns, m rows
852
852
  // B: n columns, p rows (i.e. we transpose it internally)
853
853
  // result is m columns, p rows
854
- GGML_API struct ggml_tensor * ggml_mul_mat(
855
- struct ggml_context * ctx,
856
- struct ggml_tensor * a,
857
- struct ggml_tensor * b);
854
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_mat(
855
+ struct wsp_ggml_context * ctx,
856
+ struct wsp_ggml_tensor * a,
857
+ struct wsp_ggml_tensor * b);
858
858
 
859
859
  // A: m columns, n rows,
860
860
  // B: p columns, n rows,
861
861
  // result is m columns, p rows
862
- GGML_API struct ggml_tensor * ggml_out_prod(
863
- struct ggml_context * ctx,
864
- struct ggml_tensor * a,
865
- struct ggml_tensor * b);
862
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_out_prod(
863
+ struct wsp_ggml_context * ctx,
864
+ struct wsp_ggml_tensor * a,
865
+ struct wsp_ggml_tensor * b);
866
866
 
867
867
  //
868
868
  // operations on tensors without backpropagation
869
869
  //
870
870
 
871
- GGML_API struct ggml_tensor * ggml_scale(
872
- struct ggml_context * ctx,
873
- struct ggml_tensor * a,
874
- struct ggml_tensor * b);
871
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale(
872
+ struct wsp_ggml_context * ctx,
873
+ struct wsp_ggml_tensor * a,
874
+ struct wsp_ggml_tensor * b);
875
875
 
876
876
  // in-place, returns view(a)
877
- GGML_API struct ggml_tensor * ggml_scale_inplace(
878
- struct ggml_context * ctx,
879
- struct ggml_tensor * a,
880
- struct ggml_tensor * b);
877
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_inplace(
878
+ struct wsp_ggml_context * ctx,
879
+ struct wsp_ggml_tensor * a,
880
+ struct wsp_ggml_tensor * b);
881
881
 
882
882
  // b -> view(a,offset,nb1,nb2,3), return modified a
883
- GGML_API struct ggml_tensor * ggml_set(
884
- struct ggml_context * ctx,
885
- struct ggml_tensor * a,
886
- struct ggml_tensor * b,
883
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
884
+ struct wsp_ggml_context * ctx,
885
+ struct wsp_ggml_tensor * a,
886
+ struct wsp_ggml_tensor * b,
887
887
  size_t nb1,
888
888
  size_t nb2,
889
889
  size_t nb3,
890
890
  size_t offset);
891
891
 
892
892
  // b -> view(a,offset,nb1,nb2,3), return view(a)
893
- GGML_API struct ggml_tensor * ggml_set_inplace(
894
- struct ggml_context * ctx,
895
- struct ggml_tensor * a,
896
- struct ggml_tensor * b,
893
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_inplace(
894
+ struct wsp_ggml_context * ctx,
895
+ struct wsp_ggml_tensor * a,
896
+ struct wsp_ggml_tensor * b,
897
897
  size_t nb1,
898
898
  size_t nb2,
899
899
  size_t nb3,
900
900
  size_t offset);
901
901
 
902
- GGML_API struct ggml_tensor * ggml_set_1d(
903
- struct ggml_context * ctx,
904
- struct ggml_tensor * a,
905
- struct ggml_tensor * b,
902
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d(
903
+ struct wsp_ggml_context * ctx,
904
+ struct wsp_ggml_tensor * a,
905
+ struct wsp_ggml_tensor * b,
906
906
  size_t offset);
907
907
 
908
- GGML_API struct ggml_tensor * ggml_set_1d_inplace(
909
- struct ggml_context * ctx,
910
- struct ggml_tensor * a,
911
- struct ggml_tensor * b,
908
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_1d_inplace(
909
+ struct wsp_ggml_context * ctx,
910
+ struct wsp_ggml_tensor * a,
911
+ struct wsp_ggml_tensor * b,
912
912
  size_t offset);
913
913
 
914
914
  // b -> view(a,offset,nb1,nb2,3), return modified a
915
- GGML_API struct ggml_tensor * ggml_set_2d(
916
- struct ggml_context * ctx,
917
- struct ggml_tensor * a,
918
- struct ggml_tensor * b,
915
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d(
916
+ struct wsp_ggml_context * ctx,
917
+ struct wsp_ggml_tensor * a,
918
+ struct wsp_ggml_tensor * b,
919
919
  size_t nb1,
920
920
  size_t offset);
921
921
 
922
922
  // b -> view(a,offset,nb1,nb2,3), return view(a)
923
- GGML_API struct ggml_tensor * ggml_set_2d_inplace(
924
- struct ggml_context * ctx,
925
- struct ggml_tensor * a,
926
- struct ggml_tensor * b,
923
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_2d_inplace(
924
+ struct wsp_ggml_context * ctx,
925
+ struct wsp_ggml_tensor * a,
926
+ struct wsp_ggml_tensor * b,
927
927
  size_t nb1,
928
928
  size_t offset);
929
929
 
930
930
 
931
931
  // a -> b, return view(b)
932
- GGML_API struct ggml_tensor * ggml_cpy(
933
- struct ggml_context * ctx,
934
- struct ggml_tensor * a,
935
- struct ggml_tensor * b);
932
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cpy(
933
+ struct wsp_ggml_context * ctx,
934
+ struct wsp_ggml_tensor * a,
935
+ struct wsp_ggml_tensor * b);
936
936
 
937
937
  // make contiguous
938
- GGML_API struct ggml_tensor * ggml_cont(
939
- struct ggml_context * ctx,
940
- struct ggml_tensor * a);
938
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont(
939
+ struct wsp_ggml_context * ctx,
940
+ struct wsp_ggml_tensor * a);
941
941
 
942
942
  // return view(a), b specifies the new shape
943
943
  // TODO: when we start computing gradient, make a copy instead of view
944
- GGML_API struct ggml_tensor * ggml_reshape(
945
- struct ggml_context * ctx,
946
- struct ggml_tensor * a,
947
- struct ggml_tensor * b);
944
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape(
945
+ struct wsp_ggml_context * ctx,
946
+ struct wsp_ggml_tensor * a,
947
+ struct wsp_ggml_tensor * b);
948
948
 
949
949
  // return view(a)
950
950
  // TODO: when we start computing gradient, make a copy instead of view
951
- GGML_API struct ggml_tensor * ggml_reshape_1d(
952
- struct ggml_context * ctx,
953
- struct ggml_tensor * a,
951
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_1d(
952
+ struct wsp_ggml_context * ctx,
953
+ struct wsp_ggml_tensor * a,
954
954
  int64_t ne0);
955
955
 
956
- GGML_API struct ggml_tensor * ggml_reshape_2d(
957
- struct ggml_context * ctx,
958
- struct ggml_tensor * a,
956
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_2d(
957
+ struct wsp_ggml_context * ctx,
958
+ struct wsp_ggml_tensor * a,
959
959
  int64_t ne0,
960
960
  int64_t ne1);
961
961
 
962
962
  // return view(a)
963
963
  // TODO: when we start computing gradient, make a copy instead of view
964
- GGML_API struct ggml_tensor * ggml_reshape_3d(
965
- struct ggml_context * ctx,
966
- struct ggml_tensor * a,
964
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_3d(
965
+ struct wsp_ggml_context * ctx,
966
+ struct wsp_ggml_tensor * a,
967
967
  int64_t ne0,
968
968
  int64_t ne1,
969
969
  int64_t ne2);
970
970
 
971
- GGML_API struct ggml_tensor * ggml_reshape_4d(
972
- struct ggml_context * ctx,
973
- struct ggml_tensor * a,
971
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reshape_4d(
972
+ struct wsp_ggml_context * ctx,
973
+ struct wsp_ggml_tensor * a,
974
974
  int64_t ne0,
975
975
  int64_t ne1,
976
976
  int64_t ne2,
977
977
  int64_t ne3);
978
978
 
979
979
  // offset in bytes
980
- GGML_API struct ggml_tensor * ggml_view_1d(
981
- struct ggml_context * ctx,
982
- struct ggml_tensor * a,
980
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_1d(
981
+ struct wsp_ggml_context * ctx,
982
+ struct wsp_ggml_tensor * a,
983
983
  int64_t ne0,
984
984
  size_t offset);
985
985
 
986
- GGML_API struct ggml_tensor * ggml_view_2d(
987
- struct ggml_context * ctx,
988
- struct ggml_tensor * a,
986
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_2d(
987
+ struct wsp_ggml_context * ctx,
988
+ struct wsp_ggml_tensor * a,
989
989
  int64_t ne0,
990
990
  int64_t ne1,
991
991
  size_t nb1, // row stride in bytes
992
992
  size_t offset);
993
993
 
994
- GGML_API struct ggml_tensor * ggml_view_3d(
995
- struct ggml_context * ctx,
996
- struct ggml_tensor * a,
994
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_3d(
995
+ struct wsp_ggml_context * ctx,
996
+ struct wsp_ggml_tensor * a,
997
997
  int64_t ne0,
998
998
  int64_t ne1,
999
999
  int64_t ne2,
@@ -1001,9 +1001,9 @@ extern "C" {
1001
1001
  size_t nb2, // slice stride in bytes
1002
1002
  size_t offset);
1003
1003
 
1004
- GGML_API struct ggml_tensor * ggml_view_4d(
1005
- struct ggml_context * ctx,
1006
- struct ggml_tensor * a,
1004
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_4d(
1005
+ struct wsp_ggml_context * ctx,
1006
+ struct wsp_ggml_tensor * a,
1007
1007
  int64_t ne0,
1008
1008
  int64_t ne1,
1009
1009
  int64_t ne2,
@@ -1013,95 +1013,95 @@ extern "C" {
1013
1013
  size_t nb3,
1014
1014
  size_t offset);
1015
1015
 
1016
- GGML_API struct ggml_tensor * ggml_permute(
1017
- struct ggml_context * ctx,
1018
- struct ggml_tensor * a,
1016
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_permute(
1017
+ struct wsp_ggml_context * ctx,
1018
+ struct wsp_ggml_tensor * a,
1019
1019
  int axis0,
1020
1020
  int axis1,
1021
1021
  int axis2,
1022
1022
  int axis3);
1023
1023
 
1024
- // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
1025
- GGML_API struct ggml_tensor * ggml_transpose(
1026
- struct ggml_context * ctx,
1027
- struct ggml_tensor * a);
1024
+ // alias for wsp_ggml_permute(ctx, a, 1, 0, 2, 3)
1025
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_transpose(
1026
+ struct wsp_ggml_context * ctx,
1027
+ struct wsp_ggml_tensor * a);
1028
1028
 
1029
- GGML_API struct ggml_tensor * ggml_get_rows(
1030
- struct ggml_context * ctx,
1031
- struct ggml_tensor * a,
1032
- struct ggml_tensor * b);
1029
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows(
1030
+ struct wsp_ggml_context * ctx,
1031
+ struct wsp_ggml_tensor * a,
1032
+ struct wsp_ggml_tensor * b);
1033
1033
 
1034
- GGML_API struct ggml_tensor * ggml_get_rows_back(
1035
- struct ggml_context * ctx,
1036
- struct ggml_tensor * a,
1037
- struct ggml_tensor * b,
1038
- struct ggml_tensor * c);
1034
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows_back(
1035
+ struct wsp_ggml_context * ctx,
1036
+ struct wsp_ggml_tensor * a,
1037
+ struct wsp_ggml_tensor * b,
1038
+ struct wsp_ggml_tensor * c);
1039
1039
 
1040
- GGML_API struct ggml_tensor * ggml_diag(
1041
- struct ggml_context * ctx,
1042
- struct ggml_tensor * a);
1040
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag(
1041
+ struct wsp_ggml_context * ctx,
1042
+ struct wsp_ggml_tensor * a);
1043
1043
 
1044
1044
  // set elements above the diagonal to -INF
1045
- GGML_API struct ggml_tensor * ggml_diag_mask_inf(
1046
- struct ggml_context * ctx,
1047
- struct ggml_tensor * a,
1045
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf(
1046
+ struct wsp_ggml_context * ctx,
1047
+ struct wsp_ggml_tensor * a,
1048
1048
  int n_past);
1049
1049
 
1050
1050
  // in-place, returns view(a)
1051
- GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
1052
- struct ggml_context * ctx,
1053
- struct ggml_tensor * a,
1051
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_inf_inplace(
1052
+ struct wsp_ggml_context * ctx,
1053
+ struct wsp_ggml_tensor * a,
1054
1054
  int n_past);
1055
1055
 
1056
1056
  // set elements above the diagonal to 0
1057
- GGML_API struct ggml_tensor * ggml_diag_mask_zero(
1058
- struct ggml_context * ctx,
1059
- struct ggml_tensor * a,
1057
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero(
1058
+ struct wsp_ggml_context * ctx,
1059
+ struct wsp_ggml_tensor * a,
1060
1060
  int n_past);
1061
1061
 
1062
1062
  // in-place, returns view(a)
1063
- GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
1064
- struct ggml_context * ctx,
1065
- struct ggml_tensor * a,
1063
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag_mask_zero_inplace(
1064
+ struct wsp_ggml_context * ctx,
1065
+ struct wsp_ggml_tensor * a,
1066
1066
  int n_past);
1067
1067
 
1068
- GGML_API struct ggml_tensor * ggml_soft_max(
1069
- struct ggml_context * ctx,
1070
- struct ggml_tensor * a);
1068
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max(
1069
+ struct wsp_ggml_context * ctx,
1070
+ struct wsp_ggml_tensor * a);
1071
1071
 
1072
1072
  // in-place, returns view(a)
1073
- GGML_API struct ggml_tensor * ggml_soft_max_inplace(
1074
- struct ggml_context * ctx,
1075
- struct ggml_tensor * a);
1073
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_inplace(
1074
+ struct wsp_ggml_context * ctx,
1075
+ struct wsp_ggml_tensor * a);
1076
1076
 
1077
- GGML_API struct ggml_tensor * ggml_soft_max_back(
1078
- struct ggml_context * ctx,
1079
- struct ggml_tensor * a,
1080
- struct ggml_tensor * b);
1077
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back(
1078
+ struct wsp_ggml_context * ctx,
1079
+ struct wsp_ggml_tensor * a,
1080
+ struct wsp_ggml_tensor * b);
1081
1081
 
1082
1082
  // in-place, returns view(a)
1083
- GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
1084
- struct ggml_context * ctx,
1085
- struct ggml_tensor * a,
1086
- struct ggml_tensor * b);
1083
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back_inplace(
1084
+ struct wsp_ggml_context * ctx,
1085
+ struct wsp_ggml_tensor * a,
1086
+ struct wsp_ggml_tensor * b);
1087
1087
 
1088
1088
  // rotary position embedding
1089
1089
  // if mode & 1 == 1, skip n_past elements
1090
1090
  // if mode & 2 == 1, GPT-NeoX style
1091
1091
  // if mode & 4 == 1, ChatGLM style
1092
1092
  // TODO: avoid creating a new tensor every time
1093
- GGML_API struct ggml_tensor * ggml_rope(
1094
- struct ggml_context * ctx,
1095
- struct ggml_tensor * a,
1093
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope(
1094
+ struct wsp_ggml_context * ctx,
1095
+ struct wsp_ggml_tensor * a,
1096
1096
  int n_past,
1097
1097
  int n_dims,
1098
1098
  int mode,
1099
1099
  int n_ctx);
1100
1100
 
1101
1101
  // in-place, returns view(a)
1102
- GGML_API struct ggml_tensor * ggml_rope_inplace(
1103
- struct ggml_context * ctx,
1104
- struct ggml_tensor * a,
1102
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_inplace(
1103
+ struct wsp_ggml_context * ctx,
1104
+ struct wsp_ggml_tensor * a,
1105
1105
  int n_past,
1106
1106
  int n_dims,
1107
1107
  int mode,
@@ -1109,42 +1109,42 @@ extern "C" {
1109
1109
 
1110
1110
  // rotary position embedding backward, i.e compute dx from dy
1111
1111
  // a - dy
1112
- GGML_API struct ggml_tensor * ggml_rope_back(
1113
- struct ggml_context * ctx,
1114
- struct ggml_tensor * a,
1112
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_back(
1113
+ struct wsp_ggml_context * ctx,
1114
+ struct wsp_ggml_tensor * a,
1115
1115
  int n_past,
1116
1116
  int n_dims,
1117
1117
  int mode);
1118
1118
 
1119
1119
  // alibi position embedding
1120
1120
  // in-place, returns view(a)
1121
- struct ggml_tensor * ggml_alibi(
1122
- struct ggml_context * ctx,
1123
- struct ggml_tensor * a,
1121
+ struct wsp_ggml_tensor * wsp_ggml_alibi(
1122
+ struct wsp_ggml_context * ctx,
1123
+ struct wsp_ggml_tensor * a,
1124
1124
  int n_past,
1125
1125
  int n_head,
1126
1126
  float bias_max);
1127
1127
 
1128
1128
  // clamp
1129
1129
  // in-place, returns view(a)
1130
- struct ggml_tensor * ggml_clamp(
1131
- struct ggml_context * ctx,
1132
- struct ggml_tensor * a,
1130
+ struct wsp_ggml_tensor * wsp_ggml_clamp(
1131
+ struct wsp_ggml_context * ctx,
1132
+ struct wsp_ggml_tensor * a,
1133
1133
  float min,
1134
1134
  float max);
1135
1135
 
1136
- GGML_API struct ggml_tensor * ggml_conv_1d(
1137
- struct ggml_context * ctx,
1138
- struct ggml_tensor * a,
1139
- struct ggml_tensor * b,
1136
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d(
1137
+ struct wsp_ggml_context * ctx,
1138
+ struct wsp_ggml_tensor * a,
1139
+ struct wsp_ggml_tensor * b,
1140
1140
  int s0, // stride
1141
1141
  int p0, // padding
1142
1142
  int d0); // dilation
1143
1143
 
1144
- GGML_API struct ggml_tensor * ggml_conv_2d(
1145
- struct ggml_context * ctx,
1146
- struct ggml_tensor * a,
1147
- struct ggml_tensor * b,
1144
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d(
1145
+ struct wsp_ggml_context * ctx,
1146
+ struct wsp_ggml_tensor * a,
1147
+ struct wsp_ggml_tensor * b,
1148
1148
  int s0,
1149
1149
  int s1,
1150
1150
  int p0,
@@ -1153,36 +1153,36 @@ extern "C" {
1153
1153
  int d1);
1154
1154
 
1155
1155
  // conv_1d with padding = half
1156
- // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1157
- GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1158
- struct ggml_context * ctx,
1159
- struct ggml_tensor * a,
1160
- struct ggml_tensor * b,
1156
+ // alias for wsp_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1157
+ WSP_GGML_API struct wsp_ggml_tensor* wsp_ggml_conv_1d_ph(
1158
+ struct wsp_ggml_context * ctx,
1159
+ struct wsp_ggml_tensor * a,
1160
+ struct wsp_ggml_tensor * b,
1161
1161
  int s,
1162
1162
  int d);
1163
1163
 
1164
- GGML_API struct ggml_tensor * ggml_flash_attn(
1165
- struct ggml_context * ctx,
1166
- struct ggml_tensor * q,
1167
- struct ggml_tensor * k,
1168
- struct ggml_tensor * v,
1164
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn(
1165
+ struct wsp_ggml_context * ctx,
1166
+ struct wsp_ggml_tensor * q,
1167
+ struct wsp_ggml_tensor * k,
1168
+ struct wsp_ggml_tensor * v,
1169
1169
  bool masked);
1170
1170
 
1171
- GGML_API struct ggml_tensor * ggml_flash_attn_back(
1172
- struct ggml_context * ctx,
1173
- struct ggml_tensor * q,
1174
- struct ggml_tensor * k,
1175
- struct ggml_tensor * v,
1176
- struct ggml_tensor * d,
1171
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_back(
1172
+ struct wsp_ggml_context * ctx,
1173
+ struct wsp_ggml_tensor * q,
1174
+ struct wsp_ggml_tensor * k,
1175
+ struct wsp_ggml_tensor * v,
1176
+ struct wsp_ggml_tensor * d,
1177
1177
  bool masked);
1178
1178
 
1179
- GGML_API struct ggml_tensor * ggml_flash_ff(
1180
- struct ggml_context * ctx,
1181
- struct ggml_tensor * a,
1182
- struct ggml_tensor * b0,
1183
- struct ggml_tensor * b1,
1184
- struct ggml_tensor * c0,
1185
- struct ggml_tensor * c1);
1179
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_ff(
1180
+ struct wsp_ggml_context * ctx,
1181
+ struct wsp_ggml_tensor * a,
1182
+ struct wsp_ggml_tensor * b0,
1183
+ struct wsp_ggml_tensor * b1,
1184
+ struct wsp_ggml_tensor * c0,
1185
+ struct wsp_ggml_tensor * c1);
1186
1186
 
1187
1187
  // partition into non-overlapping windows with padding if needed
1188
1188
  // example:
@@ -1190,167 +1190,167 @@ extern "C" {
1190
1190
  // w: 14
1191
1191
  // res: 768 14 14 25
1192
1192
  // used in sam
1193
- GGML_API struct ggml_tensor * ggml_win_part(
1194
- struct ggml_context * ctx,
1195
- struct ggml_tensor * a,
1193
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_part(
1194
+ struct wsp_ggml_context * ctx,
1195
+ struct wsp_ggml_tensor * a,
1196
1196
  int w);
1197
1197
 
1198
- // reverse of ggml_win_part
1198
+ // reverse of wsp_ggml_win_part
1199
1199
  // used in sam
1200
- GGML_API struct ggml_tensor * ggml_win_unpart(
1201
- struct ggml_context * ctx,
1202
- struct ggml_tensor * a,
1200
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_win_unpart(
1201
+ struct wsp_ggml_context * ctx,
1202
+ struct wsp_ggml_tensor * a,
1203
1203
  int w0,
1204
1204
  int h0,
1205
1205
  int w);
1206
1206
 
1207
1207
  // custom operators
1208
1208
 
1209
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1210
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1211
-
1212
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1213
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1214
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1215
-
1216
- GGML_API struct ggml_tensor * ggml_map_unary_f32(
1217
- struct ggml_context * ctx,
1218
- struct ggml_tensor * a,
1219
- ggml_unary_op_f32_t fun);
1220
-
1221
- GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1222
- struct ggml_context * ctx,
1223
- struct ggml_tensor * a,
1224
- ggml_unary_op_f32_t fun);
1225
-
1226
- GGML_API struct ggml_tensor * ggml_map_binary_f32(
1227
- struct ggml_context * ctx,
1228
- struct ggml_tensor * a,
1229
- struct ggml_tensor * b,
1230
- ggml_binary_op_f32_t fun);
1231
-
1232
- GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1233
- struct ggml_context * ctx,
1234
- struct ggml_tensor * a,
1235
- struct ggml_tensor * b,
1236
- ggml_binary_op_f32_t fun);
1237
-
1238
- GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1239
- struct ggml_context * ctx,
1240
- struct ggml_tensor * a,
1241
- ggml_custom1_op_f32_t fun);
1242
-
1243
- GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1244
- struct ggml_context * ctx,
1245
- struct ggml_tensor * a,
1246
- ggml_custom1_op_f32_t fun);
1247
-
1248
- GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1249
- struct ggml_context * ctx,
1250
- struct ggml_tensor * a,
1251
- struct ggml_tensor * b,
1252
- ggml_custom2_op_f32_t fun);
1253
-
1254
- GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1255
- struct ggml_context * ctx,
1256
- struct ggml_tensor * a,
1257
- struct ggml_tensor * b,
1258
- ggml_custom2_op_f32_t fun);
1259
-
1260
- GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1261
- struct ggml_context * ctx,
1262
- struct ggml_tensor * a,
1263
- struct ggml_tensor * b,
1264
- struct ggml_tensor * c,
1265
- ggml_custom3_op_f32_t fun);
1266
-
1267
- GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1268
- struct ggml_context * ctx,
1269
- struct ggml_tensor * a,
1270
- struct ggml_tensor * b,
1271
- struct ggml_tensor * c,
1272
- ggml_custom3_op_f32_t fun);
1209
+ typedef void (*wsp_ggml_unary_op_f32_t) (const int, float *, const float *);
1210
+ typedef void (*wsp_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1211
+
1212
+ typedef void (*wsp_ggml_custom1_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
1213
+ typedef void (*wsp_ggml_custom2_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
1214
+ typedef void (*wsp_ggml_custom3_op_f32_t)(struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *, const struct wsp_ggml_tensor *);
1215
+
1216
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_f32(
1217
+ struct wsp_ggml_context * ctx,
1218
+ struct wsp_ggml_tensor * a,
1219
+ wsp_ggml_unary_op_f32_t fun);
1220
+
1221
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_unary_inplace_f32(
1222
+ struct wsp_ggml_context * ctx,
1223
+ struct wsp_ggml_tensor * a,
1224
+ wsp_ggml_unary_op_f32_t fun);
1225
+
1226
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_f32(
1227
+ struct wsp_ggml_context * ctx,
1228
+ struct wsp_ggml_tensor * a,
1229
+ struct wsp_ggml_tensor * b,
1230
+ wsp_ggml_binary_op_f32_t fun);
1231
+
1232
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_binary_inplace_f32(
1233
+ struct wsp_ggml_context * ctx,
1234
+ struct wsp_ggml_tensor * a,
1235
+ struct wsp_ggml_tensor * b,
1236
+ wsp_ggml_binary_op_f32_t fun);
1237
+
1238
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_f32(
1239
+ struct wsp_ggml_context * ctx,
1240
+ struct wsp_ggml_tensor * a,
1241
+ wsp_ggml_custom1_op_f32_t fun);
1242
+
1243
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom1_inplace_f32(
1244
+ struct wsp_ggml_context * ctx,
1245
+ struct wsp_ggml_tensor * a,
1246
+ wsp_ggml_custom1_op_f32_t fun);
1247
+
1248
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_f32(
1249
+ struct wsp_ggml_context * ctx,
1250
+ struct wsp_ggml_tensor * a,
1251
+ struct wsp_ggml_tensor * b,
1252
+ wsp_ggml_custom2_op_f32_t fun);
1253
+
1254
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom2_inplace_f32(
1255
+ struct wsp_ggml_context * ctx,
1256
+ struct wsp_ggml_tensor * a,
1257
+ struct wsp_ggml_tensor * b,
1258
+ wsp_ggml_custom2_op_f32_t fun);
1259
+
1260
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_f32(
1261
+ struct wsp_ggml_context * ctx,
1262
+ struct wsp_ggml_tensor * a,
1263
+ struct wsp_ggml_tensor * b,
1264
+ struct wsp_ggml_tensor * c,
1265
+ wsp_ggml_custom3_op_f32_t fun);
1266
+
1267
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_map_custom3_inplace_f32(
1268
+ struct wsp_ggml_context * ctx,
1269
+ struct wsp_ggml_tensor * a,
1270
+ struct wsp_ggml_tensor * b,
1271
+ struct wsp_ggml_tensor * c,
1272
+ wsp_ggml_custom3_op_f32_t fun);
1273
1273
 
1274
1274
  // loss function
1275
1275
 
1276
- GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1277
- struct ggml_context * ctx,
1278
- struct ggml_tensor * a,
1279
- struct ggml_tensor * b);
1276
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss(
1277
+ struct wsp_ggml_context * ctx,
1278
+ struct wsp_ggml_tensor * a,
1279
+ struct wsp_ggml_tensor * b);
1280
1280
 
1281
- GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1282
- struct ggml_context * ctx,
1283
- struct ggml_tensor * a,
1284
- struct ggml_tensor * b,
1285
- struct ggml_tensor * c);
1281
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cross_entropy_loss_back(
1282
+ struct wsp_ggml_context * ctx,
1283
+ struct wsp_ggml_tensor * a,
1284
+ struct wsp_ggml_tensor * b,
1285
+ struct wsp_ggml_tensor * c);
1286
1286
 
1287
1287
  //
1288
1288
  // automatic differentiation
1289
1289
  //
1290
1290
 
1291
- GGML_API void ggml_set_param(
1292
- struct ggml_context * ctx,
1293
- struct ggml_tensor * tensor);
1291
+ WSP_GGML_API void wsp_ggml_set_param(
1292
+ struct wsp_ggml_context * ctx,
1293
+ struct wsp_ggml_tensor * tensor);
1294
1294
 
1295
- GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1295
+ WSP_GGML_API void wsp_ggml_build_forward_expand(struct wsp_ggml_cgraph * cgraph, struct wsp_ggml_tensor * tensor);
1296
1296
 
1297
- GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1298
- GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
1297
+ WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_forward (struct wsp_ggml_tensor * tensor);
1298
+ WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_build_backward(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * gf, bool keep);
1299
1299
 
1300
- GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1301
- GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
1300
+ WSP_GGML_API void wsp_ggml_graph_compute(struct wsp_ggml_context * ctx, struct wsp_ggml_cgraph * cgraph);
1301
+ WSP_GGML_API void wsp_ggml_graph_reset (struct wsp_ggml_cgraph * cgraph);
1302
1302
 
1303
- GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1303
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_graph_get_tensor(struct wsp_ggml_cgraph * cgraph, const char * name);
1304
1304
 
1305
- GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1306
- GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
1305
+ WSP_GGML_API void wsp_ggml_graph_export(const struct wsp_ggml_cgraph * cgraph, const char * fname);
1306
+ WSP_GGML_API struct wsp_ggml_cgraph wsp_ggml_graph_import(const char * fname, struct wsp_ggml_context ** ctx_data, struct wsp_ggml_context ** ctx_eval);
1307
1307
 
1308
1308
  // print info and performance information for the graph
1309
- GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
1309
+ WSP_GGML_API void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph);
1310
1310
 
1311
1311
  // dump the graph into a file using the dot format
1312
- GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
1312
+ WSP_GGML_API void wsp_ggml_graph_dump_dot(const struct wsp_ggml_cgraph * gb, const struct wsp_ggml_cgraph * gf, const char * filename);
1313
1313
 
1314
1314
  //
1315
1315
  // optimization
1316
1316
  //
1317
1317
 
1318
1318
  // optimization methods
1319
- enum ggml_opt_type {
1320
- GGML_OPT_ADAM,
1321
- GGML_OPT_LBFGS,
1319
+ enum wsp_ggml_opt_type {
1320
+ WSP_GGML_OPT_ADAM,
1321
+ WSP_GGML_OPT_LBFGS,
1322
1322
  };
1323
1323
 
1324
1324
  // linesearch methods
1325
- enum ggml_linesearch {
1326
- GGML_LINESEARCH_DEFAULT = 1,
1325
+ enum wsp_ggml_linesearch {
1326
+ WSP_GGML_LINESEARCH_DEFAULT = 1,
1327
1327
 
1328
- GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
1329
- GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
1330
- GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
1328
+ WSP_GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
1329
+ WSP_GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
1330
+ WSP_GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
1331
1331
  };
1332
1332
 
1333
1333
  // optimization return values
1334
- enum ggml_opt_result {
1335
- GGML_OPT_OK = 0,
1336
- GGML_OPT_DID_NOT_CONVERGE,
1337
- GGML_OPT_NO_CONTEXT,
1338
- GGML_OPT_INVALID_WOLFE,
1339
- GGML_OPT_FAIL,
1340
-
1341
- GGML_LINESEARCH_FAIL = -128,
1342
- GGML_LINESEARCH_MINIMUM_STEP,
1343
- GGML_LINESEARCH_MAXIMUM_STEP,
1344
- GGML_LINESEARCH_MAXIMUM_ITERATIONS,
1345
- GGML_LINESEARCH_INVALID_PARAMETERS,
1334
+ enum wsp_ggml_opt_result {
1335
+ WSP_GGML_OPT_OK = 0,
1336
+ WSP_GGML_OPT_DID_NOT_CONVERGE,
1337
+ WSP_GGML_OPT_NO_CONTEXT,
1338
+ WSP_GGML_OPT_INVALID_WOLFE,
1339
+ WSP_GGML_OPT_FAIL,
1340
+
1341
+ WSP_GGML_LINESEARCH_FAIL = -128,
1342
+ WSP_GGML_LINESEARCH_MINIMUM_STEP,
1343
+ WSP_GGML_LINESEARCH_MAXIMUM_STEP,
1344
+ WSP_GGML_LINESEARCH_MAXIMUM_ITERATIONS,
1345
+ WSP_GGML_LINESEARCH_INVALID_PARAMETERS,
1346
1346
  };
1347
1347
 
1348
1348
  // optimization parameters
1349
1349
  //
1350
- // see ggml.c (ggml_opt_default_params) for default values
1350
+ // see ggml.c (wsp_ggml_opt_default_params) for default values
1351
1351
  //
1352
- struct ggml_opt_params {
1353
- enum ggml_opt_type type;
1352
+ struct wsp_ggml_opt_params {
1353
+ enum wsp_ggml_opt_type type;
1354
1354
 
1355
1355
  int n_threads;
1356
1356
 
@@ -1400,13 +1400,13 @@ extern "C" {
1400
1400
  float min_step;
1401
1401
  float max_step;
1402
1402
 
1403
- enum ggml_linesearch linesearch;
1403
+ enum wsp_ggml_linesearch linesearch;
1404
1404
  } lbfgs;
1405
1405
  };
1406
1406
 
1407
- struct ggml_opt_context {
1408
- struct ggml_context * ctx;
1409
- struct ggml_opt_params params;
1407
+ struct wsp_ggml_opt_context {
1408
+ struct wsp_ggml_context * ctx;
1409
+ struct wsp_ggml_opt_params params;
1410
1410
 
1411
1411
  int iter;
1412
1412
  int64_t nx; // number of parameter elements
@@ -1414,30 +1414,30 @@ extern "C" {
1414
1414
  bool just_initialized;
1415
1415
 
1416
1416
  struct {
1417
- struct ggml_tensor * x; // view of the parameters
1418
- struct ggml_tensor * g1; // gradient
1419
- struct ggml_tensor * g2; // gradient squared
1420
- struct ggml_tensor * m; // first moment
1421
- struct ggml_tensor * v; // second moment
1422
- struct ggml_tensor * mh; // first moment hat
1423
- struct ggml_tensor * vh; // second moment hat
1424
- struct ggml_tensor * pf; // past function values
1417
+ struct wsp_ggml_tensor * x; // view of the parameters
1418
+ struct wsp_ggml_tensor * g1; // gradient
1419
+ struct wsp_ggml_tensor * g2; // gradient squared
1420
+ struct wsp_ggml_tensor * m; // first moment
1421
+ struct wsp_ggml_tensor * v; // second moment
1422
+ struct wsp_ggml_tensor * mh; // first moment hat
1423
+ struct wsp_ggml_tensor * vh; // second moment hat
1424
+ struct wsp_ggml_tensor * pf; // past function values
1425
1425
  float fx_best;
1426
1426
  float fx_prev;
1427
1427
  int n_no_improvement;
1428
1428
  } adam;
1429
1429
 
1430
1430
  struct {
1431
- struct ggml_tensor * x; // current parameters
1432
- struct ggml_tensor * xp; // previous parameters
1433
- struct ggml_tensor * g; // current gradient
1434
- struct ggml_tensor * gp; // previous gradient
1435
- struct ggml_tensor * d; // search direction
1436
- struct ggml_tensor * pf; // past function values
1437
- struct ggml_tensor * lmal; // the L-BFGS memory alpha
1438
- struct ggml_tensor * lmys; // the L-BFGS memory ys
1439
- struct ggml_tensor * lms; // the L-BFGS memory s
1440
- struct ggml_tensor * lmy; // the L-BFGS memory y
1431
+ struct wsp_ggml_tensor * x; // current parameters
1432
+ struct wsp_ggml_tensor * xp; // previous parameters
1433
+ struct wsp_ggml_tensor * g; // current gradient
1434
+ struct wsp_ggml_tensor * gp; // previous gradient
1435
+ struct wsp_ggml_tensor * d; // search direction
1436
+ struct wsp_ggml_tensor * pf; // past function values
1437
+ struct wsp_ggml_tensor * lmal; // the L-BFGS memory alpha
1438
+ struct wsp_ggml_tensor * lmys; // the L-BFGS memory ys
1439
+ struct wsp_ggml_tensor * lms; // the L-BFGS memory s
1440
+ struct wsp_ggml_tensor * lmy; // the L-BFGS memory y
1441
1441
  float fx_best;
1442
1442
  float step;
1443
1443
  int j;
@@ -1447,68 +1447,68 @@ extern "C" {
1447
1447
  } lbfgs;
1448
1448
  };
1449
1449
 
1450
- GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
1450
+ WSP_GGML_API struct wsp_ggml_opt_params wsp_ggml_opt_default_params(enum wsp_ggml_opt_type type);
1451
1451
 
1452
1452
  // optimize the function defined by the tensor f
1453
- GGML_API enum ggml_opt_result ggml_opt(
1454
- struct ggml_context * ctx,
1455
- struct ggml_opt_params params,
1456
- struct ggml_tensor * f);
1453
+ WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt(
1454
+ struct wsp_ggml_context * ctx,
1455
+ struct wsp_ggml_opt_params params,
1456
+ struct wsp_ggml_tensor * f);
1457
1457
 
1458
1458
  // initialize optimizer context
1459
- GGML_API void ggml_opt_init(
1460
- struct ggml_context * ctx,
1461
- struct ggml_opt_context * opt,
1462
- struct ggml_opt_params params,
1459
+ WSP_GGML_API void wsp_ggml_opt_init(
1460
+ struct wsp_ggml_context * ctx,
1461
+ struct wsp_ggml_opt_context * opt,
1462
+ struct wsp_ggml_opt_params params,
1463
1463
  int64_t nx);
1464
1464
 
1465
1465
  // continue optimizing the function defined by the tensor f
1466
- GGML_API enum ggml_opt_result ggml_opt_resume(
1467
- struct ggml_context * ctx,
1468
- struct ggml_opt_context * opt,
1469
- struct ggml_tensor * f);
1466
+ WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume(
1467
+ struct wsp_ggml_context * ctx,
1468
+ struct wsp_ggml_opt_context * opt,
1469
+ struct wsp_ggml_tensor * f);
1470
1470
 
1471
1471
  // continue optimizing the function defined by the tensor f
1472
- GGML_API enum ggml_opt_result ggml_opt_resume_g(
1473
- struct ggml_context * ctx,
1474
- struct ggml_opt_context * opt,
1475
- struct ggml_tensor * f,
1476
- struct ggml_cgraph * gf,
1477
- struct ggml_cgraph * gb);
1472
+ WSP_GGML_API enum wsp_ggml_opt_result wsp_ggml_opt_resume_g(
1473
+ struct wsp_ggml_context * ctx,
1474
+ struct wsp_ggml_opt_context * opt,
1475
+ struct wsp_ggml_tensor * f,
1476
+ struct wsp_ggml_cgraph * gf,
1477
+ struct wsp_ggml_cgraph * gb);
1478
1478
 
1479
1479
  //
1480
1480
  // quantization
1481
1481
  //
1482
1482
 
1483
- GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
1484
- GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
1485
- GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
1486
- GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
1487
- GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
1483
+ WSP_GGML_API size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
1484
+ WSP_GGML_API size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
1485
+ WSP_GGML_API size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
1486
+ WSP_GGML_API size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
1487
+ WSP_GGML_API size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
1488
1488
 
1489
- GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1489
+ WSP_GGML_API size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1490
1490
 
1491
1491
  //
1492
1492
  // system info
1493
1493
  //
1494
1494
 
1495
- GGML_API int ggml_cpu_has_avx (void);
1496
- GGML_API int ggml_cpu_has_avx2 (void);
1497
- GGML_API int ggml_cpu_has_avx512 (void);
1498
- GGML_API int ggml_cpu_has_avx512_vbmi(void);
1499
- GGML_API int ggml_cpu_has_avx512_vnni(void);
1500
- GGML_API int ggml_cpu_has_fma (void);
1501
- GGML_API int ggml_cpu_has_neon (void);
1502
- GGML_API int ggml_cpu_has_arm_fma (void);
1503
- GGML_API int ggml_cpu_has_f16c (void);
1504
- GGML_API int ggml_cpu_has_fp16_va (void);
1505
- GGML_API int ggml_cpu_has_wasm_simd (void);
1506
- GGML_API int ggml_cpu_has_blas (void);
1507
- GGML_API int ggml_cpu_has_cublas (void);
1508
- GGML_API int ggml_cpu_has_clblast (void);
1509
- GGML_API int ggml_cpu_has_gpublas (void);
1510
- GGML_API int ggml_cpu_has_sse3 (void);
1511
- GGML_API int ggml_cpu_has_vsx (void);
1495
+ WSP_GGML_API int wsp_ggml_cpu_has_avx (void);
1496
+ WSP_GGML_API int wsp_ggml_cpu_has_avx2 (void);
1497
+ WSP_GGML_API int wsp_ggml_cpu_has_avx512 (void);
1498
+ WSP_GGML_API int wsp_ggml_cpu_has_avx512_vbmi(void);
1499
+ WSP_GGML_API int wsp_ggml_cpu_has_avx512_vnni(void);
1500
+ WSP_GGML_API int wsp_ggml_cpu_has_fma (void);
1501
+ WSP_GGML_API int wsp_ggml_cpu_has_neon (void);
1502
+ WSP_GGML_API int wsp_ggml_cpu_has_arm_fma (void);
1503
+ WSP_GGML_API int wsp_ggml_cpu_has_f16c (void);
1504
+ WSP_GGML_API int wsp_ggml_cpu_has_fp16_va (void);
1505
+ WSP_GGML_API int wsp_ggml_cpu_has_wasm_simd (void);
1506
+ WSP_GGML_API int wsp_ggml_cpu_has_blas (void);
1507
+ WSP_GGML_API int wsp_ggml_cpu_has_cublas (void);
1508
+ WSP_GGML_API int wsp_ggml_cpu_has_clblast (void);
1509
+ WSP_GGML_API int wsp_ggml_cpu_has_gpublas (void);
1510
+ WSP_GGML_API int wsp_ggml_cpu_has_sse3 (void);
1511
+ WSP_GGML_API int wsp_ggml_cpu_has_vsx (void);
1512
1512
 
1513
1513
  //
1514
1514
  // Internal types and functions exposed for tests and benchmarks
@@ -1516,13 +1516,13 @@ extern "C" {
1516
1516
 
1517
1517
  #ifdef __cplusplus
1518
1518
  // restrict not standard in C++
1519
- #define GGML_RESTRICT
1519
+ #define WSP_GGML_RESTRICT
1520
1520
  #else
1521
- #define GGML_RESTRICT restrict
1521
+ #define WSP_GGML_RESTRICT restrict
1522
1522
  #endif
1523
- typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
1524
- typedef void (*quantize_row_q_t) (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
1525
- typedef void (*vec_dot_q_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
1523
+ typedef void (*dequantize_row_q_t)(const void * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int k);
1524
+ typedef void (*quantize_row_q_t) (const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int k);
1525
+ typedef void (*vec_dot_q_t) (const int n, float * WSP_GGML_RESTRICT s, const void * WSP_GGML_RESTRICT x, const void * WSP_GGML_RESTRICT y);
1526
1526
 
1527
1527
  typedef struct {
1528
1528
  dequantize_row_q_t dequantize_row_q;
@@ -1530,10 +1530,10 @@ extern "C" {
1530
1530
  quantize_row_q_t quantize_row_q_reference;
1531
1531
  quantize_row_q_t quantize_row_q_dot;
1532
1532
  vec_dot_q_t vec_dot_q;
1533
- enum ggml_type vec_dot_type;
1533
+ enum wsp_ggml_type vec_dot_type;
1534
1534
  } quantize_fns_t;
1535
1535
 
1536
- quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
1536
+ quantize_fns_t wsp_ggml_internal_get_quantize_fn(size_t i);
1537
1537
 
1538
1538
  #ifdef __cplusplus
1539
1539
  }