llama_cpp 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/ext/llama_cpp/extconf.rb +9 -0
- data/ext/llama_cpp/llama_cpp.cpp +26 -0
- data/ext/llama_cpp/src/ggml-cuda.h +32 -0
- data/ext/llama_cpp/src/ggml-opencl.c +216 -0
- data/ext/llama_cpp/src/ggml-opencl.h +24 -0
- data/ext/llama_cpp/src/ggml.c +1436 -624
- data/ext/llama_cpp/src/ggml.h +654 -627
- data/ext/llama_cpp/src/llama.cpp +212 -29
- data/ext/llama_cpp/src/llama.h +17 -13
- data/ext/llama_cpp/src/llama_util.h +15 -2
- data/lib/llama_cpp/client.rb +151 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +16 -8
- data/sig/llama_cpp.rbs +16 -1
- metadata +5 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -169,14 +169,27 @@
|
|
169
169
|
//
|
170
170
|
//
|
171
171
|
|
172
|
-
#ifdef
|
173
|
-
|
172
|
+
#ifdef GGML_SHARED
|
173
|
+
# if defined(_WIN32) && !defined(__MINGW32__)
|
174
|
+
# ifdef GGML_BUILD
|
175
|
+
# define GGML_API __declspec(dllexport)
|
176
|
+
# else
|
177
|
+
# define GGML_API __declspec(dllimport)
|
178
|
+
# endif
|
179
|
+
# else
|
180
|
+
# define GGML_API __attribute__ ((visibility ("default")))
|
181
|
+
# endif
|
182
|
+
#else
|
183
|
+
# define GGML_API
|
174
184
|
#endif
|
175
185
|
|
176
186
|
#include <stdint.h>
|
177
187
|
#include <stddef.h>
|
178
188
|
#include <stdbool.h>
|
179
189
|
|
190
|
+
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
191
|
+
#define GGML_FILE_VERSION 1
|
192
|
+
|
180
193
|
#define GGML_MAX_DIMS 4
|
181
194
|
#define GGML_MAX_NODES 4096
|
182
195
|
#define GGML_MAX_PARAMS 16
|
@@ -184,682 +197,696 @@ extern "C" {
|
|
184
197
|
#define GGML_MAX_OPT 4
|
185
198
|
#define GGML_DEFAULT_N_THREADS 4
|
186
199
|
|
200
|
+
#ifdef __cplusplus
|
201
|
+
extern "C" {
|
202
|
+
#endif
|
203
|
+
|
187
204
|
#ifdef __ARM_NEON
|
188
|
-
// we use the built-in 16-bit float type
|
189
|
-
typedef __fp16 ggml_fp16_t;
|
205
|
+
// we use the built-in 16-bit float type
|
206
|
+
typedef __fp16 ggml_fp16_t;
|
190
207
|
#else
|
191
|
-
typedef uint16_t ggml_fp16_t;
|
208
|
+
typedef uint16_t ggml_fp16_t;
|
192
209
|
#endif
|
193
210
|
|
194
|
-
// convert FP16 <-> FP32
|
195
|
-
float ggml_fp16_to_fp32(ggml_fp16_t x);
|
196
|
-
ggml_fp16_t ggml_fp32_to_fp16(float x);
|
197
|
-
|
198
|
-
struct ggml_object;
|
199
|
-
struct ggml_context;
|
200
|
-
|
201
|
-
enum ggml_type {
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
211
|
+
// convert FP16 <-> FP32
|
212
|
+
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
|
213
|
+
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
|
214
|
+
|
215
|
+
struct ggml_object;
|
216
|
+
struct ggml_context;
|
217
|
+
|
218
|
+
enum ggml_type {
|
219
|
+
GGML_TYPE_F32 = 0,
|
220
|
+
GGML_TYPE_F16 = 1,
|
221
|
+
GGML_TYPE_Q4_0 = 2,
|
222
|
+
GGML_TYPE_Q4_1 = 3,
|
223
|
+
GGML_TYPE_Q4_2 = 4,
|
224
|
+
GGML_TYPE_Q4_3 = 5,
|
225
|
+
GGML_TYPE_Q5_0 = 6,
|
226
|
+
GGML_TYPE_Q5_1 = 7,
|
227
|
+
GGML_TYPE_Q8_0 = 8,
|
228
|
+
GGML_TYPE_Q8_1 = 9,
|
229
|
+
GGML_TYPE_I8,
|
230
|
+
GGML_TYPE_I16,
|
231
|
+
GGML_TYPE_I32,
|
232
|
+
GGML_TYPE_COUNT,
|
233
|
+
};
|
234
|
+
|
235
|
+
// available tensor operations:
|
236
|
+
enum ggml_op {
|
237
|
+
GGML_OP_NONE = 0,
|
238
|
+
|
239
|
+
GGML_OP_DUP,
|
240
|
+
GGML_OP_ADD,
|
241
|
+
GGML_OP_SUB,
|
242
|
+
GGML_OP_MUL,
|
243
|
+
GGML_OP_DIV,
|
244
|
+
GGML_OP_SQR,
|
245
|
+
GGML_OP_SQRT,
|
246
|
+
GGML_OP_SUM,
|
247
|
+
GGML_OP_MEAN,
|
248
|
+
GGML_OP_REPEAT,
|
249
|
+
GGML_OP_ABS,
|
250
|
+
GGML_OP_SGN,
|
251
|
+
GGML_OP_NEG,
|
252
|
+
GGML_OP_STEP,
|
253
|
+
GGML_OP_RELU,
|
254
|
+
GGML_OP_GELU,
|
255
|
+
GGML_OP_SILU,
|
256
|
+
GGML_OP_NORM, // normalize
|
257
|
+
GGML_OP_RMS_NORM,
|
258
|
+
|
259
|
+
GGML_OP_MUL_MAT,
|
260
|
+
|
261
|
+
GGML_OP_SCALE,
|
262
|
+
GGML_OP_CPY,
|
263
|
+
GGML_OP_CONT,
|
264
|
+
GGML_OP_RESHAPE,
|
265
|
+
GGML_OP_VIEW,
|
266
|
+
GGML_OP_PERMUTE,
|
267
|
+
GGML_OP_TRANSPOSE,
|
268
|
+
GGML_OP_GET_ROWS,
|
269
|
+
GGML_OP_DIAG_MASK_INF,
|
270
|
+
GGML_OP_SOFT_MAX,
|
271
|
+
GGML_OP_ROPE,
|
272
|
+
GGML_OP_CONV_1D_1S,
|
273
|
+
GGML_OP_CONV_1D_2S,
|
274
|
+
|
275
|
+
GGML_OP_FLASH_ATTN,
|
276
|
+
GGML_OP_FLASH_FF,
|
277
|
+
|
278
|
+
GGML_OP_MAP_UNARY,
|
279
|
+
GGML_OP_MAP_BINARY,
|
280
|
+
|
281
|
+
GGML_OP_COUNT,
|
282
|
+
};
|
283
|
+
|
284
|
+
|
285
|
+
// ggml object
|
286
|
+
struct ggml_object {
|
287
|
+
size_t offs;
|
288
|
+
size_t size;
|
289
|
+
|
290
|
+
struct ggml_object * next;
|
291
|
+
|
292
|
+
char padding[8];
|
293
|
+
};
|
294
|
+
|
295
|
+
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
296
|
+
|
297
|
+
// n-dimensional tensor
|
298
|
+
struct ggml_tensor {
|
299
|
+
enum ggml_type type;
|
300
|
+
|
301
|
+
int n_dims;
|
302
|
+
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
303
|
+
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
304
|
+
// nb[0] = sizeof(type)
|
305
|
+
// nb[1] = nb[0] * ne[0] + padding
|
306
|
+
// nb[i] = nb[i-1] * ne[i-1]
|
307
|
+
|
308
|
+
// compute data
|
309
|
+
enum ggml_op op;
|
310
|
+
|
311
|
+
bool is_param;
|
312
|
+
|
313
|
+
struct ggml_tensor * grad;
|
314
|
+
struct ggml_tensor * src0;
|
315
|
+
struct ggml_tensor * src1;
|
316
|
+
struct ggml_tensor * opt[GGML_MAX_OPT];
|
317
|
+
|
318
|
+
// thread scheduling
|
319
|
+
int n_tasks;
|
320
|
+
|
321
|
+
// performance
|
322
|
+
int perf_runs;
|
323
|
+
int64_t perf_cycles;
|
324
|
+
int64_t perf_time_us;
|
325
|
+
|
326
|
+
void * data;
|
327
|
+
char padding[8];
|
328
|
+
};
|
329
|
+
|
330
|
+
// computation graph
|
331
|
+
struct ggml_cgraph {
|
332
|
+
int n_nodes;
|
333
|
+
int n_leafs;
|
334
|
+
int n_threads;
|
335
|
+
|
336
|
+
size_t work_size;
|
337
|
+
struct ggml_tensor * work;
|
338
|
+
|
339
|
+
struct ggml_tensor * nodes[GGML_MAX_NODES];
|
340
|
+
struct ggml_tensor * grads[GGML_MAX_NODES];
|
341
|
+
struct ggml_tensor * leafs[GGML_MAX_NODES];
|
342
|
+
|
343
|
+
// performance
|
344
|
+
int perf_runs;
|
345
|
+
int64_t perf_cycles;
|
346
|
+
int64_t perf_time_us;
|
347
|
+
};
|
348
|
+
|
349
|
+
// scratch buffer
|
350
|
+
struct ggml_scratch {
|
351
|
+
size_t offs;
|
352
|
+
size_t size;
|
353
|
+
void * data;
|
354
|
+
};
|
336
355
|
|
337
|
-
struct ggml_init_params {
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
};
|
356
|
+
struct ggml_init_params {
|
357
|
+
// memory pool
|
358
|
+
size_t mem_size; // bytes
|
359
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
360
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
361
|
+
};
|
343
362
|
|
344
|
-
|
345
|
-
int64_t ggml_time_ms(void);
|
346
|
-
int64_t ggml_time_us(void);
|
347
|
-
int64_t ggml_cycles(void);
|
348
|
-
int64_t ggml_cycles_per_ms(void);
|
363
|
+
// misc
|
349
364
|
|
350
|
-
void
|
351
|
-
|
365
|
+
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
366
|
+
GGML_API int64_t ggml_time_ms(void);
|
367
|
+
GGML_API int64_t ggml_time_us(void);
|
368
|
+
GGML_API int64_t ggml_cycles(void);
|
369
|
+
GGML_API int64_t ggml_cycles_per_ms(void);
|
352
370
|
|
353
|
-
|
354
|
-
|
371
|
+
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
372
|
+
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
355
373
|
|
356
|
-
|
357
|
-
size_t
|
358
|
-
float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
374
|
+
GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
375
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
359
376
|
|
360
|
-
|
377
|
+
GGML_API int ggml_blck_size (enum ggml_type type);
|
378
|
+
GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
379
|
+
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
361
380
|
|
362
|
-
|
381
|
+
GGML_API const char * ggml_type_name(enum ggml_type type);
|
363
382
|
|
364
|
-
|
383
|
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
365
384
|
|
366
|
-
|
367
|
-
void ggml_free(struct ggml_context * ctx);
|
385
|
+
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
368
386
|
|
369
|
-
|
387
|
+
// main
|
370
388
|
|
371
|
-
|
389
|
+
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
390
|
+
GGML_API void ggml_free(struct ggml_context * ctx);
|
372
391
|
|
373
|
-
struct
|
374
|
-
struct ggml_context * ctx,
|
375
|
-
enum ggml_type type,
|
376
|
-
int n_dims,
|
377
|
-
const int64_t *ne);
|
378
|
-
|
379
|
-
struct ggml_tensor * ggml_new_tensor_1d(
|
380
|
-
struct ggml_context * ctx,
|
381
|
-
enum ggml_type type,
|
382
|
-
int64_t ne0);
|
383
|
-
|
384
|
-
struct ggml_tensor * ggml_new_tensor_2d(
|
385
|
-
struct ggml_context * ctx,
|
386
|
-
enum ggml_type type,
|
387
|
-
int64_t ne0,
|
388
|
-
int64_t ne1);
|
389
|
-
|
390
|
-
struct ggml_tensor * ggml_new_tensor_3d(
|
391
|
-
struct ggml_context * ctx,
|
392
|
-
enum ggml_type type,
|
393
|
-
int64_t ne0,
|
394
|
-
int64_t ne1,
|
395
|
-
int64_t ne2);
|
396
|
-
|
397
|
-
struct ggml_tensor * ggml_new_tensor_4d(
|
398
|
-
struct ggml_context * ctx,
|
399
|
-
enum ggml_type type,
|
400
|
-
int64_t ne0,
|
401
|
-
int64_t ne1,
|
402
|
-
int64_t ne2,
|
403
|
-
int64_t ne3);
|
404
|
-
|
405
|
-
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
406
|
-
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
407
|
-
|
408
|
-
struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
409
|
-
struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
|
410
|
-
|
411
|
-
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
412
|
-
struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
413
|
-
struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
414
|
-
|
415
|
-
int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
416
|
-
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
417
|
-
|
418
|
-
float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
419
|
-
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
420
|
-
|
421
|
-
void * ggml_get_data (const struct ggml_tensor * tensor);
|
422
|
-
float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
423
|
-
|
424
|
-
//
|
425
|
-
// operations on tensors with backpropagation
|
426
|
-
//
|
427
|
-
|
428
|
-
struct ggml_tensor * ggml_dup(
|
429
|
-
struct ggml_context * ctx,
|
430
|
-
struct ggml_tensor * a);
|
431
|
-
|
432
|
-
struct ggml_tensor * ggml_add(
|
433
|
-
struct ggml_context * ctx,
|
434
|
-
struct ggml_tensor * a,
|
435
|
-
struct ggml_tensor * b);
|
392
|
+
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
436
393
|
|
394
|
+
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
437
395
|
|
438
|
-
struct ggml_tensor *
|
439
|
-
|
440
|
-
|
441
|
-
|
396
|
+
GGML_API struct ggml_tensor * ggml_new_tensor(
|
397
|
+
struct ggml_context * ctx,
|
398
|
+
enum ggml_type type,
|
399
|
+
int n_dims,
|
400
|
+
const int64_t *ne);
|
442
401
|
|
443
|
-
struct ggml_tensor *
|
444
|
-
|
445
|
-
|
446
|
-
|
402
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_1d(
|
403
|
+
struct ggml_context * ctx,
|
404
|
+
enum ggml_type type,
|
405
|
+
int64_t ne0);
|
447
406
|
|
448
|
-
struct ggml_tensor *
|
449
|
-
|
450
|
-
|
451
|
-
|
407
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_2d(
|
408
|
+
struct ggml_context * ctx,
|
409
|
+
enum ggml_type type,
|
410
|
+
int64_t ne0,
|
411
|
+
int64_t ne1);
|
452
412
|
|
453
|
-
struct ggml_tensor *
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
struct ggml_context * ctx,
|
460
|
-
struct ggml_tensor * a);
|
461
|
-
|
462
|
-
struct ggml_tensor * ggml_sqrt(
|
463
|
-
struct ggml_context * ctx,
|
464
|
-
struct ggml_tensor * a);
|
465
|
-
|
466
|
-
// return scalar
|
467
|
-
// TODO: compute sum along rows
|
468
|
-
struct ggml_tensor * ggml_sum(
|
469
|
-
struct ggml_context * ctx,
|
470
|
-
struct ggml_tensor * a);
|
471
|
-
|
472
|
-
// mean along rows
|
473
|
-
struct ggml_tensor * ggml_mean(
|
474
|
-
struct ggml_context * ctx,
|
475
|
-
struct ggml_tensor * a);
|
476
|
-
|
477
|
-
// if a is the same shape as b, and a is not parameter, return a
|
478
|
-
// otherwise, return a new tensor: repeat(a) to fit in b
|
479
|
-
struct ggml_tensor * ggml_repeat(
|
480
|
-
struct ggml_context * ctx,
|
481
|
-
struct ggml_tensor * a,
|
482
|
-
struct ggml_tensor * b);
|
483
|
-
|
484
|
-
struct ggml_tensor * ggml_abs(
|
485
|
-
struct ggml_context * ctx,
|
486
|
-
struct ggml_tensor * a);
|
487
|
-
|
488
|
-
struct ggml_tensor * ggml_sgn(
|
489
|
-
struct ggml_context * ctx,
|
490
|
-
struct ggml_tensor * a);
|
491
|
-
|
492
|
-
struct ggml_tensor * ggml_neg(
|
493
|
-
struct ggml_context * ctx,
|
494
|
-
struct ggml_tensor * a);
|
495
|
-
|
496
|
-
struct ggml_tensor * ggml_step(
|
497
|
-
struct ggml_context * ctx,
|
498
|
-
struct ggml_tensor * a);
|
499
|
-
|
500
|
-
struct ggml_tensor * ggml_relu(
|
501
|
-
struct ggml_context * ctx,
|
502
|
-
struct ggml_tensor * a);
|
503
|
-
|
504
|
-
// TODO: double-check this computation is correct
|
505
|
-
struct ggml_tensor * ggml_gelu(
|
506
|
-
struct ggml_context * ctx,
|
507
|
-
struct ggml_tensor * a);
|
508
|
-
|
509
|
-
struct ggml_tensor * ggml_silu(
|
510
|
-
struct ggml_context * ctx,
|
511
|
-
struct ggml_tensor * a);
|
512
|
-
|
513
|
-
// normalize along rows
|
514
|
-
// TODO: eps is hardcoded to 1e-5 for now
|
515
|
-
struct ggml_tensor * ggml_norm(
|
516
|
-
struct ggml_context * ctx,
|
517
|
-
struct ggml_tensor * a);
|
518
|
-
|
519
|
-
struct ggml_tensor * ggml_rms_norm(
|
520
|
-
struct ggml_context * ctx,
|
521
|
-
struct ggml_tensor * a);
|
522
|
-
|
523
|
-
// A: m rows, n columns
|
524
|
-
// B: p rows, n columns (i.e. we transpose it internally)
|
525
|
-
// result is m columns, p rows
|
526
|
-
struct ggml_tensor * ggml_mul_mat(
|
527
|
-
struct ggml_context * ctx,
|
528
|
-
struct ggml_tensor * a,
|
529
|
-
struct ggml_tensor * b);
|
530
|
-
|
531
|
-
//
|
532
|
-
// operations on tensors without backpropagation
|
533
|
-
//
|
534
|
-
|
535
|
-
// in-place, returns view(a)
|
536
|
-
struct ggml_tensor * ggml_scale(
|
537
|
-
struct ggml_context * ctx,
|
538
|
-
struct ggml_tensor * a,
|
539
|
-
struct ggml_tensor * b);
|
540
|
-
|
541
|
-
// a -> b, return view(b)
|
542
|
-
struct ggml_tensor * ggml_cpy(
|
543
|
-
struct ggml_context * ctx,
|
544
|
-
struct ggml_tensor * a,
|
545
|
-
struct ggml_tensor * b);
|
546
|
-
|
547
|
-
// make contiguous
|
548
|
-
struct ggml_tensor * ggml_cont(
|
549
|
-
struct ggml_context * ctx,
|
550
|
-
struct ggml_tensor * a);
|
551
|
-
|
552
|
-
// return view(a), b specifies the new shape
|
553
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
554
|
-
struct ggml_tensor * ggml_reshape(
|
555
|
-
struct ggml_context * ctx,
|
556
|
-
struct ggml_tensor * a,
|
557
|
-
struct ggml_tensor * b);
|
558
|
-
|
559
|
-
// return view(a)
|
560
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
561
|
-
struct ggml_tensor * ggml_reshape_2d(
|
562
|
-
struct ggml_context * ctx,
|
563
|
-
struct ggml_tensor * a,
|
564
|
-
int64_t ne0,
|
565
|
-
int64_t ne1);
|
566
|
-
|
567
|
-
// return view(a)
|
568
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
569
|
-
struct ggml_tensor * ggml_reshape_3d(
|
570
|
-
struct ggml_context * ctx,
|
571
|
-
struct ggml_tensor * a,
|
572
|
-
int64_t ne0,
|
573
|
-
int64_t ne1,
|
574
|
-
int64_t ne2);
|
575
|
-
|
576
|
-
// offset in bytes
|
577
|
-
struct ggml_tensor * ggml_view_1d(
|
578
|
-
struct ggml_context * ctx,
|
579
|
-
struct ggml_tensor * a,
|
580
|
-
int64_t ne0,
|
581
|
-
size_t offset);
|
582
|
-
|
583
|
-
struct ggml_tensor * ggml_view_2d(
|
584
|
-
struct ggml_context * ctx,
|
585
|
-
struct ggml_tensor * a,
|
586
|
-
int64_t ne0,
|
587
|
-
int64_t ne1,
|
588
|
-
size_t nb1, // row stride in bytes
|
589
|
-
size_t offset);
|
590
|
-
|
591
|
-
struct ggml_tensor * ggml_view_3d(
|
592
|
-
struct ggml_context * ctx,
|
593
|
-
struct ggml_tensor * a,
|
594
|
-
int64_t ne0,
|
595
|
-
int64_t ne1,
|
596
|
-
int64_t ne2,
|
597
|
-
size_t nb1, // row stride in bytes
|
598
|
-
size_t nb2, // slice stride in bytes
|
599
|
-
size_t offset);
|
600
|
-
|
601
|
-
struct ggml_tensor * ggml_permute(
|
602
|
-
struct ggml_context * ctx,
|
603
|
-
struct ggml_tensor * a,
|
604
|
-
int axis0,
|
605
|
-
int axis1,
|
606
|
-
int axis2,
|
607
|
-
int axis3);
|
608
|
-
|
609
|
-
// alias for ggml_permute(ctx, a, 1, 0, 2, 3)
|
610
|
-
struct ggml_tensor * ggml_transpose(
|
611
|
-
struct ggml_context * ctx,
|
612
|
-
struct ggml_tensor * a);
|
613
|
-
|
614
|
-
struct ggml_tensor * ggml_get_rows(
|
615
|
-
struct ggml_context * ctx,
|
616
|
-
struct ggml_tensor * a,
|
617
|
-
struct ggml_tensor * b);
|
618
|
-
|
619
|
-
// set elements above the diagonal to -INF
|
620
|
-
// in-place, returns view(a)
|
621
|
-
struct ggml_tensor * ggml_diag_mask_inf(
|
622
|
-
struct ggml_context * ctx,
|
623
|
-
struct ggml_tensor * a,
|
624
|
-
int n_past);
|
625
|
-
|
626
|
-
// in-place, returns view(a)
|
627
|
-
struct ggml_tensor * ggml_soft_max(
|
628
|
-
struct ggml_context * ctx,
|
629
|
-
struct ggml_tensor * a);
|
630
|
-
|
631
|
-
// rotary position embedding
|
632
|
-
// in-place, returns view(a)
|
633
|
-
// if mode & 1 == 1, skip n_past elements
|
634
|
-
// if mode & 2 == 1, GPT-NeoX style
|
635
|
-
// TODO: avoid creating a new tensor every time
|
636
|
-
struct ggml_tensor * ggml_rope(
|
637
|
-
struct ggml_context * ctx,
|
638
|
-
struct ggml_tensor * a,
|
639
|
-
int n_past,
|
640
|
-
int n_dims,
|
641
|
-
int mode);
|
642
|
-
|
643
|
-
// padding = 1
|
644
|
-
// TODO: we don't support extra parameters for now
|
645
|
-
// that's why we are hard-coding the stride, padding, and dilation
|
646
|
-
// not great ..
|
647
|
-
struct ggml_tensor * ggml_conv_1d_1s(
|
648
|
-
struct ggml_context * ctx,
|
649
|
-
struct ggml_tensor * a,
|
650
|
-
struct ggml_tensor * b);
|
651
|
-
|
652
|
-
struct ggml_tensor * ggml_conv_1d_2s(
|
653
|
-
struct ggml_context * ctx,
|
654
|
-
struct ggml_tensor * a,
|
655
|
-
struct ggml_tensor * b);
|
656
|
-
|
657
|
-
struct ggml_tensor * ggml_flash_attn(
|
658
|
-
struct ggml_context * ctx,
|
659
|
-
struct ggml_tensor * q,
|
660
|
-
struct ggml_tensor * k,
|
661
|
-
struct ggml_tensor * v,
|
662
|
-
bool masked);
|
663
|
-
|
664
|
-
struct ggml_tensor * ggml_flash_ff(
|
665
|
-
struct ggml_context * ctx,
|
666
|
-
struct ggml_tensor * a,
|
667
|
-
struct ggml_tensor * b0,
|
668
|
-
struct ggml_tensor * b1,
|
669
|
-
struct ggml_tensor * c0,
|
670
|
-
struct ggml_tensor * c1);
|
671
|
-
|
672
|
-
// Mapping operations
|
673
|
-
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
674
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
675
|
-
|
676
|
-
struct ggml_tensor * ggml_map_unary_f32(
|
677
|
-
struct ggml_context * ctx,
|
678
|
-
struct ggml_tensor * a,
|
679
|
-
const ggml_unary_op_f32_t fun);
|
680
|
-
|
681
|
-
struct ggml_tensor * ggml_map_binary_f32(
|
682
|
-
struct ggml_context * ctx,
|
683
|
-
struct ggml_tensor * a,
|
684
|
-
struct ggml_tensor * b,
|
685
|
-
const ggml_binary_op_f32_t fun);
|
686
|
-
|
687
|
-
//
|
688
|
-
// automatic differentiation
|
689
|
-
//
|
690
|
-
|
691
|
-
void ggml_set_param(
|
692
|
-
struct ggml_context * ctx,
|
693
|
-
struct ggml_tensor * tensor);
|
694
|
-
|
695
|
-
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
696
|
-
|
697
|
-
struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
698
|
-
struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
699
|
-
|
700
|
-
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
701
|
-
void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
702
|
-
|
703
|
-
// print info and performance information for the graph
|
704
|
-
void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
705
|
-
|
706
|
-
// dump the graph into a file using the dot format
|
707
|
-
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
708
|
-
|
709
|
-
//
|
710
|
-
// optimization
|
711
|
-
//
|
712
|
-
|
713
|
-
// optimization methods
|
714
|
-
enum ggml_opt_type {
|
715
|
-
GGML_OPT_ADAM,
|
716
|
-
GGML_OPT_LBFGS,
|
717
|
-
};
|
718
|
-
|
719
|
-
// linesearch methods
|
720
|
-
enum ggml_linesearch {
|
721
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
722
|
-
|
723
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
724
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
725
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
726
|
-
};
|
727
|
-
|
728
|
-
// optimization return values
|
729
|
-
enum ggml_opt_result {
|
730
|
-
GGML_OPT_OK = 0,
|
731
|
-
GGML_OPT_DID_NOT_CONVERGE,
|
732
|
-
GGML_OPT_NO_CONTEXT,
|
733
|
-
GGML_OPT_INVALID_WOLFE,
|
734
|
-
GGML_OPT_FAIL,
|
413
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_3d(
|
414
|
+
struct ggml_context * ctx,
|
415
|
+
enum ggml_type type,
|
416
|
+
int64_t ne0,
|
417
|
+
int64_t ne1,
|
418
|
+
int64_t ne2);
|
735
419
|
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
420
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_4d(
|
421
|
+
struct ggml_context * ctx,
|
422
|
+
enum ggml_type type,
|
423
|
+
int64_t ne0,
|
424
|
+
int64_t ne1,
|
425
|
+
int64_t ne2,
|
426
|
+
int64_t ne3);
|
742
427
|
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
struct
|
748
|
-
|
428
|
+
GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
429
|
+
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
430
|
+
|
431
|
+
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
432
|
+
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
|
433
|
+
|
434
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
435
|
+
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
436
|
+
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
437
|
+
|
438
|
+
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
439
|
+
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
749
440
|
|
750
|
-
int
|
441
|
+
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
442
|
+
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
443
|
+
|
444
|
+
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
445
|
+
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
751
446
|
|
752
|
-
// delta-based convergence test
|
753
447
|
//
|
754
|
-
//
|
755
|
-
// if past > 0:
|
756
|
-
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
448
|
+
// operations on tensors with backpropagation
|
757
449
|
//
|
758
|
-
int past;
|
759
|
-
float delta;
|
760
450
|
|
761
|
-
|
451
|
+
GGML_API struct ggml_tensor * ggml_dup(
|
452
|
+
struct ggml_context * ctx,
|
453
|
+
struct ggml_tensor * a);
|
454
|
+
|
455
|
+
GGML_API struct ggml_tensor * ggml_add(
|
456
|
+
struct ggml_context * ctx,
|
457
|
+
struct ggml_tensor * a,
|
458
|
+
struct ggml_tensor * b);
|
459
|
+
|
460
|
+
GGML_API struct ggml_tensor * ggml_add_inplace(
|
461
|
+
struct ggml_context * ctx,
|
462
|
+
struct ggml_tensor * a,
|
463
|
+
struct ggml_tensor * b);
|
464
|
+
|
465
|
+
GGML_API struct ggml_tensor * ggml_sub(
|
466
|
+
struct ggml_context * ctx,
|
467
|
+
struct ggml_tensor * a,
|
468
|
+
struct ggml_tensor * b);
|
469
|
+
|
470
|
+
GGML_API struct ggml_tensor * ggml_mul(
|
471
|
+
struct ggml_context * ctx,
|
472
|
+
struct ggml_tensor * a,
|
473
|
+
struct ggml_tensor * b);
|
474
|
+
|
475
|
+
GGML_API struct ggml_tensor * ggml_div(
|
476
|
+
struct ggml_context * ctx,
|
477
|
+
struct ggml_tensor * a,
|
478
|
+
struct ggml_tensor * b);
|
479
|
+
|
480
|
+
GGML_API struct ggml_tensor * ggml_sqr(
|
481
|
+
struct ggml_context * ctx,
|
482
|
+
struct ggml_tensor * a);
|
483
|
+
|
484
|
+
GGML_API struct ggml_tensor * ggml_sqrt(
|
485
|
+
struct ggml_context * ctx,
|
486
|
+
struct ggml_tensor * a);
|
487
|
+
|
488
|
+
// return scalar
|
489
|
+
// TODO: compute sum along rows
|
490
|
+
GGML_API struct ggml_tensor * ggml_sum(
|
491
|
+
struct ggml_context * ctx,
|
492
|
+
struct ggml_tensor * a);
|
493
|
+
|
494
|
+
// mean along rows
|
495
|
+
GGML_API struct ggml_tensor * ggml_mean(
|
496
|
+
struct ggml_context * ctx,
|
497
|
+
struct ggml_tensor * a);
|
498
|
+
|
499
|
+
// if a is the same shape as b, and a is not parameter, return a
|
500
|
+
// otherwise, return a new tensor: repeat(a) to fit in b
|
501
|
+
GGML_API struct ggml_tensor * ggml_repeat(
|
502
|
+
struct ggml_context * ctx,
|
503
|
+
struct ggml_tensor * a,
|
504
|
+
struct ggml_tensor * b);
|
505
|
+
|
506
|
+
GGML_API struct ggml_tensor * ggml_abs(
|
507
|
+
struct ggml_context * ctx,
|
508
|
+
struct ggml_tensor * a);
|
509
|
+
|
510
|
+
GGML_API struct ggml_tensor * ggml_sgn(
|
511
|
+
struct ggml_context * ctx,
|
512
|
+
struct ggml_tensor * a);
|
513
|
+
|
514
|
+
GGML_API struct ggml_tensor * ggml_neg(
|
515
|
+
struct ggml_context * ctx,
|
516
|
+
struct ggml_tensor * a);
|
517
|
+
|
518
|
+
GGML_API struct ggml_tensor * ggml_step(
|
519
|
+
struct ggml_context * ctx,
|
520
|
+
struct ggml_tensor * a);
|
521
|
+
|
522
|
+
GGML_API struct ggml_tensor * ggml_relu(
|
523
|
+
struct ggml_context * ctx,
|
524
|
+
struct ggml_tensor * a);
|
525
|
+
|
526
|
+
// TODO: double-check this computation is correct
|
527
|
+
GGML_API struct ggml_tensor * ggml_gelu(
|
528
|
+
struct ggml_context * ctx,
|
529
|
+
struct ggml_tensor * a);
|
530
|
+
|
531
|
+
GGML_API struct ggml_tensor * ggml_silu(
|
532
|
+
struct ggml_context * ctx,
|
533
|
+
struct ggml_tensor * a);
|
534
|
+
|
535
|
+
// normalize along rows
|
536
|
+
// TODO: eps is hardcoded to 1e-5 for now
|
537
|
+
GGML_API struct ggml_tensor * ggml_norm(
|
538
|
+
struct ggml_context * ctx,
|
539
|
+
struct ggml_tensor * a);
|
540
|
+
|
541
|
+
GGML_API struct ggml_tensor * ggml_rms_norm(
|
542
|
+
struct ggml_context * ctx,
|
543
|
+
struct ggml_tensor * a);
|
544
|
+
|
545
|
+
// A: m rows, n columns
|
546
|
+
// B: p rows, n columns (i.e. we transpose it internally)
|
547
|
+
// result is m columns, p rows
|
548
|
+
GGML_API struct ggml_tensor * ggml_mul_mat(
|
549
|
+
struct ggml_context * ctx,
|
550
|
+
struct ggml_tensor * a,
|
551
|
+
struct ggml_tensor * b);
|
552
|
+
|
762
553
|
//
|
763
|
-
//
|
764
|
-
// if > 0:
|
765
|
-
// assume convergence if no cost improvement in this number of iterations
|
554
|
+
// operations on tensors without backpropagation
|
766
555
|
//
|
767
|
-
int max_no_improvement;
|
768
556
|
|
769
|
-
|
770
|
-
|
557
|
+
// in-place, returns view(a)
|
558
|
+
GGML_API struct ggml_tensor * ggml_scale(
|
559
|
+
struct ggml_context * ctx,
|
560
|
+
struct ggml_tensor * a,
|
561
|
+
struct ggml_tensor * b);
|
562
|
+
|
563
|
+
// a -> b, return view(b)
|
564
|
+
GGML_API struct ggml_tensor * ggml_cpy(
|
565
|
+
struct ggml_context * ctx,
|
566
|
+
struct ggml_tensor * a,
|
567
|
+
struct ggml_tensor * b);
|
568
|
+
|
569
|
+
// make contiguous
|
570
|
+
GGML_API struct ggml_tensor * ggml_cont(
|
571
|
+
struct ggml_context * ctx,
|
572
|
+
struct ggml_tensor * a);
|
573
|
+
|
574
|
+
// return view(a), b specifies the new shape
|
575
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
576
|
+
GGML_API struct ggml_tensor * ggml_reshape(
|
577
|
+
struct ggml_context * ctx,
|
578
|
+
struct ggml_tensor * a,
|
579
|
+
struct ggml_tensor * b);
|
580
|
+
|
581
|
+
// return view(a)
|
582
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
583
|
+
GGML_API struct ggml_tensor * ggml_reshape_2d(
|
584
|
+
struct ggml_context * ctx,
|
585
|
+
struct ggml_tensor * a,
|
586
|
+
int64_t ne0,
|
587
|
+
int64_t ne1);
|
588
|
+
|
589
|
+
// return view(a)
|
590
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
591
|
+
GGML_API struct ggml_tensor * ggml_reshape_3d(
|
592
|
+
struct ggml_context * ctx,
|
593
|
+
struct ggml_tensor * a,
|
594
|
+
int64_t ne0,
|
595
|
+
int64_t ne1,
|
596
|
+
int64_t ne2);
|
597
|
+
|
598
|
+
// offset in bytes
|
599
|
+
GGML_API struct ggml_tensor * ggml_view_1d(
|
600
|
+
struct ggml_context * ctx,
|
601
|
+
struct ggml_tensor * a,
|
602
|
+
int64_t ne0,
|
603
|
+
size_t offset);
|
604
|
+
|
605
|
+
GGML_API struct ggml_tensor * ggml_view_2d(
|
606
|
+
struct ggml_context * ctx,
|
607
|
+
struct ggml_tensor * a,
|
608
|
+
int64_t ne0,
|
609
|
+
int64_t ne1,
|
610
|
+
size_t nb1, // row stride in bytes
|
611
|
+
size_t offset);
|
612
|
+
|
613
|
+
GGML_API struct ggml_tensor * ggml_view_3d(
|
614
|
+
struct ggml_context * ctx,
|
615
|
+
struct ggml_tensor * a,
|
616
|
+
int64_t ne0,
|
617
|
+
int64_t ne1,
|
618
|
+
int64_t ne2,
|
619
|
+
size_t nb1, // row stride in bytes
|
620
|
+
size_t nb2, // slice stride in bytes
|
621
|
+
size_t offset);
|
622
|
+
|
623
|
+
GGML_API struct ggml_tensor * ggml_permute(
|
624
|
+
struct ggml_context * ctx,
|
625
|
+
struct ggml_tensor * a,
|
626
|
+
int axis0,
|
627
|
+
int axis1,
|
628
|
+
int axis2,
|
629
|
+
int axis3);
|
630
|
+
|
631
|
+
// alias for ggml_permute(ctx, a, 1, 0, 2, 3)
|
632
|
+
GGML_API struct ggml_tensor * ggml_transpose(
|
633
|
+
struct ggml_context * ctx,
|
634
|
+
struct ggml_tensor * a);
|
635
|
+
|
636
|
+
GGML_API struct ggml_tensor * ggml_get_rows(
|
637
|
+
struct ggml_context * ctx,
|
638
|
+
struct ggml_tensor * a,
|
639
|
+
struct ggml_tensor * b);
|
640
|
+
|
641
|
+
// set elements above the diagonal to -INF
|
642
|
+
// in-place, returns view(a)
|
643
|
+
GGML_API struct ggml_tensor * ggml_diag_mask_inf(
|
644
|
+
struct ggml_context * ctx,
|
645
|
+
struct ggml_tensor * a,
|
646
|
+
int n_past);
|
647
|
+
|
648
|
+
// in-place, returns view(a)
|
649
|
+
GGML_API struct ggml_tensor * ggml_soft_max(
|
650
|
+
struct ggml_context * ctx,
|
651
|
+
struct ggml_tensor * a);
|
652
|
+
|
653
|
+
// rotary position embedding
|
654
|
+
// in-place, returns view(a)
|
655
|
+
// if mode & 1 == 1, skip n_past elements
|
656
|
+
// if mode & 2 == 1, GPT-NeoX style
|
657
|
+
// TODO: avoid creating a new tensor every time
|
658
|
+
GGML_API struct ggml_tensor * ggml_rope(
|
659
|
+
struct ggml_context * ctx,
|
660
|
+
struct ggml_tensor * a,
|
661
|
+
int n_past,
|
662
|
+
int n_dims,
|
663
|
+
int mode);
|
664
|
+
|
665
|
+
// padding = 1
|
666
|
+
// TODO: we don't support extra parameters for now
|
667
|
+
// that's why we are hard-coding the stride, padding, and dilation
|
668
|
+
// not great ..
|
669
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_1s(
|
670
|
+
struct ggml_context * ctx,
|
671
|
+
struct ggml_tensor * a,
|
672
|
+
struct ggml_tensor * b);
|
673
|
+
|
674
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_2s(
|
675
|
+
struct ggml_context * ctx,
|
676
|
+
struct ggml_tensor * a,
|
677
|
+
struct ggml_tensor * b);
|
678
|
+
|
679
|
+
GGML_API struct ggml_tensor * ggml_flash_attn(
|
680
|
+
struct ggml_context * ctx,
|
681
|
+
struct ggml_tensor * q,
|
682
|
+
struct ggml_tensor * k,
|
683
|
+
struct ggml_tensor * v,
|
684
|
+
bool masked);
|
685
|
+
|
686
|
+
GGML_API struct ggml_tensor * ggml_flash_ff(
|
687
|
+
struct ggml_context * ctx,
|
688
|
+
struct ggml_tensor * a,
|
689
|
+
struct ggml_tensor * b0,
|
690
|
+
struct ggml_tensor * b1,
|
691
|
+
struct ggml_tensor * c0,
|
692
|
+
struct ggml_tensor * c1);
|
693
|
+
|
694
|
+
// Mapping operations
|
695
|
+
GGML_API typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
696
|
+
GGML_API typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
697
|
+
|
698
|
+
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
699
|
+
struct ggml_context * ctx,
|
700
|
+
struct ggml_tensor * a,
|
701
|
+
const ggml_unary_op_f32_t fun);
|
702
|
+
|
703
|
+
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
704
|
+
struct ggml_context * ctx,
|
705
|
+
struct ggml_tensor * a,
|
706
|
+
struct ggml_tensor * b,
|
707
|
+
const ggml_binary_op_f32_t fun);
|
708
|
+
|
709
|
+
//
|
710
|
+
// automatic differentiation
|
711
|
+
//
|
771
712
|
|
772
|
-
|
773
|
-
|
774
|
-
|
713
|
+
GGML_API void ggml_set_param(
|
714
|
+
struct ggml_context * ctx,
|
715
|
+
struct ggml_tensor * tensor);
|
775
716
|
|
776
|
-
|
777
|
-
float beta1;
|
778
|
-
float beta2;
|
779
|
-
float eps; // epsilon for numerical stability
|
780
|
-
float eps_f; // epsilon for convergence test
|
781
|
-
float eps_g; // epsilon for convergence test
|
782
|
-
} adam;
|
717
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
783
718
|
|
784
|
-
|
785
|
-
struct
|
786
|
-
int m; // number of corrections to approximate the inv. Hessian
|
787
|
-
int n_iter;
|
788
|
-
int max_linesearch;
|
719
|
+
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
720
|
+
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
789
721
|
|
790
|
-
|
791
|
-
|
792
|
-
float wolfe;
|
793
|
-
float min_step;
|
794
|
-
float max_step;
|
722
|
+
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
723
|
+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
795
724
|
|
796
|
-
|
797
|
-
|
798
|
-
};
|
725
|
+
// print info and performance information for the graph
|
726
|
+
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
799
727
|
|
800
|
-
|
728
|
+
// dump the graph into a file using the dot format
|
729
|
+
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
801
730
|
|
802
|
-
//
|
803
|
-
|
804
|
-
|
805
|
-
struct ggml_opt_params params,
|
806
|
-
struct ggml_tensor * f);
|
731
|
+
//
|
732
|
+
// optimization
|
733
|
+
//
|
807
734
|
|
808
|
-
//
|
809
|
-
|
810
|
-
|
735
|
+
// optimization methods
|
736
|
+
enum ggml_opt_type {
|
737
|
+
GGML_OPT_ADAM,
|
738
|
+
GGML_OPT_LBFGS,
|
739
|
+
};
|
740
|
+
|
741
|
+
// linesearch methods
|
742
|
+
enum ggml_linesearch {
|
743
|
+
GGML_LINESEARCH_DEFAULT = 1,
|
744
|
+
|
745
|
+
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
746
|
+
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
747
|
+
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
748
|
+
};
|
749
|
+
|
750
|
+
// optimization return values
|
751
|
+
enum ggml_opt_result {
|
752
|
+
GGML_OPT_OK = 0,
|
753
|
+
GGML_OPT_DID_NOT_CONVERGE,
|
754
|
+
GGML_OPT_NO_CONTEXT,
|
755
|
+
GGML_OPT_INVALID_WOLFE,
|
756
|
+
GGML_OPT_FAIL,
|
757
|
+
|
758
|
+
GGML_LINESEARCH_FAIL = -128,
|
759
|
+
GGML_LINESEARCH_MINIMUM_STEP,
|
760
|
+
GGML_LINESEARCH_MAXIMUM_STEP,
|
761
|
+
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
762
|
+
GGML_LINESEARCH_INVALID_PARAMETERS,
|
763
|
+
};
|
764
|
+
|
765
|
+
// optimization parameters
|
766
|
+
//
|
767
|
+
// see ggml.c (ggml_opt_default_params) for default values
|
768
|
+
//
|
769
|
+
struct ggml_opt_params {
|
770
|
+
enum ggml_opt_type type;
|
771
|
+
|
772
|
+
int n_threads;
|
773
|
+
|
774
|
+
// delta-based convergence test
|
775
|
+
//
|
776
|
+
// if past == 0 - disabled
|
777
|
+
// if past > 0:
|
778
|
+
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
779
|
+
//
|
780
|
+
int past;
|
781
|
+
float delta;
|
782
|
+
|
783
|
+
// maximum number of iterations without improvement
|
784
|
+
//
|
785
|
+
// if 0 - disabled
|
786
|
+
// if > 0:
|
787
|
+
// assume convergence if no cost improvement in this number of iterations
|
788
|
+
//
|
789
|
+
int max_no_improvement;
|
790
|
+
|
791
|
+
bool print_forward_graph;
|
792
|
+
bool print_backward_graph;
|
793
|
+
|
794
|
+
// ADAM parameters
|
795
|
+
struct {
|
796
|
+
int n_iter;
|
797
|
+
|
798
|
+
float alpha; // learning rate
|
799
|
+
float beta1;
|
800
|
+
float beta2;
|
801
|
+
float eps; // epsilon for numerical stability
|
802
|
+
float eps_f; // epsilon for convergence test
|
803
|
+
float eps_g; // epsilon for convergence test
|
804
|
+
} adam;
|
805
|
+
|
806
|
+
// LBFGS parameters
|
807
|
+
struct {
|
808
|
+
int m; // number of corrections to approximate the inv. Hessian
|
809
|
+
int n_iter;
|
810
|
+
int max_linesearch;
|
811
|
+
|
812
|
+
float eps; // convergence tolerance
|
813
|
+
float ftol; // line search tolerance
|
814
|
+
float wolfe;
|
815
|
+
float min_step;
|
816
|
+
float max_step;
|
817
|
+
|
818
|
+
enum ggml_linesearch linesearch;
|
819
|
+
} lbfgs;
|
820
|
+
};
|
821
|
+
|
822
|
+
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
823
|
+
|
824
|
+
// optimize the function defined by the tensor f
|
825
|
+
GGML_API enum ggml_opt_result ggml_opt(
|
826
|
+
struct ggml_context * ctx,
|
827
|
+
struct ggml_opt_params params,
|
828
|
+
struct ggml_tensor * f);
|
811
829
|
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
size_t ggml_quantize_q4_3(const float * src, void * dst, int n, int k, int64_t * hist);
|
830
|
+
//
|
831
|
+
// quantization
|
832
|
+
//
|
816
833
|
|
817
|
-
size_t
|
834
|
+
GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
835
|
+
GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
836
|
+
GGML_API size_t ggml_quantize_q4_2(const float * src, void * dst, int n, int k, int64_t * hist);
|
837
|
+
GGML_API size_t ggml_quantize_q4_3(const float * src, void * dst, int n, int k, int64_t * hist);
|
838
|
+
GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
839
|
+
GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
840
|
+
GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
818
841
|
|
819
|
-
|
820
|
-
// system info
|
821
|
-
//
|
842
|
+
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
822
843
|
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
int ggml_cpu_has_avx512_vbmi(void);
|
827
|
-
int ggml_cpu_has_avx512_vnni(void);
|
828
|
-
int ggml_cpu_has_fma(void);
|
829
|
-
int ggml_cpu_has_neon(void);
|
830
|
-
int ggml_cpu_has_arm_fma(void);
|
831
|
-
int ggml_cpu_has_f16c(void);
|
832
|
-
int ggml_cpu_has_fp16_va(void);
|
833
|
-
int ggml_cpu_has_wasm_simd(void);
|
834
|
-
int ggml_cpu_has_blas(void);
|
835
|
-
int ggml_cpu_has_cublas(void);
|
836
|
-
int ggml_cpu_has_sse3(void);
|
837
|
-
int ggml_cpu_has_vsx(void);
|
844
|
+
//
|
845
|
+
// system info
|
846
|
+
//
|
838
847
|
|
848
|
+
GGML_API int ggml_cpu_has_avx (void);
|
849
|
+
GGML_API int ggml_cpu_has_avx2 (void);
|
850
|
+
GGML_API int ggml_cpu_has_avx512 (void);
|
851
|
+
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
852
|
+
GGML_API int ggml_cpu_has_avx512_vnni(void);
|
853
|
+
GGML_API int ggml_cpu_has_fma (void);
|
854
|
+
GGML_API int ggml_cpu_has_neon (void);
|
855
|
+
GGML_API int ggml_cpu_has_arm_fma (void);
|
856
|
+
GGML_API int ggml_cpu_has_f16c (void);
|
857
|
+
GGML_API int ggml_cpu_has_fp16_va (void);
|
858
|
+
GGML_API int ggml_cpu_has_wasm_simd (void);
|
859
|
+
GGML_API int ggml_cpu_has_blas (void);
|
860
|
+
GGML_API int ggml_cpu_has_cublas (void);
|
861
|
+
GGML_API int ggml_cpu_has_clblast (void);
|
862
|
+
GGML_API int ggml_cpu_has_gpublas (void);
|
863
|
+
GGML_API int ggml_cpu_has_sse3 (void);
|
864
|
+
GGML_API int ggml_cpu_has_vsx (void);
|
839
865
|
|
840
|
-
//
|
841
|
-
// Internal types and functions exposed for tests and benchmarks
|
842
|
-
//
|
866
|
+
//
|
867
|
+
// Internal types and functions exposed for tests and benchmarks
|
868
|
+
//
|
843
869
|
|
844
870
|
#ifdef __cplusplus
|
845
|
-
// restrict not standard in C++
|
871
|
+
// restrict not standard in C++
|
846
872
|
#define GGML_RESTRICT
|
847
873
|
#else
|
848
874
|
#define GGML_RESTRICT restrict
|
849
875
|
#endif
|
850
|
-
typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
851
|
-
typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
852
|
-
typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
853
|
-
|
854
|
-
typedef struct {
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
876
|
+
typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
877
|
+
typedef void (*quantize_row_q_t) (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
878
|
+
typedef void (*vec_dot_q_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
879
|
+
|
880
|
+
typedef struct {
|
881
|
+
dequantize_row_q_t dequantize_row_q;
|
882
|
+
quantize_row_q_t quantize_row_q;
|
883
|
+
quantize_row_q_t quantize_row_q_reference;
|
884
|
+
quantize_row_q_t quantize_row_q_dot;
|
885
|
+
vec_dot_q_t vec_dot_q;
|
886
|
+
enum ggml_type vec_dot_type;
|
887
|
+
} quantize_fns_t;
|
888
|
+
|
889
|
+
quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
|
863
890
|
|
864
891
|
#ifdef __cplusplus
|
865
892
|
}
|