whisper.rn 0.2.5 → 0.3.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +7 -2
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +7 -6
- package/android/src/main/jni/whisper/jni.cpp +50 -5
- package/cpp/coreml/whisper-decoder-impl.h +146 -0
- package/cpp/coreml/whisper-decoder-impl.m +201 -0
- package/cpp/coreml/whisper-encoder-impl.h +142 -0
- package/cpp/coreml/whisper-encoder-impl.m +197 -0
- package/cpp/coreml/whisper-encoder.h +22 -0
- package/cpp/coreml/whisper-encoder.mm +63 -0
- package/cpp/ggml.c +6339 -1662
- package/cpp/ggml.h +741 -554
- package/cpp/rn-whisper.cpp +0 -23
- package/cpp/rn-whisper.h +0 -6
- package/cpp/whisper.cpp +928 -625
- package/cpp/whisper.h +26 -2
- package/ios/RNWhisper.mm +19 -1
- package/ios/RNWhisperContext.mm +1 -6
- package/lib/commonjs/index.js +12 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +9 -2
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +6 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -1
- package/src/index.ts +9 -3
- package/whisper-rn.podspec +8 -2
package/cpp/ggml.h
CHANGED
|
@@ -169,579 +169,766 @@
|
|
|
169
169
|
//
|
|
170
170
|
//
|
|
171
171
|
|
|
172
|
-
#ifdef
|
|
173
|
-
|
|
172
|
+
#ifdef GGML_SHARED
|
|
173
|
+
# if defined(_WIN32) && !defined(__MINGW32__)
|
|
174
|
+
# ifdef GGML_BUILD
|
|
175
|
+
# define GGML_API __declspec(dllexport)
|
|
176
|
+
# else
|
|
177
|
+
# define GGML_API __declspec(dllimport)
|
|
178
|
+
# endif
|
|
179
|
+
# else
|
|
180
|
+
# define GGML_API __attribute__ ((visibility ("default")))
|
|
181
|
+
# endif
|
|
182
|
+
#else
|
|
183
|
+
# define GGML_API
|
|
174
184
|
#endif
|
|
175
185
|
|
|
176
186
|
#include <stdint.h>
|
|
177
187
|
#include <stddef.h>
|
|
178
188
|
#include <stdbool.h>
|
|
179
189
|
|
|
180
|
-
#define
|
|
181
|
-
#define
|
|
182
|
-
|
|
183
|
-
#define
|
|
184
|
-
#define
|
|
190
|
+
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
|
191
|
+
#define GGML_FILE_VERSION 1
|
|
192
|
+
|
|
193
|
+
#define GGML_MAX_DIMS 4
|
|
194
|
+
#define GGML_MAX_NODES 4096
|
|
195
|
+
#define GGML_MAX_PARAMS 16
|
|
196
|
+
#define GGML_MAX_CONTEXTS 64
|
|
197
|
+
#define GGML_MAX_OPT 4
|
|
198
|
+
#define GGML_DEFAULT_N_THREADS 4
|
|
199
|
+
|
|
200
|
+
#define GGML_ASSERT(x) \
|
|
201
|
+
do { \
|
|
202
|
+
if (!(x)) { \
|
|
203
|
+
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
|
204
|
+
abort(); \
|
|
205
|
+
} \
|
|
206
|
+
} while (0)
|
|
207
|
+
|
|
208
|
+
#ifdef __cplusplus
|
|
209
|
+
extern "C" {
|
|
210
|
+
#endif
|
|
185
211
|
|
|
186
212
|
#ifdef __ARM_NEON
|
|
187
|
-
// we use the built-in 16-bit float type
|
|
188
|
-
typedef __fp16 ggml_fp16_t;
|
|
213
|
+
// we use the built-in 16-bit float type
|
|
214
|
+
typedef __fp16 ggml_fp16_t;
|
|
189
215
|
#else
|
|
190
|
-
typedef uint16_t ggml_fp16_t;
|
|
216
|
+
typedef uint16_t ggml_fp16_t;
|
|
191
217
|
#endif
|
|
192
218
|
|
|
193
|
-
// convert FP16 <-> FP32
|
|
194
|
-
float ggml_fp16_to_fp32(ggml_fp16_t x);
|
|
195
|
-
ggml_fp16_t ggml_fp32_to_fp16(float x);
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
//
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
//
|
|
286
|
-
struct
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
//
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
struct
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
struct ggml_context * ctx,
|
|
352
|
-
enum ggml_type type,
|
|
353
|
-
int ne0,
|
|
354
|
-
int ne1);
|
|
355
|
-
|
|
356
|
-
struct ggml_tensor * ggml_new_tensor_3d(
|
|
357
|
-
struct ggml_context * ctx,
|
|
358
|
-
enum ggml_type type,
|
|
359
|
-
int ne0,
|
|
360
|
-
int ne1,
|
|
361
|
-
int ne2);
|
|
362
|
-
|
|
363
|
-
struct ggml_tensor * ggml_new_tensor_4d(
|
|
364
|
-
struct ggml_context * ctx,
|
|
365
|
-
enum ggml_type type,
|
|
366
|
-
int ne0,
|
|
367
|
-
int ne1,
|
|
368
|
-
int ne2,
|
|
369
|
-
int ne3);
|
|
370
|
-
|
|
371
|
-
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
|
372
|
-
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
|
373
|
-
|
|
374
|
-
struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
375
|
-
struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
376
|
-
|
|
377
|
-
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
378
|
-
struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
|
379
|
-
struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
|
380
|
-
|
|
381
|
-
int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
|
382
|
-
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
|
383
|
-
|
|
384
|
-
float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
|
385
|
-
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
|
386
|
-
|
|
387
|
-
void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
388
|
-
float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
389
|
-
|
|
390
|
-
//
|
|
391
|
-
// operations on tensors with backpropagation
|
|
392
|
-
//
|
|
393
|
-
|
|
394
|
-
struct ggml_tensor * ggml_dup(
|
|
395
|
-
struct ggml_context * ctx,
|
|
396
|
-
struct ggml_tensor * a);
|
|
397
|
-
|
|
398
|
-
struct ggml_tensor * ggml_add(
|
|
399
|
-
struct ggml_context * ctx,
|
|
400
|
-
struct ggml_tensor * a,
|
|
401
|
-
struct ggml_tensor * b);
|
|
402
|
-
|
|
403
|
-
struct ggml_tensor * ggml_sub(
|
|
404
|
-
struct ggml_context * ctx,
|
|
405
|
-
struct ggml_tensor * a,
|
|
406
|
-
struct ggml_tensor * b);
|
|
407
|
-
|
|
408
|
-
struct ggml_tensor * ggml_mul(
|
|
409
|
-
struct ggml_context * ctx,
|
|
410
|
-
struct ggml_tensor * a,
|
|
411
|
-
struct ggml_tensor * b);
|
|
412
|
-
|
|
413
|
-
struct ggml_tensor * ggml_div(
|
|
414
|
-
struct ggml_context * ctx,
|
|
415
|
-
struct ggml_tensor * a,
|
|
416
|
-
struct ggml_tensor * b);
|
|
417
|
-
|
|
418
|
-
struct ggml_tensor * ggml_sqr(
|
|
419
|
-
struct ggml_context * ctx,
|
|
420
|
-
struct ggml_tensor * a);
|
|
421
|
-
|
|
422
|
-
struct ggml_tensor * ggml_sqrt(
|
|
423
|
-
struct ggml_context * ctx,
|
|
424
|
-
struct ggml_tensor * a);
|
|
425
|
-
|
|
426
|
-
// return scalar
|
|
427
|
-
// TODO: compute sum along rows
|
|
428
|
-
struct ggml_tensor * ggml_sum(
|
|
429
|
-
struct ggml_context * ctx,
|
|
430
|
-
struct ggml_tensor * a);
|
|
431
|
-
|
|
432
|
-
// mean along rows
|
|
433
|
-
struct ggml_tensor * ggml_mean(
|
|
434
|
-
struct ggml_context * ctx,
|
|
435
|
-
struct ggml_tensor * a);
|
|
436
|
-
|
|
437
|
-
// if a is the same shape as b, and a is not parameter, return a
|
|
438
|
-
// otherwise, return a new tensor: repeat(a) to fit in b
|
|
439
|
-
struct ggml_tensor * ggml_repeat(
|
|
440
|
-
struct ggml_context * ctx,
|
|
441
|
-
struct ggml_tensor * a,
|
|
442
|
-
struct ggml_tensor * b);
|
|
443
|
-
|
|
444
|
-
struct ggml_tensor * ggml_abs(
|
|
445
|
-
struct ggml_context * ctx,
|
|
446
|
-
struct ggml_tensor * a);
|
|
447
|
-
|
|
448
|
-
struct ggml_tensor * ggml_sgn(
|
|
449
|
-
struct ggml_context * ctx,
|
|
450
|
-
struct ggml_tensor * a);
|
|
451
|
-
|
|
452
|
-
struct ggml_tensor * ggml_neg(
|
|
453
|
-
struct ggml_context * ctx,
|
|
454
|
-
struct ggml_tensor * a);
|
|
455
|
-
|
|
456
|
-
struct ggml_tensor * ggml_step(
|
|
457
|
-
struct ggml_context * ctx,
|
|
458
|
-
struct ggml_tensor * a);
|
|
459
|
-
|
|
460
|
-
struct ggml_tensor * ggml_relu(
|
|
461
|
-
struct ggml_context * ctx,
|
|
462
|
-
struct ggml_tensor * a);
|
|
463
|
-
|
|
464
|
-
// TODO: double-check this computation is correct
|
|
465
|
-
struct ggml_tensor * ggml_gelu(
|
|
466
|
-
struct ggml_context * ctx,
|
|
467
|
-
struct ggml_tensor * a);
|
|
468
|
-
|
|
469
|
-
// normalize along rows
|
|
470
|
-
// TODO: eps is hardcoded to 1e-5 for now
|
|
471
|
-
struct ggml_tensor * ggml_norm(
|
|
472
|
-
struct ggml_context * ctx,
|
|
473
|
-
struct ggml_tensor * a);
|
|
474
|
-
|
|
475
|
-
// A: m rows, n columns
|
|
476
|
-
// B: p rows, n columns (i.e. we transpose it internally)
|
|
477
|
-
// result is m columns, p rows
|
|
478
|
-
struct ggml_tensor * ggml_mul_mat(
|
|
479
|
-
struct ggml_context * ctx,
|
|
480
|
-
struct ggml_tensor * a,
|
|
481
|
-
struct ggml_tensor * b);
|
|
482
|
-
|
|
483
|
-
//
|
|
484
|
-
// operations on tensors without backpropagation
|
|
485
|
-
//
|
|
486
|
-
|
|
487
|
-
// in-place, returns view(a)
|
|
488
|
-
struct ggml_tensor * ggml_scale(
|
|
489
|
-
struct ggml_context * ctx,
|
|
490
|
-
struct ggml_tensor * a,
|
|
491
|
-
struct ggml_tensor * b);
|
|
492
|
-
|
|
493
|
-
// a -> b, return view(b)
|
|
494
|
-
struct ggml_tensor * ggml_cpy(
|
|
495
|
-
struct ggml_context * ctx,
|
|
496
|
-
struct ggml_tensor * a,
|
|
497
|
-
struct ggml_tensor * b);
|
|
498
|
-
|
|
499
|
-
// return view(a), b specifies the new shape
|
|
500
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
|
501
|
-
struct ggml_tensor * ggml_reshape(
|
|
502
|
-
struct ggml_context * ctx,
|
|
503
|
-
struct ggml_tensor * a,
|
|
504
|
-
struct ggml_tensor * b);
|
|
505
|
-
|
|
506
|
-
// return view(a)
|
|
507
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
|
508
|
-
struct ggml_tensor * ggml_reshape_2d(
|
|
509
|
-
struct ggml_context * ctx,
|
|
510
|
-
struct ggml_tensor * a,
|
|
511
|
-
int ne0,
|
|
512
|
-
int ne1);
|
|
513
|
-
|
|
514
|
-
// return view(a)
|
|
515
|
-
// TODO: when we start computing gradient, make a copy instead of view
|
|
516
|
-
struct ggml_tensor * ggml_reshape_3d(
|
|
517
|
-
struct ggml_context * ctx,
|
|
518
|
-
struct ggml_tensor * a,
|
|
519
|
-
int ne0,
|
|
520
|
-
int ne1,
|
|
521
|
-
int ne2);
|
|
522
|
-
|
|
523
|
-
// offset in bytes
|
|
524
|
-
struct ggml_tensor * ggml_view_1d(
|
|
525
|
-
struct ggml_context * ctx,
|
|
526
|
-
struct ggml_tensor * a,
|
|
527
|
-
int ne0,
|
|
528
|
-
size_t offset);
|
|
529
|
-
|
|
530
|
-
struct ggml_tensor * ggml_view_2d(
|
|
531
|
-
struct ggml_context * ctx,
|
|
532
|
-
struct ggml_tensor * a,
|
|
533
|
-
int ne0,
|
|
534
|
-
int ne1,
|
|
535
|
-
size_t nb1, // row stride in bytes
|
|
536
|
-
size_t offset);
|
|
537
|
-
|
|
538
|
-
struct ggml_tensor * ggml_permute(
|
|
539
|
-
struct ggml_context * ctx,
|
|
540
|
-
struct ggml_tensor * a,
|
|
541
|
-
int axis0,
|
|
542
|
-
int axis1,
|
|
543
|
-
int axis2,
|
|
544
|
-
int axis3);
|
|
545
|
-
|
|
546
|
-
// alias for ggml_permute(ctx, a, 1, 0, 2, 3)
|
|
547
|
-
struct ggml_tensor * ggml_transpose(
|
|
548
|
-
struct ggml_context * ctx,
|
|
549
|
-
struct ggml_tensor * a);
|
|
550
|
-
|
|
551
|
-
struct ggml_tensor * ggml_get_rows(
|
|
552
|
-
struct ggml_context * ctx,
|
|
553
|
-
struct ggml_tensor * a,
|
|
554
|
-
struct ggml_tensor * b);
|
|
555
|
-
|
|
556
|
-
// set elements above the diagonal to -INF
|
|
557
|
-
// in-place, returns view(a)
|
|
558
|
-
struct ggml_tensor * ggml_diag_mask_inf(
|
|
559
|
-
struct ggml_context * ctx,
|
|
560
|
-
struct ggml_tensor * a,
|
|
561
|
-
int n_past);
|
|
562
|
-
|
|
563
|
-
// in-place, returns view(a)
|
|
564
|
-
struct ggml_tensor * ggml_soft_max(
|
|
565
|
-
struct ggml_context * ctx,
|
|
566
|
-
struct ggml_tensor * a);
|
|
567
|
-
|
|
568
|
-
// rotary position embedding
|
|
569
|
-
// in-place, returns view(a)
|
|
570
|
-
// if mode == 1, skip n_past elements
|
|
571
|
-
// TODO: avoid creating a new tensor every time
|
|
572
|
-
struct ggml_tensor * ggml_rope(
|
|
573
|
-
struct ggml_context * ctx,
|
|
574
|
-
struct ggml_tensor * a,
|
|
575
|
-
int n_past,
|
|
576
|
-
int n_dims,
|
|
577
|
-
int mode);
|
|
578
|
-
|
|
579
|
-
// padding = 1
|
|
580
|
-
// TODO: we don't support extra parameters for now
|
|
581
|
-
// that's why we are hard-coding the stride, padding, and dilation
|
|
582
|
-
// not great ..
|
|
583
|
-
struct ggml_tensor * ggml_conv_1d_1s(
|
|
584
|
-
struct ggml_context * ctx,
|
|
585
|
-
struct ggml_tensor * a,
|
|
586
|
-
struct ggml_tensor * b);
|
|
587
|
-
|
|
588
|
-
struct ggml_tensor * ggml_conv_1d_2s(
|
|
589
|
-
struct ggml_context * ctx,
|
|
590
|
-
struct ggml_tensor * a,
|
|
591
|
-
struct ggml_tensor * b);
|
|
592
|
-
|
|
593
|
-
struct ggml_tensor * ggml_flash_attn(
|
|
594
|
-
struct ggml_context * ctx,
|
|
595
|
-
struct ggml_tensor * q,
|
|
596
|
-
struct ggml_tensor * k,
|
|
597
|
-
struct ggml_tensor * v,
|
|
598
|
-
bool masked);
|
|
599
|
-
|
|
600
|
-
struct ggml_tensor * ggml_flash_ff(
|
|
601
|
-
struct ggml_context * ctx,
|
|
602
|
-
struct ggml_tensor * a,
|
|
603
|
-
struct ggml_tensor * b0,
|
|
604
|
-
struct ggml_tensor * b1,
|
|
605
|
-
struct ggml_tensor * c0,
|
|
606
|
-
struct ggml_tensor * c1);
|
|
607
|
-
|
|
608
|
-
//
|
|
609
|
-
// automatic differentiation
|
|
610
|
-
//
|
|
611
|
-
|
|
612
|
-
void ggml_set_param(
|
|
613
|
-
struct ggml_context * ctx,
|
|
614
|
-
struct ggml_tensor * tensor);
|
|
615
|
-
|
|
616
|
-
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
617
|
-
|
|
618
|
-
struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
|
619
|
-
struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
|
620
|
-
|
|
621
|
-
void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
622
|
-
void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
|
623
|
-
|
|
624
|
-
// print info and performance information for the graph
|
|
625
|
-
void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
626
|
-
|
|
627
|
-
// dump the graph into a file using the dot format
|
|
628
|
-
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
|
629
|
-
|
|
630
|
-
//
|
|
631
|
-
// optimization
|
|
632
|
-
//
|
|
633
|
-
|
|
634
|
-
// optimization methods
|
|
635
|
-
enum ggml_opt_type {
|
|
636
|
-
GGML_OPT_ADAM,
|
|
637
|
-
GGML_OPT_LBFGS,
|
|
638
|
-
};
|
|
639
|
-
|
|
640
|
-
// linesearch methods
|
|
641
|
-
enum ggml_linesearch {
|
|
642
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
|
643
|
-
|
|
644
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
|
645
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
|
646
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
|
647
|
-
};
|
|
648
|
-
|
|
649
|
-
// optimization return values
|
|
650
|
-
enum ggml_opt_result {
|
|
651
|
-
GGML_OPT_OK = 0,
|
|
652
|
-
GGML_OPT_DID_NOT_CONVERGE,
|
|
653
|
-
GGML_OPT_NO_CONTEXT,
|
|
654
|
-
GGML_OPT_INVALID_WOLFE,
|
|
655
|
-
GGML_OPT_FAIL,
|
|
656
|
-
|
|
657
|
-
GGML_LINESEARCH_FAIL = -128,
|
|
658
|
-
GGML_LINESEARCH_MINIMUM_STEP,
|
|
659
|
-
GGML_LINESEARCH_MAXIMUM_STEP,
|
|
660
|
-
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
|
661
|
-
GGML_LINESEARCH_INVALID_PARAMETERS,
|
|
662
|
-
};
|
|
219
|
+
// convert FP16 <-> FP32
|
|
220
|
+
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
|
|
221
|
+
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
|
|
222
|
+
|
|
223
|
+
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
|
|
224
|
+
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
|
|
225
|
+
|
|
226
|
+
struct ggml_object;
|
|
227
|
+
struct ggml_context;
|
|
228
|
+
|
|
229
|
+
enum ggml_type {
|
|
230
|
+
GGML_TYPE_F32 = 0,
|
|
231
|
+
GGML_TYPE_F16 = 1,
|
|
232
|
+
GGML_TYPE_Q4_0 = 2,
|
|
233
|
+
GGML_TYPE_Q4_1 = 3,
|
|
234
|
+
GGML_TYPE_Q4_2 = 4,
|
|
235
|
+
// GGML_TYPE_Q4_3 (5) support has been removed
|
|
236
|
+
GGML_TYPE_Q5_0 = 6,
|
|
237
|
+
GGML_TYPE_Q5_1 = 7,
|
|
238
|
+
GGML_TYPE_Q8_0 = 8,
|
|
239
|
+
GGML_TYPE_Q8_1 = 9,
|
|
240
|
+
GGML_TYPE_I8,
|
|
241
|
+
GGML_TYPE_I16,
|
|
242
|
+
GGML_TYPE_I32,
|
|
243
|
+
GGML_TYPE_COUNT,
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
// model file types
|
|
247
|
+
enum ggml_ftype {
|
|
248
|
+
GGML_FTYPE_UNKNOWN = -1,
|
|
249
|
+
GGML_FTYPE_ALL_F32 = 0,
|
|
250
|
+
GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
|
251
|
+
GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
|
252
|
+
GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
|
253
|
+
GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
|
254
|
+
GGML_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors
|
|
255
|
+
GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
|
256
|
+
GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
|
257
|
+
GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
// available tensor operations:
|
|
261
|
+
enum ggml_op {
|
|
262
|
+
GGML_OP_NONE = 0,
|
|
263
|
+
|
|
264
|
+
GGML_OP_DUP,
|
|
265
|
+
GGML_OP_ADD,
|
|
266
|
+
GGML_OP_SUB,
|
|
267
|
+
GGML_OP_MUL,
|
|
268
|
+
GGML_OP_DIV,
|
|
269
|
+
GGML_OP_SQR,
|
|
270
|
+
GGML_OP_SQRT,
|
|
271
|
+
GGML_OP_SUM,
|
|
272
|
+
GGML_OP_MEAN,
|
|
273
|
+
GGML_OP_REPEAT,
|
|
274
|
+
GGML_OP_ABS,
|
|
275
|
+
GGML_OP_SGN,
|
|
276
|
+
GGML_OP_NEG,
|
|
277
|
+
GGML_OP_STEP,
|
|
278
|
+
GGML_OP_RELU,
|
|
279
|
+
GGML_OP_GELU,
|
|
280
|
+
GGML_OP_SILU,
|
|
281
|
+
GGML_OP_NORM, // normalize
|
|
282
|
+
GGML_OP_RMS_NORM,
|
|
283
|
+
|
|
284
|
+
GGML_OP_MUL_MAT,
|
|
285
|
+
|
|
286
|
+
GGML_OP_SCALE,
|
|
287
|
+
GGML_OP_CPY,
|
|
288
|
+
GGML_OP_CONT,
|
|
289
|
+
GGML_OP_RESHAPE,
|
|
290
|
+
GGML_OP_VIEW,
|
|
291
|
+
GGML_OP_PERMUTE,
|
|
292
|
+
GGML_OP_TRANSPOSE,
|
|
293
|
+
GGML_OP_GET_ROWS,
|
|
294
|
+
GGML_OP_DIAG_MASK_INF,
|
|
295
|
+
GGML_OP_SOFT_MAX,
|
|
296
|
+
GGML_OP_ROPE,
|
|
297
|
+
GGML_OP_ALIBI,
|
|
298
|
+
GGML_OP_CONV_1D_1S,
|
|
299
|
+
GGML_OP_CONV_1D_2S,
|
|
300
|
+
|
|
301
|
+
GGML_OP_FLASH_ATTN,
|
|
302
|
+
GGML_OP_FLASH_FF,
|
|
303
|
+
|
|
304
|
+
GGML_OP_MAP_UNARY,
|
|
305
|
+
GGML_OP_MAP_BINARY,
|
|
306
|
+
|
|
307
|
+
GGML_OP_COUNT,
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
// ggml object
|
|
312
|
+
struct ggml_object {
|
|
313
|
+
size_t offs;
|
|
314
|
+
size_t size;
|
|
315
|
+
|
|
316
|
+
struct ggml_object * next;
|
|
317
|
+
|
|
318
|
+
char padding[8];
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
|
322
|
+
|
|
323
|
+
// n-dimensional tensor
|
|
324
|
+
struct ggml_tensor {
|
|
325
|
+
enum ggml_type type;
|
|
326
|
+
|
|
327
|
+
int n_dims;
|
|
328
|
+
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
|
329
|
+
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
|
330
|
+
// nb[0] = sizeof(type)
|
|
331
|
+
// nb[1] = nb[0] * ne[0] + padding
|
|
332
|
+
// nb[i] = nb[i-1] * ne[i-1]
|
|
333
|
+
|
|
334
|
+
// compute data
|
|
335
|
+
enum ggml_op op;
|
|
336
|
+
|
|
337
|
+
bool is_param;
|
|
338
|
+
|
|
339
|
+
struct ggml_tensor * grad;
|
|
340
|
+
struct ggml_tensor * src0;
|
|
341
|
+
struct ggml_tensor * src1;
|
|
342
|
+
struct ggml_tensor * opt[GGML_MAX_OPT];
|
|
343
|
+
|
|
344
|
+
// thread scheduling
|
|
345
|
+
int n_tasks;
|
|
346
|
+
|
|
347
|
+
// performance
|
|
348
|
+
int perf_runs;
|
|
349
|
+
int64_t perf_cycles;
|
|
350
|
+
int64_t perf_time_us;
|
|
351
|
+
|
|
352
|
+
void * data;
|
|
353
|
+
|
|
354
|
+
char name[32];
|
|
355
|
+
|
|
356
|
+
char padding[8]; // TODO: remove and add padding to name?
|
|
357
|
+
};
|
|
358
|
+
|
|
359
|
+
// computation graph
|
|
360
|
+
struct ggml_cgraph {
|
|
361
|
+
int n_nodes;
|
|
362
|
+
int n_leafs;
|
|
363
|
+
int n_threads;
|
|
364
|
+
|
|
365
|
+
size_t work_size;
|
|
366
|
+
struct ggml_tensor * work;
|
|
367
|
+
|
|
368
|
+
struct ggml_tensor * nodes[GGML_MAX_NODES];
|
|
369
|
+
struct ggml_tensor * grads[GGML_MAX_NODES];
|
|
370
|
+
struct ggml_tensor * leafs[GGML_MAX_NODES];
|
|
371
|
+
|
|
372
|
+
// performance
|
|
373
|
+
int perf_runs;
|
|
374
|
+
int64_t perf_cycles;
|
|
375
|
+
int64_t perf_time_us;
|
|
376
|
+
};
|
|
663
377
|
|
|
664
|
-
//
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
378
|
+
// scratch buffer
|
|
379
|
+
struct ggml_scratch {
|
|
380
|
+
size_t offs;
|
|
381
|
+
size_t size;
|
|
382
|
+
void * data;
|
|
383
|
+
};
|
|
384
|
+
|
|
385
|
+
struct ggml_init_params {
|
|
386
|
+
// memory pool
|
|
387
|
+
size_t mem_size; // bytes
|
|
388
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
|
389
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
// misc
|
|
393
|
+
|
|
394
|
+
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
395
|
+
GGML_API int64_t ggml_time_ms(void);
|
|
396
|
+
GGML_API int64_t ggml_time_us(void);
|
|
397
|
+
GGML_API int64_t ggml_cycles(void);
|
|
398
|
+
GGML_API int64_t ggml_cycles_per_ms(void);
|
|
399
|
+
|
|
400
|
+
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
|
401
|
+
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
|
402
|
+
|
|
403
|
+
GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
|
404
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
|
405
|
+
|
|
406
|
+
GGML_API int ggml_blck_size (enum ggml_type type);
|
|
407
|
+
GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
|
408
|
+
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
|
409
|
+
|
|
410
|
+
GGML_API const char * ggml_type_name(enum ggml_type type);
|
|
411
|
+
|
|
412
|
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
413
|
+
|
|
414
|
+
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
|
415
|
+
|
|
416
|
+
// TODO: temporary until model loading of ggml examples is refactored
|
|
417
|
+
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
|
418
|
+
|
|
419
|
+
// main
|
|
420
|
+
|
|
421
|
+
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
|
422
|
+
GGML_API void ggml_free(struct ggml_context * ctx);
|
|
423
|
+
|
|
424
|
+
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
425
|
+
|
|
426
|
+
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
|
427
|
+
|
|
428
|
+
GGML_API struct ggml_tensor * ggml_new_tensor(
|
|
429
|
+
struct ggml_context * ctx,
|
|
430
|
+
enum ggml_type type,
|
|
431
|
+
int n_dims,
|
|
432
|
+
const int64_t *ne);
|
|
433
|
+
|
|
434
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_1d(
|
|
435
|
+
struct ggml_context * ctx,
|
|
436
|
+
enum ggml_type type,
|
|
437
|
+
int64_t ne0);
|
|
438
|
+
|
|
439
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_2d(
|
|
440
|
+
struct ggml_context * ctx,
|
|
441
|
+
enum ggml_type type,
|
|
442
|
+
int64_t ne0,
|
|
443
|
+
int64_t ne1);
|
|
444
|
+
|
|
445
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_3d(
|
|
446
|
+
struct ggml_context * ctx,
|
|
447
|
+
enum ggml_type type,
|
|
448
|
+
int64_t ne0,
|
|
449
|
+
int64_t ne1,
|
|
450
|
+
int64_t ne2);
|
|
451
|
+
|
|
452
|
+
GGML_API struct ggml_tensor * ggml_new_tensor_4d(
|
|
453
|
+
struct ggml_context * ctx,
|
|
454
|
+
enum ggml_type type,
|
|
455
|
+
int64_t ne0,
|
|
456
|
+
int64_t ne1,
|
|
457
|
+
int64_t ne2,
|
|
458
|
+
int64_t ne3);
|
|
459
|
+
|
|
460
|
+
GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
|
461
|
+
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
|
462
|
+
|
|
463
|
+
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
464
|
+
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
465
|
+
|
|
466
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
467
|
+
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
|
468
|
+
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
|
469
|
+
|
|
470
|
+
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
|
471
|
+
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
|
472
|
+
|
|
473
|
+
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
|
474
|
+
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
|
670
475
|
|
|
671
|
-
|
|
476
|
+
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
477
|
+
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
672
478
|
|
|
673
|
-
|
|
479
|
+
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
|
480
|
+
GGML_API void ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
|
481
|
+
|
|
482
|
+
//
|
|
483
|
+
// operations on tensors with backpropagation
|
|
484
|
+
//
|
|
485
|
+
|
|
486
|
+
GGML_API struct ggml_tensor * ggml_dup(
|
|
487
|
+
struct ggml_context * ctx,
|
|
488
|
+
struct ggml_tensor * a);
|
|
489
|
+
|
|
490
|
+
GGML_API struct ggml_tensor * ggml_add(
|
|
491
|
+
struct ggml_context * ctx,
|
|
492
|
+
struct ggml_tensor * a,
|
|
493
|
+
struct ggml_tensor * b);
|
|
494
|
+
|
|
495
|
+
GGML_API struct ggml_tensor * ggml_add_inplace(
|
|
496
|
+
struct ggml_context * ctx,
|
|
497
|
+
struct ggml_tensor * a,
|
|
498
|
+
struct ggml_tensor * b);
|
|
499
|
+
|
|
500
|
+
GGML_API struct ggml_tensor * ggml_sub(
|
|
501
|
+
struct ggml_context * ctx,
|
|
502
|
+
struct ggml_tensor * a,
|
|
503
|
+
struct ggml_tensor * b);
|
|
504
|
+
|
|
505
|
+
GGML_API struct ggml_tensor * ggml_mul(
|
|
506
|
+
struct ggml_context * ctx,
|
|
507
|
+
struct ggml_tensor * a,
|
|
508
|
+
struct ggml_tensor * b);
|
|
509
|
+
|
|
510
|
+
GGML_API struct ggml_tensor * ggml_div(
|
|
511
|
+
struct ggml_context * ctx,
|
|
512
|
+
struct ggml_tensor * a,
|
|
513
|
+
struct ggml_tensor * b);
|
|
514
|
+
|
|
515
|
+
GGML_API struct ggml_tensor * ggml_sqr(
|
|
516
|
+
struct ggml_context * ctx,
|
|
517
|
+
struct ggml_tensor * a);
|
|
518
|
+
|
|
519
|
+
GGML_API struct ggml_tensor * ggml_sqrt(
|
|
520
|
+
struct ggml_context * ctx,
|
|
521
|
+
struct ggml_tensor * a);
|
|
522
|
+
|
|
523
|
+
// return scalar
|
|
524
|
+
// TODO: compute sum along rows
|
|
525
|
+
GGML_API struct ggml_tensor * ggml_sum(
|
|
526
|
+
struct ggml_context * ctx,
|
|
527
|
+
struct ggml_tensor * a);
|
|
528
|
+
|
|
529
|
+
// mean along rows
|
|
530
|
+
GGML_API struct ggml_tensor * ggml_mean(
|
|
531
|
+
struct ggml_context * ctx,
|
|
532
|
+
struct ggml_tensor * a);
|
|
533
|
+
|
|
534
|
+
// if a is the same shape as b, and a is not parameter, return a
|
|
535
|
+
// otherwise, return a new tensor: repeat(a) to fit in b
|
|
536
|
+
GGML_API struct ggml_tensor * ggml_repeat(
|
|
537
|
+
struct ggml_context * ctx,
|
|
538
|
+
struct ggml_tensor * a,
|
|
539
|
+
struct ggml_tensor * b);
|
|
540
|
+
|
|
541
|
+
GGML_API struct ggml_tensor * ggml_abs(
|
|
542
|
+
struct ggml_context * ctx,
|
|
543
|
+
struct ggml_tensor * a);
|
|
544
|
+
|
|
545
|
+
GGML_API struct ggml_tensor * ggml_sgn(
|
|
546
|
+
struct ggml_context * ctx,
|
|
547
|
+
struct ggml_tensor * a);
|
|
548
|
+
|
|
549
|
+
GGML_API struct ggml_tensor * ggml_neg(
|
|
550
|
+
struct ggml_context * ctx,
|
|
551
|
+
struct ggml_tensor * a);
|
|
552
|
+
|
|
553
|
+
GGML_API struct ggml_tensor * ggml_step(
|
|
554
|
+
struct ggml_context * ctx,
|
|
555
|
+
struct ggml_tensor * a);
|
|
556
|
+
|
|
557
|
+
GGML_API struct ggml_tensor * ggml_relu(
|
|
558
|
+
struct ggml_context * ctx,
|
|
559
|
+
struct ggml_tensor * a);
|
|
560
|
+
|
|
561
|
+
// TODO: double-check this computation is correct
|
|
562
|
+
GGML_API struct ggml_tensor * ggml_gelu(
|
|
563
|
+
struct ggml_context * ctx,
|
|
564
|
+
struct ggml_tensor * a);
|
|
565
|
+
|
|
566
|
+
GGML_API struct ggml_tensor * ggml_silu(
|
|
567
|
+
struct ggml_context * ctx,
|
|
568
|
+
struct ggml_tensor * a);
|
|
569
|
+
|
|
570
|
+
// normalize along rows
|
|
571
|
+
// TODO: eps is hardcoded to 1e-5 for now
|
|
572
|
+
GGML_API struct ggml_tensor * ggml_norm(
|
|
573
|
+
struct ggml_context * ctx,
|
|
574
|
+
struct ggml_tensor * a);
|
|
575
|
+
|
|
576
|
+
GGML_API struct ggml_tensor * ggml_rms_norm(
|
|
577
|
+
struct ggml_context * ctx,
|
|
578
|
+
struct ggml_tensor * a);
|
|
579
|
+
|
|
580
|
+
// A: m rows, n columns
|
|
581
|
+
// B: p rows, n columns (i.e. we transpose it internally)
|
|
582
|
+
// result is m columns, p rows
|
|
583
|
+
GGML_API struct ggml_tensor * ggml_mul_mat(
|
|
584
|
+
struct ggml_context * ctx,
|
|
585
|
+
struct ggml_tensor * a,
|
|
586
|
+
struct ggml_tensor * b);
|
|
587
|
+
|
|
588
|
+
//
|
|
589
|
+
// operations on tensors without backpropagation
|
|
590
|
+
//
|
|
591
|
+
|
|
592
|
+
// in-place, returns view(a)
|
|
593
|
+
GGML_API struct ggml_tensor * ggml_scale(
|
|
594
|
+
struct ggml_context * ctx,
|
|
595
|
+
struct ggml_tensor * a,
|
|
596
|
+
struct ggml_tensor * b);
|
|
597
|
+
|
|
598
|
+
// a -> b, return view(b)
|
|
599
|
+
GGML_API struct ggml_tensor * ggml_cpy(
|
|
600
|
+
struct ggml_context * ctx,
|
|
601
|
+
struct ggml_tensor * a,
|
|
602
|
+
struct ggml_tensor * b);
|
|
603
|
+
|
|
604
|
+
// make contiguous
|
|
605
|
+
GGML_API struct ggml_tensor * ggml_cont(
|
|
606
|
+
struct ggml_context * ctx,
|
|
607
|
+
struct ggml_tensor * a);
|
|
608
|
+
|
|
609
|
+
// return view(a), b specifies the new shape
|
|
610
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
|
611
|
+
GGML_API struct ggml_tensor * ggml_reshape(
|
|
612
|
+
struct ggml_context * ctx,
|
|
613
|
+
struct ggml_tensor * a,
|
|
614
|
+
struct ggml_tensor * b);
|
|
615
|
+
|
|
616
|
+
// return view(a)
|
|
617
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
|
618
|
+
GGML_API struct ggml_tensor * ggml_reshape_2d(
|
|
619
|
+
struct ggml_context * ctx,
|
|
620
|
+
struct ggml_tensor * a,
|
|
621
|
+
int64_t ne0,
|
|
622
|
+
int64_t ne1);
|
|
623
|
+
|
|
624
|
+
// return view(a)
|
|
625
|
+
// TODO: when we start computing gradient, make a copy instead of view
|
|
626
|
+
GGML_API struct ggml_tensor * ggml_reshape_3d(
|
|
627
|
+
struct ggml_context * ctx,
|
|
628
|
+
struct ggml_tensor * a,
|
|
629
|
+
int64_t ne0,
|
|
630
|
+
int64_t ne1,
|
|
631
|
+
int64_t ne2);
|
|
632
|
+
|
|
633
|
+
// offset in bytes
|
|
634
|
+
GGML_API struct ggml_tensor * ggml_view_1d(
|
|
635
|
+
struct ggml_context * ctx,
|
|
636
|
+
struct ggml_tensor * a,
|
|
637
|
+
int64_t ne0,
|
|
638
|
+
size_t offset);
|
|
639
|
+
|
|
640
|
+
GGML_API struct ggml_tensor * ggml_view_2d(
|
|
641
|
+
struct ggml_context * ctx,
|
|
642
|
+
struct ggml_tensor * a,
|
|
643
|
+
int64_t ne0,
|
|
644
|
+
int64_t ne1,
|
|
645
|
+
size_t nb1, // row stride in bytes
|
|
646
|
+
size_t offset);
|
|
647
|
+
|
|
648
|
+
GGML_API struct ggml_tensor * ggml_view_3d(
|
|
649
|
+
struct ggml_context * ctx,
|
|
650
|
+
struct ggml_tensor * a,
|
|
651
|
+
int64_t ne0,
|
|
652
|
+
int64_t ne1,
|
|
653
|
+
int64_t ne2,
|
|
654
|
+
size_t nb1, // row stride in bytes
|
|
655
|
+
size_t nb2, // slice stride in bytes
|
|
656
|
+
size_t offset);
|
|
657
|
+
|
|
658
|
+
GGML_API struct ggml_tensor * ggml_permute(
|
|
659
|
+
struct ggml_context * ctx,
|
|
660
|
+
struct ggml_tensor * a,
|
|
661
|
+
int axis0,
|
|
662
|
+
int axis1,
|
|
663
|
+
int axis2,
|
|
664
|
+
int axis3);
|
|
665
|
+
|
|
666
|
+
// alias for ggml_permute(ctx, a, 1, 0, 2, 3)
|
|
667
|
+
GGML_API struct ggml_tensor * ggml_transpose(
|
|
668
|
+
struct ggml_context * ctx,
|
|
669
|
+
struct ggml_tensor * a);
|
|
670
|
+
|
|
671
|
+
GGML_API struct ggml_tensor * ggml_get_rows(
|
|
672
|
+
struct ggml_context * ctx,
|
|
673
|
+
struct ggml_tensor * a,
|
|
674
|
+
struct ggml_tensor * b);
|
|
675
|
+
|
|
676
|
+
// set elements above the diagonal to -INF
|
|
677
|
+
// in-place, returns view(a)
|
|
678
|
+
GGML_API struct ggml_tensor * ggml_diag_mask_inf(
|
|
679
|
+
struct ggml_context * ctx,
|
|
680
|
+
struct ggml_tensor * a,
|
|
681
|
+
int n_past);
|
|
682
|
+
|
|
683
|
+
// in-place, returns view(a)
|
|
684
|
+
GGML_API struct ggml_tensor * ggml_soft_max(
|
|
685
|
+
struct ggml_context * ctx,
|
|
686
|
+
struct ggml_tensor * a);
|
|
687
|
+
|
|
688
|
+
// rotary position embedding
|
|
689
|
+
// in-place, returns view(a)
|
|
690
|
+
// if mode & 1 == 1, skip n_past elements
|
|
691
|
+
// if mode & 2 == 1, GPT-NeoX style
|
|
692
|
+
// TODO: avoid creating a new tensor every time
|
|
693
|
+
GGML_API struct ggml_tensor * ggml_rope(
|
|
694
|
+
struct ggml_context * ctx,
|
|
695
|
+
struct ggml_tensor * a,
|
|
696
|
+
int n_past,
|
|
697
|
+
int n_dims,
|
|
698
|
+
int mode);
|
|
699
|
+
|
|
700
|
+
// alibi position embedding
|
|
701
|
+
// in-place, returns view(a)
|
|
702
|
+
struct ggml_tensor * ggml_alibi(
|
|
703
|
+
struct ggml_context * ctx,
|
|
704
|
+
struct ggml_tensor * a,
|
|
705
|
+
int n_past,
|
|
706
|
+
int n_head);
|
|
707
|
+
|
|
708
|
+
// padding = 1
|
|
709
|
+
// TODO: we don't support extra parameters for now
|
|
710
|
+
// that's why we are hard-coding the stride, padding, and dilation
|
|
711
|
+
// not great ..
|
|
712
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_1s(
|
|
713
|
+
struct ggml_context * ctx,
|
|
714
|
+
struct ggml_tensor * a,
|
|
715
|
+
struct ggml_tensor * b);
|
|
716
|
+
|
|
717
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_2s(
|
|
718
|
+
struct ggml_context * ctx,
|
|
719
|
+
struct ggml_tensor * a,
|
|
720
|
+
struct ggml_tensor * b);
|
|
721
|
+
|
|
722
|
+
GGML_API struct ggml_tensor * ggml_flash_attn(
|
|
723
|
+
struct ggml_context * ctx,
|
|
724
|
+
struct ggml_tensor * q,
|
|
725
|
+
struct ggml_tensor * k,
|
|
726
|
+
struct ggml_tensor * v,
|
|
727
|
+
bool masked);
|
|
728
|
+
|
|
729
|
+
GGML_API struct ggml_tensor * ggml_flash_ff(
|
|
730
|
+
struct ggml_context * ctx,
|
|
731
|
+
struct ggml_tensor * a,
|
|
732
|
+
struct ggml_tensor * b0,
|
|
733
|
+
struct ggml_tensor * b1,
|
|
734
|
+
struct ggml_tensor * c0,
|
|
735
|
+
struct ggml_tensor * c1);
|
|
736
|
+
|
|
737
|
+
// Mapping operations
|
|
738
|
+
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
|
739
|
+
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
|
740
|
+
|
|
741
|
+
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
|
742
|
+
struct ggml_context * ctx,
|
|
743
|
+
struct ggml_tensor * a,
|
|
744
|
+
const ggml_unary_op_f32_t fun);
|
|
745
|
+
|
|
746
|
+
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
|
747
|
+
struct ggml_context * ctx,
|
|
748
|
+
struct ggml_tensor * a,
|
|
749
|
+
struct ggml_tensor * b,
|
|
750
|
+
const ggml_binary_op_f32_t fun);
|
|
751
|
+
|
|
752
|
+
//
|
|
753
|
+
// automatic differentiation
|
|
754
|
+
//
|
|
755
|
+
|
|
756
|
+
GGML_API void ggml_set_param(
|
|
757
|
+
struct ggml_context * ctx,
|
|
758
|
+
struct ggml_tensor * tensor);
|
|
759
|
+
|
|
760
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
761
|
+
|
|
762
|
+
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
|
763
|
+
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
|
764
|
+
|
|
765
|
+
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
766
|
+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
|
767
|
+
|
|
768
|
+
// print info and performance information for the graph
|
|
769
|
+
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
770
|
+
|
|
771
|
+
// dump the graph into a file using the dot format
|
|
772
|
+
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
|
773
|
+
|
|
774
|
+
//
|
|
775
|
+
// optimization
|
|
674
776
|
//
|
|
675
|
-
|
|
676
|
-
//
|
|
677
|
-
|
|
777
|
+
|
|
778
|
+
// optimization methods
|
|
779
|
+
enum ggml_opt_type {
|
|
780
|
+
GGML_OPT_ADAM,
|
|
781
|
+
GGML_OPT_LBFGS,
|
|
782
|
+
};
|
|
783
|
+
|
|
784
|
+
// linesearch methods
|
|
785
|
+
enum ggml_linesearch {
|
|
786
|
+
GGML_LINESEARCH_DEFAULT = 1,
|
|
787
|
+
|
|
788
|
+
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
|
789
|
+
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
|
790
|
+
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
|
791
|
+
};
|
|
792
|
+
|
|
793
|
+
// optimization return values
|
|
794
|
+
enum ggml_opt_result {
|
|
795
|
+
GGML_OPT_OK = 0,
|
|
796
|
+
GGML_OPT_DID_NOT_CONVERGE,
|
|
797
|
+
GGML_OPT_NO_CONTEXT,
|
|
798
|
+
GGML_OPT_INVALID_WOLFE,
|
|
799
|
+
GGML_OPT_FAIL,
|
|
800
|
+
|
|
801
|
+
GGML_LINESEARCH_FAIL = -128,
|
|
802
|
+
GGML_LINESEARCH_MINIMUM_STEP,
|
|
803
|
+
GGML_LINESEARCH_MAXIMUM_STEP,
|
|
804
|
+
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
|
805
|
+
GGML_LINESEARCH_INVALID_PARAMETERS,
|
|
806
|
+
};
|
|
807
|
+
|
|
808
|
+
// optimization parameters
|
|
678
809
|
//
|
|
679
|
-
|
|
680
|
-
|
|
810
|
+
// see ggml.c (ggml_opt_default_params) for default values
|
|
811
|
+
//
|
|
812
|
+
struct ggml_opt_params {
|
|
813
|
+
enum ggml_opt_type type;
|
|
814
|
+
|
|
815
|
+
int n_threads;
|
|
816
|
+
|
|
817
|
+
// delta-based convergence test
|
|
818
|
+
//
|
|
819
|
+
// if past == 0 - disabled
|
|
820
|
+
// if past > 0:
|
|
821
|
+
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
|
822
|
+
//
|
|
823
|
+
int past;
|
|
824
|
+
float delta;
|
|
825
|
+
|
|
826
|
+
// maximum number of iterations without improvement
|
|
827
|
+
//
|
|
828
|
+
// if 0 - disabled
|
|
829
|
+
// if > 0:
|
|
830
|
+
// assume convergence if no cost improvement in this number of iterations
|
|
831
|
+
//
|
|
832
|
+
int max_no_improvement;
|
|
833
|
+
|
|
834
|
+
bool print_forward_graph;
|
|
835
|
+
bool print_backward_graph;
|
|
836
|
+
|
|
837
|
+
// ADAM parameters
|
|
838
|
+
struct {
|
|
839
|
+
int n_iter;
|
|
840
|
+
|
|
841
|
+
float alpha; // learning rate
|
|
842
|
+
float beta1;
|
|
843
|
+
float beta2;
|
|
844
|
+
float eps; // epsilon for numerical stability
|
|
845
|
+
float eps_f; // epsilon for convergence test
|
|
846
|
+
float eps_g; // epsilon for convergence test
|
|
847
|
+
} adam;
|
|
848
|
+
|
|
849
|
+
// LBFGS parameters
|
|
850
|
+
struct {
|
|
851
|
+
int m; // number of corrections to approximate the inv. Hessian
|
|
852
|
+
int n_iter;
|
|
853
|
+
int max_linesearch;
|
|
854
|
+
|
|
855
|
+
float eps; // convergence tolerance
|
|
856
|
+
float ftol; // line search tolerance
|
|
857
|
+
float wolfe;
|
|
858
|
+
float min_step;
|
|
859
|
+
float max_step;
|
|
860
|
+
|
|
861
|
+
enum ggml_linesearch linesearch;
|
|
862
|
+
} lbfgs;
|
|
863
|
+
};
|
|
864
|
+
|
|
865
|
+
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
|
866
|
+
|
|
867
|
+
// optimize the function defined by the tensor f
|
|
868
|
+
GGML_API enum ggml_opt_result ggml_opt(
|
|
869
|
+
struct ggml_context * ctx,
|
|
870
|
+
struct ggml_opt_params params,
|
|
871
|
+
struct ggml_tensor * f);
|
|
872
|
+
|
|
873
|
+
//
|
|
874
|
+
// quantization
|
|
875
|
+
//
|
|
876
|
+
|
|
877
|
+
GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
878
|
+
GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
879
|
+
GGML_API size_t ggml_quantize_q4_2(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
880
|
+
GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
881
|
+
GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
882
|
+
GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
883
|
+
|
|
884
|
+
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
|
681
885
|
|
|
682
|
-
// maximum number of iterations without improvement
|
|
683
886
|
//
|
|
684
|
-
//
|
|
685
|
-
// if > 0:
|
|
686
|
-
// assume convergence if no cost improvement in this number of iterations
|
|
887
|
+
// system info
|
|
687
888
|
//
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
struct
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
//
|
|
732
|
-
|
|
733
|
-
int ggml_cpu_has_avx(void);
|
|
734
|
-
int ggml_cpu_has_avx2(void);
|
|
735
|
-
int ggml_cpu_has_avx512(void);
|
|
736
|
-
int ggml_cpu_has_fma(void);
|
|
737
|
-
int ggml_cpu_has_neon(void);
|
|
738
|
-
int ggml_cpu_has_arm_fma(void);
|
|
739
|
-
int ggml_cpu_has_f16c(void);
|
|
740
|
-
int ggml_cpu_has_fp16_va(void);
|
|
741
|
-
int ggml_cpu_has_wasm_simd(void);
|
|
742
|
-
int ggml_cpu_has_blas(void);
|
|
743
|
-
int ggml_cpu_has_sse3(void);
|
|
744
|
-
int ggml_cpu_has_vsx(void);
|
|
889
|
+
|
|
890
|
+
GGML_API int ggml_cpu_has_avx (void);
|
|
891
|
+
GGML_API int ggml_cpu_has_avx2 (void);
|
|
892
|
+
GGML_API int ggml_cpu_has_avx512 (void);
|
|
893
|
+
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
|
894
|
+
GGML_API int ggml_cpu_has_avx512_vnni(void);
|
|
895
|
+
GGML_API int ggml_cpu_has_fma (void);
|
|
896
|
+
GGML_API int ggml_cpu_has_neon (void);
|
|
897
|
+
GGML_API int ggml_cpu_has_arm_fma (void);
|
|
898
|
+
GGML_API int ggml_cpu_has_f16c (void);
|
|
899
|
+
GGML_API int ggml_cpu_has_fp16_va (void);
|
|
900
|
+
GGML_API int ggml_cpu_has_wasm_simd (void);
|
|
901
|
+
GGML_API int ggml_cpu_has_blas (void);
|
|
902
|
+
GGML_API int ggml_cpu_has_cublas (void);
|
|
903
|
+
GGML_API int ggml_cpu_has_clblast (void);
|
|
904
|
+
GGML_API int ggml_cpu_has_gpublas (void);
|
|
905
|
+
GGML_API int ggml_cpu_has_sse3 (void);
|
|
906
|
+
GGML_API int ggml_cpu_has_vsx (void);
|
|
907
|
+
|
|
908
|
+
//
|
|
909
|
+
// Internal types and functions exposed for tests and benchmarks
|
|
910
|
+
//
|
|
911
|
+
|
|
912
|
+
#ifdef __cplusplus
|
|
913
|
+
// restrict not standard in C++
|
|
914
|
+
#define GGML_RESTRICT
|
|
915
|
+
#else
|
|
916
|
+
#define GGML_RESTRICT restrict
|
|
917
|
+
#endif
|
|
918
|
+
typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
|
919
|
+
typedef void (*quantize_row_q_t) (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
|
920
|
+
typedef void (*vec_dot_q_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
|
921
|
+
|
|
922
|
+
typedef struct {
|
|
923
|
+
dequantize_row_q_t dequantize_row_q;
|
|
924
|
+
quantize_row_q_t quantize_row_q;
|
|
925
|
+
quantize_row_q_t quantize_row_q_reference;
|
|
926
|
+
quantize_row_q_t quantize_row_q_dot;
|
|
927
|
+
vec_dot_q_t vec_dot_q;
|
|
928
|
+
enum ggml_type vec_dot_type;
|
|
929
|
+
} quantize_fns_t;
|
|
930
|
+
|
|
931
|
+
quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
|
|
745
932
|
|
|
746
933
|
#ifdef __cplusplus
|
|
747
934
|
}
|