whisper.rn 0.4.0-rc.7 → 0.4.0-rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/coreml/whisper-encoder.mm +1 -1
- package/cpp/ggml-alloc.c +41 -11
- package/cpp/ggml-alloc.h +3 -1
- package/cpp/ggml-backend-impl.h +38 -34
- package/cpp/ggml-backend.c +630 -269
- package/cpp/ggml-backend.h +58 -30
- package/cpp/ggml-impl.h +3 -0
- package/cpp/ggml-metal-whisper.metal +1253 -341
- package/cpp/ggml-metal.h +6 -54
- package/cpp/ggml-metal.m +2004 -1987
- package/cpp/ggml-quants.c +2230 -421
- package/cpp/ggml-quants.h +39 -1
- package/cpp/ggml.c +735 -265
- package/cpp/ggml.h +94 -43
- package/cpp/whisper.cpp +118 -86
- package/ios/RNWhisperContext.mm +2 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +1 -1
- package/src/version.json +1 -1
package/cpp/ggml.h
CHANGED
|
@@ -187,6 +187,16 @@
|
|
|
187
187
|
# define WSP_GGML_API
|
|
188
188
|
#endif
|
|
189
189
|
|
|
190
|
+
#ifdef WSP_GGML_MULTIPLATFORM
|
|
191
|
+
# if defined(_WIN32)
|
|
192
|
+
# define WSP_GGML_CALL
|
|
193
|
+
# else
|
|
194
|
+
# define WSP_GGML_CALL __attribute__((__ms_abi__))
|
|
195
|
+
# endif
|
|
196
|
+
#else
|
|
197
|
+
# define WSP_GGML_CALL
|
|
198
|
+
#endif
|
|
199
|
+
|
|
190
200
|
// TODO: support for clang
|
|
191
201
|
#ifdef __GNUC__
|
|
192
202
|
# define WSP_GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
|
@@ -218,7 +228,9 @@
|
|
|
218
228
|
#define WSP_GGML_MAX_PARAMS 2048
|
|
219
229
|
#define WSP_GGML_MAX_CONTEXTS 64
|
|
220
230
|
#define WSP_GGML_MAX_SRC 10
|
|
231
|
+
#ifndef WSP_GGML_MAX_NAME
|
|
221
232
|
#define WSP_GGML_MAX_NAME 64
|
|
233
|
+
#endif
|
|
222
234
|
#define WSP_GGML_MAX_OP_PARAMS 64
|
|
223
235
|
#define WSP_GGML_DEFAULT_N_THREADS 4
|
|
224
236
|
#define WSP_GGML_DEFAULT_GRAPH_SIZE 2048
|
|
@@ -255,6 +267,8 @@
|
|
|
255
267
|
#define WSP_GGML_UNREACHABLE() WSP_GGML_ASSERT(!"statement should not be reached")
|
|
256
268
|
#elif defined(__GNUC__)
|
|
257
269
|
#define WSP_GGML_UNREACHABLE() __builtin_unreachable()
|
|
270
|
+
#elif defined(_MSC_VER)
|
|
271
|
+
#define WSP_GGML_UNREACHABLE() __assume(0)
|
|
258
272
|
#else
|
|
259
273
|
#define WSP_GGML_UNREACHABLE() ((void) 0)
|
|
260
274
|
#endif
|
|
@@ -303,7 +317,7 @@ extern "C" {
|
|
|
303
317
|
|
|
304
318
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
|
305
319
|
typedef half wsp_ggml_fp16_t;
|
|
306
|
-
#elif defined(__ARM_NEON)
|
|
320
|
+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
|
307
321
|
typedef __fp16 wsp_ggml_fp16_t;
|
|
308
322
|
#else
|
|
309
323
|
typedef uint16_t wsp_ggml_fp16_t;
|
|
@@ -337,12 +351,20 @@ extern "C" {
|
|
|
337
351
|
WSP_GGML_TYPE_Q5_K = 13,
|
|
338
352
|
WSP_GGML_TYPE_Q6_K = 14,
|
|
339
353
|
WSP_GGML_TYPE_Q8_K = 15,
|
|
354
|
+
WSP_GGML_TYPE_IQ2_XXS = 16,
|
|
355
|
+
WSP_GGML_TYPE_IQ2_XS = 17,
|
|
340
356
|
WSP_GGML_TYPE_I8,
|
|
341
357
|
WSP_GGML_TYPE_I16,
|
|
342
358
|
WSP_GGML_TYPE_I32,
|
|
343
359
|
WSP_GGML_TYPE_COUNT,
|
|
344
360
|
};
|
|
345
361
|
|
|
362
|
+
// precision
|
|
363
|
+
enum wsp_ggml_prec {
|
|
364
|
+
WSP_GGML_PREC_DEFAULT,
|
|
365
|
+
WSP_GGML_PREC_F32,
|
|
366
|
+
};
|
|
367
|
+
|
|
346
368
|
enum wsp_ggml_backend_type {
|
|
347
369
|
WSP_GGML_BACKEND_CPU = 0,
|
|
348
370
|
WSP_GGML_BACKEND_GPU = 10,
|
|
@@ -365,6 +387,8 @@ extern "C" {
|
|
|
365
387
|
WSP_GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
|
|
366
388
|
WSP_GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
|
|
367
389
|
WSP_GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
|
390
|
+
WSP_GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
|
|
391
|
+
WSP_GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors
|
|
368
392
|
};
|
|
369
393
|
|
|
370
394
|
// available tensor operations:
|
|
@@ -478,7 +502,8 @@ extern "C" {
|
|
|
478
502
|
enum wsp_ggml_log_level {
|
|
479
503
|
WSP_GGML_LOG_LEVEL_ERROR = 2,
|
|
480
504
|
WSP_GGML_LOG_LEVEL_WARN = 3,
|
|
481
|
-
WSP_GGML_LOG_LEVEL_INFO = 4
|
|
505
|
+
WSP_GGML_LOG_LEVEL_INFO = 4,
|
|
506
|
+
WSP_GGML_LOG_LEVEL_DEBUG = 5
|
|
482
507
|
};
|
|
483
508
|
|
|
484
509
|
// ggml object
|
|
@@ -502,7 +527,6 @@ extern "C" {
|
|
|
502
527
|
|
|
503
528
|
struct wsp_ggml_backend_buffer * buffer;
|
|
504
529
|
|
|
505
|
-
int n_dims;
|
|
506
530
|
int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
|
|
507
531
|
size_t nb[WSP_GGML_MAX_DIMS]; // stride in bytes:
|
|
508
532
|
// nb[0] = wsp_ggml_type_size(type)
|
|
@@ -534,7 +558,7 @@ extern "C" {
|
|
|
534
558
|
|
|
535
559
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
536
560
|
|
|
537
|
-
char padding[
|
|
561
|
+
char padding[8];
|
|
538
562
|
};
|
|
539
563
|
|
|
540
564
|
static const size_t WSP_GGML_TENSOR_SIZE = sizeof(struct wsp_ggml_tensor);
|
|
@@ -635,33 +659,41 @@ extern "C" {
|
|
|
635
659
|
WSP_GGML_API void wsp_ggml_print_object (const struct wsp_ggml_object * obj);
|
|
636
660
|
WSP_GGML_API void wsp_ggml_print_objects(const struct wsp_ggml_context * ctx);
|
|
637
661
|
|
|
638
|
-
WSP_GGML_API int64_t wsp_ggml_nelements (const struct wsp_ggml_tensor * tensor);
|
|
639
|
-
WSP_GGML_API int64_t wsp_ggml_nrows (const struct wsp_ggml_tensor * tensor);
|
|
640
|
-
WSP_GGML_API size_t wsp_ggml_nbytes (const struct wsp_ggml_tensor * tensor);
|
|
641
|
-
WSP_GGML_API
|
|
642
|
-
WSP_GGML_API size_t wsp_ggml_nbytes_split(const struct wsp_ggml_tensor * tensor, int nrows_split);
|
|
662
|
+
WSP_GGML_API WSP_GGML_CALL int64_t wsp_ggml_nelements (const struct wsp_ggml_tensor * tensor);
|
|
663
|
+
WSP_GGML_API WSP_GGML_CALL int64_t wsp_ggml_nrows (const struct wsp_ggml_tensor * tensor);
|
|
664
|
+
WSP_GGML_API WSP_GGML_CALL size_t wsp_ggml_nbytes (const struct wsp_ggml_tensor * tensor);
|
|
665
|
+
WSP_GGML_API size_t wsp_ggml_nbytes_pad (const struct wsp_ggml_tensor * tensor); // same as wsp_ggml_nbytes() but padded to WSP_GGML_MEM_ALIGN
|
|
643
666
|
|
|
644
|
-
WSP_GGML_API int
|
|
645
|
-
WSP_GGML_API size_t
|
|
646
|
-
WSP_GGML_API
|
|
667
|
+
WSP_GGML_API WSP_GGML_CALL int wsp_ggml_blck_size(enum wsp_ggml_type type);
|
|
668
|
+
WSP_GGML_API WSP_GGML_CALL size_t wsp_ggml_type_size(enum wsp_ggml_type type); // size in bytes for all elements in a block
|
|
669
|
+
WSP_GGML_API WSP_GGML_CALL size_t wsp_ggml_row_size (enum wsp_ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
|
647
670
|
|
|
648
|
-
|
|
649
|
-
WSP_GGML_API
|
|
650
|
-
|
|
671
|
+
WSP_GGML_DEPRECATED(
|
|
672
|
+
WSP_GGML_API double wsp_ggml_type_sizef(enum wsp_ggml_type type), // wsp_ggml_type_size()/wsp_ggml_blck_size() as float
|
|
673
|
+
"use wsp_ggml_row_size() instead");
|
|
651
674
|
|
|
652
|
-
WSP_GGML_API const char *
|
|
653
|
-
WSP_GGML_API const char *
|
|
675
|
+
WSP_GGML_API WSP_GGML_CALL const char * wsp_ggml_type_name(enum wsp_ggml_type type);
|
|
676
|
+
WSP_GGML_API WSP_GGML_CALL const char * wsp_ggml_op_name (enum wsp_ggml_op op);
|
|
677
|
+
WSP_GGML_API const char * wsp_ggml_op_symbol(enum wsp_ggml_op op);
|
|
654
678
|
|
|
655
|
-
WSP_GGML_API
|
|
679
|
+
WSP_GGML_API const char * wsp_ggml_unary_op_name(enum wsp_ggml_unary_op op);
|
|
680
|
+
WSP_GGML_API WSP_GGML_CALL const char * wsp_ggml_op_desc(const struct wsp_ggml_tensor * t); // unary or op name
|
|
656
681
|
|
|
657
|
-
WSP_GGML_API
|
|
682
|
+
WSP_GGML_API WSP_GGML_CALL size_t wsp_ggml_element_size(const struct wsp_ggml_tensor * tensor);
|
|
683
|
+
|
|
684
|
+
WSP_GGML_API WSP_GGML_CALL bool wsp_ggml_is_quantized(enum wsp_ggml_type type);
|
|
658
685
|
|
|
659
686
|
// TODO: temporary until model loading of ggml examples is refactored
|
|
660
687
|
WSP_GGML_API enum wsp_ggml_type wsp_ggml_ftype_to_wsp_ggml_type(enum wsp_ggml_ftype ftype);
|
|
661
688
|
|
|
662
|
-
WSP_GGML_API bool wsp_ggml_is_transposed(const struct wsp_ggml_tensor * tensor);
|
|
663
|
-
WSP_GGML_API bool wsp_ggml_is_contiguous(const struct wsp_ggml_tensor * tensor);
|
|
664
|
-
WSP_GGML_API bool wsp_ggml_is_permuted (const struct wsp_ggml_tensor * tensor);
|
|
689
|
+
WSP_GGML_API WSP_GGML_CALL bool wsp_ggml_is_transposed(const struct wsp_ggml_tensor * tensor);
|
|
690
|
+
WSP_GGML_API WSP_GGML_CALL bool wsp_ggml_is_contiguous(const struct wsp_ggml_tensor * tensor);
|
|
691
|
+
WSP_GGML_API WSP_GGML_CALL bool wsp_ggml_is_permuted (const struct wsp_ggml_tensor * tensor);
|
|
692
|
+
WSP_GGML_API bool wsp_ggml_is_scalar (const struct wsp_ggml_tensor * tensor);
|
|
693
|
+
WSP_GGML_API bool wsp_ggml_is_vector (const struct wsp_ggml_tensor * tensor);
|
|
694
|
+
WSP_GGML_API bool wsp_ggml_is_matrix (const struct wsp_ggml_tensor * tensor);
|
|
695
|
+
WSP_GGML_API bool wsp_ggml_is_3d (const struct wsp_ggml_tensor * tensor);
|
|
696
|
+
WSP_GGML_API int wsp_ggml_n_dims (const struct wsp_ggml_tensor * tensor); // returns 1 for scalars
|
|
665
697
|
|
|
666
698
|
WSP_GGML_API bool wsp_ggml_are_same_shape(const struct wsp_ggml_tensor * t0, const struct wsp_ggml_tensor * t1);
|
|
667
699
|
|
|
@@ -722,8 +754,8 @@ extern "C" {
|
|
|
722
754
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_view_tensor(struct wsp_ggml_context * ctx, struct wsp_ggml_tensor * src);
|
|
723
755
|
|
|
724
756
|
// Context tensor enumeration and lookup
|
|
725
|
-
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_first_tensor(struct wsp_ggml_context * ctx);
|
|
726
|
-
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_next_tensor (struct wsp_ggml_context * ctx, struct wsp_ggml_tensor * tensor);
|
|
757
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_first_tensor(const struct wsp_ggml_context * ctx);
|
|
758
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_next_tensor (const struct wsp_ggml_context * ctx, struct wsp_ggml_tensor * tensor);
|
|
727
759
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_tensor(struct wsp_ggml_context * ctx, const char * name);
|
|
728
760
|
|
|
729
761
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);
|
|
@@ -748,7 +780,7 @@ extern "C" {
|
|
|
748
780
|
WSP_GGML_API void * wsp_ggml_get_data (const struct wsp_ggml_tensor * tensor);
|
|
749
781
|
WSP_GGML_API float * wsp_ggml_get_data_f32(const struct wsp_ggml_tensor * tensor);
|
|
750
782
|
|
|
751
|
-
WSP_GGML_API enum wsp_ggml_unary_op wsp_ggml_get_unary_op(const struct wsp_ggml_tensor * tensor);
|
|
783
|
+
WSP_GGML_API WSP_GGML_CALL enum wsp_ggml_unary_op wsp_ggml_get_unary_op(const struct wsp_ggml_tensor * tensor);
|
|
752
784
|
|
|
753
785
|
WSP_GGML_API const char * wsp_ggml_get_name (const struct wsp_ggml_tensor * tensor);
|
|
754
786
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_name ( struct wsp_ggml_tensor * tensor, const char * name);
|
|
@@ -1050,6 +1082,12 @@ extern "C" {
|
|
|
1050
1082
|
struct wsp_ggml_tensor * a,
|
|
1051
1083
|
struct wsp_ggml_tensor * b);
|
|
1052
1084
|
|
|
1085
|
+
// change the precision of a matrix multiplication
|
|
1086
|
+
// set to WSP_GGML_PREC_F32 for higher precision (useful for phi-2)
|
|
1087
|
+
WSP_GGML_API void wsp_ggml_mul_mat_set_prec(
|
|
1088
|
+
struct wsp_ggml_tensor * a,
|
|
1089
|
+
enum wsp_ggml_prec prec);
|
|
1090
|
+
|
|
1053
1091
|
// indirect matrix multiplication
|
|
1054
1092
|
// wsp_ggml_mul_mat_id(ctx, as, ids, id, b) ~= wsp_ggml_mul_mat(as[ids[id]], b)
|
|
1055
1093
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mul_mat_id(
|
|
@@ -1075,13 +1113,13 @@ extern "C" {
|
|
|
1075
1113
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale(
|
|
1076
1114
|
struct wsp_ggml_context * ctx,
|
|
1077
1115
|
struct wsp_ggml_tensor * a,
|
|
1078
|
-
|
|
1116
|
+
float s);
|
|
1079
1117
|
|
|
1080
1118
|
// in-place, returns view(a)
|
|
1081
1119
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_inplace(
|
|
1082
1120
|
struct wsp_ggml_context * ctx,
|
|
1083
1121
|
struct wsp_ggml_tensor * a,
|
|
1084
|
-
|
|
1122
|
+
float s);
|
|
1085
1123
|
|
|
1086
1124
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
|
1087
1125
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
|
|
@@ -1137,22 +1175,16 @@ extern "C" {
|
|
|
1137
1175
|
struct wsp_ggml_tensor * a,
|
|
1138
1176
|
struct wsp_ggml_tensor * b);
|
|
1139
1177
|
|
|
1140
|
-
|
|
1141
|
-
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cpy_inplace(
|
|
1178
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cast(
|
|
1142
1179
|
struct wsp_ggml_context * ctx,
|
|
1143
1180
|
struct wsp_ggml_tensor * a,
|
|
1144
|
-
|
|
1181
|
+
enum wsp_ggml_type type);
|
|
1145
1182
|
|
|
1146
1183
|
// make contiguous
|
|
1147
1184
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont(
|
|
1148
1185
|
struct wsp_ggml_context * ctx,
|
|
1149
1186
|
struct wsp_ggml_tensor * a);
|
|
1150
1187
|
|
|
1151
|
-
// make contiguous, in-place
|
|
1152
|
-
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont_inplace(
|
|
1153
|
-
struct wsp_ggml_context * ctx,
|
|
1154
|
-
struct wsp_ggml_tensor * a);
|
|
1155
|
-
|
|
1156
1188
|
// make contiguous, with new shape
|
|
1157
1189
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cont_1d(
|
|
1158
1190
|
struct wsp_ggml_context * ctx,
|
|
@@ -1391,7 +1423,7 @@ extern "C" {
|
|
|
1391
1423
|
float beta_slow);
|
|
1392
1424
|
|
|
1393
1425
|
// compute correction dims for YaRN RoPE scaling
|
|
1394
|
-
void wsp_ggml_rope_yarn_corr_dims(
|
|
1426
|
+
WSP_GGML_CALL void wsp_ggml_rope_yarn_corr_dims(
|
|
1395
1427
|
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1396
1428
|
|
|
1397
1429
|
// xPos RoPE, in-place, returns view(a)
|
|
@@ -1825,8 +1857,8 @@ extern "C" {
|
|
|
1825
1857
|
|
|
1826
1858
|
// wsp_ggml_graph_plan() has to be called before wsp_ggml_graph_compute()
|
|
1827
1859
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
1828
|
-
WSP_GGML_API struct wsp_ggml_cplan wsp_ggml_graph_plan (struct wsp_ggml_cgraph * cgraph, int n_threads /*= WSP_GGML_DEFAULT_N_THREADS*/);
|
|
1829
|
-
WSP_GGML_API int wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, struct wsp_ggml_cplan * cplan);
|
|
1860
|
+
WSP_GGML_API struct wsp_ggml_cplan wsp_ggml_graph_plan (const struct wsp_ggml_cgraph * cgraph, int n_threads /*= WSP_GGML_DEFAULT_N_THREADS*/);
|
|
1861
|
+
WSP_GGML_API int wsp_ggml_graph_compute( struct wsp_ggml_cgraph * cgraph, struct wsp_ggml_cplan * cplan);
|
|
1830
1862
|
|
|
1831
1863
|
// same as wsp_ggml_graph_compute() but the work data is allocated as a part of the context
|
|
1832
1864
|
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
|
@@ -2033,6 +2065,18 @@ extern "C" {
|
|
|
2033
2065
|
// quantization
|
|
2034
2066
|
//
|
|
2035
2067
|
|
|
2068
|
+
// - wsp_ggml_wsp_quantize_init can be called multiple times with the same type
|
|
2069
|
+
// it will only initialize the quantization tables for the first call or after wsp_ggml_wsp_quantize_free
|
|
2070
|
+
// automatically called by wsp_ggml_wsp_quantize_chunk for convenience
|
|
2071
|
+
//
|
|
2072
|
+
// - wsp_ggml_wsp_quantize_free will free any memory allocated by wsp_ggml_wsp_quantize_init
|
|
2073
|
+
// call this at the end of the program to avoid memory leaks
|
|
2074
|
+
//
|
|
2075
|
+
// note: these are thread-safe
|
|
2076
|
+
//
|
|
2077
|
+
WSP_GGML_API void wsp_ggml_wsp_quantize_init(enum wsp_ggml_type type);
|
|
2078
|
+
WSP_GGML_API void wsp_ggml_wsp_quantize_free(void);
|
|
2079
|
+
|
|
2036
2080
|
// TODO: these would probably get removed in favor of the more general wsp_ggml_wsp_quantize_chunk
|
|
2037
2081
|
WSP_GGML_API size_t wsp_ggml_wsp_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
2038
2082
|
WSP_GGML_API size_t wsp_ggml_wsp_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
@@ -2046,7 +2090,12 @@ extern "C" {
|
|
|
2046
2090
|
WSP_GGML_API size_t wsp_ggml_wsp_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
2047
2091
|
WSP_GGML_API size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
2048
2092
|
|
|
2049
|
-
|
|
2093
|
+
// some quantization type cannot be used without an importance matrix
|
|
2094
|
+
WSP_GGML_API bool wsp_ggml_wsp_quantize_requires_imatrix(enum wsp_ggml_type type);
|
|
2095
|
+
|
|
2096
|
+
// calls wsp_ggml_wsp_quantize_init internally (i.e. can allocate memory)
|
|
2097
|
+
WSP_GGML_API size_t wsp_ggml_wsp_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst,
|
|
2098
|
+
int start, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
|
2050
2099
|
|
|
2051
2100
|
//
|
|
2052
2101
|
// gguf
|
|
@@ -2116,10 +2165,11 @@ extern "C" {
|
|
|
2116
2165
|
WSP_GGML_API const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int key_id);
|
|
2117
2166
|
WSP_GGML_API const char * wsp_gguf_get_arr_str (const struct wsp_gguf_context * ctx, int key_id, int i);
|
|
2118
2167
|
|
|
2119
|
-
WSP_GGML_API int
|
|
2120
|
-
WSP_GGML_API int
|
|
2121
|
-
WSP_GGML_API size_t
|
|
2122
|
-
WSP_GGML_API char *
|
|
2168
|
+
WSP_GGML_API int wsp_gguf_get_n_tensors (const struct wsp_gguf_context * ctx);
|
|
2169
|
+
WSP_GGML_API int wsp_gguf_find_tensor (const struct wsp_gguf_context * ctx, const char * name);
|
|
2170
|
+
WSP_GGML_API size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i);
|
|
2171
|
+
WSP_GGML_API char * wsp_gguf_get_tensor_name (const struct wsp_gguf_context * ctx, int i);
|
|
2172
|
+
WSP_GGML_API enum wsp_ggml_type wsp_gguf_get_tensor_type (const struct wsp_gguf_context * ctx, int i);
|
|
2123
2173
|
|
|
2124
2174
|
// overrides existing values or adds a new one
|
|
2125
2175
|
WSP_GGML_API void wsp_gguf_set_val_u8 (struct wsp_gguf_context * ctx, const char * key, uint8_t val);
|
|
@@ -2175,6 +2225,7 @@ extern "C" {
|
|
|
2175
2225
|
//
|
|
2176
2226
|
|
|
2177
2227
|
WSP_GGML_API int wsp_ggml_cpu_has_avx (void);
|
|
2228
|
+
WSP_GGML_API int wsp_ggml_cpu_has_avx_vnni (void);
|
|
2178
2229
|
WSP_GGML_API int wsp_ggml_cpu_has_avx2 (void);
|
|
2179
2230
|
WSP_GGML_API int wsp_ggml_cpu_has_avx512 (void);
|
|
2180
2231
|
WSP_GGML_API int wsp_ggml_cpu_has_avx512_vbmi(void);
|