whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -3
- package/cpp/amx/amx.cpp +220 -0
- package/cpp/amx/amx.h +8 -0
- package/cpp/amx/common.h +91 -0
- package/cpp/amx/mmq.cpp +2511 -0
- package/cpp/amx/mmq.h +10 -0
- package/cpp/ggml-alloc.c +6 -14
- package/cpp/ggml-backend-impl.h +50 -11
- package/cpp/ggml-backend-reg.cpp +409 -31
- package/cpp/ggml-backend.cpp +9 -3
- package/cpp/ggml-backend.h +18 -0
- package/cpp/ggml-common.h +41 -43
- package/cpp/ggml-cpp.h +1 -0
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
- package/cpp/ggml-cpu-aarch64.h +2 -24
- package/cpp/ggml-cpu-impl.h +171 -11
- package/cpp/ggml-cpu-quants.c +1812 -389
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +1432 -610
- package/cpp/ggml-cpu.cpp +131 -141
- package/cpp/ggml-cpu.h +10 -50
- package/cpp/ggml-impl.h +27 -11
- package/cpp/ggml-metal-impl.h +39 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +1031 -359
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +501 -1537
- package/cpp/ggml.h +144 -171
- package/cpp/gguf.cpp +1329 -0
- package/cpp/gguf.h +202 -0
- package/cpp/whisper.cpp +254 -114
- package/cpp/whisper.h +6 -3
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +2 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +2 -2
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
- package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/ggml-cpu.cpp
CHANGED
|
@@ -2,11 +2,22 @@
|
|
|
2
2
|
#include "ggml-backend-impl.h"
|
|
3
3
|
#include "ggml-cpu.h"
|
|
4
4
|
#include "ggml-cpu-aarch64.h"
|
|
5
|
+
#include "ggml-cpu-traits.h"
|
|
5
6
|
#include "ggml-impl.h"
|
|
7
|
+
#include "amx/amx.h"
|
|
8
|
+
|
|
6
9
|
#include <cctype>
|
|
7
10
|
#include <string>
|
|
8
11
|
#include <vector>
|
|
9
12
|
|
|
13
|
+
#ifdef WSP_GGML_USE_CPU_HBM
|
|
14
|
+
#include "ggml-cpu-hbm.h"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
#ifdef WSP_GGML_USE_CPU_KLEIDIAI
|
|
18
|
+
#include "kleidiai/kleidiai.h"
|
|
19
|
+
#endif
|
|
20
|
+
|
|
10
21
|
#if defined(__APPLE__)
|
|
11
22
|
#include <sys/types.h>
|
|
12
23
|
#include <sys/sysctl.h>
|
|
@@ -22,124 +33,26 @@
|
|
|
22
33
|
|
|
23
34
|
// ggml-backend interface
|
|
24
35
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
// buffer type HBM
|
|
28
|
-
|
|
29
|
-
#include <hbwmalloc.h>
|
|
30
|
-
|
|
31
|
-
static const char * wsp_ggml_backend_cpu_hbm_buffer_type_get_name(wsp_ggml_backend_buffer_type_t buft) {
|
|
32
|
-
return "CPU_HBM";
|
|
33
|
-
|
|
34
|
-
WSP_GGML_UNUSED(buft);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
static void wsp_ggml_backend_cpu_hbm_buffer_free_buffer(wsp_ggml_backend_buffer_t buffer) {
|
|
38
|
-
hbw_free(buffer->context);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
static wsp_ggml_backend_buffer_t wsp_ggml_backend_cpu_hbm_buffer_type_alloc_buffer(wsp_ggml_backend_buffer_type_t buft, size_t size) {
|
|
42
|
-
void * ptr;
|
|
43
|
-
int result = hbw_posix_memalign(&ptr, wsp_ggml_backend_cpu_buffer_type_get_alignment(buft), size);
|
|
44
|
-
if (result != 0) {
|
|
45
|
-
WSP_GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
|
|
46
|
-
return NULL;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
|
50
|
-
buffer->buft = buft;
|
|
51
|
-
buffer->iface.free_buffer = wsp_ggml_backend_cpu_hbm_buffer_free_buffer;
|
|
52
|
-
|
|
53
|
-
return buffer;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_hbm_buffer_type(void) {
|
|
57
|
-
static struct wsp_ggml_backend_buffer_type wsp_ggml_backend_cpu_buffer_type_hbm = {
|
|
58
|
-
/* .iface = */ {
|
|
59
|
-
/* .get_name = */ wsp_ggml_backend_cpu_hbm_buffer_type_get_name,
|
|
60
|
-
/* .alloc_buffer = */ wsp_ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
|
61
|
-
/* .get_alignment = */ wsp_ggml_backend_cpu_buffer_type_get_alignment,
|
|
62
|
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
63
|
-
/* .get_alloc_size = */ NULL, // defaults to wsp_ggml_nbytes
|
|
64
|
-
/* .is_host = */ wsp_ggml_backend_cpu_buffer_type_is_host,
|
|
65
|
-
},
|
|
66
|
-
/* .context = */ NULL,
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
return &wsp_ggml_backend_cpu_buffer_type_hbm;
|
|
70
|
-
}
|
|
71
|
-
#endif
|
|
72
|
-
|
|
73
|
-
// buffer type AARCH64
|
|
74
|
-
|
|
75
|
-
static void wsp_ggml_backend_cpu_aarch64_buffer_init_tensor(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor) {
|
|
76
|
-
tensor->extra = (void *)wsp_ggml_aarch64_get_optimal_repack_type(tensor); // NOLINT
|
|
77
|
-
|
|
78
|
-
WSP_GGML_UNUSED(buffer);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
static void wsp_ggml_backend_cpu_aarch64_buffer_set_tensor(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
82
|
-
WSP_GGML_ASSERT(offset == 0);
|
|
83
|
-
WSP_GGML_ASSERT(size == wsp_ggml_nbytes(tensor));
|
|
84
|
-
|
|
85
|
-
enum wsp_ggml_type repack_type = (enum wsp_ggml_type)(intptr_t)tensor->extra;
|
|
86
|
-
|
|
87
|
-
wsp_ggml_aarch64_repack_tensor(tensor, repack_type, data, size);
|
|
88
|
-
|
|
89
|
-
WSP_GGML_UNUSED(buffer);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
static const char * wsp_ggml_backend_cpu_aarch64_buffer_type_get_name(wsp_ggml_backend_buffer_type_t buft) {
|
|
93
|
-
return "CPU_AARCH64";
|
|
94
|
-
|
|
95
|
-
WSP_GGML_UNUSED(buft);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
static wsp_ggml_backend_buffer_t wsp_ggml_backend_cpu_aarch64_buffer_type_alloc_buffer(wsp_ggml_backend_buffer_type_t buft, size_t size) {
|
|
99
|
-
auto * buffer = wsp_ggml_backend_buft_alloc_buffer(wsp_ggml_backend_cpu_buffer_type(), size);
|
|
100
|
-
|
|
101
|
-
if (buffer == NULL) {
|
|
102
|
-
return NULL;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
buffer->buft = buft;
|
|
106
|
-
buffer->iface.init_tensor = wsp_ggml_backend_cpu_aarch64_buffer_init_tensor;
|
|
107
|
-
buffer->iface.set_tensor = wsp_ggml_backend_cpu_aarch64_buffer_set_tensor;
|
|
108
|
-
|
|
109
|
-
return buffer;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_aarch64_buffer_type(void) {
|
|
113
|
-
static struct wsp_ggml_backend_buffer_type wsp_ggml_backend_cpu_buffer_type_aarch64 = {
|
|
114
|
-
/* .iface = */ {
|
|
115
|
-
/* .get_name = */ wsp_ggml_backend_cpu_aarch64_buffer_type_get_name,
|
|
116
|
-
/* .alloc_buffer = */ wsp_ggml_backend_cpu_aarch64_buffer_type_alloc_buffer,
|
|
117
|
-
/* .get_alignment = */ wsp_ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
|
118
|
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
119
|
-
/* .get_alloc_size = */ NULL, // defaults to wsp_ggml_nbytes
|
|
120
|
-
/* .is_host = */ NULL,
|
|
121
|
-
},
|
|
122
|
-
/* .device = */ wsp_ggml_backend_reg_dev_get(wsp_ggml_backend_cpu_reg(), 0),
|
|
123
|
-
/* .context = */ NULL,
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
return &wsp_ggml_backend_cpu_buffer_type_aarch64;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
bool wsp_ggml_backend_cpu_buft_is_aarch64(wsp_ggml_backend_buffer_type_t buft) {
|
|
130
|
-
return buft == wsp_ggml_backend_cpu_aarch64_buffer_type();
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
static wsp_ggml_backend_buffer_type_t * wsp_ggml_backend_cpu_get_extra_bufts(wsp_ggml_backend_dev_t device) {
|
|
36
|
+
std::vector<wsp_ggml_backend_buffer_type_t>& wsp_ggml_backend_cpu_get_extra_buffers_type() {
|
|
134
37
|
static std::vector<wsp_ggml_backend_buffer_type_t> bufts = []() {
|
|
135
38
|
std::vector<wsp_ggml_backend_buffer_type_t> bufts;
|
|
136
39
|
|
|
137
|
-
#
|
|
138
|
-
|
|
40
|
+
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
|
41
|
+
if (wsp_ggml_backend_amx_buffer_type()) {
|
|
42
|
+
bufts.push_back(wsp_ggml_backend_amx_buffer_type());
|
|
43
|
+
}
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
#ifdef WSP_GGML_USE_CPU_KLEIDIAI
|
|
47
|
+
if (wsp_ggml_backend_cpu_kleidiai_buffer_type()) {
|
|
48
|
+
bufts.push_back(wsp_ggml_backend_cpu_kleidiai_buffer_type());
|
|
49
|
+
}
|
|
139
50
|
#endif
|
|
140
51
|
|
|
141
52
|
#ifdef WSP_GGML_USE_CPU_AARCH64
|
|
142
|
-
|
|
53
|
+
if (wsp_ggml_backend_cpu_aarch64_buffer_type()) {
|
|
54
|
+
bufts.push_back(wsp_ggml_backend_cpu_aarch64_buffer_type());
|
|
55
|
+
}
|
|
143
56
|
#endif
|
|
144
57
|
|
|
145
58
|
bufts.push_back(NULL);
|
|
@@ -147,11 +60,22 @@ static wsp_ggml_backend_buffer_type_t * wsp_ggml_backend_cpu_get_extra_bufts(wsp
|
|
|
147
60
|
return bufts;
|
|
148
61
|
}();
|
|
149
62
|
|
|
150
|
-
return bufts
|
|
63
|
+
return bufts;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
static wsp_ggml_backend_buffer_type_t * wsp_ggml_backend_cpu_device_get_extra_buffers_type(wsp_ggml_backend_dev_t device) {
|
|
67
|
+
return wsp_ggml_backend_cpu_get_extra_buffers_type().data();
|
|
151
68
|
|
|
152
69
|
WSP_GGML_UNUSED(device);
|
|
153
70
|
}
|
|
154
71
|
|
|
72
|
+
static bool wsp_ggml_backend_cpu_is_extra_buffer_type(wsp_ggml_backend_buffer_type_t buft) {
|
|
73
|
+
for (auto extra : wsp_ggml_backend_cpu_get_extra_buffers_type()) {
|
|
74
|
+
if (extra && extra == buft) return true;
|
|
75
|
+
}
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
|
|
155
79
|
// CPU backend - backend (stream)
|
|
156
80
|
|
|
157
81
|
struct wsp_ggml_backend_cpu_context {
|
|
@@ -370,14 +294,14 @@ struct wsp_ggml_backend_cpu_device_context {
|
|
|
370
294
|
&hKey) == ERROR_SUCCESS) {
|
|
371
295
|
DWORD cpu_brand_size = 0;
|
|
372
296
|
if (RegQueryValueExA(hKey,
|
|
373
|
-
|
|
297
|
+
"ProcessorNameString",
|
|
374
298
|
NULL,
|
|
375
299
|
NULL,
|
|
376
300
|
NULL,
|
|
377
301
|
&cpu_brand_size) == ERROR_SUCCESS) {
|
|
378
302
|
description.resize(cpu_brand_size);
|
|
379
303
|
if (RegQueryValueExA(hKey,
|
|
380
|
-
|
|
304
|
+
"ProcessorNameString",
|
|
381
305
|
NULL,
|
|
382
306
|
NULL,
|
|
383
307
|
(LPBYTE)&description[0], // NOLINT
|
|
@@ -456,14 +380,23 @@ static bool wsp_ggml_backend_cpu_device_supports_op(wsp_ggml_backend_dev_t dev,
|
|
|
456
380
|
const struct wsp_ggml_tensor * src0 = op->src[0];
|
|
457
381
|
const struct wsp_ggml_tensor * src1 = op->src[1];
|
|
458
382
|
|
|
459
|
-
if (
|
|
460
|
-
|
|
461
|
-
|
|
383
|
+
if (op->op == WSP_GGML_OP_NONE || op->op == WSP_GGML_OP_RESHAPE || op->op == WSP_GGML_OP_VIEW || op->op == WSP_GGML_OP_PERMUTE || op->op == WSP_GGML_OP_TRANSPOSE) {
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// extra_buffer_op?
|
|
388
|
+
for (auto extra : wsp_ggml_backend_cpu_get_extra_buffers_type()) {
|
|
389
|
+
if (extra) {
|
|
390
|
+
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
|
|
391
|
+
if (buf_extra && buf_extra->supports_op(dev, op)) {
|
|
392
|
+
return true;
|
|
393
|
+
}
|
|
462
394
|
}
|
|
463
395
|
}
|
|
464
396
|
|
|
465
|
-
|
|
466
|
-
|
|
397
|
+
// the other case need host buffer.
|
|
398
|
+
for (int i = 0; i < WSP_GGML_MAX_SRC; i++) {
|
|
399
|
+
if (op->src[i] && op->src[i]->buffer && !wsp_ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
|
|
467
400
|
return false;
|
|
468
401
|
}
|
|
469
402
|
}
|
|
@@ -471,28 +404,37 @@ static bool wsp_ggml_backend_cpu_device_supports_op(wsp_ggml_backend_dev_t dev,
|
|
|
471
404
|
switch (op->op) {
|
|
472
405
|
case WSP_GGML_OP_CPY:
|
|
473
406
|
return
|
|
407
|
+
op->type != WSP_GGML_TYPE_IQ3_XXS &&
|
|
408
|
+
op->type != WSP_GGML_TYPE_IQ3_S &&
|
|
474
409
|
op->type != WSP_GGML_TYPE_IQ2_XXS &&
|
|
475
410
|
op->type != WSP_GGML_TYPE_IQ2_XS &&
|
|
411
|
+
op->type != WSP_GGML_TYPE_IQ2_S &&
|
|
476
412
|
op->type != WSP_GGML_TYPE_IQ1_S &&
|
|
477
413
|
op->type != WSP_GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
|
478
414
|
case WSP_GGML_OP_MUL_MAT:
|
|
479
415
|
return src1->type == WSP_GGML_TYPE_F32 || src1->type == wsp_ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
|
|
480
|
-
case
|
|
481
|
-
|
|
416
|
+
case WSP_GGML_OP_SOFT_MAX_BACK: {
|
|
417
|
+
if (op->src[0]->type != WSP_GGML_TYPE_F32 || op->src[1]->type != WSP_GGML_TYPE_F32) {
|
|
418
|
+
return false;
|
|
419
|
+
}
|
|
420
|
+
float max_bias = 0.0f;
|
|
421
|
+
|
|
422
|
+
memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
|
|
423
|
+
|
|
424
|
+
return max_bias == 0.0f;
|
|
425
|
+
}
|
|
482
426
|
case WSP_GGML_OP_IM2COL_BACK:
|
|
483
427
|
return src0->type == WSP_GGML_TYPE_F32 && src1->type == WSP_GGML_TYPE_F32;
|
|
484
428
|
case WSP_GGML_OP_OUT_PROD:
|
|
485
|
-
return (src0->type == WSP_GGML_TYPE_F32 || wsp_ggml_is_quantized(src0->type)
|
|
429
|
+
return (src0->type == WSP_GGML_TYPE_F32 || (wsp_ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
|
|
430
|
+
src1->type == WSP_GGML_TYPE_F32 && op->type == WSP_GGML_TYPE_F32;
|
|
486
431
|
default:
|
|
487
432
|
return true;
|
|
488
433
|
}
|
|
489
|
-
|
|
490
|
-
WSP_GGML_UNUSED(dev);
|
|
491
434
|
}
|
|
492
435
|
|
|
493
436
|
static bool wsp_ggml_backend_cpu_device_supports_buft(wsp_ggml_backend_dev_t dev, wsp_ggml_backend_buffer_type_t buft) {
|
|
494
|
-
return wsp_ggml_backend_buft_is_host(buft) ||
|
|
495
|
-
|
|
437
|
+
return wsp_ggml_backend_buft_is_host(buft) || wsp_ggml_backend_cpu_is_extra_buffer_type(buft);
|
|
496
438
|
WSP_GGML_UNUSED(dev);
|
|
497
439
|
}
|
|
498
440
|
|
|
@@ -541,16 +483,12 @@ static wsp_ggml_backend_dev_t wsp_ggml_backend_cpu_reg_get_device(wsp_ggml_backe
|
|
|
541
483
|
return &wsp_ggml_backend_cpu_device;
|
|
542
484
|
}
|
|
543
485
|
|
|
544
|
-
struct wsp_ggml_backend_feature {
|
|
545
|
-
const char * name;
|
|
546
|
-
const char * value;
|
|
547
|
-
};
|
|
548
|
-
|
|
549
|
-
// Not used yet
|
|
550
486
|
// This is intended to replace the the wsp_ggml_cpu_has_* functions when loading the CPU backend dynamically,
|
|
551
|
-
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
|
487
|
+
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
|
552
488
|
static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_backend_reg_t reg) {
|
|
553
489
|
static std::vector<wsp_ggml_backend_feature> features = []() {
|
|
490
|
+
wsp_ggml_cpu_init();
|
|
491
|
+
|
|
554
492
|
std::vector<wsp_ggml_backend_feature> features;
|
|
555
493
|
if (wsp_ggml_cpu_has_sse3()) {
|
|
556
494
|
features.push_back({ "SSE3", "1" });
|
|
@@ -561,6 +499,9 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
|
|
|
561
499
|
if (wsp_ggml_cpu_has_avx()) {
|
|
562
500
|
features.push_back({ "AVX", "1" });
|
|
563
501
|
}
|
|
502
|
+
if (wsp_ggml_cpu_has_avx_vnni()) {
|
|
503
|
+
features.push_back({ "AVX_VNNI", "1" });
|
|
504
|
+
}
|
|
564
505
|
if (wsp_ggml_cpu_has_avx2()) {
|
|
565
506
|
features.push_back({ "AVX2", "1" });
|
|
566
507
|
}
|
|
@@ -570,9 +511,6 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
|
|
|
570
511
|
if (wsp_ggml_cpu_has_fma()) {
|
|
571
512
|
features.push_back({ "FMA", "1" });
|
|
572
513
|
}
|
|
573
|
-
if (wsp_ggml_cpu_has_avx_vnni()) {
|
|
574
|
-
features.push_back({ "AVX_VNNI", "1" });
|
|
575
|
-
}
|
|
576
514
|
if (wsp_ggml_cpu_has_avx512()) {
|
|
577
515
|
features.push_back({ "AVX512", "1" });
|
|
578
516
|
}
|
|
@@ -603,22 +541,46 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
|
|
|
603
541
|
if (wsp_ggml_cpu_has_sve()) {
|
|
604
542
|
features.push_back({ "SVE", "1" });
|
|
605
543
|
}
|
|
544
|
+
if (wsp_ggml_cpu_has_dotprod()) {
|
|
545
|
+
features.push_back({ "DOTPROD", "1" });
|
|
546
|
+
}
|
|
606
547
|
if (wsp_ggml_cpu_get_sve_cnt() > 0) {
|
|
607
548
|
static std::string sve_cnt = std::to_string(wsp_ggml_cpu_get_sve_cnt());
|
|
608
549
|
features.push_back({ "SVE_CNT", sve_cnt.c_str() });
|
|
609
550
|
}
|
|
551
|
+
if (wsp_ggml_cpu_has_sme()) {
|
|
552
|
+
features.push_back({ "SME", "1" });
|
|
553
|
+
}
|
|
610
554
|
if (wsp_ggml_cpu_has_riscv_v()) {
|
|
611
555
|
features.push_back({ "RISCV_V", "1" });
|
|
612
556
|
}
|
|
613
557
|
if (wsp_ggml_cpu_has_vsx()) {
|
|
614
558
|
features.push_back({ "VSX", "1" });
|
|
615
559
|
}
|
|
560
|
+
if (wsp_ggml_cpu_has_vxe()) {
|
|
561
|
+
features.push_back({ "VXE", "1" });
|
|
562
|
+
}
|
|
616
563
|
if (wsp_ggml_cpu_has_wasm_simd()) {
|
|
617
564
|
features.push_back({ "WASM_SIMD", "1" });
|
|
618
565
|
}
|
|
619
566
|
if (wsp_ggml_cpu_has_llamafile()) {
|
|
620
567
|
features.push_back({ "LLAMAFILE", "1" });
|
|
621
568
|
}
|
|
569
|
+
#ifdef WSP_GGML_USE_ACCELERATE
|
|
570
|
+
features.push_back({ "ACCELERATE", "1" });
|
|
571
|
+
#endif
|
|
572
|
+
#ifdef WSP_GGML_USE_CPU_HBM
|
|
573
|
+
features.push_back({ "CPU_HBM", "1" });
|
|
574
|
+
#endif
|
|
575
|
+
#ifdef WSP_GGML_USE_OPENMP
|
|
576
|
+
features.push_back({ "OPENMP", "1" });
|
|
577
|
+
#endif
|
|
578
|
+
#ifdef WSP_GGML_USE_CPU_KLEIDIAI
|
|
579
|
+
features.push_back({ "KLEIDIAI", "1" });
|
|
580
|
+
#endif
|
|
581
|
+
#ifdef WSP_GGML_USE_CPU_AARCH64
|
|
582
|
+
features.push_back({ "AARCH64_REPACK", "1" });
|
|
583
|
+
#endif
|
|
622
584
|
|
|
623
585
|
features.push_back({ nullptr, nullptr });
|
|
624
586
|
|
|
@@ -632,10 +594,35 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
|
|
|
632
594
|
|
|
633
595
|
static void * wsp_ggml_backend_cpu_get_proc_address(wsp_ggml_backend_reg_t reg, const char * name) {
|
|
634
596
|
if (strcmp(name, "wsp_ggml_backend_set_n_threads") == 0) {
|
|
635
|
-
|
|
597
|
+
wsp_ggml_backend_set_n_threads_t fct = wsp_ggml_backend_cpu_set_n_threads;
|
|
598
|
+
return (void *)fct;
|
|
636
599
|
}
|
|
637
600
|
if (strcmp(name, "wsp_ggml_backend_dev_get_extra_bufts") == 0) {
|
|
638
|
-
|
|
601
|
+
wsp_ggml_backend_dev_get_extra_bufts_t fct = wsp_ggml_backend_cpu_device_get_extra_buffers_type;
|
|
602
|
+
return (void *)fct;
|
|
603
|
+
}
|
|
604
|
+
if (strcmp(name, "wsp_ggml_backend_get_features") == 0) {
|
|
605
|
+
return (void *)wsp_ggml_backend_cpu_get_features;
|
|
606
|
+
}
|
|
607
|
+
if (strcmp(name, "wsp_ggml_backend_set_abort_callback") == 0) {
|
|
608
|
+
return (void *)wsp_ggml_backend_cpu_set_abort_callback;
|
|
609
|
+
}
|
|
610
|
+
if (strcmp(name, "wsp_ggml_backend_cpu_numa_init") == 0) {
|
|
611
|
+
return (void *)wsp_ggml_numa_init;
|
|
612
|
+
}
|
|
613
|
+
if (strcmp(name, "wsp_ggml_backend_cpu_is_numa") == 0) {
|
|
614
|
+
return (void *)wsp_ggml_is_numa;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// threadpool - TODO: move to ggml-base
|
|
618
|
+
if (strcmp(name, "wsp_ggml_threadpool_new") == 0) {
|
|
619
|
+
return (void *)wsp_ggml_threadpool_new;
|
|
620
|
+
}
|
|
621
|
+
if (strcmp(name, "wsp_ggml_threadpool_free") == 0) {
|
|
622
|
+
return (void *)wsp_ggml_threadpool_free;
|
|
623
|
+
}
|
|
624
|
+
if (strcmp(name, "wsp_ggml_backend_cpu_set_threadpool") == 0) {
|
|
625
|
+
return (void *)wsp_ggml_backend_cpu_set_threadpool;
|
|
639
626
|
}
|
|
640
627
|
|
|
641
628
|
return NULL;
|
|
@@ -655,9 +642,12 @@ wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void) {
|
|
|
655
642
|
wsp_ggml_cpu_init();
|
|
656
643
|
|
|
657
644
|
static struct wsp_ggml_backend_reg wsp_ggml_backend_cpu_reg = {
|
|
658
|
-
/* .
|
|
659
|
-
/* .
|
|
645
|
+
/* .api_version = */ WSP_GGML_BACKEND_API_VERSION,
|
|
646
|
+
/* .iface = */ wsp_ggml_backend_cpu_reg_i,
|
|
647
|
+
/* .context = */ NULL,
|
|
660
648
|
};
|
|
661
649
|
|
|
662
650
|
return &wsp_ggml_backend_cpu_reg;
|
|
663
651
|
}
|
|
652
|
+
|
|
653
|
+
WSP_GGML_BACKEND_DL_IMPL(wsp_ggml_backend_cpu_reg)
|
package/cpp/ggml-cpu.h
CHANGED
|
@@ -7,31 +7,8 @@
|
|
|
7
7
|
extern "C" {
|
|
8
8
|
#endif
|
|
9
9
|
|
|
10
|
-
// Scheduling priorities
|
|
11
|
-
enum wsp_ggml_sched_priority {
|
|
12
|
-
WSP_GGML_SCHED_PRIO_NORMAL,
|
|
13
|
-
WSP_GGML_SCHED_PRIO_MEDIUM,
|
|
14
|
-
WSP_GGML_SCHED_PRIO_HIGH,
|
|
15
|
-
WSP_GGML_SCHED_PRIO_REALTIME
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
// Threadpool params
|
|
19
|
-
// Use wsp_ggml_threadpool_params_default() or wsp_ggml_threadpool_params_init() to populate the defaults
|
|
20
|
-
struct wsp_ggml_threadpool_params {
|
|
21
|
-
bool cpumask[WSP_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
22
|
-
int n_threads; // number of threads
|
|
23
|
-
enum wsp_ggml_sched_priority prio; // thread priority
|
|
24
|
-
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
25
|
-
bool strict_cpu; // strict cpu placement
|
|
26
|
-
bool paused; // start in paused state
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
struct wsp_ggml_threadpool; // forward declaration, see ggml.c
|
|
30
|
-
|
|
31
|
-
typedef struct wsp_ggml_threadpool * wsp_ggml_threadpool_t;
|
|
32
|
-
|
|
33
10
|
// the compute plan that needs to be prepared for wsp_ggml_graph_compute()
|
|
34
|
-
// since https://github.com/
|
|
11
|
+
// since https://github.com/ggml-org/ggml/issues/287
|
|
35
12
|
struct wsp_ggml_cplan {
|
|
36
13
|
size_t work_size; // size of work buffer, calculated by `wsp_ggml_graph_plan()`
|
|
37
14
|
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `wsp_ggml_graph_compute()`
|
|
@@ -75,14 +52,11 @@ extern "C" {
|
|
|
75
52
|
WSP_GGML_BACKEND_API float wsp_ggml_get_f32_nd(const struct wsp_ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
|
76
53
|
WSP_GGML_BACKEND_API void wsp_ggml_set_f32_nd(const struct wsp_ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
|
77
54
|
|
|
78
|
-
WSP_GGML_BACKEND_API struct
|
|
79
|
-
WSP_GGML_BACKEND_API void
|
|
80
|
-
WSP_GGML_BACKEND_API
|
|
81
|
-
WSP_GGML_BACKEND_API struct wsp_ggml_threadpool *
|
|
82
|
-
WSP_GGML_BACKEND_API void
|
|
83
|
-
WSP_GGML_BACKEND_API int wsp_ggml_threadpool_get_n_threads(struct wsp_ggml_threadpool * threadpool);
|
|
84
|
-
WSP_GGML_BACKEND_API void wsp_ggml_threadpool_pause (struct wsp_ggml_threadpool * threadpool);
|
|
85
|
-
WSP_GGML_BACKEND_API void wsp_ggml_threadpool_resume (struct wsp_ggml_threadpool * threadpool);
|
|
55
|
+
WSP_GGML_BACKEND_API struct wsp_ggml_threadpool * wsp_ggml_threadpool_new (struct wsp_ggml_threadpool_params * params);
|
|
56
|
+
WSP_GGML_BACKEND_API void wsp_ggml_threadpool_free (struct wsp_ggml_threadpool * threadpool);
|
|
57
|
+
WSP_GGML_BACKEND_API int wsp_ggml_threadpool_get_n_threads (struct wsp_ggml_threadpool * threadpool);
|
|
58
|
+
WSP_GGML_BACKEND_API void wsp_ggml_threadpool_pause (struct wsp_ggml_threadpool * threadpool);
|
|
59
|
+
WSP_GGML_BACKEND_API void wsp_ggml_threadpool_resume (struct wsp_ggml_threadpool * threadpool);
|
|
86
60
|
|
|
87
61
|
// wsp_ggml_graph_plan() has to be called before wsp_ggml_graph_compute()
|
|
88
62
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
@@ -104,10 +78,10 @@ extern "C" {
|
|
|
104
78
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_sse3 (void);
|
|
105
79
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_ssse3 (void);
|
|
106
80
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx (void);
|
|
81
|
+
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx_vnni (void);
|
|
107
82
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx2 (void);
|
|
108
83
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_f16c (void);
|
|
109
84
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_fma (void);
|
|
110
|
-
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx_vnni (void);
|
|
111
85
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx512 (void);
|
|
112
86
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx512_vbmi(void);
|
|
113
87
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_avx512_vnni(void);
|
|
@@ -117,35 +91,28 @@ extern "C" {
|
|
|
117
91
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_neon (void);
|
|
118
92
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_arm_fma (void);
|
|
119
93
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_fp16_va (void);
|
|
94
|
+
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_dotprod (void);
|
|
120
95
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_matmul_int8(void);
|
|
121
96
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_sve (void);
|
|
122
97
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
|
98
|
+
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_sme (void);
|
|
123
99
|
// other
|
|
124
100
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v (void);
|
|
125
101
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx (void);
|
|
102
|
+
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe (void);
|
|
126
103
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd (void);
|
|
127
104
|
WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_llamafile (void);
|
|
128
105
|
|
|
129
106
|
// Internal types and functions exposed for tests and benchmarks
|
|
130
107
|
|
|
131
|
-
typedef void (*wsp_ggml_from_float_to_mat_t)
|
|
132
|
-
(const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
|
|
133
108
|
typedef void (*wsp_ggml_vec_dot_t) (int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT x, size_t bx,
|
|
134
109
|
const void * WSP_GGML_RESTRICT y, size_t by, int nrc);
|
|
135
|
-
typedef void (*wsp_ggml_gemv_t) (int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT x,
|
|
136
|
-
const void * WSP_GGML_RESTRICT y, int nr, int nc);
|
|
137
|
-
typedef void (*wsp_ggml_gemm_t) (int n, float * WSP_GGML_RESTRICT s, size_t bs, const void * WSP_GGML_RESTRICT x,
|
|
138
|
-
const void * WSP_GGML_RESTRICT y, int nr, int nc);
|
|
139
110
|
|
|
140
111
|
struct wsp_ggml_type_traits_cpu {
|
|
141
112
|
wsp_ggml_from_float_t from_float;
|
|
142
|
-
wsp_ggml_from_float_to_mat_t from_float_to_mat;
|
|
143
113
|
wsp_ggml_vec_dot_t vec_dot;
|
|
144
114
|
enum wsp_ggml_type vec_dot_type;
|
|
145
115
|
int64_t nrows; // number of rows to process simultaneously
|
|
146
|
-
int64_t ncols; // number of columns to process simultaneously
|
|
147
|
-
wsp_ggml_gemv_t gemv;
|
|
148
|
-
wsp_ggml_gemm_t gemm;
|
|
149
116
|
};
|
|
150
117
|
|
|
151
118
|
WSP_GGML_BACKEND_API const struct wsp_ggml_type_traits_cpu * wsp_ggml_get_type_traits_cpu(enum wsp_ggml_type type);
|
|
@@ -165,13 +132,6 @@ extern "C" {
|
|
|
165
132
|
|
|
166
133
|
WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void);
|
|
167
134
|
|
|
168
|
-
#ifdef WSP_GGML_USE_CPU_HBM
|
|
169
|
-
WSP_GGML_BACKEND_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_hbm_buffer_type(void);
|
|
170
|
-
#endif
|
|
171
|
-
|
|
172
|
-
WSP_GGML_BACKEND_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_cpu_aarch64_buffer_type(void);
|
|
173
|
-
WSP_GGML_BACKEND_API bool wsp_ggml_backend_cpu_buft_is_aarch64(wsp_ggml_backend_buffer_type_t buft);
|
|
174
|
-
|
|
175
135
|
#ifdef __cplusplus
|
|
176
136
|
}
|
|
177
137
|
#endif
|
package/cpp/ggml-impl.h
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
// GGML internal header
|
|
4
4
|
|
|
5
5
|
#include "ggml.h"
|
|
6
|
+
#include "gguf.h"
|
|
7
|
+
|
|
6
8
|
#include <assert.h>
|
|
7
9
|
#include <math.h>
|
|
8
10
|
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
|
@@ -14,7 +16,7 @@
|
|
|
14
16
|
#include <arm_sve.h>
|
|
15
17
|
#endif // __ARM_FEATURE_SVE
|
|
16
18
|
|
|
17
|
-
#if defined(__ARM_NEON)
|
|
19
|
+
#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
|
|
18
20
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
|
19
21
|
//
|
|
20
22
|
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
|
@@ -30,11 +32,13 @@
|
|
|
30
32
|
extern "C" {
|
|
31
33
|
#endif
|
|
32
34
|
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
+
#ifndef MIN
|
|
36
|
+
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
37
|
+
#endif
|
|
35
38
|
|
|
36
|
-
#
|
|
37
|
-
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
39
|
+
#ifndef MAX
|
|
40
|
+
# define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
41
|
+
#endif
|
|
38
42
|
|
|
39
43
|
// required for mmap as gguf only guarantees 32-byte alignment
|
|
40
44
|
#define TENSOR_ALIGNMENT 32
|
|
@@ -72,8 +76,8 @@ static inline int wsp_ggml_up(int n, int m) {
|
|
|
72
76
|
//
|
|
73
77
|
|
|
74
78
|
WSP_GGML_ATTRIBUTE_FORMAT(2, 3)
|
|
75
|
-
void wsp_ggml_log_internal (enum wsp_ggml_log_level level, const char * format, ...);
|
|
76
|
-
void wsp_ggml_log_callback_default(enum wsp_ggml_log_level level, const char * text, void * user_data);
|
|
79
|
+
WSP_GGML_API void wsp_ggml_log_internal (enum wsp_ggml_log_level level, const char * format, ...);
|
|
80
|
+
WSP_GGML_API void wsp_ggml_log_callback_default(enum wsp_ggml_log_level level, const char * text, void * user_data);
|
|
77
81
|
|
|
78
82
|
#define WSP_GGML_LOG(...) wsp_ggml_log_internal(WSP_GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
|
79
83
|
#define WSP_GGML_LOG_INFO(...) wsp_ggml_log_internal(WSP_GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
|
@@ -295,24 +299,27 @@ struct wsp_ggml_cgraph {
|
|
|
295
299
|
enum wsp_ggml_cgraph_eval_order order;
|
|
296
300
|
};
|
|
297
301
|
|
|
302
|
+
// returns a slice of cgraph with nodes [i0, i1)
|
|
303
|
+
// the slice does not have leafs or gradients
|
|
304
|
+
// if you need the gradients, get them from the original graph
|
|
298
305
|
struct wsp_ggml_cgraph wsp_ggml_graph_view(struct wsp_ggml_cgraph * cgraph, int i0, int i1);
|
|
299
306
|
|
|
300
307
|
// Memory allocation
|
|
301
308
|
|
|
302
|
-
void * wsp_ggml_aligned_malloc(size_t size);
|
|
303
|
-
void wsp_ggml_aligned_free(void * ptr, size_t size);
|
|
309
|
+
WSP_GGML_API void * wsp_ggml_aligned_malloc(size_t size);
|
|
310
|
+
WSP_GGML_API void wsp_ggml_aligned_free(void * ptr, size_t size);
|
|
304
311
|
|
|
305
312
|
// FP16 to FP32 conversion
|
|
306
313
|
|
|
307
314
|
#if defined(__ARM_NEON)
|
|
308
|
-
#
|
|
315
|
+
#if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
|
309
316
|
typedef uint16_t wsp_ggml_fp16_internal_t;
|
|
310
317
|
#else
|
|
311
318
|
typedef __fp16 wsp_ggml_fp16_internal_t;
|
|
312
319
|
#endif
|
|
313
320
|
#endif
|
|
314
321
|
|
|
315
|
-
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
|
322
|
+
#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
|
316
323
|
#define WSP_GGML_COMPUTE_FP16_TO_FP32(x) wsp_ggml_compute_fp16_to_fp32(x)
|
|
317
324
|
#define WSP_GGML_COMPUTE_FP32_TO_FP16(x) wsp_ggml_compute_fp32_to_fp16(x)
|
|
318
325
|
|
|
@@ -549,3 +556,12 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
|
|
|
549
556
|
#ifdef __cplusplus
|
|
550
557
|
}
|
|
551
558
|
#endif
|
|
559
|
+
|
|
560
|
+
#ifdef __cplusplus
|
|
561
|
+
#include <vector>
|
|
562
|
+
|
|
563
|
+
// expose GGUF internals for test code
|
|
564
|
+
WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
|
|
565
|
+
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
|
|
566
|
+
WSP_GGML_API void wsp_gguf_write_to_buf(const struct wsp_gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
|
|
567
|
+
#endif // __cplusplus
|
package/cpp/ggml-metal-impl.h
CHANGED
|
@@ -102,6 +102,21 @@ typedef struct {
|
|
|
102
102
|
uint64_t nb3;
|
|
103
103
|
} wsp_ggml_metal_kargs_cpy;
|
|
104
104
|
|
|
105
|
+
typedef struct {
|
|
106
|
+
int64_t ne10;
|
|
107
|
+
int64_t ne11;
|
|
108
|
+
int64_t ne12;
|
|
109
|
+
uint64_t nb10;
|
|
110
|
+
uint64_t nb11;
|
|
111
|
+
uint64_t nb12;
|
|
112
|
+
uint64_t nb13;
|
|
113
|
+
uint64_t nb1;
|
|
114
|
+
uint64_t nb2;
|
|
115
|
+
uint64_t nb3;
|
|
116
|
+
uint64_t offs;
|
|
117
|
+
bool inplace;
|
|
118
|
+
} wsp_ggml_metal_kargs_set;
|
|
119
|
+
|
|
105
120
|
typedef struct {
|
|
106
121
|
int32_t ne00;
|
|
107
122
|
int32_t ne01;
|
|
@@ -192,6 +207,30 @@ typedef struct {
|
|
|
192
207
|
int16_t r3;
|
|
193
208
|
} wsp_ggml_metal_kargs_mul_mv;
|
|
194
209
|
|
|
210
|
+
typedef struct {
|
|
211
|
+
int32_t ne00;
|
|
212
|
+
int32_t ne01;
|
|
213
|
+
int32_t ne02;
|
|
214
|
+
uint64_t nb00;
|
|
215
|
+
uint64_t nb01;
|
|
216
|
+
uint64_t nb02;
|
|
217
|
+
uint64_t nb03;
|
|
218
|
+
int32_t ne10;
|
|
219
|
+
int32_t ne11;
|
|
220
|
+
int32_t ne12;
|
|
221
|
+
uint64_t nb10;
|
|
222
|
+
uint64_t nb11;
|
|
223
|
+
uint64_t nb12;
|
|
224
|
+
uint64_t nb13;
|
|
225
|
+
int32_t ne0;
|
|
226
|
+
int32_t ne1;
|
|
227
|
+
int16_t r2;
|
|
228
|
+
int16_t r3;
|
|
229
|
+
int16_t nsg;
|
|
230
|
+
int16_t nxpsg;
|
|
231
|
+
int16_t r1ptg;
|
|
232
|
+
} wsp_ggml_metal_kargs_mul_mv_ext;
|
|
233
|
+
|
|
195
234
|
typedef struct {
|
|
196
235
|
int32_t nei0;
|
|
197
236
|
int32_t nei1;
|